diff --git a/aagenerator.py b/aagenerator.py index a9704472559ec0caefb96814d9db0eaf126491b1..b870d5998f040cf372e45f9db00260bc283370d8 100644 --- a/aagenerator.py +++ b/aagenerator.py @@ -126,10 +126,14 @@ def parse_args(): ################################### newly added args ################################### parser.add_argument("--ref_path", type=str, default="/data_laion/alvin/Dataset/evaluation/debug/42361.png") parser.add_argument("--prompt", type=str, default="A person riding skis down a snow covered slope.") + # parser.add_argument("--t2mn_path", type=str, + # default="/data_laion/alvin/sd4human/ckpts/a-ranstart-body-sdv20-v-nd-flaw-avg-copy1-glc-resume288k-512-ft1024/checkpoint-388000") parser.add_argument("--t2mn_path", type=str, - default="/data_laion/alvin/sd4human/ckpts/a-ranstart-body-sdv20-v-nd-flaw-avg-copy1-glc-resume288k-512-ft1024/checkpoint-388000") + default="./ckpts/checkpoint-388000") + # parser.add_argument("--controlnet_model_name_or_path", type=str, + # default="/data_laion/alvin/sd4human/ckpts/ctrl-sdxl10-eps-glc-composer-bmn-sum-1024/checkpoint-91000") parser.add_argument("--controlnet_model_name_or_path", type=str, - default="/data_laion/alvin/sd4human/ckpts/ctrl-sdxl10-eps-glc-composer-bmn-sum-1024/checkpoint-91000") + default=".ckpts/checkpoint-91000") parser.add_argument('--step_num1', default=50, type=int) parser.add_argument('--step_num2', default=50, type=int) parser.add_argument('--size', default=2048, type=int) @@ -717,8 +721,8 @@ class Generator: ) self.body_inferencer = MMPoseInferencer( - pose2d='/fsx_laion/alvin/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge-simple_8xb64-210e_coco-256x192.py', - pose2d_weights='/fsx_laion/alvin/pretrain/ViTPose/td-hm_ViTPose-huge-simple_8xb64-210e_coco-256x192-ffd48c05_20230314.pth', + pose2d='./mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge-simple_8xb64-210e_coco-256x192.py', + pose2d_weights='./pretrain/td-hm_ViTPose-huge-simple_8xb64-210e_coco-256x192-ffd48c05_20230314.pth', scope="mmpose" # det_model='/fsx_laion/alvin/mmpose/demo/mmdetection_cfg/faster_rcnn_r50_fpn_coco.py', # det_weights="/fsx_laion/alvin/pretrain/ViTPose/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth" diff --git a/ckpts/checkpoint-388000/optimizer.bin b/ckpts/checkpoint-388000/optimizer.bin new file mode 100644 index 0000000000000000000000000000000000000000..5ed8af55f367fdf7ce99b84ccf562442505655b5 --- /dev/null +++ b/ckpts/checkpoint-388000/optimizer.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf8f25e968c1a2a363da273d9ca7ee2d2d4d3d7b84a82a80c4764152795cfa21 +size 7440199099 diff --git a/ckpts/checkpoint-388000/random_states_0.pkl b/ckpts/checkpoint-388000/random_states_0.pkl new file mode 100644 index 0000000000000000000000000000000000000000..f8f572d2aaf426947ae85c7aecc4e3b655c0e2c0 --- /dev/null +++ b/ckpts/checkpoint-388000/random_states_0.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:054b28928bcd597ce093db874082bede1c8313c722b6ce54d8360549e68737a9 +size 21795 diff --git a/ckpts/checkpoint-388000/scheduler.bin b/ckpts/checkpoint-388000/scheduler.bin new file mode 100644 index 0000000000000000000000000000000000000000..b607da0606003b7e06064712566a964c77885af6 --- /dev/null +++ b/ckpts/checkpoint-388000/scheduler.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c92acef4f084138c2708bb2f7fc53d9ad31be999ccf6cb61dc4f614bc370baf +size 563 diff --git a/ckpts/checkpoint-388000/unet/config.json b/ckpts/checkpoint-388000/unet/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c9e9ba2ffbd24c97f56d93b7c4fc92d0a1b0befc --- /dev/null +++ b/ckpts/checkpoint-388000/unet/config.json @@ -0,0 +1,76 @@ +{ + "_class_name": "UNet2DConditionModel", + "_diffusers_version": "0.19.0.dev0", + "_name_or_path": "/fsx_laion/alvin/sd4human/a-ranstart-body-sdv20-v-nd-flaw-avg-copy1-glc-resume282k-512/checkpoint-288000", + "act_fn": "silu", + "addition_embed_type": "time", + "addition_embed_type_num_heads": 64, + "addition_time_embed_dim": 256, + "attention_head_dim": [ + 5, + 10, + 20, + 20 + ], + "block_out_channels": [ + 320, + 640, + 1280, + 1280 + ], + "branch_num": 2, + "center_input_sample": false, + "class_embed_type": null, + "class_embeddings_concat": false, + "conv_in_kernel": 3, + "conv_out_kernel": 3, + "copy_first_n_block": 1, + "copy_last_n_block": 1, + "cross_attention_dim": 1024, + "cross_attention_norm": null, + "down_block_types": [ + "CrossAttnDownBlock2D", + "CrossAttnDownBlock2D", + "CrossAttnDownBlock2D", + "DownBlock2D" + ], + "downsample_padding": 1, + "dual_cross_attention": false, + "encoder_hid_dim": null, + "encoder_hid_dim_type": null, + "flip_sin_to_cos": true, + "freq_shift": 0, + "fusion": "avg", + "in_channels": 8, + "layers_per_block": 2, + "mid_block_only_cross_attention": null, + "mid_block_scale_factor": 1, + "mid_block_type": "UNetMidBlock2DCrossAttn", + "norm_eps": 1e-05, + "norm_num_groups": 32, + "num_attention_heads": null, + "num_class_embeds": null, + "off_wa": true, + "only_cross_attention": false, + "out_channels": 4, + "projection_class_embeddings_input_dim": null, + "resnet_out_scale_factor": 1.0, + "resnet_skip_time_act": false, + "resnet_time_scale_shift": "default", + "sample_size": 64, + "size_cond": true, + "time_cond_proj_dim": null, + "time_embedding_act_fn": null, + "time_embedding_dim": null, + "time_embedding_type": "positional", + "timestep_post_act": null, + "transformer_layers_per_block": 1, + "up_block_types": [ + "UpBlock2D", + "CrossAttnUpBlock2D", + "CrossAttnUpBlock2D", + "CrossAttnUpBlock2D" + ], + "upcast_attention": false, + "use_linear_projection": true +} diff --git a/ckpts/checkpoint-388000/unet/diffusion_pytorch_model.bin b/ckpts/checkpoint-388000/unet/diffusion_pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..567ea1ac7a9cf46d04bcbcef3c71b727acb7b60f --- /dev/null +++ b/ckpts/checkpoint-388000/unet/diffusion_pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2a87ea43737713b524ec399a3b92f97e267829757c2286aeb1145d00d1d4c3d +size 3720102563 diff --git a/ckpts/checkpoint-388000/unet_ema/config.json b/ckpts/checkpoint-388000/unet_ema/config.json new file mode 100644 index 0000000000000000000000000000000000000000..61b1fddc4755377ebacfee39101ac74baeb26454 --- /dev/null +++ b/ckpts/checkpoint-388000/unet_ema/config.json @@ -0,0 +1,83 @@ +{ + "_class_name": "UNet2DConditionModel", + "_diffusers_version": "0.19.0.dev0", + "_name_or_path": "/home/suraj_huggingface_co/.cache/huggingface/diffusers/models--fusing--stable-diffusion-v2/snapshots/3282d2bdc378f4afd43edbbb90803779a5249116/unet", + "act_fn": "silu", + "addition_embed_type": "time", + "addition_embed_type_num_heads": 64, + "addition_time_embed_dim": 256, + "attention_head_dim": [ + 5, + 10, + 20, + 20 + ], + "block_out_channels": [ + 320, + 640, + 1280, + 1280 + ], + "branch_num": 2, + "center_input_sample": false, + "class_embed_type": null, + "class_embeddings_concat": false, + "conv_in_kernel": 3, + "conv_out_kernel": 3, + "copy_first_n_block": 1, + "copy_last_n_block": 1, + "cross_attention_dim": 1024, + "cross_attention_norm": null, + "decay": 0.9999, + "down_block_types": [ + "CrossAttnDownBlock2D", + "CrossAttnDownBlock2D", + "CrossAttnDownBlock2D", + "DownBlock2D" + ], + "downsample_padding": 1, + "dual_cross_attention": false, + "encoder_hid_dim": null, + "encoder_hid_dim_type": null, + "flip_sin_to_cos": true, + "freq_shift": 0, + "fusion": "avg", + "in_channels": 8, + "inv_gamma": 1.0, + "layers_per_block": 2, + "mid_block_only_cross_attention": null, + "mid_block_scale_factor": 1, + "mid_block_type": "UNetMidBlock2DCrossAttn", + "min_decay": 0.0, + "norm_eps": 1e-05, + "norm_num_groups": 32, + "num_attention_heads": null, + "num_class_embeds": null, + "off_wa": true, + "only_cross_attention": false, + "optimization_step": 100000, + "out_channels": 4, + "power": 0.6666666666666666, + "projection_class_embeddings_input_dim": null, + "resnet_out_scale_factor": 1.0, + "resnet_skip_time_act": false, + "resnet_time_scale_shift": "default", + "sample_size": 64, + "size_cond": true, + "time_cond_proj_dim": null, + "time_embedding_act_fn": null, + "time_embedding_dim": null, + "time_embedding_type": "positional", + "timestep_post_act": null, + "transformer_layers_per_block": 1, + "up_block_types": [ + "UpBlock2D", + "CrossAttnUpBlock2D", + "CrossAttnUpBlock2D", + "CrossAttnUpBlock2D" + ], + "upcast_attention": false, + "update_after_step": 0, + "use_ema_warmup": false, + "use_linear_projection": true +} diff --git a/ckpts/checkpoint-388000/unet_ema/diffusion_pytorch_model.bin b/ckpts/checkpoint-388000/unet_ema/diffusion_pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..f8f92dff32a412680e43102be510a76f529d3be3 --- /dev/null +++ b/ckpts/checkpoint-388000/unet_ema/diffusion_pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb2d5bc8f8bd2cf1487f9cdbb955ba78e3fbb91570ea137710be142d2e11f55c +size 3720087523 diff --git a/ckpts/checkpoint-91000/controlnet/config.json b/ckpts/checkpoint-91000/controlnet/config.json new file mode 100644 index 0000000000000000000000000000000000000000..23b82c48615d2ec27fcdbf831e88293273a7fa5e --- /dev/null +++ b/ckpts/checkpoint-91000/controlnet/config.json @@ -0,0 +1,60 @@ +{ + "_class_name": "ControlNetModel", + "_diffusers_version": "0.19.0.dev0", + "_name_or_path": "ctrl-sdxl10-eps-glc-composer-bmn-sum-1024/checkpoint-9000", + "act_fn": "silu", + "addition_embed_type": "text_time", + "addition_embed_type_num_heads": 64, + "addition_time_embed_dim": 256, + "attention_head_dim": [ + 5, + 10, + 20 + ], + "block_out_channels": [ + 320, + 640, + 1280 + ], + "class_embed_type": null, + "cond_num": 3, + "conditioning_channels": 3, + "conditioning_embedding_out_channels": [ + 16, + 32, + 96, + 256 + ], + "controlnet_conditioning_channel_order": "rgb", + "cross_attention_dim": 2048, + "down_block_types": [ + "DownBlock2D", + "CrossAttnDownBlock2D", + "CrossAttnDownBlock2D" + ], + "downsample_padding": 1, + "encoder_hid_dim": null, + "encoder_hid_dim_type": null, + "flip_sin_to_cos": true, + "freq_shift": 0, + "fusion": "sum", + "global_pool_conditions": false, + "in_channels": 4, + "layers_per_block": 2, + "mid_block_scale_factor": 1, + "norm_eps": 1e-05, + "norm_num_groups": 32, + "normalize_to_0_1": true, + "num_attention_heads": null, + "num_class_embeds": null, + "only_cross_attention": false, + "projection_class_embeddings_input_dim": 2816, + "resnet_time_scale_shift": "default", + "transformer_layers_per_block": [ + 1, + 2, + 10 + ], + "upcast_attention": null, + "use_linear_projection": true +} diff --git a/ckpts/checkpoint-91000/controlnet/diffusion_pytorch_model.bin b/ckpts/checkpoint-91000/controlnet/diffusion_pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..4d6ad8cac3e55fec349fb9703b087fdc0fd7137f --- /dev/null +++ b/ckpts/checkpoint-91000/controlnet/diffusion_pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f823a8a0f5bf7568d10450ea2b8c3423758f024ec6fed7a267f36a58eaef5015 +size 5013143113 diff --git a/ckpts/checkpoint-91000/optimizer.bin b/ckpts/checkpoint-91000/optimizer.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3b93d998e26b956abbe79f0a7734a666033ee23 --- /dev/null +++ b/ckpts/checkpoint-91000/optimizer.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b18e068e83f582f440b7391fd5dab22acb9e1fa400f6014130d42edab42d4888 +size 10026266209 diff --git a/ckpts/checkpoint-91000/random_states_0.pkl b/ckpts/checkpoint-91000/random_states_0.pkl new file mode 100644 index 0000000000000000000000000000000000000000..8dc9b08ed3d813934eb9551b28bfe2afb4948e7c --- /dev/null +++ b/ckpts/checkpoint-91000/random_states_0.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b5e8ac6a3e6c740570fd852f0fbcd8783d0f8a9dc14fdf3a0b2d81c5432fc14 +size 21795 diff --git a/ckpts/checkpoint-91000/scheduler.bin b/ckpts/checkpoint-91000/scheduler.bin new file mode 100644 index 0000000000000000000000000000000000000000..00b1b2881b9090b277a66d93add63c9d06ba584e --- /dev/null +++ b/ckpts/checkpoint-91000/scheduler.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d37caf0a25b77ffa731e73953cef4e9361e94796681e3c7506f2a0ec3b44538 +size 563 diff --git a/mmpose/configs/_base_/datasets/300w.py b/mmpose/configs/_base_/datasets/300w.py new file mode 100644 index 0000000000000000000000000000000000000000..2c3728da1d1555c3526ccbfca182385961e8b667 --- /dev/null +++ b/mmpose/configs/_base_/datasets/300w.py @@ -0,0 +1,134 @@ +dataset_info = dict( + dataset_name='300w', + paper_info=dict( + author='Sagonas, Christos and Antonakos, Epameinondas ' + 'and Tzimiropoulos, Georgios and Zafeiriou, Stefanos ' + 'and Pantic, Maja', + title='300 faces in-the-wild challenge: ' + 'Database and results', + container='Image and vision computing', + year='2016', + homepage='https://ibug.doc.ic.ac.uk/resources/300-W/', + ), + keypoint_info={ + 0: dict(name='kpt-0', id=0, color=[255, 0, 0], type='', swap='kpt-16'), + 1: dict(name='kpt-1', id=1, color=[255, 0, 0], type='', swap='kpt-15'), + 2: dict(name='kpt-2', id=2, color=[255, 0, 0], type='', swap='kpt-14'), + 3: dict(name='kpt-3', id=3, color=[255, 0, 0], type='', swap='kpt-13'), + 4: dict(name='kpt-4', id=4, color=[255, 0, 0], type='', swap='kpt-12'), + 5: dict(name='kpt-5', id=5, color=[255, 0, 0], type='', swap='kpt-11'), + 6: dict(name='kpt-6', id=6, color=[255, 0, 0], type='', swap='kpt-10'), + 7: dict(name='kpt-7', id=7, color=[255, 0, 0], type='', swap='kpt-9'), + 8: dict(name='kpt-8', id=8, color=[255, 0, 0], type='', swap=''), + 9: dict(name='kpt-9', id=9, color=[255, 0, 0], type='', swap='kpt-7'), + 10: + dict(name='kpt-10', id=10, color=[255, 0, 0], type='', swap='kpt-6'), + 11: + dict(name='kpt-11', id=11, color=[255, 0, 0], type='', swap='kpt-5'), + 12: + dict(name='kpt-12', id=12, color=[255, 0, 0], type='', swap='kpt-4'), + 13: + dict(name='kpt-13', id=13, color=[255, 0, 0], type='', swap='kpt-3'), + 14: + dict(name='kpt-14', id=14, color=[255, 0, 0], type='', swap='kpt-2'), + 15: + dict(name='kpt-15', id=15, color=[255, 0, 0], type='', swap='kpt-1'), + 16: + dict(name='kpt-16', id=16, color=[255, 0, 0], type='', swap='kpt-0'), + 17: + dict(name='kpt-17', id=17, color=[255, 0, 0], type='', swap='kpt-26'), + 18: + dict(name='kpt-18', id=18, color=[255, 0, 0], type='', swap='kpt-25'), + 19: + dict(name='kpt-19', id=19, color=[255, 0, 0], type='', swap='kpt-24'), + 20: + dict(name='kpt-20', id=20, color=[255, 0, 0], type='', swap='kpt-23'), + 21: + dict(name='kpt-21', id=21, color=[255, 0, 0], type='', swap='kpt-22'), + 22: + dict(name='kpt-22', id=22, color=[255, 0, 0], type='', swap='kpt-21'), + 23: + dict(name='kpt-23', id=23, color=[255, 0, 0], type='', swap='kpt-20'), + 24: + dict(name='kpt-24', id=24, color=[255, 0, 0], type='', swap='kpt-19'), + 25: + dict(name='kpt-25', id=25, color=[255, 0, 0], type='', swap='kpt-18'), + 26: + dict(name='kpt-26', id=26, color=[255, 0, 0], type='', swap='kpt-17'), + 27: dict(name='kpt-27', id=27, color=[255, 0, 0], type='', swap=''), + 28: dict(name='kpt-28', id=28, color=[255, 0, 0], type='', swap=''), + 29: dict(name='kpt-29', id=29, color=[255, 0, 0], type='', swap=''), + 30: dict(name='kpt-30', id=30, color=[255, 0, 0], type='', swap=''), + 31: + dict(name='kpt-31', id=31, color=[255, 0, 0], type='', swap='kpt-35'), + 32: + dict(name='kpt-32', id=32, color=[255, 0, 0], type='', swap='kpt-34'), + 33: dict(name='kpt-33', id=33, color=[255, 0, 0], type='', swap=''), + 34: + dict(name='kpt-34', id=34, color=[255, 0, 0], type='', swap='kpt-32'), + 35: + dict(name='kpt-35', id=35, color=[255, 0, 0], type='', swap='kpt-31'), + 36: + dict(name='kpt-36', id=36, color=[255, 0, 0], type='', swap='kpt-45'), + 37: + dict(name='kpt-37', id=37, color=[255, 0, 0], type='', swap='kpt-44'), + 38: + dict(name='kpt-38', id=38, color=[255, 0, 0], type='', swap='kpt-43'), + 39: + dict(name='kpt-39', id=39, color=[255, 0, 0], type='', swap='kpt-42'), + 40: + dict(name='kpt-40', id=40, color=[255, 0, 0], type='', swap='kpt-47'), + 41: dict( + name='kpt-41', id=41, color=[255, 0, 0], type='', swap='kpt-46'), + 42: dict( + name='kpt-42', id=42, color=[255, 0, 0], type='', swap='kpt-39'), + 43: dict( + name='kpt-43', id=43, color=[255, 0, 0], type='', swap='kpt-38'), + 44: dict( + name='kpt-44', id=44, color=[255, 0, 0], type='', swap='kpt-37'), + 45: dict( + name='kpt-45', id=45, color=[255, 0, 0], type='', swap='kpt-36'), + 46: dict( + name='kpt-46', id=46, color=[255, 0, 0], type='', swap='kpt-41'), + 47: dict( + name='kpt-47', id=47, color=[255, 0, 0], type='', swap='kpt-40'), + 48: dict( + name='kpt-48', id=48, color=[255, 0, 0], type='', swap='kpt-54'), + 49: dict( + name='kpt-49', id=49, color=[255, 0, 0], type='', swap='kpt-53'), + 50: dict( + name='kpt-50', id=50, color=[255, 0, 0], type='', swap='kpt-52'), + 51: dict(name='kpt-51', id=51, color=[255, 0, 0], type='', swap=''), + 52: dict( + name='kpt-52', id=52, color=[255, 0, 0], type='', swap='kpt-50'), + 53: dict( + name='kpt-53', id=53, color=[255, 0, 0], type='', swap='kpt-49'), + 54: dict( + name='kpt-54', id=54, color=[255, 0, 0], type='', swap='kpt-48'), + 55: dict( + name='kpt-55', id=55, color=[255, 0, 0], type='', swap='kpt-59'), + 56: dict( + name='kpt-56', id=56, color=[255, 0, 0], type='', swap='kpt-58'), + 57: dict(name='kpt-57', id=57, color=[255, 0, 0], type='', swap=''), + 58: dict( + name='kpt-58', id=58, color=[255, 0, 0], type='', swap='kpt-56'), + 59: dict( + name='kpt-59', id=59, color=[255, 0, 0], type='', swap='kpt-55'), + 60: dict( + name='kpt-60', id=60, color=[255, 0, 0], type='', swap='kpt-64'), + 61: dict( + name='kpt-61', id=61, color=[255, 0, 0], type='', swap='kpt-63'), + 62: dict(name='kpt-62', id=62, color=[255, 0, 0], type='', swap=''), + 63: dict( + name='kpt-63', id=63, color=[255, 0, 0], type='', swap='kpt-61'), + 64: dict( + name='kpt-64', id=64, color=[255, 0, 0], type='', swap='kpt-60'), + 65: dict( + name='kpt-65', id=65, color=[255, 0, 0], type='', swap='kpt-67'), + 66: dict(name='kpt-66', id=66, color=[255, 0, 0], type='', swap=''), + 67: dict( + name='kpt-67', id=67, color=[255, 0, 0], type='', swap='kpt-65'), + }, + skeleton_info={}, + joint_weights=[1.] * 68, + sigmas=[]) diff --git a/mmpose/configs/_base_/datasets/aflw.py b/mmpose/configs/_base_/datasets/aflw.py new file mode 100644 index 0000000000000000000000000000000000000000..cf5e10964da700415f3613ca43a0755f5015d8f0 --- /dev/null +++ b/mmpose/configs/_base_/datasets/aflw.py @@ -0,0 +1,44 @@ +dataset_info = dict( + dataset_name='aflw', + paper_info=dict( + author='Koestinger, Martin and Wohlhart, Paul and ' + 'Roth, Peter M and Bischof, Horst', + title='Annotated facial landmarks in the wild: ' + 'A large-scale, real-world database for facial ' + 'landmark localization', + container='2011 IEEE international conference on computer ' + 'vision workshops (ICCV workshops)', + year='2011', + homepage='https://www.tugraz.at/institute/icg/research/' + 'team-bischof/lrs/downloads/aflw/', + ), + keypoint_info={ + 0: dict(name='kpt-0', id=0, color=[255, 0, 0], type='', swap='kpt-5'), + 1: dict(name='kpt-1', id=1, color=[255, 0, 0], type='', swap='kpt-4'), + 2: dict(name='kpt-2', id=2, color=[255, 0, 0], type='', swap='kpt-3'), + 3: dict(name='kpt-3', id=3, color=[255, 0, 0], type='', swap='kpt-2'), + 4: dict(name='kpt-4', id=4, color=[255, 0, 0], type='', swap='kpt-1'), + 5: dict(name='kpt-5', id=5, color=[255, 0, 0], type='', swap='kpt-0'), + 6: dict(name='kpt-6', id=6, color=[255, 0, 0], type='', swap='kpt-11'), + 7: dict(name='kpt-7', id=7, color=[255, 0, 0], type='', swap='kpt-10'), + 8: dict(name='kpt-8', id=8, color=[255, 0, 0], type='', swap='kpt-9'), + 9: dict(name='kpt-9', id=9, color=[255, 0, 0], type='', swap='kpt-8'), + 10: + dict(name='kpt-10', id=10, color=[255, 0, 0], type='', swap='kpt-7'), + 11: + dict(name='kpt-11', id=11, color=[255, 0, 0], type='', swap='kpt-6'), + 12: + dict(name='kpt-12', id=12, color=[255, 0, 0], type='', swap='kpt-14'), + 13: dict(name='kpt-13', id=13, color=[255, 0, 0], type='', swap=''), + 14: + dict(name='kpt-14', id=14, color=[255, 0, 0], type='', swap='kpt-12'), + 15: + dict(name='kpt-15', id=15, color=[255, 0, 0], type='', swap='kpt-17'), + 16: dict(name='kpt-16', id=16, color=[255, 0, 0], type='', swap=''), + 17: + dict(name='kpt-17', id=17, color=[255, 0, 0], type='', swap='kpt-15'), + 18: dict(name='kpt-18', id=18, color=[255, 0, 0], type='', swap='') + }, + skeleton_info={}, + joint_weights=[1.] * 19, + sigmas=[]) diff --git a/mmpose/configs/_base_/datasets/aic.py b/mmpose/configs/_base_/datasets/aic.py new file mode 100644 index 0000000000000000000000000000000000000000..9ecdbe3f0afeb19dbb7aed42653ce5efd85cfda3 --- /dev/null +++ b/mmpose/configs/_base_/datasets/aic.py @@ -0,0 +1,140 @@ +dataset_info = dict( + dataset_name='aic', + paper_info=dict( + author='Wu, Jiahong and Zheng, He and Zhao, Bo and ' + 'Li, Yixin and Yan, Baoming and Liang, Rui and ' + 'Wang, Wenjia and Zhou, Shipei and Lin, Guosen and ' + 'Fu, Yanwei and others', + title='Ai challenger: A large-scale dataset for going ' + 'deeper in image understanding', + container='arXiv', + year='2017', + homepage='https://github.com/AIChallenger/AI_Challenger_2017', + ), + keypoint_info={ + 0: + dict( + name='right_shoulder', + id=0, + color=[255, 128, 0], + type='upper', + swap='left_shoulder'), + 1: + dict( + name='right_elbow', + id=1, + color=[255, 128, 0], + type='upper', + swap='left_elbow'), + 2: + dict( + name='right_wrist', + id=2, + color=[255, 128, 0], + type='upper', + swap='left_wrist'), + 3: + dict( + name='left_shoulder', + id=3, + color=[0, 255, 0], + type='upper', + swap='right_shoulder'), + 4: + dict( + name='left_elbow', + id=4, + color=[0, 255, 0], + type='upper', + swap='right_elbow'), + 5: + dict( + name='left_wrist', + id=5, + color=[0, 255, 0], + type='upper', + swap='right_wrist'), + 6: + dict( + name='right_hip', + id=6, + color=[255, 128, 0], + type='lower', + swap='left_hip'), + 7: + dict( + name='right_knee', + id=7, + color=[255, 128, 0], + type='lower', + swap='left_knee'), + 8: + dict( + name='right_ankle', + id=8, + color=[255, 128, 0], + type='lower', + swap='left_ankle'), + 9: + dict( + name='left_hip', + id=9, + color=[0, 255, 0], + type='lower', + swap='right_hip'), + 10: + dict( + name='left_knee', + id=10, + color=[0, 255, 0], + type='lower', + swap='right_knee'), + 11: + dict( + name='left_ankle', + id=11, + color=[0, 255, 0], + type='lower', + swap='right_ankle'), + 12: + dict( + name='head_top', + id=12, + color=[51, 153, 255], + type='upper', + swap=''), + 13: + dict(name='neck', id=13, color=[51, 153, 255], type='upper', swap='') + }, + skeleton_info={ + 0: + dict(link=('right_wrist', 'right_elbow'), id=0, color=[255, 128, 0]), + 1: dict( + link=('right_elbow', 'right_shoulder'), id=1, color=[255, 128, 0]), + 2: dict(link=('right_shoulder', 'neck'), id=2, color=[51, 153, 255]), + 3: dict(link=('neck', 'left_shoulder'), id=3, color=[51, 153, 255]), + 4: dict(link=('left_shoulder', 'left_elbow'), id=4, color=[0, 255, 0]), + 5: dict(link=('left_elbow', 'left_wrist'), id=5, color=[0, 255, 0]), + 6: dict(link=('right_ankle', 'right_knee'), id=6, color=[255, 128, 0]), + 7: dict(link=('right_knee', 'right_hip'), id=7, color=[255, 128, 0]), + 8: dict(link=('right_hip', 'left_hip'), id=8, color=[51, 153, 255]), + 9: dict(link=('left_hip', 'left_knee'), id=9, color=[0, 255, 0]), + 10: dict(link=('left_knee', 'left_ankle'), id=10, color=[0, 255, 0]), + 11: dict(link=('head_top', 'neck'), id=11, color=[51, 153, 255]), + 12: dict( + link=('right_shoulder', 'right_hip'), id=12, color=[51, 153, 255]), + 13: + dict(link=('left_shoulder', 'left_hip'), id=13, color=[51, 153, 255]) + }, + joint_weights=[ + 1., 1.2, 1.5, 1., 1.2, 1.5, 1., 1.2, 1.5, 1., 1.2, 1.5, 1., 1. + ], + + # 'https://github.com/AIChallenger/AI_Challenger_2017/blob/master/' + # 'Evaluation/keypoint_eval/keypoint_eval.py#L50' + # delta = 2 x sigma + sigmas=[ + 0.01388152, 0.01515228, 0.01057665, 0.01417709, 0.01497891, 0.01402144, + 0.03909642, 0.03686941, 0.01981803, 0.03843971, 0.03412318, 0.02415081, + 0.01291456, 0.01236173 + ]) diff --git a/mmpose/configs/_base_/datasets/animalpose.py b/mmpose/configs/_base_/datasets/animalpose.py new file mode 100644 index 0000000000000000000000000000000000000000..d5bb62d951b71da25e679bd755fe566216dc3f6f --- /dev/null +++ b/mmpose/configs/_base_/datasets/animalpose.py @@ -0,0 +1,166 @@ +dataset_info = dict( + dataset_name='animalpose', + paper_info=dict( + author='Cao, Jinkun and Tang, Hongyang and Fang, Hao-Shu and ' + 'Shen, Xiaoyong and Lu, Cewu and Tai, Yu-Wing', + title='Cross-Domain Adaptation for Animal Pose Estimation', + container='The IEEE International Conference on ' + 'Computer Vision (ICCV)', + year='2019', + homepage='https://sites.google.com/view/animal-pose/', + ), + keypoint_info={ + 0: + dict( + name='L_Eye', id=0, color=[0, 255, 0], type='upper', swap='R_Eye'), + 1: + dict( + name='R_Eye', + id=1, + color=[255, 128, 0], + type='upper', + swap='L_Eye'), + 2: + dict( + name='L_EarBase', + id=2, + color=[0, 255, 0], + type='upper', + swap='R_EarBase'), + 3: + dict( + name='R_EarBase', + id=3, + color=[255, 128, 0], + type='upper', + swap='L_EarBase'), + 4: + dict(name='Nose', id=4, color=[51, 153, 255], type='upper', swap=''), + 5: + dict(name='Throat', id=5, color=[51, 153, 255], type='upper', swap=''), + 6: + dict( + name='TailBase', id=6, color=[51, 153, 255], type='lower', + swap=''), + 7: + dict( + name='Withers', id=7, color=[51, 153, 255], type='upper', swap=''), + 8: + dict( + name='L_F_Elbow', + id=8, + color=[0, 255, 0], + type='upper', + swap='R_F_Elbow'), + 9: + dict( + name='R_F_Elbow', + id=9, + color=[255, 128, 0], + type='upper', + swap='L_F_Elbow'), + 10: + dict( + name='L_B_Elbow', + id=10, + color=[0, 255, 0], + type='lower', + swap='R_B_Elbow'), + 11: + dict( + name='R_B_Elbow', + id=11, + color=[255, 128, 0], + type='lower', + swap='L_B_Elbow'), + 12: + dict( + name='L_F_Knee', + id=12, + color=[0, 255, 0], + type='upper', + swap='R_F_Knee'), + 13: + dict( + name='R_F_Knee', + id=13, + color=[255, 128, 0], + type='upper', + swap='L_F_Knee'), + 14: + dict( + name='L_B_Knee', + id=14, + color=[0, 255, 0], + type='lower', + swap='R_B_Knee'), + 15: + dict( + name='R_B_Knee', + id=15, + color=[255, 128, 0], + type='lower', + swap='L_B_Knee'), + 16: + dict( + name='L_F_Paw', + id=16, + color=[0, 255, 0], + type='upper', + swap='R_F_Paw'), + 17: + dict( + name='R_F_Paw', + id=17, + color=[255, 128, 0], + type='upper', + swap='L_F_Paw'), + 18: + dict( + name='L_B_Paw', + id=18, + color=[0, 255, 0], + type='lower', + swap='R_B_Paw'), + 19: + dict( + name='R_B_Paw', + id=19, + color=[255, 128, 0], + type='lower', + swap='L_B_Paw') + }, + skeleton_info={ + 0: dict(link=('L_Eye', 'R_Eye'), id=0, color=[51, 153, 255]), + 1: dict(link=('L_Eye', 'L_EarBase'), id=1, color=[0, 255, 0]), + 2: dict(link=('R_Eye', 'R_EarBase'), id=2, color=[255, 128, 0]), + 3: dict(link=('L_Eye', 'Nose'), id=3, color=[0, 255, 0]), + 4: dict(link=('R_Eye', 'Nose'), id=4, color=[255, 128, 0]), + 5: dict(link=('Nose', 'Throat'), id=5, color=[51, 153, 255]), + 6: dict(link=('Throat', 'Withers'), id=6, color=[51, 153, 255]), + 7: dict(link=('TailBase', 'Withers'), id=7, color=[51, 153, 255]), + 8: dict(link=('Throat', 'L_F_Elbow'), id=8, color=[0, 255, 0]), + 9: dict(link=('L_F_Elbow', 'L_F_Knee'), id=9, color=[0, 255, 0]), + 10: dict(link=('L_F_Knee', 'L_F_Paw'), id=10, color=[0, 255, 0]), + 11: dict(link=('Throat', 'R_F_Elbow'), id=11, color=[255, 128, 0]), + 12: dict(link=('R_F_Elbow', 'R_F_Knee'), id=12, color=[255, 128, 0]), + 13: dict(link=('R_F_Knee', 'R_F_Paw'), id=13, color=[255, 128, 0]), + 14: dict(link=('TailBase', 'L_B_Elbow'), id=14, color=[0, 255, 0]), + 15: dict(link=('L_B_Elbow', 'L_B_Knee'), id=15, color=[0, 255, 0]), + 16: dict(link=('L_B_Knee', 'L_B_Paw'), id=16, color=[0, 255, 0]), + 17: dict(link=('TailBase', 'R_B_Elbow'), id=17, color=[255, 128, 0]), + 18: dict(link=('R_B_Elbow', 'R_B_Knee'), id=18, color=[255, 128, 0]), + 19: dict(link=('R_B_Knee', 'R_B_Paw'), id=19, color=[255, 128, 0]) + }, + joint_weights=[ + 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.2, 1.2, + 1.5, 1.5, 1.5, 1.5 + ], + + # Note: The original paper did not provide enough information about + # the sigmas. We modified from 'https://github.com/cocodataset/' + # 'cocoapi/blob/master/PythonAPI/pycocotools/cocoeval.py#L523' + sigmas=[ + 0.025, 0.025, 0.026, 0.035, 0.035, 0.10, 0.10, 0.10, 0.107, 0.107, + 0.107, 0.107, 0.087, 0.087, 0.087, 0.087, 0.089, 0.089, 0.089, 0.089 + ]) diff --git a/mmpose/configs/_base_/datasets/ap10k.py b/mmpose/configs/_base_/datasets/ap10k.py new file mode 100644 index 0000000000000000000000000000000000000000..c0df579acbb8cf0de1ef62412ba865ee8710f0aa --- /dev/null +++ b/mmpose/configs/_base_/datasets/ap10k.py @@ -0,0 +1,142 @@ +dataset_info = dict( + dataset_name='ap10k', + paper_info=dict( + author='Yu, Hang and Xu, Yufei and Zhang, Jing and ' + 'Zhao, Wei and Guan, Ziyu and Tao, Dacheng', + title='AP-10K: A Benchmark for Animal Pose Estimation in the Wild', + container='35th Conference on Neural Information Processing Systems ' + '(NeurIPS 2021) Track on Datasets and Bench-marks.', + year='2021', + homepage='https://github.com/AlexTheBad/AP-10K', + ), + keypoint_info={ + 0: + dict( + name='L_Eye', id=0, color=[0, 255, 0], type='upper', swap='R_Eye'), + 1: + dict( + name='R_Eye', + id=1, + color=[255, 128, 0], + type='upper', + swap='L_Eye'), + 2: + dict(name='Nose', id=2, color=[51, 153, 255], type='upper', swap=''), + 3: + dict(name='Neck', id=3, color=[51, 153, 255], type='upper', swap=''), + 4: + dict( + name='Root of tail', + id=4, + color=[51, 153, 255], + type='lower', + swap=''), + 5: + dict( + name='L_Shoulder', + id=5, + color=[51, 153, 255], + type='upper', + swap='R_Shoulder'), + 6: + dict( + name='L_Elbow', + id=6, + color=[51, 153, 255], + type='upper', + swap='R_Elbow'), + 7: + dict( + name='L_F_Paw', + id=7, + color=[0, 255, 0], + type='upper', + swap='R_F_Paw'), + 8: + dict( + name='R_Shoulder', + id=8, + color=[0, 255, 0], + type='upper', + swap='L_Shoulder'), + 9: + dict( + name='R_Elbow', + id=9, + color=[255, 128, 0], + type='upper', + swap='L_Elbow'), + 10: + dict( + name='R_F_Paw', + id=10, + color=[0, 255, 0], + type='lower', + swap='L_F_Paw'), + 11: + dict( + name='L_Hip', + id=11, + color=[255, 128, 0], + type='lower', + swap='R_Hip'), + 12: + dict( + name='L_Knee', + id=12, + color=[255, 128, 0], + type='lower', + swap='R_Knee'), + 13: + dict( + name='L_B_Paw', + id=13, + color=[0, 255, 0], + type='lower', + swap='R_B_Paw'), + 14: + dict( + name='R_Hip', id=14, color=[0, 255, 0], type='lower', + swap='L_Hip'), + 15: + dict( + name='R_Knee', + id=15, + color=[0, 255, 0], + type='lower', + swap='L_Knee'), + 16: + dict( + name='R_B_Paw', + id=16, + color=[0, 255, 0], + type='lower', + swap='L_B_Paw'), + }, + skeleton_info={ + 0: dict(link=('L_Eye', 'R_Eye'), id=0, color=[0, 0, 255]), + 1: dict(link=('L_Eye', 'Nose'), id=1, color=[0, 0, 255]), + 2: dict(link=('R_Eye', 'Nose'), id=2, color=[0, 0, 255]), + 3: dict(link=('Nose', 'Neck'), id=3, color=[0, 255, 0]), + 4: dict(link=('Neck', 'Root of tail'), id=4, color=[0, 255, 0]), + 5: dict(link=('Neck', 'L_Shoulder'), id=5, color=[0, 255, 255]), + 6: dict(link=('L_Shoulder', 'L_Elbow'), id=6, color=[0, 255, 255]), + 7: dict(link=('L_Elbow', 'L_F_Paw'), id=6, color=[0, 255, 255]), + 8: dict(link=('Neck', 'R_Shoulder'), id=7, color=[6, 156, 250]), + 9: dict(link=('R_Shoulder', 'R_Elbow'), id=8, color=[6, 156, 250]), + 10: dict(link=('R_Elbow', 'R_F_Paw'), id=9, color=[6, 156, 250]), + 11: dict(link=('Root of tail', 'L_Hip'), id=10, color=[0, 255, 255]), + 12: dict(link=('L_Hip', 'L_Knee'), id=11, color=[0, 255, 255]), + 13: dict(link=('L_Knee', 'L_B_Paw'), id=12, color=[0, 255, 255]), + 14: dict(link=('Root of tail', 'R_Hip'), id=13, color=[6, 156, 250]), + 15: dict(link=('R_Hip', 'R_Knee'), id=14, color=[6, 156, 250]), + 16: dict(link=('R_Knee', 'R_B_Paw'), id=15, color=[6, 156, 250]), + }, + joint_weights=[ + 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5, + 1.5 + ], + sigmas=[ + 0.025, 0.025, 0.026, 0.035, 0.035, 0.079, 0.072, 0.062, 0.079, 0.072, + 0.062, 0.107, 0.087, 0.089, 0.107, 0.087, 0.089 + ]) diff --git a/mmpose/configs/_base_/datasets/atrw.py b/mmpose/configs/_base_/datasets/atrw.py new file mode 100644 index 0000000000000000000000000000000000000000..7ec71c8c508a0340139371a651ca2dd56eeae3cf --- /dev/null +++ b/mmpose/configs/_base_/datasets/atrw.py @@ -0,0 +1,144 @@ +dataset_info = dict( + dataset_name='atrw', + paper_info=dict( + author='Li, Shuyuan and Li, Jianguo and Tang, Hanlin ' + 'and Qian, Rui and Lin, Weiyao', + title='ATRW: A Benchmark for Amur Tiger ' + 'Re-identification in the Wild', + container='Proceedings of the 28th ACM ' + 'International Conference on Multimedia', + year='2020', + homepage='https://cvwc2019.github.io/challenge.html', + ), + keypoint_info={ + 0: + dict( + name='left_ear', + id=0, + color=[51, 153, 255], + type='upper', + swap='right_ear'), + 1: + dict( + name='right_ear', + id=1, + color=[51, 153, 255], + type='upper', + swap='left_ear'), + 2: + dict(name='nose', id=2, color=[51, 153, 255], type='upper', swap=''), + 3: + dict( + name='right_shoulder', + id=3, + color=[255, 128, 0], + type='upper', + swap='left_shoulder'), + 4: + dict( + name='right_front_paw', + id=4, + color=[255, 128, 0], + type='upper', + swap='left_front_paw'), + 5: + dict( + name='left_shoulder', + id=5, + color=[0, 255, 0], + type='upper', + swap='right_shoulder'), + 6: + dict( + name='left_front_paw', + id=6, + color=[0, 255, 0], + type='upper', + swap='right_front_paw'), + 7: + dict( + name='right_hip', + id=7, + color=[255, 128, 0], + type='lower', + swap='left_hip'), + 8: + dict( + name='right_knee', + id=8, + color=[255, 128, 0], + type='lower', + swap='left_knee'), + 9: + dict( + name='right_back_paw', + id=9, + color=[255, 128, 0], + type='lower', + swap='left_back_paw'), + 10: + dict( + name='left_hip', + id=10, + color=[0, 255, 0], + type='lower', + swap='right_hip'), + 11: + dict( + name='left_knee', + id=11, + color=[0, 255, 0], + type='lower', + swap='right_knee'), + 12: + dict( + name='left_back_paw', + id=12, + color=[0, 255, 0], + type='lower', + swap='right_back_paw'), + 13: + dict(name='tail', id=13, color=[51, 153, 255], type='lower', swap=''), + 14: + dict( + name='center', id=14, color=[51, 153, 255], type='lower', swap=''), + }, + skeleton_info={ + 0: + dict(link=('left_ear', 'nose'), id=0, color=[51, 153, 255]), + 1: + dict(link=('right_ear', 'nose'), id=1, color=[51, 153, 255]), + 2: + dict(link=('nose', 'center'), id=2, color=[51, 153, 255]), + 3: + dict( + link=('left_shoulder', 'left_front_paw'), id=3, color=[0, 255, 0]), + 4: + dict(link=('left_shoulder', 'center'), id=4, color=[0, 255, 0]), + 5: + dict( + link=('right_shoulder', 'right_front_paw'), + id=5, + color=[255, 128, 0]), + 6: + dict(link=('right_shoulder', 'center'), id=6, color=[255, 128, 0]), + 7: + dict(link=('tail', 'center'), id=7, color=[51, 153, 255]), + 8: + dict(link=('right_back_paw', 'right_knee'), id=8, color=[255, 128, 0]), + 9: + dict(link=('right_knee', 'right_hip'), id=9, color=[255, 128, 0]), + 10: + dict(link=('right_hip', 'tail'), id=10, color=[255, 128, 0]), + 11: + dict(link=('left_back_paw', 'left_knee'), id=11, color=[0, 255, 0]), + 12: + dict(link=('left_knee', 'left_hip'), id=12, color=[0, 255, 0]), + 13: + dict(link=('left_hip', 'tail'), id=13, color=[0, 255, 0]), + }, + joint_weights=[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], + sigmas=[ + 0.0277, 0.0823, 0.0831, 0.0202, 0.0716, 0.0263, 0.0646, 0.0302, 0.0440, + 0.0316, 0.0333, 0.0547, 0.0263, 0.0683, 0.0539 + ]) diff --git a/mmpose/configs/_base_/datasets/campus.py b/mmpose/configs/_base_/datasets/campus.py new file mode 100644 index 0000000000000000000000000000000000000000..334316e9c25282508767158d3fae30578ab3949d --- /dev/null +++ b/mmpose/configs/_base_/datasets/campus.py @@ -0,0 +1,151 @@ +dataset_info = dict( + dataset_name='campus', + paper_info=dict( + author='Belagiannis, Vasileios and Amin, Sikandar and Andriluka, ' + 'Mykhaylo and Schiele, Bernt and Navab, Nassir and Ilic, Slobodan', + title='3D Pictorial Structures for Multiple Human Pose Estimation', + container='IEEE Computer Society Conference on Computer Vision and ' + 'Pattern Recognition (CVPR)', + year='2014', + homepage='http://campar.in.tum.de/Chair/MultiHumanPose', + ), + keypoint_info={ + 0: + dict( + name='right_ankle', + id=0, + color=[255, 128, 0], + type='lower', + swap='left_ankle'), + 1: + dict( + name='right_knee', + id=1, + color=[255, 128, 0], + type='lower', + swap='left_knee'), + 2: + dict( + name='right_hip', + id=2, + color=[255, 128, 0], + type='lower', + swap='left_hip'), + 3: + dict( + name='left_hip', + id=3, + color=[0, 255, 0], + type='lower', + swap='right_hip'), + 4: + dict( + name='left_knee', + id=4, + color=[0, 255, 0], + type='lower', + swap='right_knee'), + 5: + dict( + name='left_ankle', + id=5, + color=[0, 255, 0], + type='lower', + swap='right_ankle'), + 6: + dict( + name='right_wrist', + id=6, + color=[255, 128, 0], + type='upper', + swap='left_wrist'), + 7: + dict( + name='right_elbow', + id=7, + color=[255, 128, 0], + type='upper', + swap='left_elbow'), + 8: + dict( + name='right_shoulder', + id=8, + color=[255, 128, 0], + type='upper', + swap='left_shoulder'), + 9: + dict( + name='left_shoulder', + id=9, + color=[0, 255, 0], + type='upper', + swap='right_shoulder'), + 10: + dict( + name='left_elbow', + id=10, + color=[0, 255, 0], + type='upper', + swap='right_elbow'), + 11: + dict( + name='left_wrist', + id=11, + color=[0, 255, 0], + type='upper', + swap='right_wrist'), + 12: + dict( + name='bottom_head', + id=12, + color=[51, 153, 255], + type='upper', + swap=''), + 13: + dict( + name='top_head', + id=13, + color=[51, 153, 255], + type='upper', + swap=''), + }, + skeleton_info={ + 0: + dict(link=('right_ankle', 'right_knee'), id=0, color=[255, 128, 0]), + 1: + dict(link=('right_knee', 'right_hip'), id=1, color=[255, 128, 0]), + 2: + dict(link=('left_hip', 'left_knee'), id=2, color=[0, 255, 0]), + 3: + dict(link=('left_knee', 'left_ankle'), id=3, color=[0, 255, 0]), + 4: + dict(link=('right_hip', 'left_hip'), id=4, color=[51, 153, 255]), + 5: + dict(link=('right_wrist', 'right_elbow'), id=5, color=[255, 128, 0]), + 6: + dict( + link=('right_elbow', 'right_shoulder'), id=6, color=[255, 128, 0]), + 7: + dict(link=('left_shoulder', 'left_elbow'), id=7, color=[0, 255, 0]), + 8: + dict(link=('left_elbow', 'left_wrist'), id=8, color=[0, 255, 0]), + 9: + dict(link=('right_hip', 'right_shoulder'), id=9, color=[255, 128, 0]), + 10: + dict(link=('left_hip', 'left_shoulder'), id=10, color=[0, 255, 0]), + 11: + dict( + link=('right_shoulder', 'bottom_head'), id=11, color=[255, 128, + 0]), + 12: + dict(link=('left_shoulder', 'bottom_head'), id=12, color=[0, 255, 0]), + 13: + dict(link=('bottom_head', 'top_head'), id=13, color=[51, 153, 255]), + }, + joint_weights=[ + 1.5, 1.2, 1.0, 1.0, 1.2, 1.5, 1.5, 1.2, 1.0, 1.0, 1.2, 1.5, 1.0, 1.0 + ], + sigmas=[ + 0.089, 0.087, 0.107, 0.107, 0.087, 0.089, 0.062, 0.072, 0.079, 0.079, + 0.072, 0.062, 0.026, 0.026 + ]) diff --git a/mmpose/configs/_base_/datasets/coco.py b/mmpose/configs/_base_/datasets/coco.py new file mode 100644 index 0000000000000000000000000000000000000000..865a95bc02fedd318f32d2e7aa8397147d78fdb5 --- /dev/null +++ b/mmpose/configs/_base_/datasets/coco.py @@ -0,0 +1,181 @@ +dataset_info = dict( + dataset_name='coco', + paper_info=dict( + author='Lin, Tsung-Yi and Maire, Michael and ' + 'Belongie, Serge and Hays, James and ' + 'Perona, Pietro and Ramanan, Deva and ' + r'Doll{\'a}r, Piotr and Zitnick, C Lawrence', + title='Microsoft coco: Common objects in context', + container='European conference on computer vision', + year='2014', + homepage='http://cocodataset.org/', + ), + keypoint_info={ + 0: + dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''), + 1: + dict( + name='left_eye', + id=1, + color=[51, 153, 255], + type='upper', + swap='right_eye'), + 2: + dict( + name='right_eye', + id=2, + color=[51, 153, 255], + type='upper', + swap='left_eye'), + 3: + dict( + name='left_ear', + id=3, + color=[51, 153, 255], + type='upper', + swap='right_ear'), + 4: + dict( + name='right_ear', + id=4, + color=[51, 153, 255], + type='upper', + swap='left_ear'), + 5: + dict( + name='left_shoulder', + id=5, + color=[0, 255, 0], + type='upper', + swap='right_shoulder'), + 6: + dict( + name='right_shoulder', + id=6, + color=[255, 128, 0], + type='upper', + swap='left_shoulder'), + 7: + dict( + name='left_elbow', + id=7, + color=[0, 255, 0], + type='upper', + swap='right_elbow'), + 8: + dict( + name='right_elbow', + id=8, + color=[255, 128, 0], + type='upper', + swap='left_elbow'), + 9: + dict( + name='left_wrist', + id=9, + color=[0, 255, 0], + type='upper', + swap='right_wrist'), + 10: + dict( + name='right_wrist', + id=10, + color=[255, 128, 0], + type='upper', + swap='left_wrist'), + 11: + dict( + name='left_hip', + id=11, + color=[0, 255, 0], + type='lower', + swap='right_hip'), + 12: + dict( + name='right_hip', + id=12, + color=[255, 128, 0], + type='lower', + swap='left_hip'), + 13: + dict( + name='left_knee', + id=13, + color=[0, 255, 0], + type='lower', + swap='right_knee'), + 14: + dict( + name='right_knee', + id=14, + color=[255, 128, 0], + type='lower', + swap='left_knee'), + 15: + dict( + name='left_ankle', + id=15, + color=[0, 255, 0], + type='lower', + swap='right_ankle'), + 16: + dict( + name='right_ankle', + id=16, + color=[255, 128, 0], + type='lower', + swap='left_ankle') + }, + skeleton_info={ + 0: + dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]), + 1: + dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]), + 2: + dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]), + 3: + dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]), + 4: + dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]), + 5: + dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]), + 6: + dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]), + 7: + dict( + link=('left_shoulder', 'right_shoulder'), + id=7, + color=[51, 153, 255]), + 8: + dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]), + 9: + dict( + link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]), + 10: + dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]), + 11: + dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]), + 12: + dict(link=('left_eye', 'right_eye'), id=12, color=[51, 153, 255]), + 13: + dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]), + 14: + dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]), + 15: + dict(link=('left_eye', 'left_ear'), id=15, color=[51, 153, 255]), + 16: + dict(link=('right_eye', 'right_ear'), id=16, color=[51, 153, 255]), + 17: + dict(link=('left_ear', 'left_shoulder'), id=17, color=[51, 153, 255]), + 18: + dict( + link=('right_ear', 'right_shoulder'), id=18, color=[51, 153, 255]) + }, + joint_weights=[ + 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5, + 1.5 + ], + sigmas=[ + 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062, + 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089 + ]) diff --git a/mmpose/configs/_base_/datasets/coco_aic.py b/mmpose/configs/_base_/datasets/coco_aic.py new file mode 100644 index 0000000000000000000000000000000000000000..a084247468dac1b766cbcf756b750aa3d3680b9d --- /dev/null +++ b/mmpose/configs/_base_/datasets/coco_aic.py @@ -0,0 +1,205 @@ +dataset_info = dict( + dataset_name='coco', + paper_info=[ + dict( + author='Lin, Tsung-Yi and Maire, Michael and ' + 'Belongie, Serge and Hays, James and ' + 'Perona, Pietro and Ramanan, Deva and ' + r'Doll{\'a}r, Piotr and Zitnick, C Lawrence', + title='Microsoft coco: Common objects in context', + container='European conference on computer vision', + year='2014', + homepage='http://cocodataset.org/', + ), + dict( + author='Wu, Jiahong and Zheng, He and Zhao, Bo and ' + 'Li, Yixin and Yan, Baoming and Liang, Rui and ' + 'Wang, Wenjia and Zhou, Shipei and Lin, Guosen and ' + 'Fu, Yanwei and others', + title='Ai challenger: A large-scale dataset for going ' + 'deeper in image understanding', + container='arXiv', + year='2017', + homepage='https://github.com/AIChallenger/AI_Challenger_2017', + ), + ], + keypoint_info={ + 0: + dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''), + 1: + dict( + name='left_eye', + id=1, + color=[51, 153, 255], + type='upper', + swap='right_eye'), + 2: + dict( + name='right_eye', + id=2, + color=[51, 153, 255], + type='upper', + swap='left_eye'), + 3: + dict( + name='left_ear', + id=3, + color=[51, 153, 255], + type='upper', + swap='right_ear'), + 4: + dict( + name='right_ear', + id=4, + color=[51, 153, 255], + type='upper', + swap='left_ear'), + 5: + dict( + name='left_shoulder', + id=5, + color=[0, 255, 0], + type='upper', + swap='right_shoulder'), + 6: + dict( + name='right_shoulder', + id=6, + color=[255, 128, 0], + type='upper', + swap='left_shoulder'), + 7: + dict( + name='left_elbow', + id=7, + color=[0, 255, 0], + type='upper', + swap='right_elbow'), + 8: + dict( + name='right_elbow', + id=8, + color=[255, 128, 0], + type='upper', + swap='left_elbow'), + 9: + dict( + name='left_wrist', + id=9, + color=[0, 255, 0], + type='upper', + swap='right_wrist'), + 10: + dict( + name='right_wrist', + id=10, + color=[255, 128, 0], + type='upper', + swap='left_wrist'), + 11: + dict( + name='left_hip', + id=11, + color=[0, 255, 0], + type='lower', + swap='right_hip'), + 12: + dict( + name='right_hip', + id=12, + color=[255, 128, 0], + type='lower', + swap='left_hip'), + 13: + dict( + name='left_knee', + id=13, + color=[0, 255, 0], + type='lower', + swap='right_knee'), + 14: + dict( + name='right_knee', + id=14, + color=[255, 128, 0], + type='lower', + swap='left_knee'), + 15: + dict( + name='left_ankle', + id=15, + color=[0, 255, 0], + type='lower', + swap='right_ankle'), + 16: + dict( + name='right_ankle', + id=16, + color=[255, 128, 0], + type='lower', + swap='left_ankle'), + 17: + dict( + name='head_top', + id=17, + color=[51, 153, 255], + type='upper', + swap=''), + 18: + dict(name='neck', id=18, color=[51, 153, 255], type='upper', swap='') + }, + skeleton_info={ + 0: + dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]), + 1: + dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]), + 2: + dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]), + 3: + dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]), + 4: + dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]), + 5: + dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]), + 6: + dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]), + 7: + dict( + link=('left_shoulder', 'right_shoulder'), + id=7, + color=[51, 153, 255]), + 8: + dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]), + 9: + dict( + link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]), + 10: + dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]), + 11: + dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]), + 12: + dict(link=('left_eye', 'right_eye'), id=12, color=[51, 153, 255]), + 13: + dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]), + 14: + dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]), + 15: + dict(link=('left_eye', 'left_ear'), id=15, color=[51, 153, 255]), + 16: + dict(link=('right_eye', 'right_ear'), id=16, color=[51, 153, 255]), + 17: + dict(link=('left_ear', 'left_shoulder'), id=17, color=[51, 153, 255]), + 18: + dict( + link=('right_ear', 'right_shoulder'), id=18, color=[51, 153, 255]), + 19: + dict(link=('head_top', 'neck'), id=11, color=[51, 153, 255]), + }, + joint_weights=[ + 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5, + 1.5, 1.5 + ], + sigmas=[ + 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062, + 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089, 0.026, 0.026 + ]) diff --git a/mmpose/configs/_base_/datasets/coco_openpose.py b/mmpose/configs/_base_/datasets/coco_openpose.py new file mode 100644 index 0000000000000000000000000000000000000000..9aedd9f0e42e41d92893b139eeab59c41f38d814 --- /dev/null +++ b/mmpose/configs/_base_/datasets/coco_openpose.py @@ -0,0 +1,157 @@ +dataset_info = dict( + dataset_name='coco_openpose', + paper_info=dict( + author='Zhe, Cao and Tomas, Simon and ' + 'Shih-En, Wei and Yaser, Sheikh', + title='OpenPose: Realtime Multi-Person 2D Pose ' + 'Estimation using Part Affinity Fields', + container='IEEE Transactions on Pattern Analysis ' + 'and Machine Intelligence', + year='2019', + homepage='https://github.com/CMU-Perceptual-Computing-Lab/openpose/', + ), + keypoint_info={ + 0: + dict(name='nose', id=0, color=[255, 0, 85], type='upper', swap=''), + 1: + dict(name='neck', id=1, color=[255, 0, 0], type='upper', swap=''), + 2: + dict( + name='right_shoulder', + id=2, + color=[255, 85, 0], + type='upper', + swap='left_shoulder'), + 3: + dict( + name='right_elbow', + id=3, + color=[255, 170, 0], + type='upper', + swap='left_elbow'), + 4: + dict( + name='right_wrist', + id=4, + color=[255, 255, 0], + type='upper', + swap='left_wrist'), + 5: + dict( + name='left_shoulder', + id=5, + color=[170, 255, 0], + type='upper', + swap='right_shoulder'), + 6: + dict( + name='left_elbow', + id=6, + color=[85, 255, 0], + type='upper', + swap='right_elbow'), + 7: + dict( + name='left_wrist', + id=7, + color=[0, 255, 0], + type='upper', + swap='right_wrist'), + 8: + dict( + name='right_hip', + id=8, + color=[255, 0, 170], + type='lower', + swap='left_hip'), + 9: + dict( + name='right_knee', + id=9, + color=[255, 0, 255], + type='lower', + swap='left_knee'), + 10: + dict( + name='right_ankle', + id=10, + color=[170, 0, 255], + type='lower', + swap='left_ankle'), + 11: + dict( + name='left_hip', + id=11, + color=[85, 255, 0], + type='lower', + swap='right_hip'), + 12: + dict( + name='left_knee', + id=12, + color=[0, 0, 255], + type='lower', + swap='right_knee'), + 13: + dict( + name='left_ankle', + id=13, + color=[0, 85, 255], + type='lower', + swap='right_ankle'), + 14: + dict( + name='right_eye', + id=14, + color=[0, 255, 170], + type='upper', + swap='left_eye'), + 15: + dict( + name='left_eye', + id=15, + color=[0, 255, 255], + type='upper', + swap='right_eye'), + 16: + dict( + name='right_ear', + id=16, + color=[0, 170, 255], + type='upper', + swap='left_ear'), + 17: + dict( + name='left_ear', + id=17, + color=[0, 170, 255], + type='upper', + swap='right_ear'), + }, + skeleton_info={ + 0: dict(link=('neck', 'right_shoulder'), id=0, color=[255, 0, 85]), + 1: dict(link=('neck', 'left_shoulder'), id=1, color=[255, 0, 0]), + 2: + dict(link=('right_shoulder', 'right_elbow'), id=2, color=[255, 85, 0]), + 3: + dict(link=('right_elbow', 'right_wrist'), id=3, color=[255, 170, 0]), + 4: + dict(link=('left_shoulder', 'left_elbow'), id=4, color=[255, 255, 0]), + 5: dict(link=('left_elbow', 'left_wrist'), id=5, color=[170, 255, 0]), + 6: dict(link=('neck', 'right_hip'), id=6, color=[85, 255, 0]), + 7: dict(link=('right_hip', 'right_knee'), id=7, color=[0, 255, 0]), + 8: dict(link=('right_knee', 'right_ankle'), id=8, color=[0, 255, 85]), + 9: dict(link=('neck', 'left_hip'), id=9, color=[0, 255, 170]), + 10: dict(link=('left_hip', 'left_knee'), id=10, color=[0, 255, 225]), + 11: dict(link=('left_knee', 'left_ankle'), id=11, color=[0, 170, 255]), + 12: dict(link=('neck', 'nose'), id=12, color=[0, 85, 255]), + 13: dict(link=('nose', 'right_eye'), id=13, color=[0, 0, 255]), + 14: dict(link=('right_eye', 'right_ear'), id=14, color=[255, 0, 170]), + 15: dict(link=('nose', 'left_eye'), id=15, color=[170, 0, 255]), + 16: dict(link=('left_eye', 'left_ear'), id=16, color=[255, 0, 255]), + }, + joint_weights=[1.] * 18, + sigmas=[ + 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062, + 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089, 0.082 + ]) diff --git a/mmpose/configs/_base_/datasets/coco_wholebody.py b/mmpose/configs/_base_/datasets/coco_wholebody.py new file mode 100644 index 0000000000000000000000000000000000000000..ef9b707017a24a1a133bb28566d212c618fee694 --- /dev/null +++ b/mmpose/configs/_base_/datasets/coco_wholebody.py @@ -0,0 +1,1154 @@ +dataset_info = dict( + dataset_name='coco_wholebody', + paper_info=dict( + author='Jin, Sheng and Xu, Lumin and Xu, Jin and ' + 'Wang, Can and Liu, Wentao and ' + 'Qian, Chen and Ouyang, Wanli and Luo, Ping', + title='Whole-Body Human Pose Estimation in the Wild', + container='Proceedings of the European ' + 'Conference on Computer Vision (ECCV)', + year='2020', + homepage='https://github.com/jin-s13/COCO-WholeBody/', + ), + keypoint_info={ + 0: + dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''), + 1: + dict( + name='left_eye', + id=1, + color=[51, 153, 255], + type='upper', + swap='right_eye'), + 2: + dict( + name='right_eye', + id=2, + color=[51, 153, 255], + type='upper', + swap='left_eye'), + 3: + dict( + name='left_ear', + id=3, + color=[51, 153, 255], + type='upper', + swap='right_ear'), + 4: + dict( + name='right_ear', + id=4, + color=[51, 153, 255], + type='upper', + swap='left_ear'), + 5: + dict( + name='left_shoulder', + id=5, + color=[0, 255, 0], + type='upper', + swap='right_shoulder'), + 6: + dict( + name='right_shoulder', + id=6, + color=[255, 128, 0], + type='upper', + swap='left_shoulder'), + 7: + dict( + name='left_elbow', + id=7, + color=[0, 255, 0], + type='upper', + swap='right_elbow'), + 8: + dict( + name='right_elbow', + id=8, + color=[255, 128, 0], + type='upper', + swap='left_elbow'), + 9: + dict( + name='left_wrist', + id=9, + color=[0, 255, 0], + type='upper', + swap='right_wrist'), + 10: + dict( + name='right_wrist', + id=10, + color=[255, 128, 0], + type='upper', + swap='left_wrist'), + 11: + dict( + name='left_hip', + id=11, + color=[0, 255, 0], + type='lower', + swap='right_hip'), + 12: + dict( + name='right_hip', + id=12, + color=[255, 128, 0], + type='lower', + swap='left_hip'), + 13: + dict( + name='left_knee', + id=13, + color=[0, 255, 0], + type='lower', + swap='right_knee'), + 14: + dict( + name='right_knee', + id=14, + color=[255, 128, 0], + type='lower', + swap='left_knee'), + 15: + dict( + name='left_ankle', + id=15, + color=[0, 255, 0], + type='lower', + swap='right_ankle'), + 16: + dict( + name='right_ankle', + id=16, + color=[255, 128, 0], + type='lower', + swap='left_ankle'), + 17: + dict( + name='left_big_toe', + id=17, + color=[255, 128, 0], + type='lower', + swap='right_big_toe'), + 18: + dict( + name='left_small_toe', + id=18, + color=[255, 128, 0], + type='lower', + swap='right_small_toe'), + 19: + dict( + name='left_heel', + id=19, + color=[255, 128, 0], + type='lower', + swap='right_heel'), + 20: + dict( + name='right_big_toe', + id=20, + color=[255, 128, 0], + type='lower', + swap='left_big_toe'), + 21: + dict( + name='right_small_toe', + id=21, + color=[255, 128, 0], + type='lower', + swap='left_small_toe'), + 22: + dict( + name='right_heel', + id=22, + color=[255, 128, 0], + type='lower', + swap='left_heel'), + 23: + dict( + name='face-0', + id=23, + color=[255, 255, 255], + type='', + swap='face-16'), + 24: + dict( + name='face-1', + id=24, + color=[255, 255, 255], + type='', + swap='face-15'), + 25: + dict( + name='face-2', + id=25, + color=[255, 255, 255], + type='', + swap='face-14'), + 26: + dict( + name='face-3', + id=26, + color=[255, 255, 255], + type='', + swap='face-13'), + 27: + dict( + name='face-4', + id=27, + color=[255, 255, 255], + type='', + swap='face-12'), + 28: + dict( + name='face-5', + id=28, + color=[255, 255, 255], + type='', + swap='face-11'), + 29: + dict( + name='face-6', + id=29, + color=[255, 255, 255], + type='', + swap='face-10'), + 30: + dict( + name='face-7', + id=30, + color=[255, 255, 255], + type='', + swap='face-9'), + 31: + dict(name='face-8', id=31, color=[255, 255, 255], type='', swap=''), + 32: + dict( + name='face-9', + id=32, + color=[255, 255, 255], + type='', + swap='face-7'), + 33: + dict( + name='face-10', + id=33, + color=[255, 255, 255], + type='', + swap='face-6'), + 34: + dict( + name='face-11', + id=34, + color=[255, 255, 255], + type='', + swap='face-5'), + 35: + dict( + name='face-12', + id=35, + color=[255, 255, 255], + type='', + swap='face-4'), + 36: + dict( + name='face-13', + id=36, + color=[255, 255, 255], + type='', + swap='face-3'), + 37: + dict( + name='face-14', + id=37, + color=[255, 255, 255], + type='', + swap='face-2'), + 38: + dict( + name='face-15', + id=38, + color=[255, 255, 255], + type='', + swap='face-1'), + 39: + dict( + name='face-16', + id=39, + color=[255, 255, 255], + type='', + swap='face-0'), + 40: + dict( + name='face-17', + id=40, + color=[255, 255, 255], + type='', + swap='face-26'), + 41: + dict( + name='face-18', + id=41, + color=[255, 255, 255], + type='', + swap='face-25'), + 42: + dict( + name='face-19', + id=42, + color=[255, 255, 255], + type='', + swap='face-24'), + 43: + dict( + name='face-20', + id=43, + color=[255, 255, 255], + type='', + swap='face-23'), + 44: + dict( + name='face-21', + id=44, + color=[255, 255, 255], + type='', + swap='face-22'), + 45: + dict( + name='face-22', + id=45, + color=[255, 255, 255], + type='', + swap='face-21'), + 46: + dict( + name='face-23', + id=46, + color=[255, 255, 255], + type='', + swap='face-20'), + 47: + dict( + name='face-24', + id=47, + color=[255, 255, 255], + type='', + swap='face-19'), + 48: + dict( + name='face-25', + id=48, + color=[255, 255, 255], + type='', + swap='face-18'), + 49: + dict( + name='face-26', + id=49, + color=[255, 255, 255], + type='', + swap='face-17'), + 50: + dict(name='face-27', id=50, color=[255, 255, 255], type='', swap=''), + 51: + dict(name='face-28', id=51, color=[255, 255, 255], type='', swap=''), + 52: + dict(name='face-29', id=52, color=[255, 255, 255], type='', swap=''), + 53: + dict(name='face-30', id=53, color=[255, 255, 255], type='', swap=''), + 54: + dict( + name='face-31', + id=54, + color=[255, 255, 255], + type='', + swap='face-35'), + 55: + dict( + name='face-32', + id=55, + color=[255, 255, 255], + type='', + swap='face-34'), + 56: + dict(name='face-33', id=56, color=[255, 255, 255], type='', swap=''), + 57: + dict( + name='face-34', + id=57, + color=[255, 255, 255], + type='', + swap='face-32'), + 58: + dict( + name='face-35', + id=58, + color=[255, 255, 255], + type='', + swap='face-31'), + 59: + dict( + name='face-36', + id=59, + color=[255, 255, 255], + type='', + swap='face-45'), + 60: + dict( + name='face-37', + id=60, + color=[255, 255, 255], + type='', + swap='face-44'), + 61: + dict( + name='face-38', + id=61, + color=[255, 255, 255], + type='', + swap='face-43'), + 62: + dict( + name='face-39', + id=62, + color=[255, 255, 255], + type='', + swap='face-42'), + 63: + dict( + name='face-40', + id=63, + color=[255, 255, 255], + type='', + swap='face-47'), + 64: + dict( + name='face-41', + id=64, + color=[255, 255, 255], + type='', + swap='face-46'), + 65: + dict( + name='face-42', + id=65, + color=[255, 255, 255], + type='', + swap='face-39'), + 66: + dict( + name='face-43', + id=66, + color=[255, 255, 255], + type='', + swap='face-38'), + 67: + dict( + name='face-44', + id=67, + color=[255, 255, 255], + type='', + swap='face-37'), + 68: + dict( + name='face-45', + id=68, + color=[255, 255, 255], + type='', + swap='face-36'), + 69: + dict( + name='face-46', + id=69, + color=[255, 255, 255], + type='', + swap='face-41'), + 70: + dict( + name='face-47', + id=70, + color=[255, 255, 255], + type='', + swap='face-40'), + 71: + dict( + name='face-48', + id=71, + color=[255, 255, 255], + type='', + swap='face-54'), + 72: + dict( + name='face-49', + id=72, + color=[255, 255, 255], + type='', + swap='face-53'), + 73: + dict( + name='face-50', + id=73, + color=[255, 255, 255], + type='', + swap='face-52'), + 74: + dict(name='face-51', id=74, color=[255, 255, 255], type='', swap=''), + 75: + dict( + name='face-52', + id=75, + color=[255, 255, 255], + type='', + swap='face-50'), + 76: + dict( + name='face-53', + id=76, + color=[255, 255, 255], + type='', + swap='face-49'), + 77: + dict( + name='face-54', + id=77, + color=[255, 255, 255], + type='', + swap='face-48'), + 78: + dict( + name='face-55', + id=78, + color=[255, 255, 255], + type='', + swap='face-59'), + 79: + dict( + name='face-56', + id=79, + color=[255, 255, 255], + type='', + swap='face-58'), + 80: + dict(name='face-57', id=80, color=[255, 255, 255], type='', swap=''), + 81: + dict( + name='face-58', + id=81, + color=[255, 255, 255], + type='', + swap='face-56'), + 82: + dict( + name='face-59', + id=82, + color=[255, 255, 255], + type='', + swap='face-55'), + 83: + dict( + name='face-60', + id=83, + color=[255, 255, 255], + type='', + swap='face-64'), + 84: + dict( + name='face-61', + id=84, + color=[255, 255, 255], + type='', + swap='face-63'), + 85: + dict(name='face-62', id=85, color=[255, 255, 255], type='', swap=''), + 86: + dict( + name='face-63', + id=86, + color=[255, 255, 255], + type='', + swap='face-61'), + 87: + dict( + name='face-64', + id=87, + color=[255, 255, 255], + type='', + swap='face-60'), + 88: + dict( + name='face-65', + id=88, + color=[255, 255, 255], + type='', + swap='face-67'), + 89: + dict(name='face-66', id=89, color=[255, 255, 255], type='', swap=''), + 90: + dict( + name='face-67', + id=90, + color=[255, 255, 255], + type='', + swap='face-65'), + 91: + dict( + name='left_hand_root', + id=91, + color=[255, 255, 255], + type='', + swap='right_hand_root'), + 92: + dict( + name='left_thumb1', + id=92, + color=[255, 128, 0], + type='', + swap='right_thumb1'), + 93: + dict( + name='left_thumb2', + id=93, + color=[255, 128, 0], + type='', + swap='right_thumb2'), + 94: + dict( + name='left_thumb3', + id=94, + color=[255, 128, 0], + type='', + swap='right_thumb3'), + 95: + dict( + name='left_thumb4', + id=95, + color=[255, 128, 0], + type='', + swap='right_thumb4'), + 96: + dict( + name='left_forefinger1', + id=96, + color=[255, 153, 255], + type='', + swap='right_forefinger1'), + 97: + dict( + name='left_forefinger2', + id=97, + color=[255, 153, 255], + type='', + swap='right_forefinger2'), + 98: + dict( + name='left_forefinger3', + id=98, + color=[255, 153, 255], + type='', + swap='right_forefinger3'), + 99: + dict( + name='left_forefinger4', + id=99, + color=[255, 153, 255], + type='', + swap='right_forefinger4'), + 100: + dict( + name='left_middle_finger1', + id=100, + color=[102, 178, 255], + type='', + swap='right_middle_finger1'), + 101: + dict( + name='left_middle_finger2', + id=101, + color=[102, 178, 255], + type='', + swap='right_middle_finger2'), + 102: + dict( + name='left_middle_finger3', + id=102, + color=[102, 178, 255], + type='', + swap='right_middle_finger3'), + 103: + dict( + name='left_middle_finger4', + id=103, + color=[102, 178, 255], + type='', + swap='right_middle_finger4'), + 104: + dict( + name='left_ring_finger1', + id=104, + color=[255, 51, 51], + type='', + swap='right_ring_finger1'), + 105: + dict( + name='left_ring_finger2', + id=105, + color=[255, 51, 51], + type='', + swap='right_ring_finger2'), + 106: + dict( + name='left_ring_finger3', + id=106, + color=[255, 51, 51], + type='', + swap='right_ring_finger3'), + 107: + dict( + name='left_ring_finger4', + id=107, + color=[255, 51, 51], + type='', + swap='right_ring_finger4'), + 108: + dict( + name='left_pinky_finger1', + id=108, + color=[0, 255, 0], + type='', + swap='right_pinky_finger1'), + 109: + dict( + name='left_pinky_finger2', + id=109, + color=[0, 255, 0], + type='', + swap='right_pinky_finger2'), + 110: + dict( + name='left_pinky_finger3', + id=110, + color=[0, 255, 0], + type='', + swap='right_pinky_finger3'), + 111: + dict( + name='left_pinky_finger4', + id=111, + color=[0, 255, 0], + type='', + swap='right_pinky_finger4'), + 112: + dict( + name='right_hand_root', + id=112, + color=[255, 255, 255], + type='', + swap='left_hand_root'), + 113: + dict( + name='right_thumb1', + id=113, + color=[255, 128, 0], + type='', + swap='left_thumb1'), + 114: + dict( + name='right_thumb2', + id=114, + color=[255, 128, 0], + type='', + swap='left_thumb2'), + 115: + dict( + name='right_thumb3', + id=115, + color=[255, 128, 0], + type='', + swap='left_thumb3'), + 116: + dict( + name='right_thumb4', + id=116, + color=[255, 128, 0], + type='', + swap='left_thumb4'), + 117: + dict( + name='right_forefinger1', + id=117, + color=[255, 153, 255], + type='', + swap='left_forefinger1'), + 118: + dict( + name='right_forefinger2', + id=118, + color=[255, 153, 255], + type='', + swap='left_forefinger2'), + 119: + dict( + name='right_forefinger3', + id=119, + color=[255, 153, 255], + type='', + swap='left_forefinger3'), + 120: + dict( + name='right_forefinger4', + id=120, + color=[255, 153, 255], + type='', + swap='left_forefinger4'), + 121: + dict( + name='right_middle_finger1', + id=121, + color=[102, 178, 255], + type='', + swap='left_middle_finger1'), + 122: + dict( + name='right_middle_finger2', + id=122, + color=[102, 178, 255], + type='', + swap='left_middle_finger2'), + 123: + dict( + name='right_middle_finger3', + id=123, + color=[102, 178, 255], + type='', + swap='left_middle_finger3'), + 124: + dict( + name='right_middle_finger4', + id=124, + color=[102, 178, 255], + type='', + swap='left_middle_finger4'), + 125: + dict( + name='right_ring_finger1', + id=125, + color=[255, 51, 51], + type='', + swap='left_ring_finger1'), + 126: + dict( + name='right_ring_finger2', + id=126, + color=[255, 51, 51], + type='', + swap='left_ring_finger2'), + 127: + dict( + name='right_ring_finger3', + id=127, + color=[255, 51, 51], + type='', + swap='left_ring_finger3'), + 128: + dict( + name='right_ring_finger4', + id=128, + color=[255, 51, 51], + type='', + swap='left_ring_finger4'), + 129: + dict( + name='right_pinky_finger1', + id=129, + color=[0, 255, 0], + type='', + swap='left_pinky_finger1'), + 130: + dict( + name='right_pinky_finger2', + id=130, + color=[0, 255, 0], + type='', + swap='left_pinky_finger2'), + 131: + dict( + name='right_pinky_finger3', + id=131, + color=[0, 255, 0], + type='', + swap='left_pinky_finger3'), + 132: + dict( + name='right_pinky_finger4', + id=132, + color=[0, 255, 0], + type='', + swap='left_pinky_finger4') + }, + skeleton_info={ + 0: + dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]), + 1: + dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]), + 2: + dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]), + 3: + dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]), + 4: + dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]), + 5: + dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]), + 6: + dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]), + 7: + dict( + link=('left_shoulder', 'right_shoulder'), + id=7, + color=[51, 153, 255]), + 8: + dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]), + 9: + dict( + link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]), + 10: + dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]), + 11: + dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]), + 12: + dict(link=('left_eye', 'right_eye'), id=12, color=[51, 153, 255]), + 13: + dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]), + 14: + dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]), + 15: + dict(link=('left_eye', 'left_ear'), id=15, color=[51, 153, 255]), + 16: + dict(link=('right_eye', 'right_ear'), id=16, color=[51, 153, 255]), + 17: + dict(link=('left_ear', 'left_shoulder'), id=17, color=[51, 153, 255]), + 18: + dict( + link=('right_ear', 'right_shoulder'), id=18, color=[51, 153, 255]), + 19: + dict(link=('left_ankle', 'left_big_toe'), id=19, color=[0, 255, 0]), + 20: + dict(link=('left_ankle', 'left_small_toe'), id=20, color=[0, 255, 0]), + 21: + dict(link=('left_ankle', 'left_heel'), id=21, color=[0, 255, 0]), + 22: + dict( + link=('right_ankle', 'right_big_toe'), id=22, color=[255, 128, 0]), + 23: + dict( + link=('right_ankle', 'right_small_toe'), + id=23, + color=[255, 128, 0]), + 24: + dict(link=('right_ankle', 'right_heel'), id=24, color=[255, 128, 0]), + 25: + dict( + link=('left_hand_root', 'left_thumb1'), id=25, color=[255, 128, + 0]), + 26: + dict(link=('left_thumb1', 'left_thumb2'), id=26, color=[255, 128, 0]), + 27: + dict(link=('left_thumb2', 'left_thumb3'), id=27, color=[255, 128, 0]), + 28: + dict(link=('left_thumb3', 'left_thumb4'), id=28, color=[255, 128, 0]), + 29: + dict( + link=('left_hand_root', 'left_forefinger1'), + id=29, + color=[255, 153, 255]), + 30: + dict( + link=('left_forefinger1', 'left_forefinger2'), + id=30, + color=[255, 153, 255]), + 31: + dict( + link=('left_forefinger2', 'left_forefinger3'), + id=31, + color=[255, 153, 255]), + 32: + dict( + link=('left_forefinger3', 'left_forefinger4'), + id=32, + color=[255, 153, 255]), + 33: + dict( + link=('left_hand_root', 'left_middle_finger1'), + id=33, + color=[102, 178, 255]), + 34: + dict( + link=('left_middle_finger1', 'left_middle_finger2'), + id=34, + color=[102, 178, 255]), + 35: + dict( + link=('left_middle_finger2', 'left_middle_finger3'), + id=35, + color=[102, 178, 255]), + 36: + dict( + link=('left_middle_finger3', 'left_middle_finger4'), + id=36, + color=[102, 178, 255]), + 37: + dict( + link=('left_hand_root', 'left_ring_finger1'), + id=37, + color=[255, 51, 51]), + 38: + dict( + link=('left_ring_finger1', 'left_ring_finger2'), + id=38, + color=[255, 51, 51]), + 39: + dict( + link=('left_ring_finger2', 'left_ring_finger3'), + id=39, + color=[255, 51, 51]), + 40: + dict( + link=('left_ring_finger3', 'left_ring_finger4'), + id=40, + color=[255, 51, 51]), + 41: + dict( + link=('left_hand_root', 'left_pinky_finger1'), + id=41, + color=[0, 255, 0]), + 42: + dict( + link=('left_pinky_finger1', 'left_pinky_finger2'), + id=42, + color=[0, 255, 0]), + 43: + dict( + link=('left_pinky_finger2', 'left_pinky_finger3'), + id=43, + color=[0, 255, 0]), + 44: + dict( + link=('left_pinky_finger3', 'left_pinky_finger4'), + id=44, + color=[0, 255, 0]), + 45: + dict( + link=('right_hand_root', 'right_thumb1'), + id=45, + color=[255, 128, 0]), + 46: + dict( + link=('right_thumb1', 'right_thumb2'), id=46, color=[255, 128, 0]), + 47: + dict( + link=('right_thumb2', 'right_thumb3'), id=47, color=[255, 128, 0]), + 48: + dict( + link=('right_thumb3', 'right_thumb4'), id=48, color=[255, 128, 0]), + 49: + dict( + link=('right_hand_root', 'right_forefinger1'), + id=49, + color=[255, 153, 255]), + 50: + dict( + link=('right_forefinger1', 'right_forefinger2'), + id=50, + color=[255, 153, 255]), + 51: + dict( + link=('right_forefinger2', 'right_forefinger3'), + id=51, + color=[255, 153, 255]), + 52: + dict( + link=('right_forefinger3', 'right_forefinger4'), + id=52, + color=[255, 153, 255]), + 53: + dict( + link=('right_hand_root', 'right_middle_finger1'), + id=53, + color=[102, 178, 255]), + 54: + dict( + link=('right_middle_finger1', 'right_middle_finger2'), + id=54, + color=[102, 178, 255]), + 55: + dict( + link=('right_middle_finger2', 'right_middle_finger3'), + id=55, + color=[102, 178, 255]), + 56: + dict( + link=('right_middle_finger3', 'right_middle_finger4'), + id=56, + color=[102, 178, 255]), + 57: + dict( + link=('right_hand_root', 'right_ring_finger1'), + id=57, + color=[255, 51, 51]), + 58: + dict( + link=('right_ring_finger1', 'right_ring_finger2'), + id=58, + color=[255, 51, 51]), + 59: + dict( + link=('right_ring_finger2', 'right_ring_finger3'), + id=59, + color=[255, 51, 51]), + 60: + dict( + link=('right_ring_finger3', 'right_ring_finger4'), + id=60, + color=[255, 51, 51]), + 61: + dict( + link=('right_hand_root', 'right_pinky_finger1'), + id=61, + color=[0, 255, 0]), + 62: + dict( + link=('right_pinky_finger1', 'right_pinky_finger2'), + id=62, + color=[0, 255, 0]), + 63: + dict( + link=('right_pinky_finger2', 'right_pinky_finger3'), + id=63, + color=[0, 255, 0]), + 64: + dict( + link=('right_pinky_finger3', 'right_pinky_finger4'), + id=64, + color=[0, 255, 0]) + }, + joint_weights=[1.] * 133, + # 'https://github.com/jin-s13/COCO-WholeBody/blob/master/' + # 'evaluation/myeval_wholebody.py#L175' + sigmas=[ + 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062, + 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089, 0.068, 0.066, 0.066, + 0.092, 0.094, 0.094, 0.042, 0.043, 0.044, 0.043, 0.040, 0.035, 0.031, + 0.025, 0.020, 0.023, 0.029, 0.032, 0.037, 0.038, 0.043, 0.041, 0.045, + 0.013, 0.012, 0.011, 0.011, 0.012, 0.012, 0.011, 0.011, 0.013, 0.015, + 0.009, 0.007, 0.007, 0.007, 0.012, 0.009, 0.008, 0.016, 0.010, 0.017, + 0.011, 0.009, 0.011, 0.009, 0.007, 0.013, 0.008, 0.011, 0.012, 0.010, + 0.034, 0.008, 0.008, 0.009, 0.008, 0.008, 0.007, 0.010, 0.008, 0.009, + 0.009, 0.009, 0.007, 0.007, 0.008, 0.011, 0.008, 0.008, 0.008, 0.01, + 0.008, 0.029, 0.022, 0.035, 0.037, 0.047, 0.026, 0.025, 0.024, 0.035, + 0.018, 0.024, 0.022, 0.026, 0.017, 0.021, 0.021, 0.032, 0.02, 0.019, + 0.022, 0.031, 0.029, 0.022, 0.035, 0.037, 0.047, 0.026, 0.025, 0.024, + 0.035, 0.018, 0.024, 0.022, 0.026, 0.017, 0.021, 0.021, 0.032, 0.02, + 0.019, 0.022, 0.031 + ]) diff --git a/mmpose/configs/_base_/datasets/coco_wholebody_face.py b/mmpose/configs/_base_/datasets/coco_wholebody_face.py new file mode 100644 index 0000000000000000000000000000000000000000..a3fe1e5b336d8ddd668d47123f5c0ceeff580914 --- /dev/null +++ b/mmpose/configs/_base_/datasets/coco_wholebody_face.py @@ -0,0 +1,154 @@ +dataset_info = dict( + dataset_name='coco_wholebody_face', + paper_info=dict( + author='Jin, Sheng and Xu, Lumin and Xu, Jin and ' + 'Wang, Can and Liu, Wentao and ' + 'Qian, Chen and Ouyang, Wanli and Luo, Ping', + title='Whole-Body Human Pose Estimation in the Wild', + container='Proceedings of the European ' + 'Conference on Computer Vision (ECCV)', + year='2020', + homepage='https://github.com/jin-s13/COCO-WholeBody/', + ), + keypoint_info={ + 0: + dict(name='face-0', id=0, color=[255, 0, 0], type='', swap='face-16'), + 1: + dict(name='face-1', id=1, color=[255, 0, 0], type='', swap='face-15'), + 2: + dict(name='face-2', id=2, color=[255, 0, 0], type='', swap='face-14'), + 3: + dict(name='face-3', id=3, color=[255, 0, 0], type='', swap='face-13'), + 4: + dict(name='face-4', id=4, color=[255, 0, 0], type='', swap='face-12'), + 5: + dict(name='face-5', id=5, color=[255, 0, 0], type='', swap='face-11'), + 6: + dict(name='face-6', id=6, color=[255, 0, 0], type='', swap='face-10'), + 7: + dict(name='face-7', id=7, color=[255, 0, 0], type='', swap='face-9'), + 8: dict(name='face-8', id=8, color=[255, 0, 0], type='', swap=''), + 9: + dict(name='face-9', id=9, color=[255, 0, 0], type='', swap='face-7'), + 10: + dict(name='face-10', id=10, color=[255, 0, 0], type='', swap='face-6'), + 11: + dict(name='face-11', id=11, color=[255, 0, 0], type='', swap='face-5'), + 12: + dict(name='face-12', id=12, color=[255, 0, 0], type='', swap='face-4'), + 13: + dict(name='face-13', id=13, color=[255, 0, 0], type='', swap='face-3'), + 14: + dict(name='face-14', id=14, color=[255, 0, 0], type='', swap='face-2'), + 15: + dict(name='face-15', id=15, color=[255, 0, 0], type='', swap='face-1'), + 16: + dict(name='face-16', id=16, color=[255, 0, 0], type='', swap='face-0'), + 17: dict( + name='face-17', id=17, color=[255, 0, 0], type='', swap='face-26'), + 18: dict( + name='face-18', id=18, color=[255, 0, 0], type='', swap='face-25'), + 19: dict( + name='face-19', id=19, color=[255, 0, 0], type='', swap='face-24'), + 20: dict( + name='face-20', id=20, color=[255, 0, 0], type='', swap='face-23'), + 21: dict( + name='face-21', id=21, color=[255, 0, 0], type='', swap='face-22'), + 22: dict( + name='face-22', id=22, color=[255, 0, 0], type='', swap='face-21'), + 23: dict( + name='face-23', id=23, color=[255, 0, 0], type='', swap='face-20'), + 24: dict( + name='face-24', id=24, color=[255, 0, 0], type='', swap='face-19'), + 25: dict( + name='face-25', id=25, color=[255, 0, 0], type='', swap='face-18'), + 26: dict( + name='face-26', id=26, color=[255, 0, 0], type='', swap='face-17'), + 27: dict(name='face-27', id=27, color=[255, 0, 0], type='', swap=''), + 28: dict(name='face-28', id=28, color=[255, 0, 0], type='', swap=''), + 29: dict(name='face-29', id=29, color=[255, 0, 0], type='', swap=''), + 30: dict(name='face-30', id=30, color=[255, 0, 0], type='', swap=''), + 31: dict( + name='face-31', id=31, color=[255, 0, 0], type='', swap='face-35'), + 32: dict( + name='face-32', id=32, color=[255, 0, 0], type='', swap='face-34'), + 33: dict(name='face-33', id=33, color=[255, 0, 0], type='', swap=''), + 34: dict( + name='face-34', id=34, color=[255, 0, 0], type='', swap='face-32'), + 35: dict( + name='face-35', id=35, color=[255, 0, 0], type='', swap='face-31'), + 36: dict( + name='face-36', id=36, color=[255, 0, 0], type='', swap='face-45'), + 37: dict( + name='face-37', id=37, color=[255, 0, 0], type='', swap='face-44'), + 38: dict( + name='face-38', id=38, color=[255, 0, 0], type='', swap='face-43'), + 39: dict( + name='face-39', id=39, color=[255, 0, 0], type='', swap='face-42'), + 40: dict( + name='face-40', id=40, color=[255, 0, 0], type='', swap='face-47'), + 41: dict( + name='face-41', id=41, color=[255, 0, 0], type='', swap='face-46'), + 42: dict( + name='face-42', id=42, color=[255, 0, 0], type='', swap='face-39'), + 43: dict( + name='face-43', id=43, color=[255, 0, 0], type='', swap='face-38'), + 44: dict( + name='face-44', id=44, color=[255, 0, 0], type='', swap='face-37'), + 45: dict( + name='face-45', id=45, color=[255, 0, 0], type='', swap='face-36'), + 46: dict( + name='face-46', id=46, color=[255, 0, 0], type='', swap='face-41'), + 47: dict( + name='face-47', id=47, color=[255, 0, 0], type='', swap='face-40'), + 48: dict( + name='face-48', id=48, color=[255, 0, 0], type='', swap='face-54'), + 49: dict( + name='face-49', id=49, color=[255, 0, 0], type='', swap='face-53'), + 50: dict( + name='face-50', id=50, color=[255, 0, 0], type='', swap='face-52'), + 51: dict(name='face-51', id=52, color=[255, 0, 0], type='', swap=''), + 52: dict( + name='face-52', id=52, color=[255, 0, 0], type='', swap='face-50'), + 53: dict( + name='face-53', id=53, color=[255, 0, 0], type='', swap='face-49'), + 54: dict( + name='face-54', id=54, color=[255, 0, 0], type='', swap='face-48'), + 55: dict( + name='face-55', id=55, color=[255, 0, 0], type='', swap='face-59'), + 56: dict( + name='face-56', id=56, color=[255, 0, 0], type='', swap='face-58'), + 57: dict(name='face-57', id=57, color=[255, 0, 0], type='', swap=''), + 58: dict( + name='face-58', id=58, color=[255, 0, 0], type='', swap='face-56'), + 59: dict( + name='face-59', id=59, color=[255, 0, 0], type='', swap='face-55'), + 60: dict( + name='face-60', id=60, color=[255, 0, 0], type='', swap='face-64'), + 61: dict( + name='face-61', id=61, color=[255, 0, 0], type='', swap='face-63'), + 62: dict(name='face-62', id=62, color=[255, 0, 0], type='', swap=''), + 63: dict( + name='face-63', id=63, color=[255, 0, 0], type='', swap='face-61'), + 64: dict( + name='face-64', id=64, color=[255, 0, 0], type='', swap='face-60'), + 65: dict( + name='face-65', id=65, color=[255, 0, 0], type='', swap='face-67'), + 66: dict(name='face-66', id=66, color=[255, 0, 0], type='', swap=''), + 67: dict( + name='face-67', id=67, color=[255, 0, 0], type='', swap='face-65') + }, + skeleton_info={}, + joint_weights=[1.] * 68, + + # 'https://github.com/jin-s13/COCO-WholeBody/blob/master/' + # 'evaluation/myeval_wholebody.py#L177' + sigmas=[ + 0.042, 0.043, 0.044, 0.043, 0.040, 0.035, 0.031, 0.025, 0.020, 0.023, + 0.029, 0.032, 0.037, 0.038, 0.043, 0.041, 0.045, 0.013, 0.012, 0.011, + 0.011, 0.012, 0.012, 0.011, 0.011, 0.013, 0.015, 0.009, 0.007, 0.007, + 0.007, 0.012, 0.009, 0.008, 0.016, 0.010, 0.017, 0.011, 0.009, 0.011, + 0.009, 0.007, 0.013, 0.008, 0.011, 0.012, 0.010, 0.034, 0.008, 0.008, + 0.009, 0.008, 0.008, 0.007, 0.010, 0.008, 0.009, 0.009, 0.009, 0.007, + 0.007, 0.008, 0.011, 0.008, 0.008, 0.008, 0.01, 0.008 + ]) diff --git a/mmpose/configs/_base_/datasets/coco_wholebody_hand.py b/mmpose/configs/_base_/datasets/coco_wholebody_hand.py new file mode 100644 index 0000000000000000000000000000000000000000..1910b2ced5a8b31cd6f83911e41cae9f1a580222 --- /dev/null +++ b/mmpose/configs/_base_/datasets/coco_wholebody_hand.py @@ -0,0 +1,147 @@ +dataset_info = dict( + dataset_name='coco_wholebody_hand', + paper_info=dict( + author='Jin, Sheng and Xu, Lumin and Xu, Jin and ' + 'Wang, Can and Liu, Wentao and ' + 'Qian, Chen and Ouyang, Wanli and Luo, Ping', + title='Whole-Body Human Pose Estimation in the Wild', + container='Proceedings of the European ' + 'Conference on Computer Vision (ECCV)', + year='2020', + homepage='https://github.com/jin-s13/COCO-WholeBody/', + ), + keypoint_info={ + 0: + dict(name='wrist', id=0, color=[255, 255, 255], type='', swap=''), + 1: + dict(name='thumb1', id=1, color=[255, 128, 0], type='', swap=''), + 2: + dict(name='thumb2', id=2, color=[255, 128, 0], type='', swap=''), + 3: + dict(name='thumb3', id=3, color=[255, 128, 0], type='', swap=''), + 4: + dict(name='thumb4', id=4, color=[255, 128, 0], type='', swap=''), + 5: + dict( + name='forefinger1', id=5, color=[255, 153, 255], type='', swap=''), + 6: + dict( + name='forefinger2', id=6, color=[255, 153, 255], type='', swap=''), + 7: + dict( + name='forefinger3', id=7, color=[255, 153, 255], type='', swap=''), + 8: + dict( + name='forefinger4', id=8, color=[255, 153, 255], type='', swap=''), + 9: + dict( + name='middle_finger1', + id=9, + color=[102, 178, 255], + type='', + swap=''), + 10: + dict( + name='middle_finger2', + id=10, + color=[102, 178, 255], + type='', + swap=''), + 11: + dict( + name='middle_finger3', + id=11, + color=[102, 178, 255], + type='', + swap=''), + 12: + dict( + name='middle_finger4', + id=12, + color=[102, 178, 255], + type='', + swap=''), + 13: + dict( + name='ring_finger1', id=13, color=[255, 51, 51], type='', swap=''), + 14: + dict( + name='ring_finger2', id=14, color=[255, 51, 51], type='', swap=''), + 15: + dict( + name='ring_finger3', id=15, color=[255, 51, 51], type='', swap=''), + 16: + dict( + name='ring_finger4', id=16, color=[255, 51, 51], type='', swap=''), + 17: + dict(name='pinky_finger1', id=17, color=[0, 255, 0], type='', swap=''), + 18: + dict(name='pinky_finger2', id=18, color=[0, 255, 0], type='', swap=''), + 19: + dict(name='pinky_finger3', id=19, color=[0, 255, 0], type='', swap=''), + 20: + dict(name='pinky_finger4', id=20, color=[0, 255, 0], type='', swap='') + }, + skeleton_info={ + 0: + dict(link=('wrist', 'thumb1'), id=0, color=[255, 128, 0]), + 1: + dict(link=('thumb1', 'thumb2'), id=1, color=[255, 128, 0]), + 2: + dict(link=('thumb2', 'thumb3'), id=2, color=[255, 128, 0]), + 3: + dict(link=('thumb3', 'thumb4'), id=3, color=[255, 128, 0]), + 4: + dict(link=('wrist', 'forefinger1'), id=4, color=[255, 153, 255]), + 5: + dict(link=('forefinger1', 'forefinger2'), id=5, color=[255, 153, 255]), + 6: + dict(link=('forefinger2', 'forefinger3'), id=6, color=[255, 153, 255]), + 7: + dict(link=('forefinger3', 'forefinger4'), id=7, color=[255, 153, 255]), + 8: + dict(link=('wrist', 'middle_finger1'), id=8, color=[102, 178, 255]), + 9: + dict( + link=('middle_finger1', 'middle_finger2'), + id=9, + color=[102, 178, 255]), + 10: + dict( + link=('middle_finger2', 'middle_finger3'), + id=10, + color=[102, 178, 255]), + 11: + dict( + link=('middle_finger3', 'middle_finger4'), + id=11, + color=[102, 178, 255]), + 12: + dict(link=('wrist', 'ring_finger1'), id=12, color=[255, 51, 51]), + 13: + dict( + link=('ring_finger1', 'ring_finger2'), id=13, color=[255, 51, 51]), + 14: + dict( + link=('ring_finger2', 'ring_finger3'), id=14, color=[255, 51, 51]), + 15: + dict( + link=('ring_finger3', 'ring_finger4'), id=15, color=[255, 51, 51]), + 16: + dict(link=('wrist', 'pinky_finger1'), id=16, color=[0, 255, 0]), + 17: + dict( + link=('pinky_finger1', 'pinky_finger2'), id=17, color=[0, 255, 0]), + 18: + dict( + link=('pinky_finger2', 'pinky_finger3'), id=18, color=[0, 255, 0]), + 19: + dict( + link=('pinky_finger3', 'pinky_finger4'), id=19, color=[0, 255, 0]) + }, + joint_weights=[1.] * 21, + sigmas=[ + 0.029, 0.022, 0.035, 0.037, 0.047, 0.026, 0.025, 0.024, 0.035, 0.018, + 0.024, 0.022, 0.026, 0.017, 0.021, 0.021, 0.032, 0.02, 0.019, 0.022, + 0.031 + ]) diff --git a/mmpose/configs/_base_/datasets/cofw.py b/mmpose/configs/_base_/datasets/cofw.py new file mode 100644 index 0000000000000000000000000000000000000000..d528bf2f2f7e63adbff3ed56e18bca8b02165e42 --- /dev/null +++ b/mmpose/configs/_base_/datasets/cofw.py @@ -0,0 +1,57 @@ +dataset_info = dict( + dataset_name='cofw', + paper_info=dict( + author='Burgos-Artizzu, Xavier P and Perona, ' + r'Pietro and Doll{\'a}r, Piotr', + title='Robust face landmark estimation under occlusion', + container='Proceedings of the IEEE international ' + 'conference on computer vision', + year='2013', + homepage='http://www.vision.caltech.edu/xpburgos/ICCV13/', + ), + keypoint_info={ + 0: dict(name='kpt-0', id=0, color=[255, 0, 0], type='', swap='kpt-1'), + 1: dict(name='kpt-1', id=1, color=[255, 0, 0], type='', swap='kpt-0'), + 2: dict(name='kpt-2', id=2, color=[255, 0, 0], type='', swap='kpt-3'), + 3: dict(name='kpt-3', id=3, color=[255, 0, 0], type='', swap='kpt-2'), + 4: dict(name='kpt-4', id=4, color=[255, 0, 0], type='', swap='kpt-6'), + 5: dict(name='kpt-5', id=5, color=[255, 0, 0], type='', swap='kpt-7'), + 6: dict(name='kpt-6', id=6, color=[255, 0, 0], type='', swap='kpt-4'), + 7: dict(name='kpt-7', id=7, color=[255, 0, 0], type='', swap='kpt-5'), + 8: dict(name='kpt-8', id=8, color=[255, 0, 0], type='', swap='kpt-9'), + 9: dict(name='kpt-9', id=9, color=[255, 0, 0], type='', swap='kpt-8'), + 10: + dict(name='kpt-10', id=10, color=[255, 0, 0], type='', swap='kpt-11'), + 11: + dict(name='kpt-11', id=11, color=[255, 0, 0], type='', swap='kpt-10'), + 12: + dict(name='kpt-12', id=12, color=[255, 0, 0], type='', swap='kpt-14'), + 13: + dict(name='kpt-13', id=13, color=[255, 0, 0], type='', swap='kpt-15'), + 14: + dict(name='kpt-14', id=14, color=[255, 0, 0], type='', swap='kpt-12'), + 15: + dict(name='kpt-15', id=15, color=[255, 0, 0], type='', swap='kpt-13'), + 16: + dict(name='kpt-16', id=16, color=[255, 0, 0], type='', swap='kpt-17'), + 17: + dict(name='kpt-17', id=17, color=[255, 0, 0], type='', swap='kpt-16'), + 18: + dict(name='kpt-18', id=18, color=[255, 0, 0], type='', swap='kpt-19'), + 19: + dict(name='kpt-19', id=19, color=[255, 0, 0], type='', swap='kpt-18'), + 20: dict(name='kpt-20', id=20, color=[255, 0, 0], type='', swap=''), + 21: dict(name='kpt-21', id=21, color=[255, 0, 0], type='', swap=''), + 22: + dict(name='kpt-22', id=22, color=[255, 0, 0], type='', swap='kpt-23'), + 23: + dict(name='kpt-23', id=23, color=[255, 0, 0], type='', swap='kpt-22'), + 24: dict(name='kpt-24', id=24, color=[255, 0, 0], type='', swap=''), + 25: dict(name='kpt-25', id=25, color=[255, 0, 0], type='', swap=''), + 26: dict(name='kpt-26', id=26, color=[255, 0, 0], type='', swap=''), + 27: dict(name='kpt-27', id=27, color=[255, 0, 0], type='', swap=''), + 28: dict(name='kpt-28', id=28, color=[255, 0, 0], type='', swap='') + }, + skeleton_info={}, + joint_weights=[1.] * 29, + sigmas=[]) diff --git a/mmpose/configs/_base_/datasets/crowdpose.py b/mmpose/configs/_base_/datasets/crowdpose.py new file mode 100644 index 0000000000000000000000000000000000000000..45086531a601870716eed15a32c5413c0e24b7ae --- /dev/null +++ b/mmpose/configs/_base_/datasets/crowdpose.py @@ -0,0 +1,147 @@ +dataset_info = dict( + dataset_name='crowdpose', + paper_info=dict( + author='Li, Jiefeng and Wang, Can and Zhu, Hao and ' + 'Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu', + title='CrowdPose: Efficient Crowded Scenes Pose Estimation ' + 'and A New Benchmark', + container='Proceedings of IEEE Conference on Computer ' + 'Vision and Pattern Recognition (CVPR)', + year='2019', + homepage='https://github.com/Jeff-sjtu/CrowdPose', + ), + keypoint_info={ + 0: + dict( + name='left_shoulder', + id=0, + color=[51, 153, 255], + type='upper', + swap='right_shoulder'), + 1: + dict( + name='right_shoulder', + id=1, + color=[51, 153, 255], + type='upper', + swap='left_shoulder'), + 2: + dict( + name='left_elbow', + id=2, + color=[51, 153, 255], + type='upper', + swap='right_elbow'), + 3: + dict( + name='right_elbow', + id=3, + color=[51, 153, 255], + type='upper', + swap='left_elbow'), + 4: + dict( + name='left_wrist', + id=4, + color=[51, 153, 255], + type='upper', + swap='right_wrist'), + 5: + dict( + name='right_wrist', + id=5, + color=[0, 255, 0], + type='upper', + swap='left_wrist'), + 6: + dict( + name='left_hip', + id=6, + color=[255, 128, 0], + type='lower', + swap='right_hip'), + 7: + dict( + name='right_hip', + id=7, + color=[0, 255, 0], + type='lower', + swap='left_hip'), + 8: + dict( + name='left_knee', + id=8, + color=[255, 128, 0], + type='lower', + swap='right_knee'), + 9: + dict( + name='right_knee', + id=9, + color=[0, 255, 0], + type='lower', + swap='left_knee'), + 10: + dict( + name='left_ankle', + id=10, + color=[255, 128, 0], + type='lower', + swap='right_ankle'), + 11: + dict( + name='right_ankle', + id=11, + color=[0, 255, 0], + type='lower', + swap='left_ankle'), + 12: + dict( + name='top_head', id=12, color=[255, 128, 0], type='upper', + swap=''), + 13: + dict(name='neck', id=13, color=[0, 255, 0], type='upper', swap='') + }, + skeleton_info={ + 0: + dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]), + 1: + dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]), + 2: + dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]), + 3: + dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]), + 4: + dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]), + 5: + dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]), + 6: + dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]), + 7: + dict( + link=('left_shoulder', 'right_shoulder'), + id=7, + color=[51, 153, 255]), + 8: + dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]), + 9: + dict( + link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]), + 10: + dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]), + 11: + dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]), + 12: + dict(link=('top_head', 'neck'), id=12, color=[51, 153, 255]), + 13: + dict(link=('right_shoulder', 'neck'), id=13, color=[51, 153, 255]), + 14: + dict(link=('left_shoulder', 'neck'), id=14, color=[51, 153, 255]) + }, + joint_weights=[ + 0.2, 0.2, 0.2, 1.3, 1.5, 0.2, 1.3, 1.5, 0.2, 0.2, 0.5, 0.2, 0.2, 0.5 + ], + sigmas=[ + 0.079, 0.079, 0.072, 0.072, 0.062, 0.062, 0.107, 0.107, 0.087, 0.087, + 0.089, 0.089, 0.079, 0.079 + ]) diff --git a/mmpose/configs/_base_/datasets/deepfashion2.py b/mmpose/configs/_base_/datasets/deepfashion2.py new file mode 100644 index 0000000000000000000000000000000000000000..f65d1bb591fab8f06a79b5d595478a282acd8b3e --- /dev/null +++ b/mmpose/configs/_base_/datasets/deepfashion2.py @@ -0,0 +1,2660 @@ +colors = dict( + sss=[255, 128, 0], # short_sleeve_shirt + lss=[255, 0, 128], # long_sleeved_shirt + sso=[128, 0, 255], # short_sleeved_outwear + lso=[0, 128, 255], # long_sleeved_outwear + vest=[0, 128, 128], # vest + sling=[0, 0, 128], # sling + shorts=[128, 128, 128], # shorts + trousers=[128, 0, 128], # trousers + skirt=[64, 128, 128], # skirt + ssd=[64, 64, 128], # short_sleeved_dress + lsd=[128, 64, 0], # long_sleeved_dress + vd=[128, 64, 255], # vest_dress + sd=[128, 64, 0], # sling_dress +) +dataset_info = dict( + dataset_name='deepfashion2', + paper_info=dict( + author='Yuying Ge and Ruimao Zhang and Lingyun Wu ' + 'and Xiaogang Wang and Xiaoou Tang and Ping Luo', + title='DeepFashion2: A Versatile Benchmark for ' + 'Detection, Pose Estimation, Segmentation and ' + 'Re-Identification of Clothing Images', + container='Proceedings of IEEE Conference on Computer ' + 'Vision and Pattern Recognition (CVPR)', + year='2019', + homepage='https://github.com/switchablenorms/DeepFashion2', + ), + keypoint_info={ + # short_sleeved_shirt + 0: + dict(name='sss_kpt1', id=0, color=colors['sss'], type='', swap=''), + 1: + dict( + name='sss_kpt2', + id=1, + color=colors['sss'], + type='', + swap='sss_kpt6'), + 2: + dict( + name='sss_kpt3', + id=2, + color=colors['sss'], + type='', + swap='sss_kpt5'), + 3: + dict(name='sss_kpt4', id=3, color=colors['sss'], type='', swap=''), + 4: + dict( + name='sss_kpt5', + id=4, + color=colors['sss'], + type='', + swap='sss_kpt3'), + 5: + dict( + name='sss_kpt6', + id=5, + color=colors['sss'], + type='', + swap='sss_kpt2'), + 6: + dict( + name='sss_kpt7', + id=6, + color=colors['sss'], + type='', + swap='sss_kpt25'), + 7: + dict( + name='sss_kpt8', + id=7, + color=colors['sss'], + type='', + swap='sss_kpt24'), + 8: + dict( + name='sss_kpt9', + id=8, + color=colors['sss'], + type='', + swap='sss_kpt23'), + 9: + dict( + name='sss_kpt10', + id=9, + color=colors['sss'], + type='', + swap='sss_kpt22'), + 10: + dict( + name='sss_kpt11', + id=10, + color=colors['sss'], + type='', + swap='sss_kpt21'), + 11: + dict( + name='sss_kpt12', + id=11, + color=colors['sss'], + type='', + swap='sss_kpt20'), + 12: + dict( + name='sss_kpt13', + id=12, + color=colors['sss'], + type='', + swap='sss_kpt19'), + 13: + dict( + name='sss_kpt14', + id=13, + color=colors['sss'], + type='', + swap='sss_kpt18'), + 14: + dict( + name='sss_kpt15', + id=14, + color=colors['sss'], + type='', + swap='sss_kpt17'), + 15: + dict(name='sss_kpt16', id=15, color=colors['sss'], type='', swap=''), + 16: + dict( + name='sss_kpt17', + id=16, + color=colors['sss'], + type='', + swap='sss_kpt15'), + 17: + dict( + name='sss_kpt18', + id=17, + color=colors['sss'], + type='', + swap='sss_kpt14'), + 18: + dict( + name='sss_kpt19', + id=18, + color=colors['sss'], + type='', + swap='sss_kpt13'), + 19: + dict( + name='sss_kpt20', + id=19, + color=colors['sss'], + type='', + swap='sss_kpt12'), + 20: + dict( + name='sss_kpt21', + id=20, + color=colors['sss'], + type='', + swap='sss_kpt11'), + 21: + dict( + name='sss_kpt22', + id=21, + color=colors['sss'], + type='', + swap='sss_kpt10'), + 22: + dict( + name='sss_kpt23', + id=22, + color=colors['sss'], + type='', + swap='sss_kpt9'), + 23: + dict( + name='sss_kpt24', + id=23, + color=colors['sss'], + type='', + swap='sss_kpt8'), + 24: + dict( + name='sss_kpt25', + id=24, + color=colors['sss'], + type='', + swap='sss_kpt7'), + # long_sleeved_shirt + 25: + dict(name='lss_kpt1', id=25, color=colors['lss'], type='', swap=''), + 26: + dict( + name='lss_kpt2', + id=26, + color=colors['lss'], + type='', + swap='lss_kpt6'), + 27: + dict( + name='lss_kpt3', + id=27, + color=colors['lss'], + type='', + swap='lss_kpt5'), + 28: + dict(name='lss_kpt4', id=28, color=colors['lss'], type='', swap=''), + 29: + dict( + name='lss_kpt5', + id=29, + color=colors['lss'], + type='', + swap='lss_kpt3'), + 30: + dict( + name='lss_kpt6', + id=30, + color=colors['lss'], + type='', + swap='lss_kpt2'), + 31: + dict( + name='lss_kpt7', + id=31, + color=colors['lss'], + type='', + swap='lss_kpt33'), + 32: + dict( + name='lss_kpt8', + id=32, + color=colors['lss'], + type='', + swap='lss_kpt32'), + 33: + dict( + name='lss_kpt9', + id=33, + color=colors['lss'], + type='', + swap='lss_kpt31'), + 34: + dict( + name='lss_kpt10', + id=34, + color=colors['lss'], + type='', + swap='lss_kpt30'), + 35: + dict( + name='lss_kpt11', + id=35, + color=colors['lss'], + type='', + swap='lss_kpt29'), + 36: + dict( + name='lss_kpt12', + id=36, + color=colors['lss'], + type='', + swap='lss_kpt28'), + 37: + dict( + name='lss_kpt13', + id=37, + color=colors['lss'], + type='', + swap='lss_kpt27'), + 38: + dict( + name='lss_kpt14', + id=38, + color=colors['lss'], + type='', + swap='lss_kpt26'), + 39: + dict( + name='lss_kpt15', + id=39, + color=colors['lss'], + type='', + swap='lss_kpt25'), + 40: + dict( + name='lss_kpt16', + id=40, + color=colors['lss'], + type='', + swap='lss_kpt24'), + 41: + dict( + name='lss_kpt17', + id=41, + color=colors['lss'], + type='', + swap='lss_kpt23'), + 42: + dict( + name='lss_kpt18', + id=42, + color=colors['lss'], + type='', + swap='lss_kpt22'), + 43: + dict( + name='lss_kpt19', + id=43, + color=colors['lss'], + type='', + swap='lss_kpt21'), + 44: + dict(name='lss_kpt20', id=44, color=colors['lss'], type='', swap=''), + 45: + dict( + name='lss_kpt21', + id=45, + color=colors['lss'], + type='', + swap='lss_kpt19'), + 46: + dict( + name='lss_kpt22', + id=46, + color=colors['lss'], + type='', + swap='lss_kpt18'), + 47: + dict( + name='lss_kpt23', + id=47, + color=colors['lss'], + type='', + swap='lss_kpt17'), + 48: + dict( + name='lss_kpt24', + id=48, + color=colors['lss'], + type='', + swap='lss_kpt16'), + 49: + dict( + name='lss_kpt25', + id=49, + color=colors['lss'], + type='', + swap='lss_kpt15'), + 50: + dict( + name='lss_kpt26', + id=50, + color=colors['lss'], + type='', + swap='lss_kpt14'), + 51: + dict( + name='lss_kpt27', + id=51, + color=colors['lss'], + type='', + swap='lss_kpt13'), + 52: + dict( + name='lss_kpt28', + id=52, + color=colors['lss'], + type='', + swap='lss_kpt12'), + 53: + dict( + name='lss_kpt29', + id=53, + color=colors['lss'], + type='', + swap='lss_kpt11'), + 54: + dict( + name='lss_kpt30', + id=54, + color=colors['lss'], + type='', + swap='lss_kpt10'), + 55: + dict( + name='lss_kpt31', + id=55, + color=colors['lss'], + type='', + swap='lss_kpt9'), + 56: + dict( + name='lss_kpt32', + id=56, + color=colors['lss'], + type='', + swap='lss_kpt8'), + 57: + dict( + name='lss_kpt33', + id=57, + color=colors['lss'], + type='', + swap='lss_kpt7'), + # short_sleeved_outwear + 58: + dict(name='sso_kpt1', id=58, color=colors['sso'], type='', swap=''), + 59: + dict( + name='sso_kpt2', + id=59, + color=colors['sso'], + type='', + swap='sso_kpt26'), + 60: + dict( + name='sso_kpt3', + id=60, + color=colors['sso'], + type='', + swap='sso_kpt5'), + 61: + dict( + name='sso_kpt4', + id=61, + color=colors['sso'], + type='', + swap='sso_kpt6'), + 62: + dict( + name='sso_kpt5', + id=62, + color=colors['sso'], + type='', + swap='sso_kpt3'), + 63: + dict( + name='sso_kpt6', + id=63, + color=colors['sso'], + type='', + swap='sso_kpt4'), + 64: + dict( + name='sso_kpt7', + id=64, + color=colors['sso'], + type='', + swap='sso_kpt25'), + 65: + dict( + name='sso_kpt8', + id=65, + color=colors['sso'], + type='', + swap='sso_kpt24'), + 66: + dict( + name='sso_kpt9', + id=66, + color=colors['sso'], + type='', + swap='sso_kpt23'), + 67: + dict( + name='sso_kpt10', + id=67, + color=colors['sso'], + type='', + swap='sso_kpt22'), + 68: + dict( + name='sso_kpt11', + id=68, + color=colors['sso'], + type='', + swap='sso_kpt21'), + 69: + dict( + name='sso_kpt12', + id=69, + color=colors['sso'], + type='', + swap='sso_kpt20'), + 70: + dict( + name='sso_kpt13', + id=70, + color=colors['sso'], + type='', + swap='sso_kpt19'), + 71: + dict( + name='sso_kpt14', + id=71, + color=colors['sso'], + type='', + swap='sso_kpt18'), + 72: + dict( + name='sso_kpt15', + id=72, + color=colors['sso'], + type='', + swap='sso_kpt17'), + 73: + dict( + name='sso_kpt16', + id=73, + color=colors['sso'], + type='', + swap='sso_kpt29'), + 74: + dict( + name='sso_kpt17', + id=74, + color=colors['sso'], + type='', + swap='sso_kpt15'), + 75: + dict( + name='sso_kpt18', + id=75, + color=colors['sso'], + type='', + swap='sso_kpt14'), + 76: + dict( + name='sso_kpt19', + id=76, + color=colors['sso'], + type='', + swap='sso_kpt13'), + 77: + dict( + name='sso_kpt20', + id=77, + color=colors['sso'], + type='', + swap='sso_kpt12'), + 78: + dict( + name='sso_kpt21', + id=78, + color=colors['sso'], + type='', + swap='sso_kpt11'), + 79: + dict( + name='sso_kpt22', + id=79, + color=colors['sso'], + type='', + swap='sso_kpt10'), + 80: + dict( + name='sso_kpt23', + id=80, + color=colors['sso'], + type='', + swap='sso_kpt9'), + 81: + dict( + name='sso_kpt24', + id=81, + color=colors['sso'], + type='', + swap='sso_kpt8'), + 82: + dict( + name='sso_kpt25', + id=82, + color=colors['sso'], + type='', + swap='sso_kpt7'), + 83: + dict( + name='sso_kpt26', + id=83, + color=colors['sso'], + type='', + swap='sso_kpt2'), + 84: + dict( + name='sso_kpt27', + id=84, + color=colors['sso'], + type='', + swap='sso_kpt30'), + 85: + dict( + name='sso_kpt28', + id=85, + color=colors['sso'], + type='', + swap='sso_kpt31'), + 86: + dict( + name='sso_kpt29', + id=86, + color=colors['sso'], + type='', + swap='sso_kpt16'), + 87: + dict( + name='sso_kpt30', + id=87, + color=colors['sso'], + type='', + swap='sso_kpt27'), + 88: + dict( + name='sso_kpt31', + id=88, + color=colors['sso'], + type='', + swap='sso_kpt28'), + # long_sleeved_outwear + 89: + dict(name='lso_kpt1', id=89, color=colors['lso'], type='', swap=''), + 90: + dict( + name='lso_kpt2', + id=90, + color=colors['lso'], + type='', + swap='lso_kpt6'), + 91: + dict( + name='lso_kpt3', + id=91, + color=colors['lso'], + type='', + swap='lso_kpt5'), + 92: + dict( + name='lso_kpt4', + id=92, + color=colors['lso'], + type='', + swap='lso_kpt34'), + 93: + dict( + name='lso_kpt5', + id=93, + color=colors['lso'], + type='', + swap='lso_kpt3'), + 94: + dict( + name='lso_kpt6', + id=94, + color=colors['lso'], + type='', + swap='lso_kpt2'), + 95: + dict( + name='lso_kpt7', + id=95, + color=colors['lso'], + type='', + swap='lso_kpt33'), + 96: + dict( + name='lso_kpt8', + id=96, + color=colors['lso'], + type='', + swap='lso_kpt32'), + 97: + dict( + name='lso_kpt9', + id=97, + color=colors['lso'], + type='', + swap='lso_kpt31'), + 98: + dict( + name='lso_kpt10', + id=98, + color=colors['lso'], + type='', + swap='lso_kpt30'), + 99: + dict( + name='lso_kpt11', + id=99, + color=colors['lso'], + type='', + swap='lso_kpt29'), + 100: + dict( + name='lso_kpt12', + id=100, + color=colors['lso'], + type='', + swap='lso_kpt28'), + 101: + dict( + name='lso_kpt13', + id=101, + color=colors['lso'], + type='', + swap='lso_kpt27'), + 102: + dict( + name='lso_kpt14', + id=102, + color=colors['lso'], + type='', + swap='lso_kpt26'), + 103: + dict( + name='lso_kpt15', + id=103, + color=colors['lso'], + type='', + swap='lso_kpt25'), + 104: + dict( + name='lso_kpt16', + id=104, + color=colors['lso'], + type='', + swap='lso_kpt24'), + 105: + dict( + name='lso_kpt17', + id=105, + color=colors['lso'], + type='', + swap='lso_kpt23'), + 106: + dict( + name='lso_kpt18', + id=106, + color=colors['lso'], + type='', + swap='lso_kpt22'), + 107: + dict( + name='lso_kpt19', + id=107, + color=colors['lso'], + type='', + swap='lso_kpt21'), + 108: + dict( + name='lso_kpt20', + id=108, + color=colors['lso'], + type='', + swap='lso_kpt37'), + 109: + dict( + name='lso_kpt21', + id=109, + color=colors['lso'], + type='', + swap='lso_kpt19'), + 110: + dict( + name='lso_kpt22', + id=110, + color=colors['lso'], + type='', + swap='lso_kpt18'), + 111: + dict( + name='lso_kpt23', + id=111, + color=colors['lso'], + type='', + swap='lso_kpt17'), + 112: + dict( + name='lso_kpt24', + id=112, + color=colors['lso'], + type='', + swap='lso_kpt16'), + 113: + dict( + name='lso_kpt25', + id=113, + color=colors['lso'], + type='', + swap='lso_kpt15'), + 114: + dict( + name='lso_kpt26', + id=114, + color=colors['lso'], + type='', + swap='lso_kpt14'), + 115: + dict( + name='lso_kpt27', + id=115, + color=colors['lso'], + type='', + swap='lso_kpt13'), + 116: + dict( + name='lso_kpt28', + id=116, + color=colors['lso'], + type='', + swap='lso_kpt12'), + 117: + dict( + name='lso_kpt29', + id=117, + color=colors['lso'], + type='', + swap='lso_kpt11'), + 118: + dict( + name='lso_kpt30', + id=118, + color=colors['lso'], + type='', + swap='lso_kpt10'), + 119: + dict( + name='lso_kpt31', + id=119, + color=colors['lso'], + type='', + swap='lso_kpt9'), + 120: + dict( + name='lso_kpt32', + id=120, + color=colors['lso'], + type='', + swap='lso_kpt8'), + 121: + dict( + name='lso_kpt33', + id=121, + color=colors['lso'], + type='', + swap='lso_kpt7'), + 122: + dict( + name='lso_kpt34', + id=122, + color=colors['lso'], + type='', + swap='lso_kpt4'), + 123: + dict( + name='lso_kpt35', + id=123, + color=colors['lso'], + type='', + swap='lso_kpt38'), + 124: + dict( + name='lso_kpt36', + id=124, + color=colors['lso'], + type='', + swap='lso_kpt39'), + 125: + dict( + name='lso_kpt37', + id=125, + color=colors['lso'], + type='', + swap='lso_kpt20'), + 126: + dict( + name='lso_kpt38', + id=126, + color=colors['lso'], + type='', + swap='lso_kpt35'), + 127: + dict( + name='lso_kpt39', + id=127, + color=colors['lso'], + type='', + swap='lso_kpt36'), + # vest + 128: + dict(name='vest_kpt1', id=128, color=colors['vest'], type='', swap=''), + 129: + dict( + name='vest_kpt2', + id=129, + color=colors['vest'], + type='', + swap='vest_kpt6'), + 130: + dict( + name='vest_kpt3', + id=130, + color=colors['vest'], + type='', + swap='vest_kpt5'), + 131: + dict(name='vest_kpt4', id=131, color=colors['vest'], type='', swap=''), + 132: + dict( + name='vest_kpt5', + id=132, + color=colors['vest'], + type='', + swap='vest_kpt3'), + 133: + dict( + name='vest_kpt6', + id=133, + color=colors['vest'], + type='', + swap='vest_kpt2'), + 134: + dict( + name='vest_kpt7', + id=134, + color=colors['vest'], + type='', + swap='vest_kpt15'), + 135: + dict( + name='vest_kpt8', + id=135, + color=colors['vest'], + type='', + swap='vest_kpt14'), + 136: + dict( + name='vest_kpt9', + id=136, + color=colors['vest'], + type='', + swap='vest_kpt13'), + 137: + dict( + name='vest_kpt10', + id=137, + color=colors['vest'], + type='', + swap='vest_kpt12'), + 138: + dict( + name='vest_kpt11', id=138, color=colors['vest'], type='', swap=''), + 139: + dict( + name='vest_kpt12', + id=139, + color=colors['vest'], + type='', + swap='vest_kpt10'), + 140: + dict( + name='vest_kpt13', id=140, color=colors['vest'], type='', swap=''), + 141: + dict( + name='vest_kpt14', + id=141, + color=colors['vest'], + type='', + swap='vest_kpt8'), + 142: + dict( + name='vest_kpt15', + id=142, + color=colors['vest'], + type='', + swap='vest_kpt7'), + # sling + 143: + dict( + name='sling_kpt1', id=143, color=colors['sling'], type='', + swap=''), + 144: + dict( + name='sling_kpt2', + id=144, + color=colors['sling'], + type='', + swap='sling_kpt6'), + 145: + dict( + name='sling_kpt3', + id=145, + color=colors['sling'], + type='', + swap='sling_kpt5'), + 146: + dict( + name='sling_kpt4', id=146, color=colors['sling'], type='', + swap=''), + 147: + dict( + name='sling_kpt5', + id=147, + color=colors['sling'], + type='', + swap='sling_kpt3'), + 148: + dict( + name='sling_kpt6', + id=148, + color=colors['sling'], + type='', + swap='sling_kpt2'), + 149: + dict( + name='sling_kpt7', + id=149, + color=colors['sling'], + type='', + swap='sling_kpt15'), + 150: + dict( + name='sling_kpt8', + id=150, + color=colors['sling'], + type='', + swap='sling_kpt14'), + 151: + dict( + name='sling_kpt9', + id=151, + color=colors['sling'], + type='', + swap='sling_kpt13'), + 152: + dict( + name='sling_kpt10', + id=152, + color=colors['sling'], + type='', + swap='sling_kpt12'), + 153: + dict( + name='sling_kpt11', + id=153, + color=colors['sling'], + type='', + swap=''), + 154: + dict( + name='sling_kpt12', + id=154, + color=colors['sling'], + type='', + swap='sling_kpt10'), + 155: + dict( + name='sling_kpt13', + id=155, + color=colors['sling'], + type='', + swap='sling_kpt9'), + 156: + dict( + name='sling_kpt14', + id=156, + color=colors['sling'], + type='', + swap='sling_kpt8'), + 157: + dict( + name='sling_kpt15', + id=157, + color=colors['sling'], + type='', + swap='sling_kpt7'), + # shorts + 158: + dict( + name='shorts_kpt1', + id=158, + color=colors['shorts'], + type='', + swap='shorts_kpt3'), + 159: + dict( + name='shorts_kpt2', + id=159, + color=colors['shorts'], + type='', + swap=''), + 160: + dict( + name='shorts_kpt3', + id=160, + color=colors['shorts'], + type='', + swap='shorts_kpt1'), + 161: + dict( + name='shorts_kpt4', + id=161, + color=colors['shorts'], + type='', + swap='shorts_kpt10'), + 162: + dict( + name='shorts_kpt5', + id=162, + color=colors['shorts'], + type='', + swap='shorts_kpt9'), + 163: + dict( + name='shorts_kpt6', + id=163, + color=colors['shorts'], + type='', + swap='shorts_kpt8'), + 164: + dict( + name='shorts_kpt7', + id=164, + color=colors['shorts'], + type='', + swap=''), + 165: + dict( + name='shorts_kpt8', + id=165, + color=colors['shorts'], + type='', + swap='shorts_kpt6'), + 166: + dict( + name='shorts_kpt9', + id=166, + color=colors['shorts'], + type='', + swap='shorts_kpt5'), + 167: + dict( + name='shorts_kpt10', + id=167, + color=colors['shorts'], + type='', + swap='shorts_kpt4'), + # trousers + 168: + dict( + name='trousers_kpt1', + id=168, + color=colors['trousers'], + type='', + swap='trousers_kpt3'), + 169: + dict( + name='trousers_kpt2', + id=169, + color=colors['trousers'], + type='', + swap=''), + 170: + dict( + name='trousers_kpt3', + id=170, + color=colors['trousers'], + type='', + swap='trousers_kpt1'), + 171: + dict( + name='trousers_kpt4', + id=171, + color=colors['trousers'], + type='', + swap='trousers_kpt14'), + 172: + dict( + name='trousers_kpt5', + id=172, + color=colors['trousers'], + type='', + swap='trousers_kpt13'), + 173: + dict( + name='trousers_kpt6', + id=173, + color=colors['trousers'], + type='', + swap='trousers_kpt12'), + 174: + dict( + name='trousers_kpt7', + id=174, + color=colors['trousers'], + type='', + swap='trousers_kpt11'), + 175: + dict( + name='trousers_kpt8', + id=175, + color=colors['trousers'], + type='', + swap='trousers_kpt10'), + 176: + dict( + name='trousers_kpt9', + id=176, + color=colors['trousers'], + type='', + swap=''), + 177: + dict( + name='trousers_kpt10', + id=177, + color=colors['trousers'], + type='', + swap='trousers_kpt8'), + 178: + dict( + name='trousers_kpt11', + id=178, + color=colors['trousers'], + type='', + swap='trousers_kpt7'), + 179: + dict( + name='trousers_kpt12', + id=179, + color=colors['trousers'], + type='', + swap='trousers_kpt6'), + 180: + dict( + name='trousers_kpt13', + id=180, + color=colors['trousers'], + type='', + swap='trousers_kpt5'), + 181: + dict( + name='trousers_kpt14', + id=181, + color=colors['trousers'], + type='', + swap='trousers_kpt4'), + # skirt + 182: + dict( + name='skirt_kpt1', + id=182, + color=colors['skirt'], + type='', + swap='skirt_kpt3'), + 183: + dict( + name='skirt_kpt2', id=183, color=colors['skirt'], type='', + swap=''), + 184: + dict( + name='skirt_kpt3', + id=184, + color=colors['skirt'], + type='', + swap='skirt_kpt1'), + 185: + dict( + name='skirt_kpt4', + id=185, + color=colors['skirt'], + type='', + swap='skirt_kpt8'), + 186: + dict( + name='skirt_kpt5', + id=186, + color=colors['skirt'], + type='', + swap='skirt_kpt7'), + 187: + dict( + name='skirt_kpt6', id=187, color=colors['skirt'], type='', + swap=''), + 188: + dict( + name='skirt_kpt7', + id=188, + color=colors['skirt'], + type='', + swap='skirt_kpt5'), + 189: + dict( + name='skirt_kpt8', + id=189, + color=colors['skirt'], + type='', + swap='skirt_kpt4'), + # short_sleeved_dress + 190: + dict(name='ssd_kpt1', id=190, color=colors['ssd'], type='', swap=''), + 191: + dict( + name='ssd_kpt2', + id=191, + color=colors['ssd'], + type='', + swap='ssd_kpt6'), + 192: + dict( + name='ssd_kpt3', + id=192, + color=colors['ssd'], + type='', + swap='ssd_kpt5'), + 193: + dict(name='ssd_kpt4', id=193, color=colors['ssd'], type='', swap=''), + 194: + dict( + name='ssd_kpt5', + id=194, + color=colors['ssd'], + type='', + swap='ssd_kpt3'), + 195: + dict( + name='ssd_kpt6', + id=195, + color=colors['ssd'], + type='', + swap='ssd_kpt2'), + 196: + dict( + name='ssd_kpt7', + id=196, + color=colors['ssd'], + type='', + swap='ssd_kpt29'), + 197: + dict( + name='ssd_kpt8', + id=197, + color=colors['ssd'], + type='', + swap='ssd_kpt28'), + 198: + dict( + name='ssd_kpt9', + id=198, + color=colors['ssd'], + type='', + swap='ssd_kpt27'), + 199: + dict( + name='ssd_kpt10', + id=199, + color=colors['ssd'], + type='', + swap='ssd_kpt26'), + 200: + dict( + name='ssd_kpt11', + id=200, + color=colors['ssd'], + type='', + swap='ssd_kpt25'), + 201: + dict( + name='ssd_kpt12', + id=201, + color=colors['ssd'], + type='', + swap='ssd_kpt24'), + 202: + dict( + name='ssd_kpt13', + id=202, + color=colors['ssd'], + type='', + swap='ssd_kpt23'), + 203: + dict( + name='ssd_kpt14', + id=203, + color=colors['ssd'], + type='', + swap='ssd_kpt22'), + 204: + dict( + name='ssd_kpt15', + id=204, + color=colors['ssd'], + type='', + swap='ssd_kpt21'), + 205: + dict( + name='ssd_kpt16', + id=205, + color=colors['ssd'], + type='', + swap='ssd_kpt20'), + 206: + dict( + name='ssd_kpt17', + id=206, + color=colors['ssd'], + type='', + swap='ssd_kpt19'), + 207: + dict(name='ssd_kpt18', id=207, color=colors['ssd'], type='', swap=''), + 208: + dict( + name='ssd_kpt19', + id=208, + color=colors['ssd'], + type='', + swap='ssd_kpt17'), + 209: + dict( + name='ssd_kpt20', + id=209, + color=colors['ssd'], + type='', + swap='ssd_kpt16'), + 210: + dict( + name='ssd_kpt21', + id=210, + color=colors['ssd'], + type='', + swap='ssd_kpt15'), + 211: + dict( + name='ssd_kpt22', + id=211, + color=colors['ssd'], + type='', + swap='ssd_kpt14'), + 212: + dict( + name='ssd_kpt23', + id=212, + color=colors['ssd'], + type='', + swap='ssd_kpt13'), + 213: + dict( + name='ssd_kpt24', + id=213, + color=colors['ssd'], + type='', + swap='ssd_kpt12'), + 214: + dict( + name='ssd_kpt25', + id=214, + color=colors['ssd'], + type='', + swap='ssd_kpt11'), + 215: + dict( + name='ssd_kpt26', + id=215, + color=colors['ssd'], + type='', + swap='ssd_kpt10'), + 216: + dict( + name='ssd_kpt27', + id=216, + color=colors['ssd'], + type='', + swap='ssd_kpt9'), + 217: + dict( + name='ssd_kpt28', + id=217, + color=colors['ssd'], + type='', + swap='ssd_kpt8'), + 218: + dict( + name='ssd_kpt29', + id=218, + color=colors['ssd'], + type='', + swap='ssd_kpt7'), + # long_sleeved_dress + 219: + dict(name='lsd_kpt1', id=219, color=colors['lsd'], type='', swap=''), + 220: + dict( + name='lsd_kpt2', + id=220, + color=colors['lsd'], + type='', + swap='lsd_kpt6'), + 221: + dict( + name='lsd_kpt3', + id=221, + color=colors['lsd'], + type='', + swap='lsd_kpt5'), + 222: + dict(name='lsd_kpt4', id=222, color=colors['lsd'], type='', swap=''), + 223: + dict( + name='lsd_kpt5', + id=223, + color=colors['lsd'], + type='', + swap='lsd_kpt3'), + 224: + dict( + name='lsd_kpt6', + id=224, + color=colors['lsd'], + type='', + swap='lsd_kpt2'), + 225: + dict( + name='lsd_kpt7', + id=225, + color=colors['lsd'], + type='', + swap='lsd_kpt37'), + 226: + dict( + name='lsd_kpt8', + id=226, + color=colors['lsd'], + type='', + swap='lsd_kpt36'), + 227: + dict( + name='lsd_kpt9', + id=227, + color=colors['lsd'], + type='', + swap='lsd_kpt35'), + 228: + dict( + name='lsd_kpt10', + id=228, + color=colors['lsd'], + type='', + swap='lsd_kpt34'), + 229: + dict( + name='lsd_kpt11', + id=229, + color=colors['lsd'], + type='', + swap='lsd_kpt33'), + 230: + dict( + name='lsd_kpt12', + id=230, + color=colors['lsd'], + type='', + swap='lsd_kpt32'), + 231: + dict( + name='lsd_kpt13', + id=231, + color=colors['lsd'], + type='', + swap='lsd_kpt31'), + 232: + dict( + name='lsd_kpt14', + id=232, + color=colors['lsd'], + type='', + swap='lsd_kpt30'), + 233: + dict( + name='lsd_kpt15', + id=233, + color=colors['lsd'], + type='', + swap='lsd_kpt29'), + 234: + dict( + name='lsd_kpt16', + id=234, + color=colors['lsd'], + type='', + swap='lsd_kpt28'), + 235: + dict( + name='lsd_kpt17', + id=235, + color=colors['lsd'], + type='', + swap='lsd_kpt27'), + 236: + dict( + name='lsd_kpt18', + id=236, + color=colors['lsd'], + type='', + swap='lsd_kpt26'), + 237: + dict( + name='lsd_kpt19', + id=237, + color=colors['lsd'], + type='', + swap='lsd_kpt25'), + 238: + dict( + name='lsd_kpt20', + id=238, + color=colors['lsd'], + type='', + swap='lsd_kpt24'), + 239: + dict( + name='lsd_kpt21', + id=239, + color=colors['lsd'], + type='', + swap='lsd_kpt23'), + 240: + dict(name='lsd_kpt22', id=240, color=colors['lsd'], type='', swap=''), + 241: + dict( + name='lsd_kpt23', + id=241, + color=colors['lsd'], + type='', + swap='lsd_kpt21'), + 242: + dict( + name='lsd_kpt24', + id=242, + color=colors['lsd'], + type='', + swap='lsd_kpt20'), + 243: + dict( + name='lsd_kpt25', + id=243, + color=colors['lsd'], + type='', + swap='lsd_kpt19'), + 244: + dict( + name='lsd_kpt26', + id=244, + color=colors['lsd'], + type='', + swap='lsd_kpt18'), + 245: + dict( + name='lsd_kpt27', + id=245, + color=colors['lsd'], + type='', + swap='lsd_kpt17'), + 246: + dict( + name='lsd_kpt28', + id=246, + color=colors['lsd'], + type='', + swap='lsd_kpt16'), + 247: + dict( + name='lsd_kpt29', + id=247, + color=colors['lsd'], + type='', + swap='lsd_kpt15'), + 248: + dict( + name='lsd_kpt30', + id=248, + color=colors['lsd'], + type='', + swap='lsd_kpt14'), + 249: + dict( + name='lsd_kpt31', + id=249, + color=colors['lsd'], + type='', + swap='lsd_kpt13'), + 250: + dict( + name='lsd_kpt32', + id=250, + color=colors['lsd'], + type='', + swap='lsd_kpt12'), + 251: + dict( + name='lsd_kpt33', + id=251, + color=colors['lsd'], + type='', + swap='lsd_kpt11'), + 252: + dict( + name='lsd_kpt34', + id=252, + color=colors['lsd'], + type='', + swap='lsd_kpt10'), + 253: + dict( + name='lsd_kpt35', + id=253, + color=colors['lsd'], + type='', + swap='lsd_kpt9'), + 254: + dict( + name='lsd_kpt36', + id=254, + color=colors['lsd'], + type='', + swap='lsd_kpt8'), + 255: + dict( + name='lsd_kpt37', + id=255, + color=colors['lsd'], + type='', + swap='lsd_kpt7'), + # vest_dress + 256: + dict(name='vd_kpt1', id=256, color=colors['vd'], type='', swap=''), + 257: + dict( + name='vd_kpt2', + id=257, + color=colors['vd'], + type='', + swap='vd_kpt6'), + 258: + dict( + name='vd_kpt3', + id=258, + color=colors['vd'], + type='', + swap='vd_kpt5'), + 259: + dict(name='vd_kpt4', id=259, color=colors['vd'], type='', swap=''), + 260: + dict( + name='vd_kpt5', + id=260, + color=colors['vd'], + type='', + swap='vd_kpt3'), + 261: + dict( + name='vd_kpt6', + id=261, + color=colors['vd'], + type='', + swap='vd_kpt2'), + 262: + dict( + name='vd_kpt7', + id=262, + color=colors['vd'], + type='', + swap='vd_kpt19'), + 263: + dict( + name='vd_kpt8', + id=263, + color=colors['vd'], + type='', + swap='vd_kpt18'), + 264: + dict( + name='vd_kpt9', + id=264, + color=colors['vd'], + type='', + swap='vd_kpt17'), + 265: + dict( + name='vd_kpt10', + id=265, + color=colors['vd'], + type='', + swap='vd_kpt16'), + 266: + dict( + name='vd_kpt11', + id=266, + color=colors['vd'], + type='', + swap='vd_kpt15'), + 267: + dict( + name='vd_kpt12', + id=267, + color=colors['vd'], + type='', + swap='vd_kpt14'), + 268: + dict(name='vd_kpt13', id=268, color=colors['vd'], type='', swap=''), + 269: + dict( + name='vd_kpt14', + id=269, + color=colors['vd'], + type='', + swap='vd_kpt12'), + 270: + dict( + name='vd_kpt15', + id=270, + color=colors['vd'], + type='', + swap='vd_kpt11'), + 271: + dict( + name='vd_kpt16', + id=271, + color=colors['vd'], + type='', + swap='vd_kpt10'), + 272: + dict( + name='vd_kpt17', + id=272, + color=colors['vd'], + type='', + swap='vd_kpt9'), + 273: + dict( + name='vd_kpt18', + id=273, + color=colors['vd'], + type='', + swap='vd_kpt8'), + 274: + dict( + name='vd_kpt19', + id=274, + color=colors['vd'], + type='', + swap='vd_kpt7'), + # sling_dress + 275: + dict(name='sd_kpt1', id=275, color=colors['sd'], type='', swap=''), + 276: + dict( + name='sd_kpt2', + id=276, + color=colors['sd'], + type='', + swap='sd_kpt6'), + 277: + dict( + name='sd_kpt3', + id=277, + color=colors['sd'], + type='', + swap='sd_kpt5'), + 278: + dict(name='sd_kpt4', id=278, color=colors['sd'], type='', swap=''), + 279: + dict( + name='sd_kpt5', + id=279, + color=colors['sd'], + type='', + swap='sd_kpt3'), + 280: + dict( + name='sd_kpt6', + id=280, + color=colors['sd'], + type='', + swap='sd_kpt2'), + 281: + dict( + name='sd_kpt7', + id=281, + color=colors['sd'], + type='', + swap='sd_kpt19'), + 282: + dict( + name='sd_kpt8', + id=282, + color=colors['sd'], + type='', + swap='sd_kpt18'), + 283: + dict( + name='sd_kpt9', + id=283, + color=colors['sd'], + type='', + swap='sd_kpt17'), + 284: + dict( + name='sd_kpt10', + id=284, + color=colors['sd'], + type='', + swap='sd_kpt16'), + 285: + dict( + name='sd_kpt11', + id=285, + color=colors['sd'], + type='', + swap='sd_kpt15'), + 286: + dict( + name='sd_kpt12', + id=286, + color=colors['sd'], + type='', + swap='sd_kpt14'), + 287: + dict(name='sd_kpt13', id=287, color=colors['sd'], type='', swap=''), + 288: + dict( + name='sd_kpt14', + id=288, + color=colors['sd'], + type='', + swap='sd_kpt12'), + 289: + dict( + name='sd_kpt15', + id=289, + color=colors['sd'], + type='', + swap='sd_kpt11'), + 290: + dict( + name='sd_kpt16', + id=290, + color=colors['sd'], + type='', + swap='sd_kpt10'), + 291: + dict( + name='sd_kpt17', + id=291, + color=colors['sd'], + type='', + swap='sd_kpt9'), + 292: + dict( + name='sd_kpt18', + id=292, + color=colors['sd'], + type='', + swap='sd_kpt8'), + 293: + dict( + name='sd_kpt19', + id=293, + color=colors['sd'], + type='', + swap='sd_kpt7'), + }, + skeleton_info={ + # short_sleeved_shirt + 0: + dict(link=('sss_kpt1', 'sss_kpt2'), id=0, color=[255, 128, 0]), + 1: + dict(link=('sss_kpt2', 'sss_kpt7'), id=1, color=[255, 128, 0]), + 2: + dict(link=('sss_kpt7', 'sss_kpt8'), id=2, color=[255, 128, 0]), + 3: + dict(link=('sss_kpt8', 'sss_kpt9'), id=3, color=[255, 128, 0]), + 4: + dict(link=('sss_kpt9', 'sss_kpt10'), id=4, color=[255, 128, 0]), + 5: + dict(link=('sss_kpt10', 'sss_kpt11'), id=5, color=[255, 128, 0]), + 6: + dict(link=('sss_kpt11', 'sss_kpt12'), id=6, color=[255, 128, 0]), + 7: + dict(link=('sss_kpt12', 'sss_kpt13'), id=7, color=[255, 128, 0]), + 8: + dict(link=('sss_kpt13', 'sss_kpt14'), id=8, color=[255, 128, 0]), + 9: + dict(link=('sss_kpt14', 'sss_kpt15'), id=9, color=[255, 128, 0]), + 10: + dict(link=('sss_kpt15', 'sss_kpt16'), id=10, color=[255, 128, 0]), + 11: + dict(link=('sss_kpt16', 'sss_kpt17'), id=11, color=[255, 128, 0]), + 12: + dict(link=('sss_kpt17', 'sss_kpt18'), id=12, color=[255, 128, 0]), + 13: + dict(link=('sss_kpt18', 'sss_kpt19'), id=13, color=[255, 128, 0]), + 14: + dict(link=('sss_kpt19', 'sss_kpt20'), id=14, color=[255, 128, 0]), + 15: + dict(link=('sss_kpt20', 'sss_kpt21'), id=15, color=[255, 128, 0]), + 16: + dict(link=('sss_kpt21', 'sss_kpt22'), id=16, color=[255, 128, 0]), + 17: + dict(link=('sss_kpt22', 'sss_kpt23'), id=17, color=[255, 128, 0]), + 18: + dict(link=('sss_kpt23', 'sss_kpt24'), id=18, color=[255, 128, 0]), + 19: + dict(link=('sss_kpt24', 'sss_kpt25'), id=19, color=[255, 128, 0]), + 20: + dict(link=('sss_kpt25', 'sss_kpt6'), id=20, color=[255, 128, 0]), + 21: + dict(link=('sss_kpt6', 'sss_kpt1'), id=21, color=[255, 128, 0]), + 22: + dict(link=('sss_kpt2', 'sss_kpt3'), id=22, color=[255, 128, 0]), + 23: + dict(link=('sss_kpt3', 'sss_kpt4'), id=23, color=[255, 128, 0]), + 24: + dict(link=('sss_kpt4', 'sss_kpt5'), id=24, color=[255, 128, 0]), + 25: + dict(link=('sss_kpt5', 'sss_kpt6'), id=25, color=[255, 128, 0]), + # long_sleeve_shirt + 26: + dict(link=('lss_kpt1', 'lss_kpt2'), id=26, color=[255, 0, 128]), + 27: + dict(link=('lss_kpt2', 'lss_kpt7'), id=27, color=[255, 0, 128]), + 28: + dict(link=('lss_kpt7', 'lss_kpt8'), id=28, color=[255, 0, 128]), + 29: + dict(link=('lss_kpt8', 'lss_kpt9'), id=29, color=[255, 0, 128]), + 30: + dict(link=('lss_kpt9', 'lss_kpt10'), id=30, color=[255, 0, 128]), + 31: + dict(link=('lss_kpt10', 'lss_kpt11'), id=31, color=[255, 0, 128]), + 32: + dict(link=('lss_kpt11', 'lss_kpt12'), id=32, color=[255, 0, 128]), + 33: + dict(link=('lss_kpt12', 'lss_kpt13'), id=33, color=[255, 0, 128]), + 34: + dict(link=('lss_kpt13', 'lss_kpt14'), id=34, color=[255, 0, 128]), + 35: + dict(link=('lss_kpt14', 'lss_kpt15'), id=35, color=[255, 0, 128]), + 36: + dict(link=('lss_kpt15', 'lss_kpt16'), id=36, color=[255, 0, 128]), + 37: + dict(link=('lss_kpt16', 'lss_kpt17'), id=37, color=[255, 0, 128]), + 38: + dict(link=('lss_kpt17', 'lss_kpt18'), id=38, color=[255, 0, 128]), + 39: + dict(link=('lss_kpt18', 'lss_kpt19'), id=39, color=[255, 0, 128]), + 40: + dict(link=('lss_kpt19', 'lss_kpt20'), id=40, color=[255, 0, 128]), + 41: + dict(link=('lss_kpt20', 'lss_kpt21'), id=41, color=[255, 0, 128]), + 42: + dict(link=('lss_kpt21', 'lss_kpt22'), id=42, color=[255, 0, 128]), + 43: + dict(link=('lss_kpt22', 'lss_kpt23'), id=43, color=[255, 0, 128]), + 44: + dict(link=('lss_kpt23', 'lss_kpt24'), id=44, color=[255, 0, 128]), + 45: + dict(link=('lss_kpt24', 'lss_kpt25'), id=45, color=[255, 0, 128]), + 46: + dict(link=('lss_kpt25', 'lss_kpt26'), id=46, color=[255, 0, 128]), + 47: + dict(link=('lss_kpt26', 'lss_kpt27'), id=47, color=[255, 0, 128]), + 48: + dict(link=('lss_kpt27', 'lss_kpt28'), id=48, color=[255, 0, 128]), + 49: + dict(link=('lss_kpt28', 'lss_kpt29'), id=49, color=[255, 0, 128]), + 50: + dict(link=('lss_kpt29', 'lss_kpt30'), id=50, color=[255, 0, 128]), + 51: + dict(link=('lss_kpt30', 'lss_kpt31'), id=51, color=[255, 0, 128]), + 52: + dict(link=('lss_kpt31', 'lss_kpt32'), id=52, color=[255, 0, 128]), + 53: + dict(link=('lss_kpt32', 'lss_kpt33'), id=53, color=[255, 0, 128]), + 54: + dict(link=('lss_kpt33', 'lss_kpt6'), id=54, color=[255, 0, 128]), + 55: + dict(link=('lss_kpt6', 'lss_kpt5'), id=55, color=[255, 0, 128]), + 56: + dict(link=('lss_kpt5', 'lss_kpt4'), id=56, color=[255, 0, 128]), + 57: + dict(link=('lss_kpt4', 'lss_kpt3'), id=57, color=[255, 0, 128]), + 58: + dict(link=('lss_kpt3', 'lss_kpt2'), id=58, color=[255, 0, 128]), + 59: + dict(link=('lss_kpt6', 'lss_kpt1'), id=59, color=[255, 0, 128]), + # short_sleeved_outwear + 60: + dict(link=('sso_kpt1', 'sso_kpt4'), id=60, color=[128, 0, 255]), + 61: + dict(link=('sso_kpt4', 'sso_kpt7'), id=61, color=[128, 0, 255]), + 62: + dict(link=('sso_kpt7', 'sso_kpt8'), id=62, color=[128, 0, 255]), + 63: + dict(link=('sso_kpt8', 'sso_kpt9'), id=63, color=[128, 0, 255]), + 64: + dict(link=('sso_kpt9', 'sso_kpt10'), id=64, color=[128, 0, 255]), + 65: + dict(link=('sso_kpt10', 'sso_kpt11'), id=65, color=[128, 0, 255]), + 66: + dict(link=('sso_kpt11', 'sso_kpt12'), id=66, color=[128, 0, 255]), + 67: + dict(link=('sso_kpt12', 'sso_kpt13'), id=67, color=[128, 0, 255]), + 68: + dict(link=('sso_kpt13', 'sso_kpt14'), id=68, color=[128, 0, 255]), + 69: + dict(link=('sso_kpt14', 'sso_kpt15'), id=69, color=[128, 0, 255]), + 70: + dict(link=('sso_kpt15', 'sso_kpt16'), id=70, color=[128, 0, 255]), + 71: + dict(link=('sso_kpt16', 'sso_kpt31'), id=71, color=[128, 0, 255]), + 72: + dict(link=('sso_kpt31', 'sso_kpt30'), id=72, color=[128, 0, 255]), + 73: + dict(link=('sso_kpt30', 'sso_kpt2'), id=73, color=[128, 0, 255]), + 74: + dict(link=('sso_kpt2', 'sso_kpt3'), id=74, color=[128, 0, 255]), + 75: + dict(link=('sso_kpt3', 'sso_kpt4'), id=75, color=[128, 0, 255]), + 76: + dict(link=('sso_kpt1', 'sso_kpt6'), id=76, color=[128, 0, 255]), + 77: + dict(link=('sso_kpt6', 'sso_kpt25'), id=77, color=[128, 0, 255]), + 78: + dict(link=('sso_kpt25', 'sso_kpt24'), id=78, color=[128, 0, 255]), + 79: + dict(link=('sso_kpt24', 'sso_kpt23'), id=79, color=[128, 0, 255]), + 80: + dict(link=('sso_kpt23', 'sso_kpt22'), id=80, color=[128, 0, 255]), + 81: + dict(link=('sso_kpt22', 'sso_kpt21'), id=81, color=[128, 0, 255]), + 82: + dict(link=('sso_kpt21', 'sso_kpt20'), id=82, color=[128, 0, 255]), + 83: + dict(link=('sso_kpt20', 'sso_kpt19'), id=83, color=[128, 0, 255]), + 84: + dict(link=('sso_kpt19', 'sso_kpt18'), id=84, color=[128, 0, 255]), + 85: + dict(link=('sso_kpt18', 'sso_kpt17'), id=85, color=[128, 0, 255]), + 86: + dict(link=('sso_kpt17', 'sso_kpt29'), id=86, color=[128, 0, 255]), + 87: + dict(link=('sso_kpt29', 'sso_kpt28'), id=87, color=[128, 0, 255]), + 88: + dict(link=('sso_kpt28', 'sso_kpt27'), id=88, color=[128, 0, 255]), + 89: + dict(link=('sso_kpt27', 'sso_kpt26'), id=89, color=[128, 0, 255]), + 90: + dict(link=('sso_kpt26', 'sso_kpt5'), id=90, color=[128, 0, 255]), + 91: + dict(link=('sso_kpt5', 'sso_kpt6'), id=91, color=[128, 0, 255]), + # long_sleeved_outwear + 92: + dict(link=('lso_kpt1', 'lso_kpt2'), id=92, color=[0, 128, 255]), + 93: + dict(link=('lso_kpt2', 'lso_kpt7'), id=93, color=[0, 128, 255]), + 94: + dict(link=('lso_kpt7', 'lso_kpt8'), id=94, color=[0, 128, 255]), + 95: + dict(link=('lso_kpt8', 'lso_kpt9'), id=95, color=[0, 128, 255]), + 96: + dict(link=('lso_kpt9', 'lso_kpt10'), id=96, color=[0, 128, 255]), + 97: + dict(link=('lso_kpt10', 'lso_kpt11'), id=97, color=[0, 128, 255]), + 98: + dict(link=('lso_kpt11', 'lso_kpt12'), id=98, color=[0, 128, 255]), + 99: + dict(link=('lso_kpt12', 'lso_kpt13'), id=99, color=[0, 128, 255]), + 100: + dict(link=('lso_kpt13', 'lso_kpt14'), id=100, color=[0, 128, 255]), + 101: + dict(link=('lso_kpt14', 'lso_kpt15'), id=101, color=[0, 128, 255]), + 102: + dict(link=('lso_kpt15', 'lso_kpt16'), id=102, color=[0, 128, 255]), + 103: + dict(link=('lso_kpt16', 'lso_kpt17'), id=103, color=[0, 128, 255]), + 104: + dict(link=('lso_kpt17', 'lso_kpt18'), id=104, color=[0, 128, 255]), + 105: + dict(link=('lso_kpt18', 'lso_kpt19'), id=105, color=[0, 128, 255]), + 106: + dict(link=('lso_kpt19', 'lso_kpt20'), id=106, color=[0, 128, 255]), + 107: + dict(link=('lso_kpt20', 'lso_kpt39'), id=107, color=[0, 128, 255]), + 108: + dict(link=('lso_kpt39', 'lso_kpt38'), id=108, color=[0, 128, 255]), + 109: + dict(link=('lso_kpt38', 'lso_kpt4'), id=109, color=[0, 128, 255]), + 110: + dict(link=('lso_kpt4', 'lso_kpt3'), id=110, color=[0, 128, 255]), + 111: + dict(link=('lso_kpt3', 'lso_kpt2'), id=111, color=[0, 128, 255]), + 112: + dict(link=('lso_kpt1', 'lso_kpt6'), id=112, color=[0, 128, 255]), + 113: + dict(link=('lso_kpt6', 'lso_kpt33'), id=113, color=[0, 128, 255]), + 114: + dict(link=('lso_kpt33', 'lso_kpt32'), id=114, color=[0, 128, 255]), + 115: + dict(link=('lso_kpt32', 'lso_kpt31'), id=115, color=[0, 128, 255]), + 116: + dict(link=('lso_kpt31', 'lso_kpt30'), id=116, color=[0, 128, 255]), + 117: + dict(link=('lso_kpt30', 'lso_kpt29'), id=117, color=[0, 128, 255]), + 118: + dict(link=('lso_kpt29', 'lso_kpt28'), id=118, color=[0, 128, 255]), + 119: + dict(link=('lso_kpt28', 'lso_kpt27'), id=119, color=[0, 128, 255]), + 120: + dict(link=('lso_kpt27', 'lso_kpt26'), id=120, color=[0, 128, 255]), + 121: + dict(link=('lso_kpt26', 'lso_kpt25'), id=121, color=[0, 128, 255]), + 122: + dict(link=('lso_kpt25', 'lso_kpt24'), id=122, color=[0, 128, 255]), + 123: + dict(link=('lso_kpt24', 'lso_kpt23'), id=123, color=[0, 128, 255]), + 124: + dict(link=('lso_kpt23', 'lso_kpt22'), id=124, color=[0, 128, 255]), + 125: + dict(link=('lso_kpt22', 'lso_kpt21'), id=125, color=[0, 128, 255]), + 126: + dict(link=('lso_kpt21', 'lso_kpt37'), id=126, color=[0, 128, 255]), + 127: + dict(link=('lso_kpt37', 'lso_kpt36'), id=127, color=[0, 128, 255]), + 128: + dict(link=('lso_kpt36', 'lso_kpt35'), id=128, color=[0, 128, 255]), + 129: + dict(link=('lso_kpt35', 'lso_kpt34'), id=129, color=[0, 128, 255]), + 130: + dict(link=('lso_kpt34', 'lso_kpt5'), id=130, color=[0, 128, 255]), + 131: + dict(link=('lso_kpt5', 'lso_kpt6'), id=131, color=[0, 128, 255]), + # vest + 132: + dict(link=('vest_kpt1', 'vest_kpt2'), id=132, color=[0, 128, 128]), + 133: + dict(link=('vest_kpt2', 'vest_kpt7'), id=133, color=[0, 128, 128]), + 134: + dict(link=('vest_kpt7', 'vest_kpt8'), id=134, color=[0, 128, 128]), + 135: + dict(link=('vest_kpt8', 'vest_kpt9'), id=135, color=[0, 128, 128]), + 136: + dict(link=('vest_kpt9', 'vest_kpt10'), id=136, color=[0, 128, 128]), + 137: + dict(link=('vest_kpt10', 'vest_kpt11'), id=137, color=[0, 128, 128]), + 138: + dict(link=('vest_kpt11', 'vest_kpt12'), id=138, color=[0, 128, 128]), + 139: + dict(link=('vest_kpt12', 'vest_kpt13'), id=139, color=[0, 128, 128]), + 140: + dict(link=('vest_kpt13', 'vest_kpt14'), id=140, color=[0, 128, 128]), + 141: + dict(link=('vest_kpt14', 'vest_kpt15'), id=141, color=[0, 128, 128]), + 142: + dict(link=('vest_kpt15', 'vest_kpt6'), id=142, color=[0, 128, 128]), + 143: + dict(link=('vest_kpt6', 'vest_kpt1'), id=143, color=[0, 128, 128]), + 144: + dict(link=('vest_kpt2', 'vest_kpt3'), id=144, color=[0, 128, 128]), + 145: + dict(link=('vest_kpt3', 'vest_kpt4'), id=145, color=[0, 128, 128]), + 146: + dict(link=('vest_kpt4', 'vest_kpt5'), id=146, color=[0, 128, 128]), + 147: + dict(link=('vest_kpt5', 'vest_kpt6'), id=147, color=[0, 128, 128]), + # sling + 148: + dict(link=('sling_kpt1', 'sling_kpt2'), id=148, color=[0, 0, 128]), + 149: + dict(link=('sling_kpt2', 'sling_kpt8'), id=149, color=[0, 0, 128]), + 150: + dict(link=('sling_kpt8', 'sling_kpt9'), id=150, color=[0, 0, 128]), + 151: + dict(link=('sling_kpt9', 'sling_kpt10'), id=151, color=[0, 0, 128]), + 152: + dict(link=('sling_kpt10', 'sling_kpt11'), id=152, color=[0, 0, 128]), + 153: + dict(link=('sling_kpt11', 'sling_kpt12'), id=153, color=[0, 0, 128]), + 154: + dict(link=('sling_kpt12', 'sling_kpt13'), id=154, color=[0, 0, 128]), + 155: + dict(link=('sling_kpt13', 'sling_kpt14'), id=155, color=[0, 0, 128]), + 156: + dict(link=('sling_kpt14', 'sling_kpt6'), id=156, color=[0, 0, 128]), + 157: + dict(link=('sling_kpt2', 'sling_kpt7'), id=157, color=[0, 0, 128]), + 158: + dict(link=('sling_kpt6', 'sling_kpt15'), id=158, color=[0, 0, 128]), + 159: + dict(link=('sling_kpt2', 'sling_kpt3'), id=159, color=[0, 0, 128]), + 160: + dict(link=('sling_kpt3', 'sling_kpt4'), id=160, color=[0, 0, 128]), + 161: + dict(link=('sling_kpt4', 'sling_kpt5'), id=161, color=[0, 0, 128]), + 162: + dict(link=('sling_kpt5', 'sling_kpt6'), id=162, color=[0, 0, 128]), + 163: + dict(link=('sling_kpt1', 'sling_kpt6'), id=163, color=[0, 0, 128]), + # shorts + 164: + dict( + link=('shorts_kpt1', 'shorts_kpt4'), id=164, color=[128, 128, + 128]), + 165: + dict( + link=('shorts_kpt4', 'shorts_kpt5'), id=165, color=[128, 128, + 128]), + 166: + dict( + link=('shorts_kpt5', 'shorts_kpt6'), id=166, color=[128, 128, + 128]), + 167: + dict( + link=('shorts_kpt6', 'shorts_kpt7'), id=167, color=[128, 128, + 128]), + 168: + dict( + link=('shorts_kpt7', 'shorts_kpt8'), id=168, color=[128, 128, + 128]), + 169: + dict( + link=('shorts_kpt8', 'shorts_kpt9'), id=169, color=[128, 128, + 128]), + 170: + dict( + link=('shorts_kpt9', 'shorts_kpt10'), + id=170, + color=[128, 128, 128]), + 171: + dict( + link=('shorts_kpt10', 'shorts_kpt3'), + id=171, + color=[128, 128, 128]), + 172: + dict( + link=('shorts_kpt3', 'shorts_kpt2'), id=172, color=[128, 128, + 128]), + 173: + dict( + link=('shorts_kpt2', 'shorts_kpt1'), id=173, color=[128, 128, + 128]), + # trousers + 174: + dict( + link=('trousers_kpt1', 'trousers_kpt4'), + id=174, + color=[128, 0, 128]), + 175: + dict( + link=('trousers_kpt4', 'trousers_kpt5'), + id=175, + color=[128, 0, 128]), + 176: + dict( + link=('trousers_kpt5', 'trousers_kpt6'), + id=176, + color=[128, 0, 128]), + 177: + dict( + link=('trousers_kpt6', 'trousers_kpt7'), + id=177, + color=[128, 0, 128]), + 178: + dict( + link=('trousers_kpt7', 'trousers_kpt8'), + id=178, + color=[128, 0, 128]), + 179: + dict( + link=('trousers_kpt8', 'trousers_kpt9'), + id=179, + color=[128, 0, 128]), + 180: + dict( + link=('trousers_kpt9', 'trousers_kpt10'), + id=180, + color=[128, 0, 128]), + 181: + dict( + link=('trousers_kpt10', 'trousers_kpt11'), + id=181, + color=[128, 0, 128]), + 182: + dict( + link=('trousers_kpt11', 'trousers_kpt12'), + id=182, + color=[128, 0, 128]), + 183: + dict( + link=('trousers_kpt12', 'trousers_kpt13'), + id=183, + color=[128, 0, 128]), + 184: + dict( + link=('trousers_kpt13', 'trousers_kpt14'), + id=184, + color=[128, 0, 128]), + 185: + dict( + link=('trousers_kpt14', 'trousers_kpt3'), + id=185, + color=[128, 0, 128]), + 186: + dict( + link=('trousers_kpt3', 'trousers_kpt2'), + id=186, + color=[128, 0, 128]), + 187: + dict( + link=('trousers_kpt2', 'trousers_kpt1'), + id=187, + color=[128, 0, 128]), + # skirt + 188: + dict(link=('skirt_kpt1', 'skirt_kpt4'), id=188, color=[64, 128, 128]), + 189: + dict(link=('skirt_kpt4', 'skirt_kpt5'), id=189, color=[64, 128, 128]), + 190: + dict(link=('skirt_kpt5', 'skirt_kpt6'), id=190, color=[64, 128, 128]), + 191: + dict(link=('skirt_kpt6', 'skirt_kpt7'), id=191, color=[64, 128, 128]), + 192: + dict(link=('skirt_kpt7', 'skirt_kpt8'), id=192, color=[64, 128, 128]), + 193: + dict(link=('skirt_kpt8', 'skirt_kpt3'), id=193, color=[64, 128, 128]), + 194: + dict(link=('skirt_kpt3', 'skirt_kpt2'), id=194, color=[64, 128, 128]), + 195: + dict(link=('skirt_kpt2', 'skirt_kpt1'), id=195, color=[64, 128, 128]), + # short_sleeved_dress + 196: + dict(link=('ssd_kpt1', 'ssd_kpt2'), id=196, color=[64, 64, 128]), + 197: + dict(link=('ssd_kpt2', 'ssd_kpt7'), id=197, color=[64, 64, 128]), + 198: + dict(link=('ssd_kpt7', 'ssd_kpt8'), id=198, color=[64, 64, 128]), + 199: + dict(link=('ssd_kpt8', 'ssd_kpt9'), id=199, color=[64, 64, 128]), + 200: + dict(link=('ssd_kpt9', 'ssd_kpt10'), id=200, color=[64, 64, 128]), + 201: + dict(link=('ssd_kpt10', 'ssd_kpt11'), id=201, color=[64, 64, 128]), + 202: + dict(link=('ssd_kpt11', 'ssd_kpt12'), id=202, color=[64, 64, 128]), + 203: + dict(link=('ssd_kpt12', 'ssd_kpt13'), id=203, color=[64, 64, 128]), + 204: + dict(link=('ssd_kpt13', 'ssd_kpt14'), id=204, color=[64, 64, 128]), + 205: + dict(link=('ssd_kpt14', 'ssd_kpt15'), id=205, color=[64, 64, 128]), + 206: + dict(link=('ssd_kpt15', 'ssd_kpt16'), id=206, color=[64, 64, 128]), + 207: + dict(link=('ssd_kpt16', 'ssd_kpt17'), id=207, color=[64, 64, 128]), + 208: + dict(link=('ssd_kpt17', 'ssd_kpt18'), id=208, color=[64, 64, 128]), + 209: + dict(link=('ssd_kpt18', 'ssd_kpt19'), id=209, color=[64, 64, 128]), + 210: + dict(link=('ssd_kpt19', 'ssd_kpt20'), id=210, color=[64, 64, 128]), + 211: + dict(link=('ssd_kpt20', 'ssd_kpt21'), id=211, color=[64, 64, 128]), + 212: + dict(link=('ssd_kpt21', 'ssd_kpt22'), id=212, color=[64, 64, 128]), + 213: + dict(link=('ssd_kpt22', 'ssd_kpt23'), id=213, color=[64, 64, 128]), + 214: + dict(link=('ssd_kpt23', 'ssd_kpt24'), id=214, color=[64, 64, 128]), + 215: + dict(link=('ssd_kpt24', 'ssd_kpt25'), id=215, color=[64, 64, 128]), + 216: + dict(link=('ssd_kpt25', 'ssd_kpt26'), id=216, color=[64, 64, 128]), + 217: + dict(link=('ssd_kpt26', 'ssd_kpt27'), id=217, color=[64, 64, 128]), + 218: + dict(link=('ssd_kpt27', 'ssd_kpt28'), id=218, color=[64, 64, 128]), + 219: + dict(link=('ssd_kpt28', 'ssd_kpt29'), id=219, color=[64, 64, 128]), + 220: + dict(link=('ssd_kpt29', 'ssd_kpt6'), id=220, color=[64, 64, 128]), + 221: + dict(link=('ssd_kpt6', 'ssd_kpt5'), id=221, color=[64, 64, 128]), + 222: + dict(link=('ssd_kpt5', 'ssd_kpt4'), id=222, color=[64, 64, 128]), + 223: + dict(link=('ssd_kpt4', 'ssd_kpt3'), id=223, color=[64, 64, 128]), + 224: + dict(link=('ssd_kpt3', 'ssd_kpt2'), id=224, color=[64, 64, 128]), + 225: + dict(link=('ssd_kpt6', 'ssd_kpt1'), id=225, color=[64, 64, 128]), + # long_sleeved_dress + 226: + dict(link=('lsd_kpt1', 'lsd_kpt2'), id=226, color=[128, 64, 0]), + 227: + dict(link=('lsd_kpt2', 'lsd_kpt7'), id=228, color=[128, 64, 0]), + 228: + dict(link=('lsd_kpt7', 'lsd_kpt8'), id=228, color=[128, 64, 0]), + 229: + dict(link=('lsd_kpt8', 'lsd_kpt9'), id=229, color=[128, 64, 0]), + 230: + dict(link=('lsd_kpt9', 'lsd_kpt10'), id=230, color=[128, 64, 0]), + 231: + dict(link=('lsd_kpt10', 'lsd_kpt11'), id=231, color=[128, 64, 0]), + 232: + dict(link=('lsd_kpt11', 'lsd_kpt12'), id=232, color=[128, 64, 0]), + 233: + dict(link=('lsd_kpt12', 'lsd_kpt13'), id=233, color=[128, 64, 0]), + 234: + dict(link=('lsd_kpt13', 'lsd_kpt14'), id=234, color=[128, 64, 0]), + 235: + dict(link=('lsd_kpt14', 'lsd_kpt15'), id=235, color=[128, 64, 0]), + 236: + dict(link=('lsd_kpt15', 'lsd_kpt16'), id=236, color=[128, 64, 0]), + 237: + dict(link=('lsd_kpt16', 'lsd_kpt17'), id=237, color=[128, 64, 0]), + 238: + dict(link=('lsd_kpt17', 'lsd_kpt18'), id=238, color=[128, 64, 0]), + 239: + dict(link=('lsd_kpt18', 'lsd_kpt19'), id=239, color=[128, 64, 0]), + 240: + dict(link=('lsd_kpt19', 'lsd_kpt20'), id=240, color=[128, 64, 0]), + 241: + dict(link=('lsd_kpt20', 'lsd_kpt21'), id=241, color=[128, 64, 0]), + 242: + dict(link=('lsd_kpt21', 'lsd_kpt22'), id=242, color=[128, 64, 0]), + 243: + dict(link=('lsd_kpt22', 'lsd_kpt23'), id=243, color=[128, 64, 0]), + 244: + dict(link=('lsd_kpt23', 'lsd_kpt24'), id=244, color=[128, 64, 0]), + 245: + dict(link=('lsd_kpt24', 'lsd_kpt25'), id=245, color=[128, 64, 0]), + 246: + dict(link=('lsd_kpt25', 'lsd_kpt26'), id=246, color=[128, 64, 0]), + 247: + dict(link=('lsd_kpt26', 'lsd_kpt27'), id=247, color=[128, 64, 0]), + 248: + dict(link=('lsd_kpt27', 'lsd_kpt28'), id=248, color=[128, 64, 0]), + 249: + dict(link=('lsd_kpt28', 'lsd_kpt29'), id=249, color=[128, 64, 0]), + 250: + dict(link=('lsd_kpt29', 'lsd_kpt30'), id=250, color=[128, 64, 0]), + 251: + dict(link=('lsd_kpt30', 'lsd_kpt31'), id=251, color=[128, 64, 0]), + 252: + dict(link=('lsd_kpt31', 'lsd_kpt32'), id=252, color=[128, 64, 0]), + 253: + dict(link=('lsd_kpt32', 'lsd_kpt33'), id=253, color=[128, 64, 0]), + 254: + dict(link=('lsd_kpt33', 'lsd_kpt34'), id=254, color=[128, 64, 0]), + 255: + dict(link=('lsd_kpt34', 'lsd_kpt35'), id=255, color=[128, 64, 0]), + 256: + dict(link=('lsd_kpt35', 'lsd_kpt36'), id=256, color=[128, 64, 0]), + 257: + dict(link=('lsd_kpt36', 'lsd_kpt37'), id=257, color=[128, 64, 0]), + 258: + dict(link=('lsd_kpt37', 'lsd_kpt6'), id=258, color=[128, 64, 0]), + 259: + dict(link=('lsd_kpt6', 'lsd_kpt5'), id=259, color=[128, 64, 0]), + 260: + dict(link=('lsd_kpt5', 'lsd_kpt4'), id=260, color=[128, 64, 0]), + 261: + dict(link=('lsd_kpt4', 'lsd_kpt3'), id=261, color=[128, 64, 0]), + 262: + dict(link=('lsd_kpt3', 'lsd_kpt2'), id=262, color=[128, 64, 0]), + 263: + dict(link=('lsd_kpt6', 'lsd_kpt1'), id=263, color=[128, 64, 0]), + # vest_dress + 264: + dict(link=('vd_kpt1', 'vd_kpt2'), id=264, color=[128, 64, 255]), + 265: + dict(link=('vd_kpt2', 'vd_kpt7'), id=265, color=[128, 64, 255]), + 266: + dict(link=('vd_kpt7', 'vd_kpt8'), id=266, color=[128, 64, 255]), + 267: + dict(link=('vd_kpt8', 'vd_kpt9'), id=267, color=[128, 64, 255]), + 268: + dict(link=('vd_kpt9', 'vd_kpt10'), id=268, color=[128, 64, 255]), + 269: + dict(link=('vd_kpt10', 'vd_kpt11'), id=269, color=[128, 64, 255]), + 270: + dict(link=('vd_kpt11', 'vd_kpt12'), id=270, color=[128, 64, 255]), + 271: + dict(link=('vd_kpt12', 'vd_kpt13'), id=271, color=[128, 64, 255]), + 272: + dict(link=('vd_kpt13', 'vd_kpt14'), id=272, color=[128, 64, 255]), + 273: + dict(link=('vd_kpt14', 'vd_kpt15'), id=273, color=[128, 64, 255]), + 274: + dict(link=('vd_kpt15', 'vd_kpt16'), id=274, color=[128, 64, 255]), + 275: + dict(link=('vd_kpt16', 'vd_kpt17'), id=275, color=[128, 64, 255]), + 276: + dict(link=('vd_kpt17', 'vd_kpt18'), id=276, color=[128, 64, 255]), + 277: + dict(link=('vd_kpt18', 'vd_kpt19'), id=277, color=[128, 64, 255]), + 278: + dict(link=('vd_kpt19', 'vd_kpt6'), id=278, color=[128, 64, 255]), + 279: + dict(link=('vd_kpt6', 'vd_kpt5'), id=279, color=[128, 64, 255]), + 280: + dict(link=('vd_kpt5', 'vd_kpt4'), id=280, color=[128, 64, 255]), + 281: + dict(link=('vd_kpt4', 'vd_kpt3'), id=281, color=[128, 64, 255]), + 282: + dict(link=('vd_kpt3', 'vd_kpt2'), id=282, color=[128, 64, 255]), + 283: + dict(link=('vd_kpt6', 'vd_kpt1'), id=283, color=[128, 64, 255]), + # sling_dress + 284: + dict(link=('sd_kpt1', 'sd_kpt2'), id=284, color=[128, 64, 0]), + 285: + dict(link=('sd_kpt2', 'sd_kpt8'), id=285, color=[128, 64, 0]), + 286: + dict(link=('sd_kpt8', 'sd_kpt9'), id=286, color=[128, 64, 0]), + 287: + dict(link=('sd_kpt9', 'sd_kpt10'), id=287, color=[128, 64, 0]), + 288: + dict(link=('sd_kpt10', 'sd_kpt11'), id=288, color=[128, 64, 0]), + 289: + dict(link=('sd_kpt11', 'sd_kpt12'), id=289, color=[128, 64, 0]), + 290: + dict(link=('sd_kpt12', 'sd_kpt13'), id=290, color=[128, 64, 0]), + 291: + dict(link=('sd_kpt13', 'sd_kpt14'), id=291, color=[128, 64, 0]), + 292: + dict(link=('sd_kpt14', 'sd_kpt15'), id=292, color=[128, 64, 0]), + 293: + dict(link=('sd_kpt15', 'sd_kpt16'), id=293, color=[128, 64, 0]), + 294: + dict(link=('sd_kpt16', 'sd_kpt17'), id=294, color=[128, 64, 0]), + 295: + dict(link=('sd_kpt17', 'sd_kpt18'), id=295, color=[128, 64, 0]), + 296: + dict(link=('sd_kpt18', 'sd_kpt6'), id=296, color=[128, 64, 0]), + 297: + dict(link=('sd_kpt6', 'sd_kpt5'), id=297, color=[128, 64, 0]), + 298: + dict(link=('sd_kpt5', 'sd_kpt4'), id=298, color=[128, 64, 0]), + 299: + dict(link=('sd_kpt4', 'sd_kpt3'), id=299, color=[128, 64, 0]), + 300: + dict(link=('sd_kpt3', 'sd_kpt2'), id=300, color=[128, 64, 0]), + 301: + dict(link=('sd_kpt2', 'sd_kpt7'), id=301, color=[128, 64, 0]), + 302: + dict(link=('sd_kpt6', 'sd_kpt19'), id=302, color=[128, 64, 0]), + 303: + dict(link=('sd_kpt6', 'sd_kpt1'), id=303, color=[128, 64, 0]), + }, + joint_weights=[1.] * 294, + sigmas=[]) diff --git a/mmpose/configs/_base_/datasets/deepfashion_full.py b/mmpose/configs/_base_/datasets/deepfashion_full.py new file mode 100644 index 0000000000000000000000000000000000000000..4d989069ee7253d3a5b5f01c81135b1a472cd4b2 --- /dev/null +++ b/mmpose/configs/_base_/datasets/deepfashion_full.py @@ -0,0 +1,74 @@ +dataset_info = dict( + dataset_name='deepfashion_full', + paper_info=dict( + author='Liu, Ziwei and Luo, Ping and Qiu, Shi ' + 'and Wang, Xiaogang and Tang, Xiaoou', + title='DeepFashion: Powering Robust Clothes Recognition ' + 'and Retrieval with Rich Annotations', + container='Proceedings of IEEE Conference on Computer ' + 'Vision and Pattern Recognition (CVPR)', + year='2016', + homepage='http://mmlab.ie.cuhk.edu.hk/projects/' + 'DeepFashion/LandmarkDetection.html', + ), + keypoint_info={ + 0: + dict( + name='left collar', + id=0, + color=[255, 255, 255], + type='', + swap='right collar'), + 1: + dict( + name='right collar', + id=1, + color=[255, 255, 255], + type='', + swap='left collar'), + 2: + dict( + name='left sleeve', + id=2, + color=[255, 255, 255], + type='', + swap='right sleeve'), + 3: + dict( + name='right sleeve', + id=3, + color=[255, 255, 255], + type='', + swap='left sleeve'), + 4: + dict( + name='left waistline', + id=0, + color=[255, 255, 255], + type='', + swap='right waistline'), + 5: + dict( + name='right waistline', + id=1, + color=[255, 255, 255], + type='', + swap='left waistline'), + 6: + dict( + name='left hem', + id=2, + color=[255, 255, 255], + type='', + swap='right hem'), + 7: + dict( + name='right hem', + id=3, + color=[255, 255, 255], + type='', + swap='left hem'), + }, + skeleton_info={}, + joint_weights=[1.] * 8, + sigmas=[]) diff --git a/mmpose/configs/_base_/datasets/deepfashion_lower.py b/mmpose/configs/_base_/datasets/deepfashion_lower.py new file mode 100644 index 0000000000000000000000000000000000000000..db014a1747ca618f93a7d092d29027015b48ae3c --- /dev/null +++ b/mmpose/configs/_base_/datasets/deepfashion_lower.py @@ -0,0 +1,46 @@ +dataset_info = dict( + dataset_name='deepfashion_lower', + paper_info=dict( + author='Liu, Ziwei and Luo, Ping and Qiu, Shi ' + 'and Wang, Xiaogang and Tang, Xiaoou', + title='DeepFashion: Powering Robust Clothes Recognition ' + 'and Retrieval with Rich Annotations', + container='Proceedings of IEEE Conference on Computer ' + 'Vision and Pattern Recognition (CVPR)', + year='2016', + homepage='http://mmlab.ie.cuhk.edu.hk/projects/' + 'DeepFashion/LandmarkDetection.html', + ), + keypoint_info={ + 0: + dict( + name='left waistline', + id=0, + color=[255, 255, 255], + type='', + swap='right waistline'), + 1: + dict( + name='right waistline', + id=1, + color=[255, 255, 255], + type='', + swap='left waistline'), + 2: + dict( + name='left hem', + id=2, + color=[255, 255, 255], + type='', + swap='right hem'), + 3: + dict( + name='right hem', + id=3, + color=[255, 255, 255], + type='', + swap='left hem'), + }, + skeleton_info={}, + joint_weights=[1.] * 4, + sigmas=[]) diff --git a/mmpose/configs/_base_/datasets/deepfashion_upper.py b/mmpose/configs/_base_/datasets/deepfashion_upper.py new file mode 100644 index 0000000000000000000000000000000000000000..f0b012fd37bee1ba5ed956a7a5465a8623bf0894 --- /dev/null +++ b/mmpose/configs/_base_/datasets/deepfashion_upper.py @@ -0,0 +1,60 @@ +dataset_info = dict( + dataset_name='deepfashion_upper', + paper_info=dict( + author='Liu, Ziwei and Luo, Ping and Qiu, Shi ' + 'and Wang, Xiaogang and Tang, Xiaoou', + title='DeepFashion: Powering Robust Clothes Recognition ' + 'and Retrieval with Rich Annotations', + container='Proceedings of IEEE Conference on Computer ' + 'Vision and Pattern Recognition (CVPR)', + year='2016', + homepage='http://mmlab.ie.cuhk.edu.hk/projects/' + 'DeepFashion/LandmarkDetection.html', + ), + keypoint_info={ + 0: + dict( + name='left collar', + id=0, + color=[255, 255, 255], + type='', + swap='right collar'), + 1: + dict( + name='right collar', + id=1, + color=[255, 255, 255], + type='', + swap='left collar'), + 2: + dict( + name='left sleeve', + id=2, + color=[255, 255, 255], + type='', + swap='right sleeve'), + 3: + dict( + name='right sleeve', + id=3, + color=[255, 255, 255], + type='', + swap='left sleeve'), + 4: + dict( + name='left hem', + id=4, + color=[255, 255, 255], + type='', + swap='right hem'), + 5: + dict( + name='right hem', + id=5, + color=[255, 255, 255], + type='', + swap='left hem'), + }, + skeleton_info={}, + joint_weights=[1.] * 6, + sigmas=[]) diff --git a/mmpose/configs/_base_/datasets/fly.py b/mmpose/configs/_base_/datasets/fly.py new file mode 100644 index 0000000000000000000000000000000000000000..5f94ff57ca93d8f562b6a61b9a67198abdcde217 --- /dev/null +++ b/mmpose/configs/_base_/datasets/fly.py @@ -0,0 +1,237 @@ +dataset_info = dict( + dataset_name='fly', + paper_info=dict( + author='Pereira, Talmo D and Aldarondo, Diego E and ' + 'Willmore, Lindsay and Kislin, Mikhail and ' + 'Wang, Samuel S-H and Murthy, Mala and Shaevitz, Joshua W', + title='Fast animal pose estimation using deep neural networks', + container='Nature methods', + year='2019', + homepage='https://github.com/jgraving/DeepPoseKit-Data', + ), + keypoint_info={ + 0: + dict(name='head', id=0, color=[255, 255, 255], type='', swap=''), + 1: + dict(name='eyeL', id=1, color=[255, 255, 255], type='', swap='eyeR'), + 2: + dict(name='eyeR', id=2, color=[255, 255, 255], type='', swap='eyeL'), + 3: + dict(name='neck', id=3, color=[255, 255, 255], type='', swap=''), + 4: + dict(name='thorax', id=4, color=[255, 255, 255], type='', swap=''), + 5: + dict(name='abdomen', id=5, color=[255, 255, 255], type='', swap=''), + 6: + dict( + name='forelegR1', + id=6, + color=[255, 255, 255], + type='', + swap='forelegL1'), + 7: + dict( + name='forelegR2', + id=7, + color=[255, 255, 255], + type='', + swap='forelegL2'), + 8: + dict( + name='forelegR3', + id=8, + color=[255, 255, 255], + type='', + swap='forelegL3'), + 9: + dict( + name='forelegR4', + id=9, + color=[255, 255, 255], + type='', + swap='forelegL4'), + 10: + dict( + name='midlegR1', + id=10, + color=[255, 255, 255], + type='', + swap='midlegL1'), + 11: + dict( + name='midlegR2', + id=11, + color=[255, 255, 255], + type='', + swap='midlegL2'), + 12: + dict( + name='midlegR3', + id=12, + color=[255, 255, 255], + type='', + swap='midlegL3'), + 13: + dict( + name='midlegR4', + id=13, + color=[255, 255, 255], + type='', + swap='midlegL4'), + 14: + dict( + name='hindlegR1', + id=14, + color=[255, 255, 255], + type='', + swap='hindlegL1'), + 15: + dict( + name='hindlegR2', + id=15, + color=[255, 255, 255], + type='', + swap='hindlegL2'), + 16: + dict( + name='hindlegR3', + id=16, + color=[255, 255, 255], + type='', + swap='hindlegL3'), + 17: + dict( + name='hindlegR4', + id=17, + color=[255, 255, 255], + type='', + swap='hindlegL4'), + 18: + dict( + name='forelegL1', + id=18, + color=[255, 255, 255], + type='', + swap='forelegR1'), + 19: + dict( + name='forelegL2', + id=19, + color=[255, 255, 255], + type='', + swap='forelegR2'), + 20: + dict( + name='forelegL3', + id=20, + color=[255, 255, 255], + type='', + swap='forelegR3'), + 21: + dict( + name='forelegL4', + id=21, + color=[255, 255, 255], + type='', + swap='forelegR4'), + 22: + dict( + name='midlegL1', + id=22, + color=[255, 255, 255], + type='', + swap='midlegR1'), + 23: + dict( + name='midlegL2', + id=23, + color=[255, 255, 255], + type='', + swap='midlegR2'), + 24: + dict( + name='midlegL3', + id=24, + color=[255, 255, 255], + type='', + swap='midlegR3'), + 25: + dict( + name='midlegL4', + id=25, + color=[255, 255, 255], + type='', + swap='midlegR4'), + 26: + dict( + name='hindlegL1', + id=26, + color=[255, 255, 255], + type='', + swap='hindlegR1'), + 27: + dict( + name='hindlegL2', + id=27, + color=[255, 255, 255], + type='', + swap='hindlegR2'), + 28: + dict( + name='hindlegL3', + id=28, + color=[255, 255, 255], + type='', + swap='hindlegR3'), + 29: + dict( + name='hindlegL4', + id=29, + color=[255, 255, 255], + type='', + swap='hindlegR4'), + 30: + dict( + name='wingL', id=30, color=[255, 255, 255], type='', swap='wingR'), + 31: + dict( + name='wingR', id=31, color=[255, 255, 255], type='', swap='wingL'), + }, + skeleton_info={ + 0: dict(link=('eyeL', 'head'), id=0, color=[255, 255, 255]), + 1: dict(link=('eyeR', 'head'), id=1, color=[255, 255, 255]), + 2: dict(link=('neck', 'head'), id=2, color=[255, 255, 255]), + 3: dict(link=('thorax', 'neck'), id=3, color=[255, 255, 255]), + 4: dict(link=('abdomen', 'thorax'), id=4, color=[255, 255, 255]), + 5: dict(link=('forelegR2', 'forelegR1'), id=5, color=[255, 255, 255]), + 6: dict(link=('forelegR3', 'forelegR2'), id=6, color=[255, 255, 255]), + 7: dict(link=('forelegR4', 'forelegR3'), id=7, color=[255, 255, 255]), + 8: dict(link=('midlegR2', 'midlegR1'), id=8, color=[255, 255, 255]), + 9: dict(link=('midlegR3', 'midlegR2'), id=9, color=[255, 255, 255]), + 10: dict(link=('midlegR4', 'midlegR3'), id=10, color=[255, 255, 255]), + 11: + dict(link=('hindlegR2', 'hindlegR1'), id=11, color=[255, 255, 255]), + 12: + dict(link=('hindlegR3', 'hindlegR2'), id=12, color=[255, 255, 255]), + 13: + dict(link=('hindlegR4', 'hindlegR3'), id=13, color=[255, 255, 255]), + 14: + dict(link=('forelegL2', 'forelegL1'), id=14, color=[255, 255, 255]), + 15: + dict(link=('forelegL3', 'forelegL2'), id=15, color=[255, 255, 255]), + 16: + dict(link=('forelegL4', 'forelegL3'), id=16, color=[255, 255, 255]), + 17: dict(link=('midlegL2', 'midlegL1'), id=17, color=[255, 255, 255]), + 18: dict(link=('midlegL3', 'midlegL2'), id=18, color=[255, 255, 255]), + 19: dict(link=('midlegL4', 'midlegL3'), id=19, color=[255, 255, 255]), + 20: + dict(link=('hindlegL2', 'hindlegL1'), id=20, color=[255, 255, 255]), + 21: + dict(link=('hindlegL3', 'hindlegL2'), id=21, color=[255, 255, 255]), + 22: + dict(link=('hindlegL4', 'hindlegL3'), id=22, color=[255, 255, 255]), + 23: dict(link=('wingL', 'neck'), id=23, color=[255, 255, 255]), + 24: dict(link=('wingR', 'neck'), id=24, color=[255, 255, 255]) + }, + joint_weights=[1.] * 32, + sigmas=[]) diff --git a/mmpose/configs/_base_/datasets/freihand2d.py b/mmpose/configs/_base_/datasets/freihand2d.py new file mode 100644 index 0000000000000000000000000000000000000000..8b960d10f3538801531dbccdd67aeac6e73ac572 --- /dev/null +++ b/mmpose/configs/_base_/datasets/freihand2d.py @@ -0,0 +1,144 @@ +dataset_info = dict( + dataset_name='freihand', + paper_info=dict( + author='Zimmermann, Christian and Ceylan, Duygu and ' + 'Yang, Jimei and Russell, Bryan and ' + 'Argus, Max and Brox, Thomas', + title='Freihand: A dataset for markerless capture of hand pose ' + 'and shape from single rgb images', + container='Proceedings of the IEEE International ' + 'Conference on Computer Vision', + year='2019', + homepage='https://lmb.informatik.uni-freiburg.de/projects/freihand/', + ), + keypoint_info={ + 0: + dict(name='wrist', id=0, color=[255, 255, 255], type='', swap=''), + 1: + dict(name='thumb1', id=1, color=[255, 128, 0], type='', swap=''), + 2: + dict(name='thumb2', id=2, color=[255, 128, 0], type='', swap=''), + 3: + dict(name='thumb3', id=3, color=[255, 128, 0], type='', swap=''), + 4: + dict(name='thumb4', id=4, color=[255, 128, 0], type='', swap=''), + 5: + dict( + name='forefinger1', id=5, color=[255, 153, 255], type='', swap=''), + 6: + dict( + name='forefinger2', id=6, color=[255, 153, 255], type='', swap=''), + 7: + dict( + name='forefinger3', id=7, color=[255, 153, 255], type='', swap=''), + 8: + dict( + name='forefinger4', id=8, color=[255, 153, 255], type='', swap=''), + 9: + dict( + name='middle_finger1', + id=9, + color=[102, 178, 255], + type='', + swap=''), + 10: + dict( + name='middle_finger2', + id=10, + color=[102, 178, 255], + type='', + swap=''), + 11: + dict( + name='middle_finger3', + id=11, + color=[102, 178, 255], + type='', + swap=''), + 12: + dict( + name='middle_finger4', + id=12, + color=[102, 178, 255], + type='', + swap=''), + 13: + dict( + name='ring_finger1', id=13, color=[255, 51, 51], type='', swap=''), + 14: + dict( + name='ring_finger2', id=14, color=[255, 51, 51], type='', swap=''), + 15: + dict( + name='ring_finger3', id=15, color=[255, 51, 51], type='', swap=''), + 16: + dict( + name='ring_finger4', id=16, color=[255, 51, 51], type='', swap=''), + 17: + dict(name='pinky_finger1', id=17, color=[0, 255, 0], type='', swap=''), + 18: + dict(name='pinky_finger2', id=18, color=[0, 255, 0], type='', swap=''), + 19: + dict(name='pinky_finger3', id=19, color=[0, 255, 0], type='', swap=''), + 20: + dict(name='pinky_finger4', id=20, color=[0, 255, 0], type='', swap='') + }, + skeleton_info={ + 0: + dict(link=('wrist', 'thumb1'), id=0, color=[255, 128, 0]), + 1: + dict(link=('thumb1', 'thumb2'), id=1, color=[255, 128, 0]), + 2: + dict(link=('thumb2', 'thumb3'), id=2, color=[255, 128, 0]), + 3: + dict(link=('thumb3', 'thumb4'), id=3, color=[255, 128, 0]), + 4: + dict(link=('wrist', 'forefinger1'), id=4, color=[255, 153, 255]), + 5: + dict(link=('forefinger1', 'forefinger2'), id=5, color=[255, 153, 255]), + 6: + dict(link=('forefinger2', 'forefinger3'), id=6, color=[255, 153, 255]), + 7: + dict(link=('forefinger3', 'forefinger4'), id=7, color=[255, 153, 255]), + 8: + dict(link=('wrist', 'middle_finger1'), id=8, color=[102, 178, 255]), + 9: + dict( + link=('middle_finger1', 'middle_finger2'), + id=9, + color=[102, 178, 255]), + 10: + dict( + link=('middle_finger2', 'middle_finger3'), + id=10, + color=[102, 178, 255]), + 11: + dict( + link=('middle_finger3', 'middle_finger4'), + id=11, + color=[102, 178, 255]), + 12: + dict(link=('wrist', 'ring_finger1'), id=12, color=[255, 51, 51]), + 13: + dict( + link=('ring_finger1', 'ring_finger2'), id=13, color=[255, 51, 51]), + 14: + dict( + link=('ring_finger2', 'ring_finger3'), id=14, color=[255, 51, 51]), + 15: + dict( + link=('ring_finger3', 'ring_finger4'), id=15, color=[255, 51, 51]), + 16: + dict(link=('wrist', 'pinky_finger1'), id=16, color=[0, 255, 0]), + 17: + dict( + link=('pinky_finger1', 'pinky_finger2'), id=17, color=[0, 255, 0]), + 18: + dict( + link=('pinky_finger2', 'pinky_finger3'), id=18, color=[0, 255, 0]), + 19: + dict( + link=('pinky_finger3', 'pinky_finger4'), id=19, color=[0, 255, 0]) + }, + joint_weights=[1.] * 21, + sigmas=[]) diff --git a/mmpose/configs/_base_/datasets/h36m.py b/mmpose/configs/_base_/datasets/h36m.py new file mode 100644 index 0000000000000000000000000000000000000000..00a719d8b19f9ff3c5ef98476d73216055bf9186 --- /dev/null +++ b/mmpose/configs/_base_/datasets/h36m.py @@ -0,0 +1,152 @@ +dataset_info = dict( + dataset_name='h36m', + paper_info=dict( + author='Ionescu, Catalin and Papava, Dragos and ' + 'Olaru, Vlad and Sminchisescu, Cristian', + title='Human3.6M: Large Scale Datasets and Predictive ' + 'Methods for 3D Human Sensing in Natural Environments', + container='IEEE Transactions on Pattern Analysis and ' + 'Machine Intelligence', + year='2014', + homepage='http://vision.imar.ro/human3.6m/description.php', + ), + keypoint_info={ + 0: + dict(name='root', id=0, color=[51, 153, 255], type='lower', swap=''), + 1: + dict( + name='right_hip', + id=1, + color=[255, 128, 0], + type='lower', + swap='left_hip'), + 2: + dict( + name='right_knee', + id=2, + color=[255, 128, 0], + type='lower', + swap='left_knee'), + 3: + dict( + name='right_foot', + id=3, + color=[255, 128, 0], + type='lower', + swap='left_foot'), + 4: + dict( + name='left_hip', + id=4, + color=[0, 255, 0], + type='lower', + swap='right_hip'), + 5: + dict( + name='left_knee', + id=5, + color=[0, 255, 0], + type='lower', + swap='right_knee'), + 6: + dict( + name='left_foot', + id=6, + color=[0, 255, 0], + type='lower', + swap='right_foot'), + 7: + dict(name='spine', id=7, color=[51, 153, 255], type='upper', swap=''), + 8: + dict(name='thorax', id=8, color=[51, 153, 255], type='upper', swap=''), + 9: + dict( + name='neck_base', + id=9, + color=[51, 153, 255], + type='upper', + swap=''), + 10: + dict(name='head', id=10, color=[51, 153, 255], type='upper', swap=''), + 11: + dict( + name='left_shoulder', + id=11, + color=[0, 255, 0], + type='upper', + swap='right_shoulder'), + 12: + dict( + name='left_elbow', + id=12, + color=[0, 255, 0], + type='upper', + swap='right_elbow'), + 13: + dict( + name='left_wrist', + id=13, + color=[0, 255, 0], + type='upper', + swap='right_wrist'), + 14: + dict( + name='right_shoulder', + id=14, + color=[255, 128, 0], + type='upper', + swap='left_shoulder'), + 15: + dict( + name='right_elbow', + id=15, + color=[255, 128, 0], + type='upper', + swap='left_elbow'), + 16: + dict( + name='right_wrist', + id=16, + color=[255, 128, 0], + type='upper', + swap='left_wrist') + }, + skeleton_info={ + 0: + dict(link=('root', 'left_hip'), id=0, color=[0, 255, 0]), + 1: + dict(link=('left_hip', 'left_knee'), id=1, color=[0, 255, 0]), + 2: + dict(link=('left_knee', 'left_foot'), id=2, color=[0, 255, 0]), + 3: + dict(link=('root', 'right_hip'), id=3, color=[255, 128, 0]), + 4: + dict(link=('right_hip', 'right_knee'), id=4, color=[255, 128, 0]), + 5: + dict(link=('right_knee', 'right_foot'), id=5, color=[255, 128, 0]), + 6: + dict(link=('root', 'spine'), id=6, color=[51, 153, 255]), + 7: + dict(link=('spine', 'thorax'), id=7, color=[51, 153, 255]), + 8: + dict(link=('thorax', 'neck_base'), id=8, color=[51, 153, 255]), + 9: + dict(link=('neck_base', 'head'), id=9, color=[51, 153, 255]), + 10: + dict(link=('thorax', 'left_shoulder'), id=10, color=[0, 255, 0]), + 11: + dict(link=('left_shoulder', 'left_elbow'), id=11, color=[0, 255, 0]), + 12: + dict(link=('left_elbow', 'left_wrist'), id=12, color=[0, 255, 0]), + 13: + dict(link=('thorax', 'right_shoulder'), id=13, color=[255, 128, 0]), + 14: + dict( + link=('right_shoulder', 'right_elbow'), id=14, color=[255, 128, + 0]), + 15: + dict(link=('right_elbow', 'right_wrist'), id=15, color=[255, 128, 0]) + }, + joint_weights=[1.] * 17, + sigmas=[], + stats_info=dict(bbox_center=(528., 427.), bbox_scale=400.)) diff --git a/mmpose/configs/_base_/datasets/halpe.py b/mmpose/configs/_base_/datasets/halpe.py new file mode 100644 index 0000000000000000000000000000000000000000..1385fe81dc2190684f2142449c0f288f2cb74c1a --- /dev/null +++ b/mmpose/configs/_base_/datasets/halpe.py @@ -0,0 +1,1157 @@ +dataset_info = dict( + dataset_name='halpe', + paper_info=dict( + author='Li, Yong-Lu and Xu, Liang and Liu, Xinpeng and Huang, Xijie' + ' and Xu, Yue and Wang, Shiyi and Fang, Hao-Shu' + ' and Ma, Ze and Chen, Mingyang and Lu, Cewu', + title='PaStaNet: Toward Human Activity Knowledge Engine', + container='CVPR', + year='2020', + homepage='https://github.com/Fang-Haoshu/Halpe-FullBody/', + ), + keypoint_info={ + 0: + dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''), + 1: + dict( + name='left_eye', + id=1, + color=[51, 153, 255], + type='upper', + swap='right_eye'), + 2: + dict( + name='right_eye', + id=2, + color=[51, 153, 255], + type='upper', + swap='left_eye'), + 3: + dict( + name='left_ear', + id=3, + color=[51, 153, 255], + type='upper', + swap='right_ear'), + 4: + dict( + name='right_ear', + id=4, + color=[51, 153, 255], + type='upper', + swap='left_ear'), + 5: + dict( + name='left_shoulder', + id=5, + color=[0, 255, 0], + type='upper', + swap='right_shoulder'), + 6: + dict( + name='right_shoulder', + id=6, + color=[255, 128, 0], + type='upper', + swap='left_shoulder'), + 7: + dict( + name='left_elbow', + id=7, + color=[0, 255, 0], + type='upper', + swap='right_elbow'), + 8: + dict( + name='right_elbow', + id=8, + color=[255, 128, 0], + type='upper', + swap='left_elbow'), + 9: + dict( + name='left_wrist', + id=9, + color=[0, 255, 0], + type='upper', + swap='right_wrist'), + 10: + dict( + name='right_wrist', + id=10, + color=[255, 128, 0], + type='upper', + swap='left_wrist'), + 11: + dict( + name='left_hip', + id=11, + color=[0, 255, 0], + type='lower', + swap='right_hip'), + 12: + dict( + name='right_hip', + id=12, + color=[255, 128, 0], + type='lower', + swap='left_hip'), + 13: + dict( + name='left_knee', + id=13, + color=[0, 255, 0], + type='lower', + swap='right_knee'), + 14: + dict( + name='right_knee', + id=14, + color=[255, 128, 0], + type='lower', + swap='left_knee'), + 15: + dict( + name='left_ankle', + id=15, + color=[0, 255, 0], + type='lower', + swap='right_ankle'), + 16: + dict( + name='right_ankle', + id=16, + color=[255, 128, 0], + type='lower', + swap='left_ankle'), + 17: + dict(name='head', id=17, color=[255, 128, 0], type='upper', swap=''), + 18: + dict(name='neck', id=18, color=[255, 128, 0], type='upper', swap=''), + 19: + dict(name='hip', id=19, color=[255, 128, 0], type='lower', swap=''), + 20: + dict( + name='left_big_toe', + id=20, + color=[255, 128, 0], + type='lower', + swap='right_big_toe'), + 21: + dict( + name='right_big_toe', + id=21, + color=[255, 128, 0], + type='lower', + swap='left_big_toe'), + 22: + dict( + name='left_small_toe', + id=22, + color=[255, 128, 0], + type='lower', + swap='right_small_toe'), + 23: + dict( + name='right_small_toe', + id=23, + color=[255, 128, 0], + type='lower', + swap='left_small_toe'), + 24: + dict( + name='left_heel', + id=24, + color=[255, 128, 0], + type='lower', + swap='right_heel'), + 25: + dict( + name='right_heel', + id=25, + color=[255, 128, 0], + type='lower', + swap='left_heel'), + 26: + dict( + name='face-0', + id=26, + color=[255, 255, 255], + type='', + swap='face-16'), + 27: + dict( + name='face-1', + id=27, + color=[255, 255, 255], + type='', + swap='face-15'), + 28: + dict( + name='face-2', + id=28, + color=[255, 255, 255], + type='', + swap='face-14'), + 29: + dict( + name='face-3', + id=29, + color=[255, 255, 255], + type='', + swap='face-13'), + 30: + dict( + name='face-4', + id=30, + color=[255, 255, 255], + type='', + swap='face-12'), + 31: + dict( + name='face-5', + id=31, + color=[255, 255, 255], + type='', + swap='face-11'), + 32: + dict( + name='face-6', + id=32, + color=[255, 255, 255], + type='', + swap='face-10'), + 33: + dict( + name='face-7', + id=33, + color=[255, 255, 255], + type='', + swap='face-9'), + 34: + dict(name='face-8', id=34, color=[255, 255, 255], type='', swap=''), + 35: + dict( + name='face-9', + id=35, + color=[255, 255, 255], + type='', + swap='face-7'), + 36: + dict( + name='face-10', + id=36, + color=[255, 255, 255], + type='', + swap='face-6'), + 37: + dict( + name='face-11', + id=37, + color=[255, 255, 255], + type='', + swap='face-5'), + 38: + dict( + name='face-12', + id=38, + color=[255, 255, 255], + type='', + swap='face-4'), + 39: + dict( + name='face-13', + id=39, + color=[255, 255, 255], + type='', + swap='face-3'), + 40: + dict( + name='face-14', + id=40, + color=[255, 255, 255], + type='', + swap='face-2'), + 41: + dict( + name='face-15', + id=41, + color=[255, 255, 255], + type='', + swap='face-1'), + 42: + dict( + name='face-16', + id=42, + color=[255, 255, 255], + type='', + swap='face-0'), + 43: + dict( + name='face-17', + id=43, + color=[255, 255, 255], + type='', + swap='face-26'), + 44: + dict( + name='face-18', + id=44, + color=[255, 255, 255], + type='', + swap='face-25'), + 45: + dict( + name='face-19', + id=45, + color=[255, 255, 255], + type='', + swap='face-24'), + 46: + dict( + name='face-20', + id=46, + color=[255, 255, 255], + type='', + swap='face-23'), + 47: + dict( + name='face-21', + id=47, + color=[255, 255, 255], + type='', + swap='face-22'), + 48: + dict( + name='face-22', + id=48, + color=[255, 255, 255], + type='', + swap='face-21'), + 49: + dict( + name='face-23', + id=49, + color=[255, 255, 255], + type='', + swap='face-20'), + 50: + dict( + name='face-24', + id=50, + color=[255, 255, 255], + type='', + swap='face-19'), + 51: + dict( + name='face-25', + id=51, + color=[255, 255, 255], + type='', + swap='face-18'), + 52: + dict( + name='face-26', + id=52, + color=[255, 255, 255], + type='', + swap='face-17'), + 53: + dict(name='face-27', id=53, color=[255, 255, 255], type='', swap=''), + 54: + dict(name='face-28', id=54, color=[255, 255, 255], type='', swap=''), + 55: + dict(name='face-29', id=55, color=[255, 255, 255], type='', swap=''), + 56: + dict(name='face-30', id=56, color=[255, 255, 255], type='', swap=''), + 57: + dict( + name='face-31', + id=57, + color=[255, 255, 255], + type='', + swap='face-35'), + 58: + dict( + name='face-32', + id=58, + color=[255, 255, 255], + type='', + swap='face-34'), + 59: + dict(name='face-33', id=59, color=[255, 255, 255], type='', swap=''), + 60: + dict( + name='face-34', + id=60, + color=[255, 255, 255], + type='', + swap='face-32'), + 61: + dict( + name='face-35', + id=61, + color=[255, 255, 255], + type='', + swap='face-31'), + 62: + dict( + name='face-36', + id=62, + color=[255, 255, 255], + type='', + swap='face-45'), + 63: + dict( + name='face-37', + id=63, + color=[255, 255, 255], + type='', + swap='face-44'), + 64: + dict( + name='face-38', + id=64, + color=[255, 255, 255], + type='', + swap='face-43'), + 65: + dict( + name='face-39', + id=65, + color=[255, 255, 255], + type='', + swap='face-42'), + 66: + dict( + name='face-40', + id=66, + color=[255, 255, 255], + type='', + swap='face-47'), + 67: + dict( + name='face-41', + id=67, + color=[255, 255, 255], + type='', + swap='face-46'), + 68: + dict( + name='face-42', + id=68, + color=[255, 255, 255], + type='', + swap='face-39'), + 69: + dict( + name='face-43', + id=69, + color=[255, 255, 255], + type='', + swap='face-38'), + 70: + dict( + name='face-44', + id=70, + color=[255, 255, 255], + type='', + swap='face-37'), + 71: + dict( + name='face-45', + id=71, + color=[255, 255, 255], + type='', + swap='face-36'), + 72: + dict( + name='face-46', + id=72, + color=[255, 255, 255], + type='', + swap='face-41'), + 73: + dict( + name='face-47', + id=73, + color=[255, 255, 255], + type='', + swap='face-40'), + 74: + dict( + name='face-48', + id=74, + color=[255, 255, 255], + type='', + swap='face-54'), + 75: + dict( + name='face-49', + id=75, + color=[255, 255, 255], + type='', + swap='face-53'), + 76: + dict( + name='face-50', + id=76, + color=[255, 255, 255], + type='', + swap='face-52'), + 77: + dict(name='face-51', id=77, color=[255, 255, 255], type='', swap=''), + 78: + dict( + name='face-52', + id=78, + color=[255, 255, 255], + type='', + swap='face-50'), + 79: + dict( + name='face-53', + id=79, + color=[255, 255, 255], + type='', + swap='face-49'), + 80: + dict( + name='face-54', + id=80, + color=[255, 255, 255], + type='', + swap='face-48'), + 81: + dict( + name='face-55', + id=81, + color=[255, 255, 255], + type='', + swap='face-59'), + 82: + dict( + name='face-56', + id=82, + color=[255, 255, 255], + type='', + swap='face-58'), + 83: + dict(name='face-57', id=83, color=[255, 255, 255], type='', swap=''), + 84: + dict( + name='face-58', + id=84, + color=[255, 255, 255], + type='', + swap='face-56'), + 85: + dict( + name='face-59', + id=85, + color=[255, 255, 255], + type='', + swap='face-55'), + 86: + dict( + name='face-60', + id=86, + color=[255, 255, 255], + type='', + swap='face-64'), + 87: + dict( + name='face-61', + id=87, + color=[255, 255, 255], + type='', + swap='face-63'), + 88: + dict(name='face-62', id=88, color=[255, 255, 255], type='', swap=''), + 89: + dict( + name='face-63', + id=89, + color=[255, 255, 255], + type='', + swap='face-61'), + 90: + dict( + name='face-64', + id=90, + color=[255, 255, 255], + type='', + swap='face-60'), + 91: + dict( + name='face-65', + id=91, + color=[255, 255, 255], + type='', + swap='face-67'), + 92: + dict(name='face-66', id=92, color=[255, 255, 255], type='', swap=''), + 93: + dict( + name='face-67', + id=93, + color=[255, 255, 255], + type='', + swap='face-65'), + 94: + dict( + name='left_hand_root', + id=94, + color=[255, 255, 255], + type='', + swap='right_hand_root'), + 95: + dict( + name='left_thumb1', + id=95, + color=[255, 128, 0], + type='', + swap='right_thumb1'), + 96: + dict( + name='left_thumb2', + id=96, + color=[255, 128, 0], + type='', + swap='right_thumb2'), + 97: + dict( + name='left_thumb3', + id=97, + color=[255, 128, 0], + type='', + swap='right_thumb3'), + 98: + dict( + name='left_thumb4', + id=98, + color=[255, 128, 0], + type='', + swap='right_thumb4'), + 99: + dict( + name='left_forefinger1', + id=99, + color=[255, 153, 255], + type='', + swap='right_forefinger1'), + 100: + dict( + name='left_forefinger2', + id=100, + color=[255, 153, 255], + type='', + swap='right_forefinger2'), + 101: + dict( + name='left_forefinger3', + id=101, + color=[255, 153, 255], + type='', + swap='right_forefinger3'), + 102: + dict( + name='left_forefinger4', + id=102, + color=[255, 153, 255], + type='', + swap='right_forefinger4'), + 103: + dict( + name='left_middle_finger1', + id=103, + color=[102, 178, 255], + type='', + swap='right_middle_finger1'), + 104: + dict( + name='left_middle_finger2', + id=104, + color=[102, 178, 255], + type='', + swap='right_middle_finger2'), + 105: + dict( + name='left_middle_finger3', + id=105, + color=[102, 178, 255], + type='', + swap='right_middle_finger3'), + 106: + dict( + name='left_middle_finger4', + id=106, + color=[102, 178, 255], + type='', + swap='right_middle_finger4'), + 107: + dict( + name='left_ring_finger1', + id=107, + color=[255, 51, 51], + type='', + swap='right_ring_finger1'), + 108: + dict( + name='left_ring_finger2', + id=108, + color=[255, 51, 51], + type='', + swap='right_ring_finger2'), + 109: + dict( + name='left_ring_finger3', + id=109, + color=[255, 51, 51], + type='', + swap='right_ring_finger3'), + 110: + dict( + name='left_ring_finger4', + id=110, + color=[255, 51, 51], + type='', + swap='right_ring_finger4'), + 111: + dict( + name='left_pinky_finger1', + id=111, + color=[0, 255, 0], + type='', + swap='right_pinky_finger1'), + 112: + dict( + name='left_pinky_finger2', + id=112, + color=[0, 255, 0], + type='', + swap='right_pinky_finger2'), + 113: + dict( + name='left_pinky_finger3', + id=113, + color=[0, 255, 0], + type='', + swap='right_pinky_finger3'), + 114: + dict( + name='left_pinky_finger4', + id=114, + color=[0, 255, 0], + type='', + swap='right_pinky_finger4'), + 115: + dict( + name='right_hand_root', + id=115, + color=[255, 255, 255], + type='', + swap='left_hand_root'), + 116: + dict( + name='right_thumb1', + id=116, + color=[255, 128, 0], + type='', + swap='left_thumb1'), + 117: + dict( + name='right_thumb2', + id=117, + color=[255, 128, 0], + type='', + swap='left_thumb2'), + 118: + dict( + name='right_thumb3', + id=118, + color=[255, 128, 0], + type='', + swap='left_thumb3'), + 119: + dict( + name='right_thumb4', + id=119, + color=[255, 128, 0], + type='', + swap='left_thumb4'), + 120: + dict( + name='right_forefinger1', + id=120, + color=[255, 153, 255], + type='', + swap='left_forefinger1'), + 121: + dict( + name='right_forefinger2', + id=121, + color=[255, 153, 255], + type='', + swap='left_forefinger2'), + 122: + dict( + name='right_forefinger3', + id=122, + color=[255, 153, 255], + type='', + swap='left_forefinger3'), + 123: + dict( + name='right_forefinger4', + id=123, + color=[255, 153, 255], + type='', + swap='left_forefinger4'), + 124: + dict( + name='right_middle_finger1', + id=124, + color=[102, 178, 255], + type='', + swap='left_middle_finger1'), + 125: + dict( + name='right_middle_finger2', + id=125, + color=[102, 178, 255], + type='', + swap='left_middle_finger2'), + 126: + dict( + name='right_middle_finger3', + id=126, + color=[102, 178, 255], + type='', + swap='left_middle_finger3'), + 127: + dict( + name='right_middle_finger4', + id=127, + color=[102, 178, 255], + type='', + swap='left_middle_finger4'), + 128: + dict( + name='right_ring_finger1', + id=128, + color=[255, 51, 51], + type='', + swap='left_ring_finger1'), + 129: + dict( + name='right_ring_finger2', + id=129, + color=[255, 51, 51], + type='', + swap='left_ring_finger2'), + 130: + dict( + name='right_ring_finger3', + id=130, + color=[255, 51, 51], + type='', + swap='left_ring_finger3'), + 131: + dict( + name='right_ring_finger4', + id=131, + color=[255, 51, 51], + type='', + swap='left_ring_finger4'), + 132: + dict( + name='right_pinky_finger1', + id=132, + color=[0, 255, 0], + type='', + swap='left_pinky_finger1'), + 133: + dict( + name='right_pinky_finger2', + id=133, + color=[0, 255, 0], + type='', + swap='left_pinky_finger2'), + 134: + dict( + name='right_pinky_finger3', + id=134, + color=[0, 255, 0], + type='', + swap='left_pinky_finger3'), + 135: + dict( + name='right_pinky_finger4', + id=135, + color=[0, 255, 0], + type='', + swap='left_pinky_finger4') + }, + skeleton_info={ + 0: + dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]), + 1: + dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]), + 2: + dict(link=('left_hip', 'hip'), id=2, color=[0, 255, 0]), + 3: + dict(link=('right_ankle', 'right_knee'), id=3, color=[255, 128, 0]), + 4: + dict(link=('right_knee', 'right_hip'), id=4, color=[255, 128, 0]), + 5: + dict(link=('right_hip', 'hip'), id=5, color=[255, 128, 0]), + 6: + dict(link=('head', 'neck'), id=6, color=[51, 153, 255]), + 7: + dict(link=('neck', 'hip'), id=7, color=[51, 153, 255]), + 8: + dict(link=('neck', 'left_shoulder'), id=8, color=[0, 255, 0]), + 9: + dict(link=('left_shoulder', 'left_elbow'), id=9, color=[0, 255, 0]), + 10: + dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]), + 11: + dict(link=('neck', 'right_shoulder'), id=11, color=[255, 128, 0]), + 12: + dict( + link=('right_shoulder', 'right_elbow'), id=12, color=[255, 128, + 0]), + 13: + dict(link=('right_elbow', 'right_wrist'), id=13, color=[255, 128, 0]), + 14: + dict(link=('left_eye', 'right_eye'), id=14, color=[51, 153, 255]), + 15: + dict(link=('nose', 'left_eye'), id=15, color=[51, 153, 255]), + 16: + dict(link=('nose', 'right_eye'), id=16, color=[51, 153, 255]), + 17: + dict(link=('left_eye', 'left_ear'), id=17, color=[51, 153, 255]), + 18: + dict(link=('right_eye', 'right_ear'), id=18, color=[51, 153, 255]), + 19: + dict(link=('left_ear', 'left_shoulder'), id=19, color=[51, 153, 255]), + 20: + dict( + link=('right_ear', 'right_shoulder'), id=20, color=[51, 153, 255]), + 21: + dict(link=('left_ankle', 'left_big_toe'), id=21, color=[0, 255, 0]), + 22: + dict(link=('left_ankle', 'left_small_toe'), id=22, color=[0, 255, 0]), + 23: + dict(link=('left_ankle', 'left_heel'), id=23, color=[0, 255, 0]), + 24: + dict( + link=('right_ankle', 'right_big_toe'), id=24, color=[255, 128, 0]), + 25: + dict( + link=('right_ankle', 'right_small_toe'), + id=25, + color=[255, 128, 0]), + 26: + dict(link=('right_ankle', 'right_heel'), id=26, color=[255, 128, 0]), + 27: + dict(link=('left_wrist', 'left_thumb1'), id=27, color=[255, 128, 0]), + 28: + dict(link=('left_thumb1', 'left_thumb2'), id=28, color=[255, 128, 0]), + 29: + dict(link=('left_thumb2', 'left_thumb3'), id=29, color=[255, 128, 0]), + 30: + dict(link=('left_thumb3', 'left_thumb4'), id=30, color=[255, 128, 0]), + 31: + dict( + link=('left_wrist', 'left_forefinger1'), + id=31, + color=[255, 153, 255]), + 32: + dict( + link=('left_forefinger1', 'left_forefinger2'), + id=32, + color=[255, 153, 255]), + 33: + dict( + link=('left_forefinger2', 'left_forefinger3'), + id=33, + color=[255, 153, 255]), + 34: + dict( + link=('left_forefinger3', 'left_forefinger4'), + id=34, + color=[255, 153, 255]), + 35: + dict( + link=('left_wrist', 'left_middle_finger1'), + id=35, + color=[102, 178, 255]), + 36: + dict( + link=('left_middle_finger1', 'left_middle_finger2'), + id=36, + color=[102, 178, 255]), + 37: + dict( + link=('left_middle_finger2', 'left_middle_finger3'), + id=37, + color=[102, 178, 255]), + 38: + dict( + link=('left_middle_finger3', 'left_middle_finger4'), + id=38, + color=[102, 178, 255]), + 39: + dict( + link=('left_wrist', 'left_ring_finger1'), + id=39, + color=[255, 51, 51]), + 40: + dict( + link=('left_ring_finger1', 'left_ring_finger2'), + id=40, + color=[255, 51, 51]), + 41: + dict( + link=('left_ring_finger2', 'left_ring_finger3'), + id=41, + color=[255, 51, 51]), + 42: + dict( + link=('left_ring_finger3', 'left_ring_finger4'), + id=42, + color=[255, 51, 51]), + 43: + dict( + link=('left_wrist', 'left_pinky_finger1'), + id=43, + color=[0, 255, 0]), + 44: + dict( + link=('left_pinky_finger1', 'left_pinky_finger2'), + id=44, + color=[0, 255, 0]), + 45: + dict( + link=('left_pinky_finger2', 'left_pinky_finger3'), + id=45, + color=[0, 255, 0]), + 46: + dict( + link=('left_pinky_finger3', 'left_pinky_finger4'), + id=46, + color=[0, 255, 0]), + 47: + dict(link=('right_wrist', 'right_thumb1'), id=47, color=[255, 128, 0]), + 48: + dict( + link=('right_thumb1', 'right_thumb2'), id=48, color=[255, 128, 0]), + 49: + dict( + link=('right_thumb2', 'right_thumb3'), id=49, color=[255, 128, 0]), + 50: + dict( + link=('right_thumb3', 'right_thumb4'), id=50, color=[255, 128, 0]), + 51: + dict( + link=('right_wrist', 'right_forefinger1'), + id=51, + color=[255, 153, 255]), + 52: + dict( + link=('right_forefinger1', 'right_forefinger2'), + id=52, + color=[255, 153, 255]), + 53: + dict( + link=('right_forefinger2', 'right_forefinger3'), + id=53, + color=[255, 153, 255]), + 54: + dict( + link=('right_forefinger3', 'right_forefinger4'), + id=54, + color=[255, 153, 255]), + 55: + dict( + link=('right_wrist', 'right_middle_finger1'), + id=55, + color=[102, 178, 255]), + 56: + dict( + link=('right_middle_finger1', 'right_middle_finger2'), + id=56, + color=[102, 178, 255]), + 57: + dict( + link=('right_middle_finger2', 'right_middle_finger3'), + id=57, + color=[102, 178, 255]), + 58: + dict( + link=('right_middle_finger3', 'right_middle_finger4'), + id=58, + color=[102, 178, 255]), + 59: + dict( + link=('right_wrist', 'right_ring_finger1'), + id=59, + color=[255, 51, 51]), + 60: + dict( + link=('right_ring_finger1', 'right_ring_finger2'), + id=60, + color=[255, 51, 51]), + 61: + dict( + link=('right_ring_finger2', 'right_ring_finger3'), + id=61, + color=[255, 51, 51]), + 62: + dict( + link=('right_ring_finger3', 'right_ring_finger4'), + id=62, + color=[255, 51, 51]), + 63: + dict( + link=('right_wrist', 'right_pinky_finger1'), + id=63, + color=[0, 255, 0]), + 64: + dict( + link=('right_pinky_finger1', 'right_pinky_finger2'), + id=64, + color=[0, 255, 0]), + 65: + dict( + link=('right_pinky_finger2', 'right_pinky_finger3'), + id=65, + color=[0, 255, 0]), + 66: + dict( + link=('right_pinky_finger3', 'right_pinky_finger4'), + id=66, + color=[0, 255, 0]) + }, + joint_weights=[1.] * 136, + + # 'https://github.com/Fang-Haoshu/Halpe-FullBody/blob/master/' + # 'HalpeCOCOAPI/PythonAPI/halpecocotools/cocoeval.py#L245' + sigmas=[ + 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062, + 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089, 0.08, 0.08, 0.08, + 0.089, 0.089, 0.089, 0.089, 0.089, 0.089, 0.015, 0.015, 0.015, 0.015, + 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, + 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, + 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, + 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, + 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, + 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, + 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, + 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, + 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, + 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, + 0.015, 0.015, 0.015, 0.015, 0.015, 0.015 + ]) diff --git a/mmpose/configs/_base_/datasets/horse10.py b/mmpose/configs/_base_/datasets/horse10.py new file mode 100644 index 0000000000000000000000000000000000000000..a485bf191bc151b0d76e48f3e55eb8e2dda6c506 --- /dev/null +++ b/mmpose/configs/_base_/datasets/horse10.py @@ -0,0 +1,201 @@ +dataset_info = dict( + dataset_name='horse10', + paper_info=dict( + author='Mathis, Alexander and Biasi, Thomas and ' + 'Schneider, Steffen and ' + 'Yuksekgonul, Mert and Rogers, Byron and ' + 'Bethge, Matthias and ' + 'Mathis, Mackenzie W', + title='Pretraining boosts out-of-domain robustness ' + 'for pose estimation', + container='Proceedings of the IEEE/CVF Winter Conference on ' + 'Applications of Computer Vision', + year='2021', + homepage='http://www.mackenziemathislab.org/horse10', + ), + keypoint_info={ + 0: + dict(name='Nose', id=0, color=[255, 153, 255], type='upper', swap=''), + 1: + dict(name='Eye', id=1, color=[255, 153, 255], type='upper', swap=''), + 2: + dict( + name='Nearknee', + id=2, + color=[255, 102, 255], + type='upper', + swap=''), + 3: + dict( + name='Nearfrontfetlock', + id=3, + color=[255, 102, 255], + type='upper', + swap=''), + 4: + dict( + name='Nearfrontfoot', + id=4, + color=[255, 102, 255], + type='upper', + swap=''), + 5: + dict( + name='Offknee', id=5, color=[255, 102, 255], type='upper', + swap=''), + 6: + dict( + name='Offfrontfetlock', + id=6, + color=[255, 102, 255], + type='upper', + swap=''), + 7: + dict( + name='Offfrontfoot', + id=7, + color=[255, 102, 255], + type='upper', + swap=''), + 8: + dict( + name='Shoulder', + id=8, + color=[255, 153, 255], + type='upper', + swap=''), + 9: + dict( + name='Midshoulder', + id=9, + color=[255, 153, 255], + type='upper', + swap=''), + 10: + dict( + name='Elbow', id=10, color=[255, 153, 255], type='upper', swap=''), + 11: + dict( + name='Girth', id=11, color=[255, 153, 255], type='upper', swap=''), + 12: + dict( + name='Wither', id=12, color=[255, 153, 255], type='upper', + swap=''), + 13: + dict( + name='Nearhindhock', + id=13, + color=[255, 51, 255], + type='lower', + swap=''), + 14: + dict( + name='Nearhindfetlock', + id=14, + color=[255, 51, 255], + type='lower', + swap=''), + 15: + dict( + name='Nearhindfoot', + id=15, + color=[255, 51, 255], + type='lower', + swap=''), + 16: + dict(name='Hip', id=16, color=[255, 153, 255], type='lower', swap=''), + 17: + dict( + name='Stifle', id=17, color=[255, 153, 255], type='lower', + swap=''), + 18: + dict( + name='Offhindhock', + id=18, + color=[255, 51, 255], + type='lower', + swap=''), + 19: + dict( + name='Offhindfetlock', + id=19, + color=[255, 51, 255], + type='lower', + swap=''), + 20: + dict( + name='Offhindfoot', + id=20, + color=[255, 51, 255], + type='lower', + swap=''), + 21: + dict( + name='Ischium', + id=21, + color=[255, 153, 255], + type='lower', + swap='') + }, + skeleton_info={ + 0: + dict(link=('Nose', 'Eye'), id=0, color=[255, 153, 255]), + 1: + dict(link=('Eye', 'Wither'), id=1, color=[255, 153, 255]), + 2: + dict(link=('Wither', 'Hip'), id=2, color=[255, 153, 255]), + 3: + dict(link=('Hip', 'Ischium'), id=3, color=[255, 153, 255]), + 4: + dict(link=('Ischium', 'Stifle'), id=4, color=[255, 153, 255]), + 5: + dict(link=('Stifle', 'Girth'), id=5, color=[255, 153, 255]), + 6: + dict(link=('Girth', 'Elbow'), id=6, color=[255, 153, 255]), + 7: + dict(link=('Elbow', 'Shoulder'), id=7, color=[255, 153, 255]), + 8: + dict(link=('Shoulder', 'Midshoulder'), id=8, color=[255, 153, 255]), + 9: + dict(link=('Midshoulder', 'Wither'), id=9, color=[255, 153, 255]), + 10: + dict( + link=('Nearknee', 'Nearfrontfetlock'), + id=10, + color=[255, 102, 255]), + 11: + dict( + link=('Nearfrontfetlock', 'Nearfrontfoot'), + id=11, + color=[255, 102, 255]), + 12: + dict( + link=('Offknee', 'Offfrontfetlock'), id=12, color=[255, 102, 255]), + 13: + dict( + link=('Offfrontfetlock', 'Offfrontfoot'), + id=13, + color=[255, 102, 255]), + 14: + dict( + link=('Nearhindhock', 'Nearhindfetlock'), + id=14, + color=[255, 51, 255]), + 15: + dict( + link=('Nearhindfetlock', 'Nearhindfoot'), + id=15, + color=[255, 51, 255]), + 16: + dict( + link=('Offhindhock', 'Offhindfetlock'), + id=16, + color=[255, 51, 255]), + 17: + dict( + link=('Offhindfetlock', 'Offhindfoot'), + id=17, + color=[255, 51, 255]) + }, + joint_weights=[1.] * 22, + sigmas=[]) diff --git a/mmpose/configs/_base_/datasets/interhand2d.py b/mmpose/configs/_base_/datasets/interhand2d.py new file mode 100644 index 0000000000000000000000000000000000000000..0134f07de5bf536eaffbf71155a7e6eb33b24f0a --- /dev/null +++ b/mmpose/configs/_base_/datasets/interhand2d.py @@ -0,0 +1,142 @@ +dataset_info = dict( + dataset_name='interhand2d', + paper_info=dict( + author='Moon, Gyeongsik and Yu, Shoou-I and Wen, He and ' + 'Shiratori, Takaaki and Lee, Kyoung Mu', + title='InterHand2.6M: A dataset and baseline for 3D ' + 'interacting hand pose estimation from a single RGB image', + container='arXiv', + year='2020', + homepage='https://mks0601.github.io/InterHand2.6M/', + ), + keypoint_info={ + 0: + dict(name='thumb4', id=0, color=[255, 128, 0], type='', swap=''), + 1: + dict(name='thumb3', id=1, color=[255, 128, 0], type='', swap=''), + 2: + dict(name='thumb2', id=2, color=[255, 128, 0], type='', swap=''), + 3: + dict(name='thumb1', id=3, color=[255, 128, 0], type='', swap=''), + 4: + dict( + name='forefinger4', id=4, color=[255, 153, 255], type='', swap=''), + 5: + dict( + name='forefinger3', id=5, color=[255, 153, 255], type='', swap=''), + 6: + dict( + name='forefinger2', id=6, color=[255, 153, 255], type='', swap=''), + 7: + dict( + name='forefinger1', id=7, color=[255, 153, 255], type='', swap=''), + 8: + dict( + name='middle_finger4', + id=8, + color=[102, 178, 255], + type='', + swap=''), + 9: + dict( + name='middle_finger3', + id=9, + color=[102, 178, 255], + type='', + swap=''), + 10: + dict( + name='middle_finger2', + id=10, + color=[102, 178, 255], + type='', + swap=''), + 11: + dict( + name='middle_finger1', + id=11, + color=[102, 178, 255], + type='', + swap=''), + 12: + dict( + name='ring_finger4', id=12, color=[255, 51, 51], type='', swap=''), + 13: + dict( + name='ring_finger3', id=13, color=[255, 51, 51], type='', swap=''), + 14: + dict( + name='ring_finger2', id=14, color=[255, 51, 51], type='', swap=''), + 15: + dict( + name='ring_finger1', id=15, color=[255, 51, 51], type='', swap=''), + 16: + dict(name='pinky_finger4', id=16, color=[0, 255, 0], type='', swap=''), + 17: + dict(name='pinky_finger3', id=17, color=[0, 255, 0], type='', swap=''), + 18: + dict(name='pinky_finger2', id=18, color=[0, 255, 0], type='', swap=''), + 19: + dict(name='pinky_finger1', id=19, color=[0, 255, 0], type='', swap=''), + 20: + dict(name='wrist', id=20, color=[255, 255, 255], type='', swap='') + }, + skeleton_info={ + 0: + dict(link=('wrist', 'thumb1'), id=0, color=[255, 128, 0]), + 1: + dict(link=('thumb1', 'thumb2'), id=1, color=[255, 128, 0]), + 2: + dict(link=('thumb2', 'thumb3'), id=2, color=[255, 128, 0]), + 3: + dict(link=('thumb3', 'thumb4'), id=3, color=[255, 128, 0]), + 4: + dict(link=('wrist', 'forefinger1'), id=4, color=[255, 153, 255]), + 5: + dict(link=('forefinger1', 'forefinger2'), id=5, color=[255, 153, 255]), + 6: + dict(link=('forefinger2', 'forefinger3'), id=6, color=[255, 153, 255]), + 7: + dict(link=('forefinger3', 'forefinger4'), id=7, color=[255, 153, 255]), + 8: + dict(link=('wrist', 'middle_finger1'), id=8, color=[102, 178, 255]), + 9: + dict( + link=('middle_finger1', 'middle_finger2'), + id=9, + color=[102, 178, 255]), + 10: + dict( + link=('middle_finger2', 'middle_finger3'), + id=10, + color=[102, 178, 255]), + 11: + dict( + link=('middle_finger3', 'middle_finger4'), + id=11, + color=[102, 178, 255]), + 12: + dict(link=('wrist', 'ring_finger1'), id=12, color=[255, 51, 51]), + 13: + dict( + link=('ring_finger1', 'ring_finger2'), id=13, color=[255, 51, 51]), + 14: + dict( + link=('ring_finger2', 'ring_finger3'), id=14, color=[255, 51, 51]), + 15: + dict( + link=('ring_finger3', 'ring_finger4'), id=15, color=[255, 51, 51]), + 16: + dict(link=('wrist', 'pinky_finger1'), id=16, color=[0, 255, 0]), + 17: + dict( + link=('pinky_finger1', 'pinky_finger2'), id=17, color=[0, 255, 0]), + 18: + dict( + link=('pinky_finger2', 'pinky_finger3'), id=18, color=[0, 255, 0]), + 19: + dict( + link=('pinky_finger3', 'pinky_finger4'), id=19, color=[0, 255, 0]) + }, + joint_weights=[1.] * 21, + sigmas=[]) diff --git a/mmpose/configs/_base_/datasets/interhand3d.py b/mmpose/configs/_base_/datasets/interhand3d.py new file mode 100644 index 0000000000000000000000000000000000000000..e2bd8121c281c741ec9b980c7570ebef8a632993 --- /dev/null +++ b/mmpose/configs/_base_/datasets/interhand3d.py @@ -0,0 +1,487 @@ +dataset_info = dict( + dataset_name='interhand3d', + paper_info=dict( + author='Moon, Gyeongsik and Yu, Shoou-I and Wen, He and ' + 'Shiratori, Takaaki and Lee, Kyoung Mu', + title='InterHand2.6M: A dataset and baseline for 3D ' + 'interacting hand pose estimation from a single RGB image', + container='arXiv', + year='2020', + homepage='https://mks0601.github.io/InterHand2.6M/', + ), + keypoint_info={ + 0: + dict( + name='right_thumb4', + id=0, + color=[255, 128, 0], + type='', + swap='left_thumb4'), + 1: + dict( + name='right_thumb3', + id=1, + color=[255, 128, 0], + type='', + swap='left_thumb3'), + 2: + dict( + name='right_thumb2', + id=2, + color=[255, 128, 0], + type='', + swap='left_thumb2'), + 3: + dict( + name='right_thumb1', + id=3, + color=[255, 128, 0], + type='', + swap='left_thumb1'), + 4: + dict( + name='right_forefinger4', + id=4, + color=[255, 153, 255], + type='', + swap='left_forefinger4'), + 5: + dict( + name='right_forefinger3', + id=5, + color=[255, 153, 255], + type='', + swap='left_forefinger3'), + 6: + dict( + name='right_forefinger2', + id=6, + color=[255, 153, 255], + type='', + swap='left_forefinger2'), + 7: + dict( + name='right_forefinger1', + id=7, + color=[255, 153, 255], + type='', + swap='left_forefinger1'), + 8: + dict( + name='right_middle_finger4', + id=8, + color=[102, 178, 255], + type='', + swap='left_middle_finger4'), + 9: + dict( + name='right_middle_finger3', + id=9, + color=[102, 178, 255], + type='', + swap='left_middle_finger3'), + 10: + dict( + name='right_middle_finger2', + id=10, + color=[102, 178, 255], + type='', + swap='left_middle_finger2'), + 11: + dict( + name='right_middle_finger1', + id=11, + color=[102, 178, 255], + type='', + swap='left_middle_finger1'), + 12: + dict( + name='right_ring_finger4', + id=12, + color=[255, 51, 51], + type='', + swap='left_ring_finger4'), + 13: + dict( + name='right_ring_finger3', + id=13, + color=[255, 51, 51], + type='', + swap='left_ring_finger3'), + 14: + dict( + name='right_ring_finger2', + id=14, + color=[255, 51, 51], + type='', + swap='left_ring_finger2'), + 15: + dict( + name='right_ring_finger1', + id=15, + color=[255, 51, 51], + type='', + swap='left_ring_finger1'), + 16: + dict( + name='right_pinky_finger4', + id=16, + color=[0, 255, 0], + type='', + swap='left_pinky_finger4'), + 17: + dict( + name='right_pinky_finger3', + id=17, + color=[0, 255, 0], + type='', + swap='left_pinky_finger3'), + 18: + dict( + name='right_pinky_finger2', + id=18, + color=[0, 255, 0], + type='', + swap='left_pinky_finger2'), + 19: + dict( + name='right_pinky_finger1', + id=19, + color=[0, 255, 0], + type='', + swap='left_pinky_finger1'), + 20: + dict( + name='right_wrist', + id=20, + color=[255, 255, 255], + type='', + swap='left_wrist'), + 21: + dict( + name='left_thumb4', + id=21, + color=[255, 128, 0], + type='', + swap='right_thumb4'), + 22: + dict( + name='left_thumb3', + id=22, + color=[255, 128, 0], + type='', + swap='right_thumb3'), + 23: + dict( + name='left_thumb2', + id=23, + color=[255, 128, 0], + type='', + swap='right_thumb2'), + 24: + dict( + name='left_thumb1', + id=24, + color=[255, 128, 0], + type='', + swap='right_thumb1'), + 25: + dict( + name='left_forefinger4', + id=25, + color=[255, 153, 255], + type='', + swap='right_forefinger4'), + 26: + dict( + name='left_forefinger3', + id=26, + color=[255, 153, 255], + type='', + swap='right_forefinger3'), + 27: + dict( + name='left_forefinger2', + id=27, + color=[255, 153, 255], + type='', + swap='right_forefinger2'), + 28: + dict( + name='left_forefinger1', + id=28, + color=[255, 153, 255], + type='', + swap='right_forefinger1'), + 29: + dict( + name='left_middle_finger4', + id=29, + color=[102, 178, 255], + type='', + swap='right_middle_finger4'), + 30: + dict( + name='left_middle_finger3', + id=30, + color=[102, 178, 255], + type='', + swap='right_middle_finger3'), + 31: + dict( + name='left_middle_finger2', + id=31, + color=[102, 178, 255], + type='', + swap='right_middle_finger2'), + 32: + dict( + name='left_middle_finger1', + id=32, + color=[102, 178, 255], + type='', + swap='right_middle_finger1'), + 33: + dict( + name='left_ring_finger4', + id=33, + color=[255, 51, 51], + type='', + swap='right_ring_finger4'), + 34: + dict( + name='left_ring_finger3', + id=34, + color=[255, 51, 51], + type='', + swap='right_ring_finger3'), + 35: + dict( + name='left_ring_finger2', + id=35, + color=[255, 51, 51], + type='', + swap='right_ring_finger2'), + 36: + dict( + name='left_ring_finger1', + id=36, + color=[255, 51, 51], + type='', + swap='right_ring_finger1'), + 37: + dict( + name='left_pinky_finger4', + id=37, + color=[0, 255, 0], + type='', + swap='right_pinky_finger4'), + 38: + dict( + name='left_pinky_finger3', + id=38, + color=[0, 255, 0], + type='', + swap='right_pinky_finger3'), + 39: + dict( + name='left_pinky_finger2', + id=39, + color=[0, 255, 0], + type='', + swap='right_pinky_finger2'), + 40: + dict( + name='left_pinky_finger1', + id=40, + color=[0, 255, 0], + type='', + swap='right_pinky_finger1'), + 41: + dict( + name='left_wrist', + id=41, + color=[255, 255, 255], + type='', + swap='right_wrist'), + }, + skeleton_info={ + 0: + dict(link=('right_wrist', 'right_thumb1'), id=0, color=[255, 128, 0]), + 1: + dict(link=('right_thumb1', 'right_thumb2'), id=1, color=[255, 128, 0]), + 2: + dict(link=('right_thumb2', 'right_thumb3'), id=2, color=[255, 128, 0]), + 3: + dict(link=('right_thumb3', 'right_thumb4'), id=3, color=[255, 128, 0]), + 4: + dict( + link=('right_wrist', 'right_forefinger1'), + id=4, + color=[255, 153, 255]), + 5: + dict( + link=('right_forefinger1', 'right_forefinger2'), + id=5, + color=[255, 153, 255]), + 6: + dict( + link=('right_forefinger2', 'right_forefinger3'), + id=6, + color=[255, 153, 255]), + 7: + dict( + link=('right_forefinger3', 'right_forefinger4'), + id=7, + color=[255, 153, 255]), + 8: + dict( + link=('right_wrist', 'right_middle_finger1'), + id=8, + color=[102, 178, 255]), + 9: + dict( + link=('right_middle_finger1', 'right_middle_finger2'), + id=9, + color=[102, 178, 255]), + 10: + dict( + link=('right_middle_finger2', 'right_middle_finger3'), + id=10, + color=[102, 178, 255]), + 11: + dict( + link=('right_middle_finger3', 'right_middle_finger4'), + id=11, + color=[102, 178, 255]), + 12: + dict( + link=('right_wrist', 'right_ring_finger1'), + id=12, + color=[255, 51, 51]), + 13: + dict( + link=('right_ring_finger1', 'right_ring_finger2'), + id=13, + color=[255, 51, 51]), + 14: + dict( + link=('right_ring_finger2', 'right_ring_finger3'), + id=14, + color=[255, 51, 51]), + 15: + dict( + link=('right_ring_finger3', 'right_ring_finger4'), + id=15, + color=[255, 51, 51]), + 16: + dict( + link=('right_wrist', 'right_pinky_finger1'), + id=16, + color=[0, 255, 0]), + 17: + dict( + link=('right_pinky_finger1', 'right_pinky_finger2'), + id=17, + color=[0, 255, 0]), + 18: + dict( + link=('right_pinky_finger2', 'right_pinky_finger3'), + id=18, + color=[0, 255, 0]), + 19: + dict( + link=('right_pinky_finger3', 'right_pinky_finger4'), + id=19, + color=[0, 255, 0]), + 20: + dict(link=('left_wrist', 'left_thumb1'), id=20, color=[255, 128, 0]), + 21: + dict(link=('left_thumb1', 'left_thumb2'), id=21, color=[255, 128, 0]), + 22: + dict(link=('left_thumb2', 'left_thumb3'), id=22, color=[255, 128, 0]), + 23: + dict(link=('left_thumb3', 'left_thumb4'), id=23, color=[255, 128, 0]), + 24: + dict( + link=('left_wrist', 'left_forefinger1'), + id=24, + color=[255, 153, 255]), + 25: + dict( + link=('left_forefinger1', 'left_forefinger2'), + id=25, + color=[255, 153, 255]), + 26: + dict( + link=('left_forefinger2', 'left_forefinger3'), + id=26, + color=[255, 153, 255]), + 27: + dict( + link=('left_forefinger3', 'left_forefinger4'), + id=27, + color=[255, 153, 255]), + 28: + dict( + link=('left_wrist', 'left_middle_finger1'), + id=28, + color=[102, 178, 255]), + 29: + dict( + link=('left_middle_finger1', 'left_middle_finger2'), + id=29, + color=[102, 178, 255]), + 30: + dict( + link=('left_middle_finger2', 'left_middle_finger3'), + id=30, + color=[102, 178, 255]), + 31: + dict( + link=('left_middle_finger3', 'left_middle_finger4'), + id=31, + color=[102, 178, 255]), + 32: + dict( + link=('left_wrist', 'left_ring_finger1'), + id=32, + color=[255, 51, 51]), + 33: + dict( + link=('left_ring_finger1', 'left_ring_finger2'), + id=33, + color=[255, 51, 51]), + 34: + dict( + link=('left_ring_finger2', 'left_ring_finger3'), + id=34, + color=[255, 51, 51]), + 35: + dict( + link=('left_ring_finger3', 'left_ring_finger4'), + id=35, + color=[255, 51, 51]), + 36: + dict( + link=('left_wrist', 'left_pinky_finger1'), + id=36, + color=[0, 255, 0]), + 37: + dict( + link=('left_pinky_finger1', 'left_pinky_finger2'), + id=37, + color=[0, 255, 0]), + 38: + dict( + link=('left_pinky_finger2', 'left_pinky_finger3'), + id=38, + color=[0, 255, 0]), + 39: + dict( + link=('left_pinky_finger3', 'left_pinky_finger4'), + id=39, + color=[0, 255, 0]), + }, + joint_weights=[1.] * 42, + sigmas=[]) diff --git a/mmpose/configs/_base_/datasets/jhmdb.py b/mmpose/configs/_base_/datasets/jhmdb.py new file mode 100644 index 0000000000000000000000000000000000000000..1b37488498a2bade1fa6f2ff6532fcd219071803 --- /dev/null +++ b/mmpose/configs/_base_/datasets/jhmdb.py @@ -0,0 +1,129 @@ +dataset_info = dict( + dataset_name='jhmdb', + paper_info=dict( + author='H. Jhuang and J. Gall and S. Zuffi and ' + 'C. Schmid and M. J. Black', + title='Towards understanding action recognition', + container='International Conf. on Computer Vision (ICCV)', + year='2013', + homepage='http://jhmdb.is.tue.mpg.de/dataset', + ), + keypoint_info={ + 0: + dict(name='neck', id=0, color=[255, 128, 0], type='upper', swap=''), + 1: + dict(name='belly', id=1, color=[255, 128, 0], type='upper', swap=''), + 2: + dict(name='head', id=2, color=[255, 128, 0], type='upper', swap=''), + 3: + dict( + name='right_shoulder', + id=3, + color=[0, 255, 0], + type='upper', + swap='left_shoulder'), + 4: + dict( + name='left_shoulder', + id=4, + color=[0, 255, 0], + type='upper', + swap='right_shoulder'), + 5: + dict( + name='right_hip', + id=5, + color=[0, 255, 0], + type='lower', + swap='left_hip'), + 6: + dict( + name='left_hip', + id=6, + color=[51, 153, 255], + type='lower', + swap='right_hip'), + 7: + dict( + name='right_elbow', + id=7, + color=[51, 153, 255], + type='upper', + swap='left_elbow'), + 8: + dict( + name='left_elbow', + id=8, + color=[51, 153, 255], + type='upper', + swap='right_elbow'), + 9: + dict( + name='right_knee', + id=9, + color=[51, 153, 255], + type='lower', + swap='left_knee'), + 10: + dict( + name='left_knee', + id=10, + color=[255, 128, 0], + type='lower', + swap='right_knee'), + 11: + dict( + name='right_wrist', + id=11, + color=[255, 128, 0], + type='upper', + swap='left_wrist'), + 12: + dict( + name='left_wrist', + id=12, + color=[255, 128, 0], + type='upper', + swap='right_wrist'), + 13: + dict( + name='right_ankle', + id=13, + color=[0, 255, 0], + type='lower', + swap='left_ankle'), + 14: + dict( + name='left_ankle', + id=14, + color=[0, 255, 0], + type='lower', + swap='right_ankle') + }, + skeleton_info={ + 0: dict(link=('right_ankle', 'right_knee'), id=0, color=[255, 128, 0]), + 1: dict(link=('right_knee', 'right_hip'), id=1, color=[255, 128, 0]), + 2: dict(link=('right_hip', 'belly'), id=2, color=[255, 128, 0]), + 3: dict(link=('belly', 'left_hip'), id=3, color=[0, 255, 0]), + 4: dict(link=('left_hip', 'left_knee'), id=4, color=[0, 255, 0]), + 5: dict(link=('left_knee', 'left_ankle'), id=5, color=[0, 255, 0]), + 6: dict(link=('belly', 'neck'), id=6, color=[51, 153, 255]), + 7: dict(link=('neck', 'head'), id=7, color=[51, 153, 255]), + 8: dict(link=('neck', 'right_shoulder'), id=8, color=[255, 128, 0]), + 9: dict( + link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]), + 10: + dict(link=('right_elbow', 'right_wrist'), id=10, color=[255, 128, 0]), + 11: dict(link=('neck', 'left_shoulder'), id=11, color=[0, 255, 0]), + 12: + dict(link=('left_shoulder', 'left_elbow'), id=12, color=[0, 255, 0]), + 13: dict(link=('left_elbow', 'left_wrist'), id=13, color=[0, 255, 0]) + }, + joint_weights=[ + 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.2, 1.2, 1.5, 1.5, 1.5, 1.5 + ], + # Adapted from COCO dataset. + sigmas=[ + 0.025, 0.107, 0.025, 0.079, 0.079, 0.107, 0.107, 0.072, 0.072, 0.087, + 0.087, 0.062, 0.062, 0.089, 0.089 + ]) diff --git a/mmpose/configs/_base_/datasets/lapa.py b/mmpose/configs/_base_/datasets/lapa.py new file mode 100644 index 0000000000000000000000000000000000000000..26a0843404eaed46c0af9249aee6f90ef952c216 --- /dev/null +++ b/mmpose/configs/_base_/datasets/lapa.py @@ -0,0 +1,688 @@ +dataset_info = dict( + dataset_name='lapa', + paper_info=dict( + author='Liu, Yinglu and Shi, Hailin and Shen, Hao and Si, ' + 'Yue and Wang, Xiaobo and Mei, Tao', + title='A New Dataset and Boundary-Attention Semantic ' + 'Segmentation for Face Parsing.', + container='Proceedings of the AAAI Conference on ' + 'Artificial Intelligence 2020', + year='2020', + homepage='https://github.com/JDAI-CV/lapa-dataset', + ), + keypoint_info={ + 0: + dict( + name='kpt-0', id=0, color=[255, 0, 0], type='upper', + swap='kpt-32'), + 1: + dict( + name='kpt-1', id=1, color=[255, 0, 0], type='upper', + swap='kpt-31'), + 2: + dict( + name='kpt-2', id=2, color=[255, 0, 0], type='upper', + swap='kpt-30'), + 3: + dict( + name='kpt-3', id=3, color=[255, 0, 0], type='lower', + swap='kpt-29'), + 4: + dict( + name='kpt-4', id=4, color=[255, 0, 0], type='lower', + swap='kpt-28'), + 5: + dict( + name='kpt-5', id=5, color=[255, 0, 0], type='lower', + swap='kpt-27'), + 6: + dict( + name='kpt-6', id=6, color=[255, 0, 0], type='lower', + swap='kpt-26'), + 7: + dict( + name='kpt-7', id=7, color=[255, 0, 0], type='lower', + swap='kpt-25'), + 8: + dict( + name='kpt-8', id=8, color=[255, 0, 0], type='lower', + swap='kpt-24'), + 9: + dict( + name='kpt-9', id=9, color=[255, 0, 0], type='lower', + swap='kpt-23'), + 10: + dict( + name='kpt-10', + id=10, + color=[255, 0, 0], + type='lower', + swap='kpt-22'), + 11: + dict( + name='kpt-11', + id=11, + color=[255, 0, 0], + type='lower', + swap='kpt-21'), + 12: + dict( + name='kpt-12', + id=12, + color=[255, 0, 0], + type='lower', + swap='kpt-20'), + 13: + dict( + name='kpt-13', + id=13, + color=[255, 0, 0], + type='lower', + swap='kpt-19'), + 14: + dict( + name='kpt-14', + id=14, + color=[255, 0, 0], + type='lower', + swap='kpt-18'), + 15: + dict( + name='kpt-15', + id=15, + color=[255, 0, 0], + type='lower', + swap='kpt-17'), + 16: + dict(name='kpt-16', id=16, color=[255, 0, 0], type='lower', swap=''), + 17: + dict( + name='kpt-17', + id=17, + color=[255, 0, 0], + type='lower', + swap='kpt-15'), + 18: + dict( + name='kpt-18', + id=18, + color=[255, 0, 0], + type='lower', + swap='kpt-14'), + 19: + dict( + name='kpt-19', + id=19, + color=[255, 0, 0], + type='lower', + swap='kpt-13'), + 20: + dict( + name='kpt-20', + id=20, + color=[255, 0, 0], + type='lower', + swap='kpt-12'), + 21: + dict( + name='kpt-21', + id=21, + color=[255, 0, 0], + type='lower', + swap='kpt-11'), + 22: + dict( + name='kpt-22', + id=22, + color=[255, 0, 0], + type='lower', + swap='kpt-10'), + 23: + dict( + name='kpt-23', + id=23, + color=[255, 0, 0], + type='lower', + swap='kpt-9'), + 24: + dict( + name='kpt-24', + id=24, + color=[255, 0, 0], + type='lower', + swap='kpt-8'), + 25: + dict( + name='kpt-25', + id=25, + color=[255, 0, 0], + type='lower', + swap='kpt-7'), + 26: + dict( + name='kpt-26', + id=26, + color=[255, 0, 0], + type='lower', + swap='kpt-6'), + 27: + dict( + name='kpt-27', + id=27, + color=[255, 0, 0], + type='lower', + swap='kpt-5'), + 28: + dict( + name='kpt-28', + id=28, + color=[255, 0, 0], + type='lower', + swap='kpt-4'), + 29: + dict( + name='kpt-29', + id=29, + color=[255, 0, 0], + type='lower', + swap='kpt-3'), + 30: + dict( + name='kpt-30', + id=30, + color=[255, 0, 0], + type='upper', + swap='kpt-2'), + 31: + dict( + name='kpt-31', + id=31, + color=[255, 0, 0], + type='upper', + swap='kpt-1'), + 32: + dict( + name='kpt-32', + id=32, + color=[255, 0, 0], + type='upper', + swap='kpt-0'), + 33: + dict( + name='kpt-33', + id=33, + color=[255, 0, 0], + type='upper', + swap='kpt-46'), + 34: + dict( + name='kpt-34', + id=34, + color=[255, 0, 0], + type='upper', + swap='kpt-45'), + 35: + dict( + name='kpt-35', + id=35, + color=[255, 0, 0], + type='upper', + swap='kpt-44'), + 36: + dict( + name='kpt-36', + id=36, + color=[255, 0, 0], + type='upper', + swap='kpt-43'), + 37: + dict( + name='kpt-37', + id=37, + color=[255, 0, 0], + type='upper', + swap='kpt-42'), + 38: + dict( + name='kpt-38', + id=38, + color=[255, 0, 0], + type='upper', + swap='kpt-50'), + 39: + dict( + name='kpt-39', + id=39, + color=[255, 0, 0], + type='upper', + swap='kpt-49'), + 40: + dict( + name='kpt-40', + id=40, + color=[255, 0, 0], + type='upper', + swap='kpt-48'), + 41: + dict( + name='kpt-41', + id=41, + color=[255, 0, 0], + type='upper', + swap='kpt-47'), + 42: + dict( + name='kpt-42', + id=42, + color=[255, 0, 0], + type='upper', + swap='kpt-37'), + 43: + dict( + name='kpt-43', + id=43, + color=[255, 0, 0], + type='upper', + swap='kpt-36'), + 44: + dict( + name='kpt-44', + id=44, + color=[255, 0, 0], + type='upper', + swap='kpt-35'), + 45: + dict( + name='kpt-45', + id=45, + color=[255, 0, 0], + type='upper', + swap='kpt-34'), + 46: + dict( + name='kpt-46', + id=46, + color=[255, 0, 0], + type='upper', + swap='kpt-33'), + 47: + dict( + name='kpt-47', + id=47, + color=[255, 0, 0], + type='upper', + swap='kpt-41'), + 48: + dict( + name='kpt-48', + id=48, + color=[255, 0, 0], + type='upper', + swap='kpt-40'), + 49: + dict( + name='kpt-49', + id=49, + color=[255, 0, 0], + type='upper', + swap='kpt-39'), + 50: + dict( + name='kpt-50', + id=50, + color=[255, 0, 0], + type='upper', + swap='kpt-38'), + 51: + dict(name='kpt-51', id=51, color=[255, 0, 0], type='upper', swap=''), + 52: + dict(name='kpt-52', id=52, color=[255, 0, 0], type='upper', swap=''), + 53: + dict(name='kpt-53', id=53, color=[255, 0, 0], type='lower', swap=''), + 54: + dict(name='kpt-54', id=54, color=[255, 0, 0], type='lower', swap=''), + 55: + dict( + name='kpt-55', + id=55, + color=[255, 0, 0], + type='upper', + swap='kpt-65'), + 56: + dict( + name='kpt-56', + id=56, + color=[255, 0, 0], + type='lower', + swap='kpt-64'), + 57: + dict( + name='kpt-57', + id=57, + color=[255, 0, 0], + type='lower', + swap='kpt-63'), + 58: + dict( + name='kpt-58', + id=58, + color=[255, 0, 0], + type='lower', + swap='kpt-62'), + 59: + dict( + name='kpt-59', + id=59, + color=[255, 0, 0], + type='lower', + swap='kpt-61'), + 60: + dict(name='kpt-60', id=60, color=[255, 0, 0], type='lower', swap=''), + 61: + dict( + name='kpt-61', + id=61, + color=[255, 0, 0], + type='lower', + swap='kpt-59'), + 62: + dict( + name='kpt-62', + id=62, + color=[255, 0, 0], + type='lower', + swap='kpt-58'), + 63: + dict( + name='kpt-63', + id=63, + color=[255, 0, 0], + type='lower', + swap='kpt-57'), + 64: + dict( + name='kpt-64', + id=64, + color=[255, 0, 0], + type='lower', + swap='kpt-56'), + 65: + dict( + name='kpt-65', + id=65, + color=[255, 0, 0], + type='upper', + swap='kpt-55'), + 66: + dict( + name='kpt-66', + id=66, + color=[255, 0, 0], + type='upper', + swap='kpt-79'), + 67: + dict( + name='kpt-67', + id=67, + color=[255, 0, 0], + type='upper', + swap='kpt-78'), + 68: + dict( + name='kpt-68', + id=68, + color=[255, 0, 0], + type='upper', + swap='kpt-77'), + 69: + dict( + name='kpt-69', + id=69, + color=[255, 0, 0], + type='upper', + swap='kpt-76'), + 70: + dict( + name='kpt-70', + id=70, + color=[255, 0, 0], + type='upper', + swap='kpt-75'), + 71: + dict( + name='kpt-71', + id=71, + color=[255, 0, 0], + type='upper', + swap='kpt-82'), + 72: + dict( + name='kpt-72', + id=72, + color=[255, 0, 0], + type='upper', + swap='kpt-81'), + 73: + dict( + name='kpt-73', + id=73, + color=[255, 0, 0], + type='upper', + swap='kpt-80'), + 74: + dict( + name='kpt-74', + id=74, + color=[255, 0, 0], + type='upper', + swap='kpt-83'), + 75: + dict( + name='kpt-75', + id=75, + color=[255, 0, 0], + type='upper', + swap='kpt-70'), + 76: + dict( + name='kpt-76', + id=76, + color=[255, 0, 0], + type='upper', + swap='kpt-69'), + 77: + dict( + name='kpt-77', + id=77, + color=[255, 0, 0], + type='upper', + swap='kpt-68'), + 78: + dict( + name='kpt-78', + id=78, + color=[255, 0, 0], + type='upper', + swap='kpt-67'), + 79: + dict( + name='kpt-79', + id=79, + color=[255, 0, 0], + type='upper', + swap='kpt-66'), + 80: + dict( + name='kpt-80', + id=80, + color=[255, 0, 0], + type='upper', + swap='kpt-73'), + 81: + dict( + name='kpt-81', + id=81, + color=[255, 0, 0], + type='upper', + swap='kpt-72'), + 82: + dict( + name='kpt-82', + id=82, + color=[255, 0, 0], + type='upper', + swap='kpt-71'), + 83: + dict( + name='kpt-83', + id=83, + color=[255, 0, 0], + type='upper', + swap='kpt-74'), + 84: + dict( + name='kpt-84', + id=84, + color=[255, 0, 0], + type='lower', + swap='kpt-90'), + 85: + dict( + name='kpt-85', + id=85, + color=[255, 0, 0], + type='lower', + swap='kpt-89'), + 86: + dict( + name='kpt-86', + id=86, + color=[255, 0, 0], + type='lower', + swap='kpt-88'), + 87: + dict(name='kpt-87', id=87, color=[255, 0, 0], type='lower', swap=''), + 88: + dict( + name='kpt-88', + id=88, + color=[255, 0, 0], + type='lower', + swap='kpt-86'), + 89: + dict( + name='kpt-89', + id=89, + color=[255, 0, 0], + type='lower', + swap='kpt-85'), + 90: + dict( + name='kpt-90', + id=90, + color=[255, 0, 0], + type='lower', + swap='kpt-84'), + 91: + dict( + name='kpt-91', + id=91, + color=[255, 0, 0], + type='lower', + swap='kpt-95'), + 92: + dict( + name='kpt-92', + id=92, + color=[255, 0, 0], + type='lower', + swap='kpt-94'), + 93: + dict(name='kpt-93', id=93, color=[255, 0, 0], type='lower', swap=''), + 94: + dict( + name='kpt-94', + id=94, + color=[255, 0, 0], + type='lower', + swap='kpt-92'), + 95: + dict( + name='kpt-95', + id=95, + color=[255, 0, 0], + type='lower', + swap='kpt-91'), + 96: + dict( + name='kpt-96', + id=96, + color=[255, 0, 0], + type='lower', + swap='kpt-100'), + 97: + dict( + name='kpt-97', + id=97, + color=[255, 0, 0], + type='lower', + swap='kpt-99'), + 98: + dict(name='kpt-98', id=98, color=[255, 0, 0], type='lower', swap=''), + 99: + dict( + name='kpt-99', + id=99, + color=[255, 0, 0], + type='lower', + swap='kpt-97'), + 100: + dict( + name='kpt-100', + id=100, + color=[255, 0, 0], + type='lower', + swap='kpt-96'), + 101: + dict( + name='kpt-101', + id=101, + color=[255, 0, 0], + type='lower', + swap='kpt-103'), + 102: + dict(name='kpt-102', id=102, color=[255, 0, 0], type='lower', swap=''), + 103: + dict( + name='kpt-103', + id=103, + color=[255, 0, 0], + type='lower', + swap='kpt-101'), + 104: + dict( + name='kpt-104', + id=104, + color=[255, 0, 0], + type='upper', + swap='kpt-105'), + 105: + dict( + name='kpt-105', + id=105, + color=[255, 0, 0], + type='upper', + swap='kpt-104') + }, + skeleton_info={}, + joint_weights=[ + 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, + 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, + 0.8, 0.8, 0.8, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, + 2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, + 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, + 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.0, 1.0 + ], + sigmas=[]) diff --git a/mmpose/configs/_base_/datasets/locust.py b/mmpose/configs/_base_/datasets/locust.py new file mode 100644 index 0000000000000000000000000000000000000000..db3fa15aa060b5806faae7a21f65460f77be2745 --- /dev/null +++ b/mmpose/configs/_base_/datasets/locust.py @@ -0,0 +1,263 @@ +dataset_info = dict( + dataset_name='locust', + paper_info=dict( + author='Graving, Jacob M and Chae, Daniel and Naik, Hemal and ' + 'Li, Liang and Koger, Benjamin and Costelloe, Blair R and ' + 'Couzin, Iain D', + title='DeepPoseKit, a software toolkit for fast and robust ' + 'animal pose estimation using deep learning', + container='Elife', + year='2019', + homepage='https://github.com/jgraving/DeepPoseKit-Data', + ), + keypoint_info={ + 0: + dict(name='head', id=0, color=[255, 255, 255], type='', swap=''), + 1: + dict(name='neck', id=1, color=[255, 255, 255], type='', swap=''), + 2: + dict(name='thorax', id=2, color=[255, 255, 255], type='', swap=''), + 3: + dict(name='abdomen1', id=3, color=[255, 255, 255], type='', swap=''), + 4: + dict(name='abdomen2', id=4, color=[255, 255, 255], type='', swap=''), + 5: + dict( + name='anttipL', + id=5, + color=[255, 255, 255], + type='', + swap='anttipR'), + 6: + dict( + name='antbaseL', + id=6, + color=[255, 255, 255], + type='', + swap='antbaseR'), + 7: + dict(name='eyeL', id=7, color=[255, 255, 255], type='', swap='eyeR'), + 8: + dict( + name='forelegL1', + id=8, + color=[255, 255, 255], + type='', + swap='forelegR1'), + 9: + dict( + name='forelegL2', + id=9, + color=[255, 255, 255], + type='', + swap='forelegR2'), + 10: + dict( + name='forelegL3', + id=10, + color=[255, 255, 255], + type='', + swap='forelegR3'), + 11: + dict( + name='forelegL4', + id=11, + color=[255, 255, 255], + type='', + swap='forelegR4'), + 12: + dict( + name='midlegL1', + id=12, + color=[255, 255, 255], + type='', + swap='midlegR1'), + 13: + dict( + name='midlegL2', + id=13, + color=[255, 255, 255], + type='', + swap='midlegR2'), + 14: + dict( + name='midlegL3', + id=14, + color=[255, 255, 255], + type='', + swap='midlegR3'), + 15: + dict( + name='midlegL4', + id=15, + color=[255, 255, 255], + type='', + swap='midlegR4'), + 16: + dict( + name='hindlegL1', + id=16, + color=[255, 255, 255], + type='', + swap='hindlegR1'), + 17: + dict( + name='hindlegL2', + id=17, + color=[255, 255, 255], + type='', + swap='hindlegR2'), + 18: + dict( + name='hindlegL3', + id=18, + color=[255, 255, 255], + type='', + swap='hindlegR3'), + 19: + dict( + name='hindlegL4', + id=19, + color=[255, 255, 255], + type='', + swap='hindlegR4'), + 20: + dict( + name='anttipR', + id=20, + color=[255, 255, 255], + type='', + swap='anttipL'), + 21: + dict( + name='antbaseR', + id=21, + color=[255, 255, 255], + type='', + swap='antbaseL'), + 22: + dict(name='eyeR', id=22, color=[255, 255, 255], type='', swap='eyeL'), + 23: + dict( + name='forelegR1', + id=23, + color=[255, 255, 255], + type='', + swap='forelegL1'), + 24: + dict( + name='forelegR2', + id=24, + color=[255, 255, 255], + type='', + swap='forelegL2'), + 25: + dict( + name='forelegR3', + id=25, + color=[255, 255, 255], + type='', + swap='forelegL3'), + 26: + dict( + name='forelegR4', + id=26, + color=[255, 255, 255], + type='', + swap='forelegL4'), + 27: + dict( + name='midlegR1', + id=27, + color=[255, 255, 255], + type='', + swap='midlegL1'), + 28: + dict( + name='midlegR2', + id=28, + color=[255, 255, 255], + type='', + swap='midlegL2'), + 29: + dict( + name='midlegR3', + id=29, + color=[255, 255, 255], + type='', + swap='midlegL3'), + 30: + dict( + name='midlegR4', + id=30, + color=[255, 255, 255], + type='', + swap='midlegL4'), + 31: + dict( + name='hindlegR1', + id=31, + color=[255, 255, 255], + type='', + swap='hindlegL1'), + 32: + dict( + name='hindlegR2', + id=32, + color=[255, 255, 255], + type='', + swap='hindlegL2'), + 33: + dict( + name='hindlegR3', + id=33, + color=[255, 255, 255], + type='', + swap='hindlegL3'), + 34: + dict( + name='hindlegR4', + id=34, + color=[255, 255, 255], + type='', + swap='hindlegL4') + }, + skeleton_info={ + 0: dict(link=('neck', 'head'), id=0, color=[255, 255, 255]), + 1: dict(link=('thorax', 'neck'), id=1, color=[255, 255, 255]), + 2: dict(link=('abdomen1', 'thorax'), id=2, color=[255, 255, 255]), + 3: dict(link=('abdomen2', 'abdomen1'), id=3, color=[255, 255, 255]), + 4: dict(link=('antbaseL', 'anttipL'), id=4, color=[255, 255, 255]), + 5: dict(link=('eyeL', 'antbaseL'), id=5, color=[255, 255, 255]), + 6: dict(link=('forelegL2', 'forelegL1'), id=6, color=[255, 255, 255]), + 7: dict(link=('forelegL3', 'forelegL2'), id=7, color=[255, 255, 255]), + 8: dict(link=('forelegL4', 'forelegL3'), id=8, color=[255, 255, 255]), + 9: dict(link=('midlegL2', 'midlegL1'), id=9, color=[255, 255, 255]), + 10: dict(link=('midlegL3', 'midlegL2'), id=10, color=[255, 255, 255]), + 11: dict(link=('midlegL4', 'midlegL3'), id=11, color=[255, 255, 255]), + 12: + dict(link=('hindlegL2', 'hindlegL1'), id=12, color=[255, 255, 255]), + 13: + dict(link=('hindlegL3', 'hindlegL2'), id=13, color=[255, 255, 255]), + 14: + dict(link=('hindlegL4', 'hindlegL3'), id=14, color=[255, 255, 255]), + 15: dict(link=('antbaseR', 'anttipR'), id=15, color=[255, 255, 255]), + 16: dict(link=('eyeR', 'antbaseR'), id=16, color=[255, 255, 255]), + 17: + dict(link=('forelegR2', 'forelegR1'), id=17, color=[255, 255, 255]), + 18: + dict(link=('forelegR3', 'forelegR2'), id=18, color=[255, 255, 255]), + 19: + dict(link=('forelegR4', 'forelegR3'), id=19, color=[255, 255, 255]), + 20: dict(link=('midlegR2', 'midlegR1'), id=20, color=[255, 255, 255]), + 21: dict(link=('midlegR3', 'midlegR2'), id=21, color=[255, 255, 255]), + 22: dict(link=('midlegR4', 'midlegR3'), id=22, color=[255, 255, 255]), + 23: + dict(link=('hindlegR2', 'hindlegR1'), id=23, color=[255, 255, 255]), + 24: + dict(link=('hindlegR3', 'hindlegR2'), id=24, color=[255, 255, 255]), + 25: + dict(link=('hindlegR4', 'hindlegR3'), id=25, color=[255, 255, 255]) + }, + joint_weights=[1.] * 35, + sigmas=[]) diff --git a/mmpose/configs/_base_/datasets/macaque.py b/mmpose/configs/_base_/datasets/macaque.py new file mode 100644 index 0000000000000000000000000000000000000000..ea8dac297ea2f0e36dabccccc021d953216a6ac8 --- /dev/null +++ b/mmpose/configs/_base_/datasets/macaque.py @@ -0,0 +1,183 @@ +dataset_info = dict( + dataset_name='macaque', + paper_info=dict( + author='Labuguen, Rollyn and Matsumoto, Jumpei and ' + 'Negrete, Salvador and Nishimaru, Hiroshi and ' + 'Nishijo, Hisao and Takada, Masahiko and ' + 'Go, Yasuhiro and Inoue, Ken-ichi and Shibata, Tomohiro', + title='MacaquePose: A novel "in the wild" macaque monkey pose dataset ' + 'for markerless motion capture', + container='bioRxiv', + year='2020', + homepage='http://www.pri.kyoto-u.ac.jp/datasets/' + 'macaquepose/index.html', + ), + keypoint_info={ + 0: + dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''), + 1: + dict( + name='left_eye', + id=1, + color=[51, 153, 255], + type='upper', + swap='right_eye'), + 2: + dict( + name='right_eye', + id=2, + color=[51, 153, 255], + type='upper', + swap='left_eye'), + 3: + dict( + name='left_ear', + id=3, + color=[51, 153, 255], + type='upper', + swap='right_ear'), + 4: + dict( + name='right_ear', + id=4, + color=[51, 153, 255], + type='upper', + swap='left_ear'), + 5: + dict( + name='left_shoulder', + id=5, + color=[0, 255, 0], + type='upper', + swap='right_shoulder'), + 6: + dict( + name='right_shoulder', + id=6, + color=[255, 128, 0], + type='upper', + swap='left_shoulder'), + 7: + dict( + name='left_elbow', + id=7, + color=[0, 255, 0], + type='upper', + swap='right_elbow'), + 8: + dict( + name='right_elbow', + id=8, + color=[255, 128, 0], + type='upper', + swap='left_elbow'), + 9: + dict( + name='left_wrist', + id=9, + color=[0, 255, 0], + type='upper', + swap='right_wrist'), + 10: + dict( + name='right_wrist', + id=10, + color=[255, 128, 0], + type='upper', + swap='left_wrist'), + 11: + dict( + name='left_hip', + id=11, + color=[0, 255, 0], + type='lower', + swap='right_hip'), + 12: + dict( + name='right_hip', + id=12, + color=[255, 128, 0], + type='lower', + swap='left_hip'), + 13: + dict( + name='left_knee', + id=13, + color=[0, 255, 0], + type='lower', + swap='right_knee'), + 14: + dict( + name='right_knee', + id=14, + color=[255, 128, 0], + type='lower', + swap='left_knee'), + 15: + dict( + name='left_ankle', + id=15, + color=[0, 255, 0], + type='lower', + swap='right_ankle'), + 16: + dict( + name='right_ankle', + id=16, + color=[255, 128, 0], + type='lower', + swap='left_ankle') + }, + skeleton_info={ + 0: + dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]), + 1: + dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]), + 2: + dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]), + 3: + dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]), + 4: + dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]), + 5: + dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]), + 6: + dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]), + 7: + dict( + link=('left_shoulder', 'right_shoulder'), + id=7, + color=[51, 153, 255]), + 8: + dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]), + 9: + dict( + link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]), + 10: + dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]), + 11: + dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]), + 12: + dict(link=('left_eye', 'right_eye'), id=12, color=[51, 153, 255]), + 13: + dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]), + 14: + dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]), + 15: + dict(link=('left_eye', 'left_ear'), id=15, color=[51, 153, 255]), + 16: + dict(link=('right_eye', 'right_ear'), id=16, color=[51, 153, 255]), + 17: + dict(link=('left_ear', 'left_shoulder'), id=17, color=[51, 153, 255]), + 18: + dict( + link=('right_ear', 'right_shoulder'), id=18, color=[51, 153, 255]) + }, + joint_weights=[ + 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5, + 1.5 + ], + sigmas=[ + 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062, + 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089 + ]) diff --git a/mmpose/configs/_base_/datasets/mhp.py b/mmpose/configs/_base_/datasets/mhp.py new file mode 100644 index 0000000000000000000000000000000000000000..e16e37c79cb63c4352c48bb4e45602b8408f534b --- /dev/null +++ b/mmpose/configs/_base_/datasets/mhp.py @@ -0,0 +1,156 @@ +dataset_info = dict( + dataset_name='mhp', + paper_info=dict( + author='Zhao, Jian and Li, Jianshu and Cheng, Yu and ' + 'Sim, Terence and Yan, Shuicheng and Feng, Jiashi', + title='Understanding humans in crowded scenes: ' + 'Deep nested adversarial learning and a ' + 'new benchmark for multi-human parsing', + container='Proceedings of the 26th ACM ' + 'international conference on Multimedia', + year='2018', + homepage='https://lv-mhp.github.io/dataset', + ), + keypoint_info={ + 0: + dict( + name='right_ankle', + id=0, + color=[255, 128, 0], + type='lower', + swap='left_ankle'), + 1: + dict( + name='right_knee', + id=1, + color=[255, 128, 0], + type='lower', + swap='left_knee'), + 2: + dict( + name='right_hip', + id=2, + color=[255, 128, 0], + type='lower', + swap='left_hip'), + 3: + dict( + name='left_hip', + id=3, + color=[0, 255, 0], + type='lower', + swap='right_hip'), + 4: + dict( + name='left_knee', + id=4, + color=[0, 255, 0], + type='lower', + swap='right_knee'), + 5: + dict( + name='left_ankle', + id=5, + color=[0, 255, 0], + type='lower', + swap='right_ankle'), + 6: + dict(name='pelvis', id=6, color=[51, 153, 255], type='lower', swap=''), + 7: + dict(name='thorax', id=7, color=[51, 153, 255], type='upper', swap=''), + 8: + dict( + name='upper_neck', + id=8, + color=[51, 153, 255], + type='upper', + swap=''), + 9: + dict( + name='head_top', id=9, color=[51, 153, 255], type='upper', + swap=''), + 10: + dict( + name='right_wrist', + id=10, + color=[255, 128, 0], + type='upper', + swap='left_wrist'), + 11: + dict( + name='right_elbow', + id=11, + color=[255, 128, 0], + type='upper', + swap='left_elbow'), + 12: + dict( + name='right_shoulder', + id=12, + color=[255, 128, 0], + type='upper', + swap='left_shoulder'), + 13: + dict( + name='left_shoulder', + id=13, + color=[0, 255, 0], + type='upper', + swap='right_shoulder'), + 14: + dict( + name='left_elbow', + id=14, + color=[0, 255, 0], + type='upper', + swap='right_elbow'), + 15: + dict( + name='left_wrist', + id=15, + color=[0, 255, 0], + type='upper', + swap='right_wrist') + }, + skeleton_info={ + 0: + dict(link=('right_ankle', 'right_knee'), id=0, color=[255, 128, 0]), + 1: + dict(link=('right_knee', 'right_hip'), id=1, color=[255, 128, 0]), + 2: + dict(link=('right_hip', 'pelvis'), id=2, color=[255, 128, 0]), + 3: + dict(link=('pelvis', 'left_hip'), id=3, color=[0, 255, 0]), + 4: + dict(link=('left_hip', 'left_knee'), id=4, color=[0, 255, 0]), + 5: + dict(link=('left_knee', 'left_ankle'), id=5, color=[0, 255, 0]), + 6: + dict(link=('pelvis', 'thorax'), id=6, color=[51, 153, 255]), + 7: + dict(link=('thorax', 'upper_neck'), id=7, color=[51, 153, 255]), + 8: + dict(link=('upper_neck', 'head_top'), id=8, color=[51, 153, 255]), + 9: + dict(link=('upper_neck', 'right_shoulder'), id=9, color=[255, 128, 0]), + 10: + dict( + link=('right_shoulder', 'right_elbow'), id=10, color=[255, 128, + 0]), + 11: + dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]), + 12: + dict(link=('upper_neck', 'left_shoulder'), id=12, color=[0, 255, 0]), + 13: + dict(link=('left_shoulder', 'left_elbow'), id=13, color=[0, 255, 0]), + 14: + dict(link=('left_elbow', 'left_wrist'), id=14, color=[0, 255, 0]) + }, + joint_weights=[ + 1.5, 1.2, 1., 1., 1.2, 1.5, 1., 1., 1., 1., 1.5, 1.2, 1., 1., 1.2, 1.5 + ], + # Adapted from COCO dataset. + sigmas=[ + 0.089, 0.083, 0.107, 0.107, 0.083, 0.089, 0.026, 0.026, 0.026, 0.026, + 0.062, 0.072, 0.179, 0.179, 0.072, 0.062 + ]) diff --git a/mmpose/configs/_base_/datasets/mpi_inf_3dhp.py b/mmpose/configs/_base_/datasets/mpi_inf_3dhp.py new file mode 100644 index 0000000000000000000000000000000000000000..ffd0a70297b24456ea38566ac205bb585aa47e5d --- /dev/null +++ b/mmpose/configs/_base_/datasets/mpi_inf_3dhp.py @@ -0,0 +1,132 @@ +dataset_info = dict( + dataset_name='mpi_inf_3dhp', + paper_info=dict( + author='ehta, Dushyant and Rhodin, Helge and Casas, Dan and ' + 'Fua, Pascal and Sotnychenko, Oleksandr and Xu, Weipeng and ' + 'Theobalt, Christian', + title='Monocular 3D Human Pose Estimation In The Wild Using Improved ' + 'CNN Supervision', + container='2017 international conference on 3D vision (3DV)', + year='2017', + homepage='http://gvv.mpi-inf.mpg.de/3dhp-dataset', + ), + keypoint_info={ + 0: + dict( + name='head_top', id=0, color=[51, 153, 255], type='upper', + swap=''), + 1: + dict(name='neck', id=1, color=[51, 153, 255], type='upper', swap=''), + 2: + dict( + name='right_shoulder', + id=2, + color=[255, 128, 0], + type='upper', + swap='left_shoulder'), + 3: + dict( + name='right_elbow', + id=3, + color=[255, 128, 0], + type='upper', + swap='left_elbow'), + 4: + dict( + name='right_wrist', + id=4, + color=[255, 128, 0], + type='upper', + swap='left_wrist'), + 5: + dict( + name='left_shoulder', + id=5, + color=[0, 255, 0], + type='upper', + swap='right_shoulder'), + 6: + dict( + name='left_elbow', + id=6, + color=[0, 255, 0], + type='upper', + swap='right_elbow'), + 7: + dict( + name='left_wrist', + id=7, + color=[0, 255, 0], + type='upper', + swap='right_wrist'), + 8: + dict( + name='right_hip', + id=8, + color=[255, 128, 0], + type='lower', + swap='left_hip'), + 9: + dict( + name='right_knee', + id=9, + color=[255, 128, 0], + type='lower', + swap='left_knee'), + 10: + dict( + name='right_ankle', + id=10, + color=[255, 128, 0], + type='lower', + swap='left_ankle'), + 11: + dict( + name='left_hip', + id=11, + color=[0, 255, 0], + type='lower', + swap='right_hip'), + 12: + dict( + name='left_knee', + id=12, + color=[0, 255, 0], + type='lower', + swap='right_knee'), + 13: + dict( + name='left_ankle', + id=13, + color=[0, 255, 0], + type='lower', + swap='right_ankle'), + 14: + dict(name='root', id=14, color=[51, 153, 255], type='lower', swap=''), + 15: + dict(name='spine', id=15, color=[51, 153, 255], type='upper', swap=''), + 16: + dict(name='head', id=16, color=[51, 153, 255], type='upper', swap='') + }, + skeleton_info={ + 0: dict(link=('neck', 'right_shoulder'), id=0, color=[255, 128, 0]), + 1: dict( + link=('right_shoulder', 'right_elbow'), id=1, color=[255, 128, 0]), + 2: + dict(link=('right_elbow', 'right_wrist'), id=2, color=[255, 128, 0]), + 3: dict(link=('neck', 'left_shoulder'), id=3, color=[0, 255, 0]), + 4: dict(link=('left_shoulder', 'left_elbow'), id=4, color=[0, 255, 0]), + 5: dict(link=('left_elbow', 'left_wrist'), id=5, color=[0, 255, 0]), + 6: dict(link=('root', 'right_hip'), id=6, color=[255, 128, 0]), + 7: dict(link=('right_hip', 'right_knee'), id=7, color=[255, 128, 0]), + 8: dict(link=('right_knee', 'right_ankle'), id=8, color=[255, 128, 0]), + 9: dict(link=('root', 'left_hip'), id=9, color=[0, 255, 0]), + 10: dict(link=('left_hip', 'left_knee'), id=10, color=[0, 255, 0]), + 11: dict(link=('left_knee', 'left_ankle'), id=11, color=[0, 255, 0]), + 12: dict(link=('head_top', 'head'), id=12, color=[51, 153, 255]), + 13: dict(link=('head', 'neck'), id=13, color=[51, 153, 255]), + 14: dict(link=('neck', 'spine'), id=14, color=[51, 153, 255]), + 15: dict(link=('spine', 'root'), id=15, color=[51, 153, 255]) + }, + joint_weights=[1.] * 17, + sigmas=[]) diff --git a/mmpose/configs/_base_/datasets/mpii.py b/mmpose/configs/_base_/datasets/mpii.py new file mode 100644 index 0000000000000000000000000000000000000000..6c2a491c7b58bc3eaa5c0056d3d7184bdd1d1cc7 --- /dev/null +++ b/mmpose/configs/_base_/datasets/mpii.py @@ -0,0 +1,155 @@ +dataset_info = dict( + dataset_name='mpii', + paper_info=dict( + author='Mykhaylo Andriluka and Leonid Pishchulin and ' + 'Peter Gehler and Schiele, Bernt', + title='2D Human Pose Estimation: New Benchmark and ' + 'State of the Art Analysis', + container='IEEE Conference on Computer Vision and ' + 'Pattern Recognition (CVPR)', + year='2014', + homepage='http://human-pose.mpi-inf.mpg.de/', + ), + keypoint_info={ + 0: + dict( + name='right_ankle', + id=0, + color=[255, 128, 0], + type='lower', + swap='left_ankle'), + 1: + dict( + name='right_knee', + id=1, + color=[255, 128, 0], + type='lower', + swap='left_knee'), + 2: + dict( + name='right_hip', + id=2, + color=[255, 128, 0], + type='lower', + swap='left_hip'), + 3: + dict( + name='left_hip', + id=3, + color=[0, 255, 0], + type='lower', + swap='right_hip'), + 4: + dict( + name='left_knee', + id=4, + color=[0, 255, 0], + type='lower', + swap='right_knee'), + 5: + dict( + name='left_ankle', + id=5, + color=[0, 255, 0], + type='lower', + swap='right_ankle'), + 6: + dict(name='pelvis', id=6, color=[51, 153, 255], type='lower', swap=''), + 7: + dict(name='thorax', id=7, color=[51, 153, 255], type='upper', swap=''), + 8: + dict( + name='upper_neck', + id=8, + color=[51, 153, 255], + type='upper', + swap=''), + 9: + dict( + name='head_top', id=9, color=[51, 153, 255], type='upper', + swap=''), + 10: + dict( + name='right_wrist', + id=10, + color=[255, 128, 0], + type='upper', + swap='left_wrist'), + 11: + dict( + name='right_elbow', + id=11, + color=[255, 128, 0], + type='upper', + swap='left_elbow'), + 12: + dict( + name='right_shoulder', + id=12, + color=[255, 128, 0], + type='upper', + swap='left_shoulder'), + 13: + dict( + name='left_shoulder', + id=13, + color=[0, 255, 0], + type='upper', + swap='right_shoulder'), + 14: + dict( + name='left_elbow', + id=14, + color=[0, 255, 0], + type='upper', + swap='right_elbow'), + 15: + dict( + name='left_wrist', + id=15, + color=[0, 255, 0], + type='upper', + swap='right_wrist') + }, + skeleton_info={ + 0: + dict(link=('right_ankle', 'right_knee'), id=0, color=[255, 128, 0]), + 1: + dict(link=('right_knee', 'right_hip'), id=1, color=[255, 128, 0]), + 2: + dict(link=('right_hip', 'pelvis'), id=2, color=[255, 128, 0]), + 3: + dict(link=('pelvis', 'left_hip'), id=3, color=[0, 255, 0]), + 4: + dict(link=('left_hip', 'left_knee'), id=4, color=[0, 255, 0]), + 5: + dict(link=('left_knee', 'left_ankle'), id=5, color=[0, 255, 0]), + 6: + dict(link=('pelvis', 'thorax'), id=6, color=[51, 153, 255]), + 7: + dict(link=('thorax', 'upper_neck'), id=7, color=[51, 153, 255]), + 8: + dict(link=('upper_neck', 'head_top'), id=8, color=[51, 153, 255]), + 9: + dict(link=('upper_neck', 'right_shoulder'), id=9, color=[255, 128, 0]), + 10: + dict( + link=('right_shoulder', 'right_elbow'), id=10, color=[255, 128, + 0]), + 11: + dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]), + 12: + dict(link=('upper_neck', 'left_shoulder'), id=12, color=[0, 255, 0]), + 13: + dict(link=('left_shoulder', 'left_elbow'), id=13, color=[0, 255, 0]), + 14: + dict(link=('left_elbow', 'left_wrist'), id=14, color=[0, 255, 0]) + }, + joint_weights=[ + 1.5, 1.2, 1., 1., 1.2, 1.5, 1., 1., 1., 1., 1.5, 1.2, 1., 1., 1.2, 1.5 + ], + # Adapted from COCO dataset. + sigmas=[ + 0.089, 0.083, 0.107, 0.107, 0.083, 0.089, 0.026, 0.026, 0.026, 0.026, + 0.062, 0.072, 0.179, 0.179, 0.072, 0.062 + ]) diff --git a/mmpose/configs/_base_/datasets/mpii_trb.py b/mmpose/configs/_base_/datasets/mpii_trb.py new file mode 100644 index 0000000000000000000000000000000000000000..73940d4b4827f8e08343c3b517360db788e4820d --- /dev/null +++ b/mmpose/configs/_base_/datasets/mpii_trb.py @@ -0,0 +1,380 @@ +dataset_info = dict( + dataset_name='mpii_trb', + paper_info=dict( + author='Duan, Haodong and Lin, Kwan-Yee and Jin, Sheng and ' + 'Liu, Wentao and Qian, Chen and Ouyang, Wanli', + title='TRB: A Novel Triplet Representation for ' + 'Understanding 2D Human Body', + container='Proceedings of the IEEE International ' + 'Conference on Computer Vision', + year='2019', + homepage='https://github.com/kennymckormick/' + 'Triplet-Representation-of-human-Body', + ), + keypoint_info={ + 0: + dict( + name='left_shoulder', + id=0, + color=[0, 255, 0], + type='upper', + swap='right_shoulder'), + 1: + dict( + name='right_shoulder', + id=1, + color=[255, 128, 0], + type='upper', + swap='left_shoulder'), + 2: + dict( + name='left_elbow', + id=2, + color=[0, 255, 0], + type='upper', + swap='right_elbow'), + 3: + dict( + name='right_elbow', + id=3, + color=[255, 128, 0], + type='upper', + swap='left_elbow'), + 4: + dict( + name='left_wrist', + id=4, + color=[0, 255, 0], + type='upper', + swap='right_wrist'), + 5: + dict( + name='right_wrist', + id=5, + color=[255, 128, 0], + type='upper', + swap='left_wrist'), + 6: + dict( + name='left_hip', + id=6, + color=[0, 255, 0], + type='lower', + swap='right_hip'), + 7: + dict( + name='right_hip', + id=7, + color=[255, 128, 0], + type='lower', + swap='left_hip'), + 8: + dict( + name='left_knee', + id=8, + color=[0, 255, 0], + type='lower', + swap='right_knee'), + 9: + dict( + name='right_knee', + id=9, + color=[255, 128, 0], + type='lower', + swap='left_knee'), + 10: + dict( + name='left_ankle', + id=10, + color=[0, 255, 0], + type='lower', + swap='right_ankle'), + 11: + dict( + name='right_ankle', + id=11, + color=[255, 128, 0], + type='lower', + swap='left_ankle'), + 12: + dict(name='head', id=12, color=[51, 153, 255], type='upper', swap=''), + 13: + dict(name='neck', id=13, color=[51, 153, 255], type='upper', swap=''), + 14: + dict( + name='right_neck', + id=14, + color=[255, 255, 255], + type='upper', + swap='left_neck'), + 15: + dict( + name='left_neck', + id=15, + color=[255, 255, 255], + type='upper', + swap='right_neck'), + 16: + dict( + name='medial_right_shoulder', + id=16, + color=[255, 255, 255], + type='upper', + swap='medial_left_shoulder'), + 17: + dict( + name='lateral_right_shoulder', + id=17, + color=[255, 255, 255], + type='upper', + swap='lateral_left_shoulder'), + 18: + dict( + name='medial_right_bow', + id=18, + color=[255, 255, 255], + type='upper', + swap='medial_left_bow'), + 19: + dict( + name='lateral_right_bow', + id=19, + color=[255, 255, 255], + type='upper', + swap='lateral_left_bow'), + 20: + dict( + name='medial_right_wrist', + id=20, + color=[255, 255, 255], + type='upper', + swap='medial_left_wrist'), + 21: + dict( + name='lateral_right_wrist', + id=21, + color=[255, 255, 255], + type='upper', + swap='lateral_left_wrist'), + 22: + dict( + name='medial_left_shoulder', + id=22, + color=[255, 255, 255], + type='upper', + swap='medial_right_shoulder'), + 23: + dict( + name='lateral_left_shoulder', + id=23, + color=[255, 255, 255], + type='upper', + swap='lateral_right_shoulder'), + 24: + dict( + name='medial_left_bow', + id=24, + color=[255, 255, 255], + type='upper', + swap='medial_right_bow'), + 25: + dict( + name='lateral_left_bow', + id=25, + color=[255, 255, 255], + type='upper', + swap='lateral_right_bow'), + 26: + dict( + name='medial_left_wrist', + id=26, + color=[255, 255, 255], + type='upper', + swap='medial_right_wrist'), + 27: + dict( + name='lateral_left_wrist', + id=27, + color=[255, 255, 255], + type='upper', + swap='lateral_right_wrist'), + 28: + dict( + name='medial_right_hip', + id=28, + color=[255, 255, 255], + type='lower', + swap='medial_left_hip'), + 29: + dict( + name='lateral_right_hip', + id=29, + color=[255, 255, 255], + type='lower', + swap='lateral_left_hip'), + 30: + dict( + name='medial_right_knee', + id=30, + color=[255, 255, 255], + type='lower', + swap='medial_left_knee'), + 31: + dict( + name='lateral_right_knee', + id=31, + color=[255, 255, 255], + type='lower', + swap='lateral_left_knee'), + 32: + dict( + name='medial_right_ankle', + id=32, + color=[255, 255, 255], + type='lower', + swap='medial_left_ankle'), + 33: + dict( + name='lateral_right_ankle', + id=33, + color=[255, 255, 255], + type='lower', + swap='lateral_left_ankle'), + 34: + dict( + name='medial_left_hip', + id=34, + color=[255, 255, 255], + type='lower', + swap='medial_right_hip'), + 35: + dict( + name='lateral_left_hip', + id=35, + color=[255, 255, 255], + type='lower', + swap='lateral_right_hip'), + 36: + dict( + name='medial_left_knee', + id=36, + color=[255, 255, 255], + type='lower', + swap='medial_right_knee'), + 37: + dict( + name='lateral_left_knee', + id=37, + color=[255, 255, 255], + type='lower', + swap='lateral_right_knee'), + 38: + dict( + name='medial_left_ankle', + id=38, + color=[255, 255, 255], + type='lower', + swap='medial_right_ankle'), + 39: + dict( + name='lateral_left_ankle', + id=39, + color=[255, 255, 255], + type='lower', + swap='lateral_right_ankle'), + }, + skeleton_info={ + 0: + dict(link=('head', 'neck'), id=0, color=[51, 153, 255]), + 1: + dict(link=('neck', 'left_shoulder'), id=1, color=[51, 153, 255]), + 2: + dict(link=('neck', 'right_shoulder'), id=2, color=[51, 153, 255]), + 3: + dict(link=('left_shoulder', 'left_elbow'), id=3, color=[0, 255, 0]), + 4: + dict( + link=('right_shoulder', 'right_elbow'), id=4, color=[255, 128, 0]), + 5: + dict(link=('left_elbow', 'left_wrist'), id=5, color=[0, 255, 0]), + 6: + dict(link=('right_elbow', 'right_wrist'), id=6, color=[255, 128, 0]), + 7: + dict(link=('left_shoulder', 'left_hip'), id=7, color=[51, 153, 255]), + 8: + dict(link=('right_shoulder', 'right_hip'), id=8, color=[51, 153, 255]), + 9: + dict(link=('left_hip', 'right_hip'), id=9, color=[51, 153, 255]), + 10: + dict(link=('left_hip', 'left_knee'), id=10, color=[0, 255, 0]), + 11: + dict(link=('right_hip', 'right_knee'), id=11, color=[255, 128, 0]), + 12: + dict(link=('left_knee', 'left_ankle'), id=12, color=[0, 255, 0]), + 13: + dict(link=('right_knee', 'right_ankle'), id=13, color=[255, 128, 0]), + 14: + dict(link=('right_neck', 'left_neck'), id=14, color=[255, 255, 255]), + 15: + dict( + link=('medial_right_shoulder', 'lateral_right_shoulder'), + id=15, + color=[255, 255, 255]), + 16: + dict( + link=('medial_right_bow', 'lateral_right_bow'), + id=16, + color=[255, 255, 255]), + 17: + dict( + link=('medial_right_wrist', 'lateral_right_wrist'), + id=17, + color=[255, 255, 255]), + 18: + dict( + link=('medial_left_shoulder', 'lateral_left_shoulder'), + id=18, + color=[255, 255, 255]), + 19: + dict( + link=('medial_left_bow', 'lateral_left_bow'), + id=19, + color=[255, 255, 255]), + 20: + dict( + link=('medial_left_wrist', 'lateral_left_wrist'), + id=20, + color=[255, 255, 255]), + 21: + dict( + link=('medial_right_hip', 'lateral_right_hip'), + id=21, + color=[255, 255, 255]), + 22: + dict( + link=('medial_right_knee', 'lateral_right_knee'), + id=22, + color=[255, 255, 255]), + 23: + dict( + link=('medial_right_ankle', 'lateral_right_ankle'), + id=23, + color=[255, 255, 255]), + 24: + dict( + link=('medial_left_hip', 'lateral_left_hip'), + id=24, + color=[255, 255, 255]), + 25: + dict( + link=('medial_left_knee', 'lateral_left_knee'), + id=25, + color=[255, 255, 255]), + 26: + dict( + link=('medial_left_ankle', 'lateral_left_ankle'), + id=26, + color=[255, 255, 255]) + }, + joint_weights=[1.] * 40, + sigmas=[]) diff --git a/mmpose/configs/_base_/datasets/ochuman.py b/mmpose/configs/_base_/datasets/ochuman.py new file mode 100644 index 0000000000000000000000000000000000000000..2ef20838fe583fde133a97e688d30e91ae562746 --- /dev/null +++ b/mmpose/configs/_base_/datasets/ochuman.py @@ -0,0 +1,181 @@ +dataset_info = dict( + dataset_name='ochuman', + paper_info=dict( + author='Zhang, Song-Hai and Li, Ruilong and Dong, Xin and ' + 'Rosin, Paul and Cai, Zixi and Han, Xi and ' + 'Yang, Dingcheng and Huang, Haozhi and Hu, Shi-Min', + title='Pose2seg: Detection free human instance segmentation', + container='Proceedings of the IEEE conference on computer ' + 'vision and pattern recognition', + year='2019', + homepage='https://github.com/liruilong940607/OCHumanApi', + ), + keypoint_info={ + 0: + dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''), + 1: + dict( + name='left_eye', + id=1, + color=[51, 153, 255], + type='upper', + swap='right_eye'), + 2: + dict( + name='right_eye', + id=2, + color=[51, 153, 255], + type='upper', + swap='left_eye'), + 3: + dict( + name='left_ear', + id=3, + color=[51, 153, 255], + type='upper', + swap='right_ear'), + 4: + dict( + name='right_ear', + id=4, + color=[51, 153, 255], + type='upper', + swap='left_ear'), + 5: + dict( + name='left_shoulder', + id=5, + color=[0, 255, 0], + type='upper', + swap='right_shoulder'), + 6: + dict( + name='right_shoulder', + id=6, + color=[255, 128, 0], + type='upper', + swap='left_shoulder'), + 7: + dict( + name='left_elbow', + id=7, + color=[0, 255, 0], + type='upper', + swap='right_elbow'), + 8: + dict( + name='right_elbow', + id=8, + color=[255, 128, 0], + type='upper', + swap='left_elbow'), + 9: + dict( + name='left_wrist', + id=9, + color=[0, 255, 0], + type='upper', + swap='right_wrist'), + 10: + dict( + name='right_wrist', + id=10, + color=[255, 128, 0], + type='upper', + swap='left_wrist'), + 11: + dict( + name='left_hip', + id=11, + color=[0, 255, 0], + type='lower', + swap='right_hip'), + 12: + dict( + name='right_hip', + id=12, + color=[255, 128, 0], + type='lower', + swap='left_hip'), + 13: + dict( + name='left_knee', + id=13, + color=[0, 255, 0], + type='lower', + swap='right_knee'), + 14: + dict( + name='right_knee', + id=14, + color=[255, 128, 0], + type='lower', + swap='left_knee'), + 15: + dict( + name='left_ankle', + id=15, + color=[0, 255, 0], + type='lower', + swap='right_ankle'), + 16: + dict( + name='right_ankle', + id=16, + color=[255, 128, 0], + type='lower', + swap='left_ankle') + }, + skeleton_info={ + 0: + dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]), + 1: + dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]), + 2: + dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]), + 3: + dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]), + 4: + dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]), + 5: + dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]), + 6: + dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]), + 7: + dict( + link=('left_shoulder', 'right_shoulder'), + id=7, + color=[51, 153, 255]), + 8: + dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]), + 9: + dict( + link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]), + 10: + dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]), + 11: + dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]), + 12: + dict(link=('left_eye', 'right_eye'), id=12, color=[51, 153, 255]), + 13: + dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]), + 14: + dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]), + 15: + dict(link=('left_eye', 'left_ear'), id=15, color=[51, 153, 255]), + 16: + dict(link=('right_eye', 'right_ear'), id=16, color=[51, 153, 255]), + 17: + dict(link=('left_ear', 'left_shoulder'), id=17, color=[51, 153, 255]), + 18: + dict( + link=('right_ear', 'right_shoulder'), id=18, color=[51, 153, 255]) + }, + joint_weights=[ + 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5, + 1.5 + ], + sigmas=[ + 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062, + 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089 + ]) diff --git a/mmpose/configs/_base_/datasets/onehand10k.py b/mmpose/configs/_base_/datasets/onehand10k.py new file mode 100644 index 0000000000000000000000000000000000000000..016770f14f3075dfa7d59389524a0c11a4feb802 --- /dev/null +++ b/mmpose/configs/_base_/datasets/onehand10k.py @@ -0,0 +1,142 @@ +dataset_info = dict( + dataset_name='onehand10k', + paper_info=dict( + author='Wang, Yangang and Peng, Cong and Liu, Yebin', + title='Mask-pose cascaded cnn for 2d hand pose estimation ' + 'from single color image', + container='IEEE Transactions on Circuits and Systems ' + 'for Video Technology', + year='2018', + homepage='https://www.yangangwang.com/papers/WANG-MCC-2018-10.html', + ), + keypoint_info={ + 0: + dict(name='wrist', id=0, color=[255, 255, 255], type='', swap=''), + 1: + dict(name='thumb1', id=1, color=[255, 128, 0], type='', swap=''), + 2: + dict(name='thumb2', id=2, color=[255, 128, 0], type='', swap=''), + 3: + dict(name='thumb3', id=3, color=[255, 128, 0], type='', swap=''), + 4: + dict(name='thumb4', id=4, color=[255, 128, 0], type='', swap=''), + 5: + dict( + name='forefinger1', id=5, color=[255, 153, 255], type='', swap=''), + 6: + dict( + name='forefinger2', id=6, color=[255, 153, 255], type='', swap=''), + 7: + dict( + name='forefinger3', id=7, color=[255, 153, 255], type='', swap=''), + 8: + dict( + name='forefinger4', id=8, color=[255, 153, 255], type='', swap=''), + 9: + dict( + name='middle_finger1', + id=9, + color=[102, 178, 255], + type='', + swap=''), + 10: + dict( + name='middle_finger2', + id=10, + color=[102, 178, 255], + type='', + swap=''), + 11: + dict( + name='middle_finger3', + id=11, + color=[102, 178, 255], + type='', + swap=''), + 12: + dict( + name='middle_finger4', + id=12, + color=[102, 178, 255], + type='', + swap=''), + 13: + dict( + name='ring_finger1', id=13, color=[255, 51, 51], type='', swap=''), + 14: + dict( + name='ring_finger2', id=14, color=[255, 51, 51], type='', swap=''), + 15: + dict( + name='ring_finger3', id=15, color=[255, 51, 51], type='', swap=''), + 16: + dict( + name='ring_finger4', id=16, color=[255, 51, 51], type='', swap=''), + 17: + dict(name='pinky_finger1', id=17, color=[0, 255, 0], type='', swap=''), + 18: + dict(name='pinky_finger2', id=18, color=[0, 255, 0], type='', swap=''), + 19: + dict(name='pinky_finger3', id=19, color=[0, 255, 0], type='', swap=''), + 20: + dict(name='pinky_finger4', id=20, color=[0, 255, 0], type='', swap='') + }, + skeleton_info={ + 0: + dict(link=('wrist', 'thumb1'), id=0, color=[255, 128, 0]), + 1: + dict(link=('thumb1', 'thumb2'), id=1, color=[255, 128, 0]), + 2: + dict(link=('thumb2', 'thumb3'), id=2, color=[255, 128, 0]), + 3: + dict(link=('thumb3', 'thumb4'), id=3, color=[255, 128, 0]), + 4: + dict(link=('wrist', 'forefinger1'), id=4, color=[255, 153, 255]), + 5: + dict(link=('forefinger1', 'forefinger2'), id=5, color=[255, 153, 255]), + 6: + dict(link=('forefinger2', 'forefinger3'), id=6, color=[255, 153, 255]), + 7: + dict(link=('forefinger3', 'forefinger4'), id=7, color=[255, 153, 255]), + 8: + dict(link=('wrist', 'middle_finger1'), id=8, color=[102, 178, 255]), + 9: + dict( + link=('middle_finger1', 'middle_finger2'), + id=9, + color=[102, 178, 255]), + 10: + dict( + link=('middle_finger2', 'middle_finger3'), + id=10, + color=[102, 178, 255]), + 11: + dict( + link=('middle_finger3', 'middle_finger4'), + id=11, + color=[102, 178, 255]), + 12: + dict(link=('wrist', 'ring_finger1'), id=12, color=[255, 51, 51]), + 13: + dict( + link=('ring_finger1', 'ring_finger2'), id=13, color=[255, 51, 51]), + 14: + dict( + link=('ring_finger2', 'ring_finger3'), id=14, color=[255, 51, 51]), + 15: + dict( + link=('ring_finger3', 'ring_finger4'), id=15, color=[255, 51, 51]), + 16: + dict(link=('wrist', 'pinky_finger1'), id=16, color=[0, 255, 0]), + 17: + dict( + link=('pinky_finger1', 'pinky_finger2'), id=17, color=[0, 255, 0]), + 18: + dict( + link=('pinky_finger2', 'pinky_finger3'), id=18, color=[0, 255, 0]), + 19: + dict( + link=('pinky_finger3', 'pinky_finger4'), id=19, color=[0, 255, 0]) + }, + joint_weights=[1.] * 21, + sigmas=[]) diff --git a/mmpose/configs/_base_/datasets/panoptic_body3d.py b/mmpose/configs/_base_/datasets/panoptic_body3d.py new file mode 100644 index 0000000000000000000000000000000000000000..e3b19ac462415a840ca2e0b9e214bdb35d91b5e4 --- /dev/null +++ b/mmpose/configs/_base_/datasets/panoptic_body3d.py @@ -0,0 +1,160 @@ +dataset_info = dict( + dataset_name='panoptic_pose_3d', + paper_info=dict( + author='Joo, Hanbyul and Simon, Tomas and Li, Xulong' + 'and Liu, Hao and Tan, Lei and Gui, Lin and Banerjee, Sean' + 'and Godisart, Timothy and Nabbe, Bart and Matthews, Iain' + 'and Kanade, Takeo and Nobuhara, Shohei and Sheikh, Yaser', + title='Panoptic Studio: A Massively Multiview System ' + 'for Interaction Motion Capture', + container='IEEE Transactions on Pattern Analysis' + ' and Machine Intelligence', + year='2017', + homepage='http://domedb.perception.cs.cmu.edu', + ), + keypoint_info={ + 0: + dict(name='neck', id=0, color=[51, 153, 255], type='upper', swap=''), + 1: + dict(name='nose', id=1, color=[51, 153, 255], type='upper', swap=''), + 2: + dict(name='mid_hip', id=2, color=[0, 255, 0], type='lower', swap=''), + 3: + dict( + name='left_shoulder', + id=3, + color=[0, 255, 0], + type='upper', + swap='right_shoulder'), + 4: + dict( + name='left_elbow', + id=4, + color=[0, 255, 0], + type='upper', + swap='right_elbow'), + 5: + dict( + name='left_wrist', + id=5, + color=[0, 255, 0], + type='upper', + swap='right_wrist'), + 6: + dict( + name='left_hip', + id=6, + color=[0, 255, 0], + type='lower', + swap='right_hip'), + 7: + dict( + name='left_knee', + id=7, + color=[0, 255, 0], + type='lower', + swap='right_knee'), + 8: + dict( + name='left_ankle', + id=8, + color=[0, 255, 0], + type='lower', + swap='right_ankle'), + 9: + dict( + name='right_shoulder', + id=9, + color=[255, 128, 0], + type='upper', + swap='left_shoulder'), + 10: + dict( + name='right_elbow', + id=10, + color=[255, 128, 0], + type='upper', + swap='left_elbow'), + 11: + dict( + name='right_wrist', + id=11, + color=[255, 128, 0], + type='upper', + swap='left_wrist'), + 12: + dict( + name='right_hip', + id=12, + color=[255, 128, 0], + type='lower', + swap='left_hip'), + 13: + dict( + name='right_knee', + id=13, + color=[255, 128, 0], + type='lower', + swap='left_knee'), + 14: + dict( + name='right_ankle', + id=14, + color=[255, 128, 0], + type='lower', + swap='left_ankle'), + 15: + dict( + name='left_eye', + id=15, + color=[51, 153, 255], + type='upper', + swap='right_eye'), + 16: + dict( + name='left_ear', + id=16, + color=[51, 153, 255], + type='upper', + swap='right_ear'), + 17: + dict( + name='right_eye', + id=17, + color=[51, 153, 255], + type='upper', + swap='left_eye'), + 18: + dict( + name='right_ear', + id=18, + color=[51, 153, 255], + type='upper', + swap='left_ear') + }, + skeleton_info={ + 0: dict(link=('nose', 'neck'), id=0, color=[51, 153, 255]), + 1: dict(link=('neck', 'left_shoulder'), id=1, color=[0, 255, 0]), + 2: dict(link=('neck', 'right_shoulder'), id=2, color=[255, 128, 0]), + 3: dict(link=('left_shoulder', 'left_elbow'), id=3, color=[0, 255, 0]), + 4: dict( + link=('right_shoulder', 'right_elbow'), id=4, color=[255, 128, 0]), + 5: dict(link=('left_elbow', 'left_wrist'), id=5, color=[0, 255, 0]), + 6: + dict(link=('right_elbow', 'right_wrist'), id=6, color=[255, 128, 0]), + 7: dict(link=('left_ankle', 'left_knee'), id=7, color=[0, 255, 0]), + 8: dict(link=('left_knee', 'left_hip'), id=8, color=[0, 255, 0]), + 9: dict(link=('right_ankle', 'right_knee'), id=9, color=[255, 128, 0]), + 10: dict(link=('right_knee', 'right_hip'), id=10, color=[255, 128, 0]), + 11: dict(link=('mid_hip', 'left_hip'), id=11, color=[0, 255, 0]), + 12: dict(link=('mid_hip', 'right_hip'), id=12, color=[255, 128, 0]), + 13: dict(link=('mid_hip', 'neck'), id=13, color=[51, 153, 255]), + }, + joint_weights=[ + 1.0, 1.0, 1.0, 1.0, 1.2, 1.5, 1.0, 1.2, 1.5, 1.0, 1.2, 1.5, 1.0, 1.2, + 1.5, 1.0, 1.0, 1.0, 1.0 + ], + sigmas=[ + 0.026, 0.026, 0.107, 0.079, 0.072, 0.062, 0.107, 0.087, 0.089, 0.079, + 0.072, 0.062, 0.107, 0.087, 0.089, 0.025, 0.035, 0.025, 0.035 + ]) diff --git a/mmpose/configs/_base_/datasets/panoptic_hand2d.py b/mmpose/configs/_base_/datasets/panoptic_hand2d.py new file mode 100644 index 0000000000000000000000000000000000000000..7a65731ba87b155beb1b40591fd9acb232c2afc6 --- /dev/null +++ b/mmpose/configs/_base_/datasets/panoptic_hand2d.py @@ -0,0 +1,143 @@ +dataset_info = dict( + dataset_name='panoptic_hand2d', + paper_info=dict( + author='Simon, Tomas and Joo, Hanbyul and ' + 'Matthews, Iain and Sheikh, Yaser', + title='Hand keypoint detection in single images using ' + 'multiview bootstrapping', + container='Proceedings of the IEEE conference on ' + 'Computer Vision and Pattern Recognition', + year='2017', + homepage='http://domedb.perception.cs.cmu.edu/handdb.html', + ), + keypoint_info={ + 0: + dict(name='wrist', id=0, color=[255, 255, 255], type='', swap=''), + 1: + dict(name='thumb1', id=1, color=[255, 128, 0], type='', swap=''), + 2: + dict(name='thumb2', id=2, color=[255, 128, 0], type='', swap=''), + 3: + dict(name='thumb3', id=3, color=[255, 128, 0], type='', swap=''), + 4: + dict(name='thumb4', id=4, color=[255, 128, 0], type='', swap=''), + 5: + dict( + name='forefinger1', id=5, color=[255, 153, 255], type='', swap=''), + 6: + dict( + name='forefinger2', id=6, color=[255, 153, 255], type='', swap=''), + 7: + dict( + name='forefinger3', id=7, color=[255, 153, 255], type='', swap=''), + 8: + dict( + name='forefinger4', id=8, color=[255, 153, 255], type='', swap=''), + 9: + dict( + name='middle_finger1', + id=9, + color=[102, 178, 255], + type='', + swap=''), + 10: + dict( + name='middle_finger2', + id=10, + color=[102, 178, 255], + type='', + swap=''), + 11: + dict( + name='middle_finger3', + id=11, + color=[102, 178, 255], + type='', + swap=''), + 12: + dict( + name='middle_finger4', + id=12, + color=[102, 178, 255], + type='', + swap=''), + 13: + dict( + name='ring_finger1', id=13, color=[255, 51, 51], type='', swap=''), + 14: + dict( + name='ring_finger2', id=14, color=[255, 51, 51], type='', swap=''), + 15: + dict( + name='ring_finger3', id=15, color=[255, 51, 51], type='', swap=''), + 16: + dict( + name='ring_finger4', id=16, color=[255, 51, 51], type='', swap=''), + 17: + dict(name='pinky_finger1', id=17, color=[0, 255, 0], type='', swap=''), + 18: + dict(name='pinky_finger2', id=18, color=[0, 255, 0], type='', swap=''), + 19: + dict(name='pinky_finger3', id=19, color=[0, 255, 0], type='', swap=''), + 20: + dict(name='pinky_finger4', id=20, color=[0, 255, 0], type='', swap='') + }, + skeleton_info={ + 0: + dict(link=('wrist', 'thumb1'), id=0, color=[255, 128, 0]), + 1: + dict(link=('thumb1', 'thumb2'), id=1, color=[255, 128, 0]), + 2: + dict(link=('thumb2', 'thumb3'), id=2, color=[255, 128, 0]), + 3: + dict(link=('thumb3', 'thumb4'), id=3, color=[255, 128, 0]), + 4: + dict(link=('wrist', 'forefinger1'), id=4, color=[255, 153, 255]), + 5: + dict(link=('forefinger1', 'forefinger2'), id=5, color=[255, 153, 255]), + 6: + dict(link=('forefinger2', 'forefinger3'), id=6, color=[255, 153, 255]), + 7: + dict(link=('forefinger3', 'forefinger4'), id=7, color=[255, 153, 255]), + 8: + dict(link=('wrist', 'middle_finger1'), id=8, color=[102, 178, 255]), + 9: + dict( + link=('middle_finger1', 'middle_finger2'), + id=9, + color=[102, 178, 255]), + 10: + dict( + link=('middle_finger2', 'middle_finger3'), + id=10, + color=[102, 178, 255]), + 11: + dict( + link=('middle_finger3', 'middle_finger4'), + id=11, + color=[102, 178, 255]), + 12: + dict(link=('wrist', 'ring_finger1'), id=12, color=[255, 51, 51]), + 13: + dict( + link=('ring_finger1', 'ring_finger2'), id=13, color=[255, 51, 51]), + 14: + dict( + link=('ring_finger2', 'ring_finger3'), id=14, color=[255, 51, 51]), + 15: + dict( + link=('ring_finger3', 'ring_finger4'), id=15, color=[255, 51, 51]), + 16: + dict(link=('wrist', 'pinky_finger1'), id=16, color=[0, 255, 0]), + 17: + dict( + link=('pinky_finger1', 'pinky_finger2'), id=17, color=[0, 255, 0]), + 18: + dict( + link=('pinky_finger2', 'pinky_finger3'), id=18, color=[0, 255, 0]), + 19: + dict( + link=('pinky_finger3', 'pinky_finger4'), id=19, color=[0, 255, 0]) + }, + joint_weights=[1.] * 21, + sigmas=[]) diff --git a/mmpose/configs/_base_/datasets/posetrack18.py b/mmpose/configs/_base_/datasets/posetrack18.py new file mode 100644 index 0000000000000000000000000000000000000000..5aefd1c97fe083df35ee88bebab4f99134c27971 --- /dev/null +++ b/mmpose/configs/_base_/datasets/posetrack18.py @@ -0,0 +1,176 @@ +dataset_info = dict( + dataset_name='posetrack18', + paper_info=dict( + author='Andriluka, Mykhaylo and Iqbal, Umar and ' + 'Insafutdinov, Eldar and Pishchulin, Leonid and ' + 'Milan, Anton and Gall, Juergen and Schiele, Bernt', + title='Posetrack: A benchmark for human pose estimation and tracking', + container='Proceedings of the IEEE Conference on ' + 'Computer Vision and Pattern Recognition', + year='2018', + homepage='https://posetrack.net/users/download.php', + ), + keypoint_info={ + 0: + dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''), + 1: + dict( + name='head_bottom', + id=1, + color=[51, 153, 255], + type='upper', + swap=''), + 2: + dict( + name='head_top', id=2, color=[51, 153, 255], type='upper', + swap=''), + 3: + dict( + name='left_ear', + id=3, + color=[51, 153, 255], + type='upper', + swap='right_ear'), + 4: + dict( + name='right_ear', + id=4, + color=[51, 153, 255], + type='upper', + swap='left_ear'), + 5: + dict( + name='left_shoulder', + id=5, + color=[0, 255, 0], + type='upper', + swap='right_shoulder'), + 6: + dict( + name='right_shoulder', + id=6, + color=[255, 128, 0], + type='upper', + swap='left_shoulder'), + 7: + dict( + name='left_elbow', + id=7, + color=[0, 255, 0], + type='upper', + swap='right_elbow'), + 8: + dict( + name='right_elbow', + id=8, + color=[255, 128, 0], + type='upper', + swap='left_elbow'), + 9: + dict( + name='left_wrist', + id=9, + color=[0, 255, 0], + type='upper', + swap='right_wrist'), + 10: + dict( + name='right_wrist', + id=10, + color=[255, 128, 0], + type='upper', + swap='left_wrist'), + 11: + dict( + name='left_hip', + id=11, + color=[0, 255, 0], + type='lower', + swap='right_hip'), + 12: + dict( + name='right_hip', + id=12, + color=[255, 128, 0], + type='lower', + swap='left_hip'), + 13: + dict( + name='left_knee', + id=13, + color=[0, 255, 0], + type='lower', + swap='right_knee'), + 14: + dict( + name='right_knee', + id=14, + color=[255, 128, 0], + type='lower', + swap='left_knee'), + 15: + dict( + name='left_ankle', + id=15, + color=[0, 255, 0], + type='lower', + swap='right_ankle'), + 16: + dict( + name='right_ankle', + id=16, + color=[255, 128, 0], + type='lower', + swap='left_ankle') + }, + skeleton_info={ + 0: + dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]), + 1: + dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]), + 2: + dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]), + 3: + dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]), + 4: + dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]), + 5: + dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]), + 6: + dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]), + 7: + dict( + link=('left_shoulder', 'right_shoulder'), + id=7, + color=[51, 153, 255]), + 8: + dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]), + 9: + dict( + link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]), + 10: + dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]), + 11: + dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]), + 12: + dict(link=('nose', 'head_bottom'), id=12, color=[51, 153, 255]), + 13: + dict(link=('nose', 'head_top'), id=13, color=[51, 153, 255]), + 14: + dict( + link=('head_bottom', 'left_shoulder'), id=14, color=[51, 153, + 255]), + 15: + dict( + link=('head_bottom', 'right_shoulder'), + id=15, + color=[51, 153, 255]) + }, + joint_weights=[ + 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5, + 1.5 + ], + sigmas=[ + 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062, + 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089 + ]) diff --git a/mmpose/configs/_base_/datasets/rhd2d.py b/mmpose/configs/_base_/datasets/rhd2d.py new file mode 100644 index 0000000000000000000000000000000000000000..4631ccd03814155b06687e0b1ba2b83404c837fc --- /dev/null +++ b/mmpose/configs/_base_/datasets/rhd2d.py @@ -0,0 +1,151 @@ +dataset_info = dict( + dataset_name='rhd2d', + paper_info=dict( + author='Christian Zimmermann and Thomas Brox', + title='Learning to Estimate 3D Hand Pose from Single RGB Images', + container='arXiv', + year='2017', + homepage='https://lmb.informatik.uni-freiburg.de/resources/' + 'datasets/RenderedHandposeDataset.en.html', + ), + # In RHD, 1-4: left thumb [tip to palm], which means the finger is from + # tip to palm, so as other fingers. Please refer to + # `https://lmb.informatik.uni-freiburg.de/resources/datasets/ + # RenderedHandpose/README` for details of keypoint definition. + # But in COCO-WholeBody-Hand, FreiHand, CMU Panoptic HandDB, it is in + # inverse order. Pay attention to this if you want to combine RHD with + # other hand datasets to train a single model. + # Also, note that 'keypoint_info' will not directly affect the order of + # the keypoint in the dataset. It is mostly for visualization & storing + # information about flip_pairs. + keypoint_info={ + 0: + dict(name='wrist', id=0, color=[255, 255, 255], type='', swap=''), + 1: + dict(name='thumb4', id=1, color=[255, 128, 0], type='', swap=''), + 2: + dict(name='thumb3', id=2, color=[255, 128, 0], type='', swap=''), + 3: + dict(name='thumb2', id=3, color=[255, 128, 0], type='', swap=''), + 4: + dict(name='thumb1', id=4, color=[255, 128, 0], type='', swap=''), + 5: + dict( + name='forefinger4', id=5, color=[255, 153, 255], type='', swap=''), + 6: + dict( + name='forefinger3', id=6, color=[255, 153, 255], type='', swap=''), + 7: + dict( + name='forefinger2', id=7, color=[255, 153, 255], type='', swap=''), + 8: + dict( + name='forefinger1', id=8, color=[255, 153, 255], type='', swap=''), + 9: + dict( + name='middle_finger4', + id=9, + color=[102, 178, 255], + type='', + swap=''), + 10: + dict( + name='middle_finger3', + id=10, + color=[102, 178, 255], + type='', + swap=''), + 11: + dict( + name='middle_finger2', + id=11, + color=[102, 178, 255], + type='', + swap=''), + 12: + dict( + name='middle_finger1', + id=12, + color=[102, 178, 255], + type='', + swap=''), + 13: + dict( + name='ring_finger4', id=13, color=[255, 51, 51], type='', swap=''), + 14: + dict( + name='ring_finger3', id=14, color=[255, 51, 51], type='', swap=''), + 15: + dict( + name='ring_finger2', id=15, color=[255, 51, 51], type='', swap=''), + 16: + dict( + name='ring_finger1', id=16, color=[255, 51, 51], type='', swap=''), + 17: + dict(name='pinky_finger4', id=17, color=[0, 255, 0], type='', swap=''), + 18: + dict(name='pinky_finger3', id=18, color=[0, 255, 0], type='', swap=''), + 19: + dict(name='pinky_finger2', id=19, color=[0, 255, 0], type='', swap=''), + 20: + dict(name='pinky_finger1', id=20, color=[0, 255, 0], type='', swap='') + }, + skeleton_info={ + 0: + dict(link=('wrist', 'thumb1'), id=0, color=[255, 128, 0]), + 1: + dict(link=('thumb1', 'thumb2'), id=1, color=[255, 128, 0]), + 2: + dict(link=('thumb2', 'thumb3'), id=2, color=[255, 128, 0]), + 3: + dict(link=('thumb3', 'thumb4'), id=3, color=[255, 128, 0]), + 4: + dict(link=('wrist', 'forefinger1'), id=4, color=[255, 153, 255]), + 5: + dict(link=('forefinger1', 'forefinger2'), id=5, color=[255, 153, 255]), + 6: + dict(link=('forefinger2', 'forefinger3'), id=6, color=[255, 153, 255]), + 7: + dict(link=('forefinger3', 'forefinger4'), id=7, color=[255, 153, 255]), + 8: + dict(link=('wrist', 'middle_finger1'), id=8, color=[102, 178, 255]), + 9: + dict( + link=('middle_finger1', 'middle_finger2'), + id=9, + color=[102, 178, 255]), + 10: + dict( + link=('middle_finger2', 'middle_finger3'), + id=10, + color=[102, 178, 255]), + 11: + dict( + link=('middle_finger3', 'middle_finger4'), + id=11, + color=[102, 178, 255]), + 12: + dict(link=('wrist', 'ring_finger1'), id=12, color=[255, 51, 51]), + 13: + dict( + link=('ring_finger1', 'ring_finger2'), id=13, color=[255, 51, 51]), + 14: + dict( + link=('ring_finger2', 'ring_finger3'), id=14, color=[255, 51, 51]), + 15: + dict( + link=('ring_finger3', 'ring_finger4'), id=15, color=[255, 51, 51]), + 16: + dict(link=('wrist', 'pinky_finger1'), id=16, color=[0, 255, 0]), + 17: + dict( + link=('pinky_finger1', 'pinky_finger2'), id=17, color=[0, 255, 0]), + 18: + dict( + link=('pinky_finger2', 'pinky_finger3'), id=18, color=[0, 255, 0]), + 19: + dict( + link=('pinky_finger3', 'pinky_finger4'), id=19, color=[0, 255, 0]) + }, + joint_weights=[1.] * 21, + sigmas=[]) diff --git a/mmpose/configs/_base_/datasets/shelf.py b/mmpose/configs/_base_/datasets/shelf.py new file mode 100644 index 0000000000000000000000000000000000000000..5fe6e42b3b44e3f65947284efd9ffac58d41d43f --- /dev/null +++ b/mmpose/configs/_base_/datasets/shelf.py @@ -0,0 +1,151 @@ +dataset_info = dict( + dataset_name='shelf', + paper_info=dict( + author='Belagiannis, Vasileios and Amin, Sikandar and Andriluka, ' + 'Mykhaylo and Schiele, Bernt and Navab, Nassir and Ilic, Slobodan', + title='3D Pictorial Structures for Multiple Human Pose Estimation', + container='IEEE Computer Society Conference on Computer Vision and ' + 'Pattern Recognition (CVPR)', + year='2014', + homepage='http://campar.in.tum.de/Chair/MultiHumanPose', + ), + keypoint_info={ + 0: + dict( + name='right_ankle', + id=0, + color=[255, 128, 0], + type='lower', + swap='left_ankle'), + 1: + dict( + name='right_knee', + id=1, + color=[255, 128, 0], + type='lower', + swap='left_knee'), + 2: + dict( + name='right_hip', + id=2, + color=[255, 128, 0], + type='lower', + swap='left_hip'), + 3: + dict( + name='left_hip', + id=3, + color=[0, 255, 0], + type='lower', + swap='right_hip'), + 4: + dict( + name='left_knee', + id=4, + color=[0, 255, 0], + type='lower', + swap='right_knee'), + 5: + dict( + name='left_ankle', + id=5, + color=[0, 255, 0], + type='lower', + swap='right_ankle'), + 6: + dict( + name='right_wrist', + id=6, + color=[255, 128, 0], + type='upper', + swap='left_wrist'), + 7: + dict( + name='right_elbow', + id=7, + color=[255, 128, 0], + type='upper', + swap='left_elbow'), + 8: + dict( + name='right_shoulder', + id=8, + color=[255, 128, 0], + type='upper', + swap='left_shoulder'), + 9: + dict( + name='left_shoulder', + id=9, + color=[0, 255, 0], + type='upper', + swap='right_shoulder'), + 10: + dict( + name='left_elbow', + id=10, + color=[0, 255, 0], + type='upper', + swap='right_elbow'), + 11: + dict( + name='left_wrist', + id=11, + color=[0, 255, 0], + type='upper', + swap='right_wrist'), + 12: + dict( + name='bottom_head', + id=12, + color=[51, 153, 255], + type='upper', + swap=''), + 13: + dict( + name='top_head', + id=13, + color=[51, 153, 255], + type='upper', + swap=''), + }, + skeleton_info={ + 0: + dict(link=('right_ankle', 'right_knee'), id=0, color=[255, 128, 0]), + 1: + dict(link=('right_knee', 'right_hip'), id=1, color=[255, 128, 0]), + 2: + dict(link=('left_hip', 'left_knee'), id=2, color=[0, 255, 0]), + 3: + dict(link=('left_knee', 'left_ankle'), id=3, color=[0, 255, 0]), + 4: + dict(link=('right_hip', 'left_hip'), id=4, color=[51, 153, 255]), + 5: + dict(link=('right_wrist', 'right_elbow'), id=5, color=[255, 128, 0]), + 6: + dict( + link=('right_elbow', 'right_shoulder'), id=6, color=[255, 128, 0]), + 7: + dict(link=('left_shoulder', 'left_elbow'), id=7, color=[0, 255, 0]), + 8: + dict(link=('left_elbow', 'left_wrist'), id=8, color=[0, 255, 0]), + 9: + dict(link=('right_hip', 'right_shoulder'), id=9, color=[255, 128, 0]), + 10: + dict(link=('left_hip', 'left_shoulder'), id=10, color=[0, 255, 0]), + 11: + dict( + link=('right_shoulder', 'bottom_head'), id=11, color=[255, 128, + 0]), + 12: + dict(link=('left_shoulder', 'bottom_head'), id=12, color=[0, 255, 0]), + 13: + dict(link=('bottom_head', 'top_head'), id=13, color=[51, 153, 255]), + }, + joint_weights=[ + 1.5, 1.2, 1.0, 1.0, 1.2, 1.5, 1.5, 1.2, 1.0, 1.0, 1.2, 1.5, 1.0, 1.0 + ], + sigmas=[ + 0.089, 0.087, 0.107, 0.107, 0.087, 0.089, 0.062, 0.072, 0.079, 0.079, + 0.072, 0.062, 0.026, 0.026 + ]) diff --git a/mmpose/configs/_base_/datasets/wflw.py b/mmpose/configs/_base_/datasets/wflw.py new file mode 100644 index 0000000000000000000000000000000000000000..80c29b696cf5031d8f21d7d8ed7e573043666f35 --- /dev/null +++ b/mmpose/configs/_base_/datasets/wflw.py @@ -0,0 +1,192 @@ +dataset_info = dict( + dataset_name='wflw', + paper_info=dict( + author='Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, ' + 'Quan and Cai, Yici and Zhou, Qiang', + title='Look at boundary: A boundary-aware face alignment algorithm', + container='Proceedings of the IEEE conference on computer ' + 'vision and pattern recognition', + year='2018', + homepage='https://wywu.github.io/projects/LAB/WFLW.html', + ), + keypoint_info={ + 0: dict(name='kpt-0', id=0, color=[255, 0, 0], type='', swap='kpt-32'), + 1: dict(name='kpt-1', id=1, color=[255, 0, 0], type='', swap='kpt-31'), + 2: dict(name='kpt-2', id=2, color=[255, 0, 0], type='', swap='kpt-30'), + 3: dict(name='kpt-3', id=3, color=[255, 0, 0], type='', swap='kpt-29'), + 4: dict(name='kpt-4', id=4, color=[255, 0, 0], type='', swap='kpt-28'), + 5: dict(name='kpt-5', id=5, color=[255, 0, 0], type='', swap='kpt-27'), + 6: dict(name='kpt-6', id=6, color=[255, 0, 0], type='', swap='kpt-26'), + 7: dict(name='kpt-7', id=7, color=[255, 0, 0], type='', swap='kpt-25'), + 8: dict(name='kpt-8', id=8, color=[255, 0, 0], type='', swap='kpt-24'), + 9: dict(name='kpt-9', id=9, color=[255, 0, 0], type='', swap='kpt-23'), + 10: + dict(name='kpt-10', id=10, color=[255, 0, 0], type='', swap='kpt-22'), + 11: + dict(name='kpt-11', id=11, color=[255, 0, 0], type='', swap='kpt-21'), + 12: + dict(name='kpt-12', id=12, color=[255, 0, 0], type='', swap='kpt-20'), + 13: + dict(name='kpt-13', id=13, color=[255, 0, 0], type='', swap='kpt-19'), + 14: + dict(name='kpt-14', id=14, color=[255, 0, 0], type='', swap='kpt-18'), + 15: + dict(name='kpt-15', id=15, color=[255, 0, 0], type='', swap='kpt-17'), + 16: dict(name='kpt-16', id=16, color=[255, 0, 0], type='', swap=''), + 17: + dict(name='kpt-17', id=17, color=[255, 0, 0], type='', swap='kpt-15'), + 18: + dict(name='kpt-18', id=18, color=[255, 0, 0], type='', swap='kpt-14'), + 19: + dict(name='kpt-19', id=19, color=[255, 0, 0], type='', swap='kpt-13'), + 20: + dict(name='kpt-20', id=20, color=[255, 0, 0], type='', swap='kpt-12'), + 21: + dict(name='kpt-21', id=21, color=[255, 0, 0], type='', swap='kpt-11'), + 22: + dict(name='kpt-22', id=22, color=[255, 0, 0], type='', swap='kpt-10'), + 23: + dict(name='kpt-23', id=23, color=[255, 0, 0], type='', swap='kpt-9'), + 24: + dict(name='kpt-24', id=24, color=[255, 0, 0], type='', swap='kpt-8'), + 25: + dict(name='kpt-25', id=25, color=[255, 0, 0], type='', swap='kpt-7'), + 26: + dict(name='kpt-26', id=26, color=[255, 0, 0], type='', swap='kpt-6'), + 27: + dict(name='kpt-27', id=27, color=[255, 0, 0], type='', swap='kpt-5'), + 28: + dict(name='kpt-28', id=28, color=[255, 0, 0], type='', swap='kpt-4'), + 29: + dict(name='kpt-29', id=29, color=[255, 0, 0], type='', swap='kpt-3'), + 30: + dict(name='kpt-30', id=30, color=[255, 0, 0], type='', swap='kpt-2'), + 31: + dict(name='kpt-31', id=31, color=[255, 0, 0], type='', swap='kpt-1'), + 32: + dict(name='kpt-32', id=32, color=[255, 0, 0], type='', swap='kpt-0'), + 33: + dict(name='kpt-33', id=33, color=[255, 0, 0], type='', swap='kpt-46'), + 34: + dict(name='kpt-34', id=34, color=[255, 0, 0], type='', swap='kpt-45'), + 35: + dict(name='kpt-35', id=35, color=[255, 0, 0], type='', swap='kpt-44'), + 36: + dict(name='kpt-36', id=36, color=[255, 0, 0], type='', swap='kpt-43'), + 37: dict( + name='kpt-37', id=37, color=[255, 0, 0], type='', swap='kpt-42'), + 38: dict( + name='kpt-38', id=38, color=[255, 0, 0], type='', swap='kpt-50'), + 39: dict( + name='kpt-39', id=39, color=[255, 0, 0], type='', swap='kpt-49'), + 40: dict( + name='kpt-40', id=40, color=[255, 0, 0], type='', swap='kpt-48'), + 41: dict( + name='kpt-41', id=41, color=[255, 0, 0], type='', swap='kpt-47'), + 42: dict( + name='kpt-42', id=42, color=[255, 0, 0], type='', swap='kpt-37'), + 43: dict( + name='kpt-43', id=43, color=[255, 0, 0], type='', swap='kpt-36'), + 44: dict( + name='kpt-44', id=44, color=[255, 0, 0], type='', swap='kpt-35'), + 45: dict( + name='kpt-45', id=45, color=[255, 0, 0], type='', swap='kpt-34'), + 46: dict( + name='kpt-46', id=46, color=[255, 0, 0], type='', swap='kpt-33'), + 47: dict( + name='kpt-47', id=47, color=[255, 0, 0], type='', swap='kpt-41'), + 48: dict( + name='kpt-48', id=48, color=[255, 0, 0], type='', swap='kpt-40'), + 49: dict( + name='kpt-49', id=49, color=[255, 0, 0], type='', swap='kpt-39'), + 50: dict( + name='kpt-50', id=50, color=[255, 0, 0], type='', swap='kpt-38'), + 51: dict(name='kpt-51', id=51, color=[255, 0, 0], type='', swap=''), + 52: dict(name='kpt-52', id=52, color=[255, 0, 0], type='', swap=''), + 53: dict(name='kpt-53', id=53, color=[255, 0, 0], type='', swap=''), + 54: dict(name='kpt-54', id=54, color=[255, 0, 0], type='', swap=''), + 55: dict( + name='kpt-55', id=55, color=[255, 0, 0], type='', swap='kpt-59'), + 56: dict( + name='kpt-56', id=56, color=[255, 0, 0], type='', swap='kpt-58'), + 57: dict(name='kpt-57', id=57, color=[255, 0, 0], type='', swap=''), + 58: dict( + name='kpt-58', id=58, color=[255, 0, 0], type='', swap='kpt-56'), + 59: dict( + name='kpt-59', id=59, color=[255, 0, 0], type='', swap='kpt-55'), + 60: dict( + name='kpt-60', id=60, color=[255, 0, 0], type='', swap='kpt-72'), + 61: dict( + name='kpt-61', id=61, color=[255, 0, 0], type='', swap='kpt-71'), + 62: dict( + name='kpt-62', id=62, color=[255, 0, 0], type='', swap='kpt-70'), + 63: dict( + name='kpt-63', id=63, color=[255, 0, 0], type='', swap='kpt-69'), + 64: dict( + name='kpt-64', id=64, color=[255, 0, 0], type='', swap='kpt-68'), + 65: dict( + name='kpt-65', id=65, color=[255, 0, 0], type='', swap='kpt-75'), + 66: dict( + name='kpt-66', id=66, color=[255, 0, 0], type='', swap='kpt-74'), + 67: dict( + name='kpt-67', id=67, color=[255, 0, 0], type='', swap='kpt-73'), + 68: dict( + name='kpt-68', id=68, color=[255, 0, 0], type='', swap='kpt-64'), + 69: dict( + name='kpt-69', id=69, color=[255, 0, 0], type='', swap='kpt-63'), + 70: dict( + name='kpt-70', id=70, color=[255, 0, 0], type='', swap='kpt-62'), + 71: dict( + name='kpt-71', id=71, color=[255, 0, 0], type='', swap='kpt-61'), + 72: dict( + name='kpt-72', id=72, color=[255, 0, 0], type='', swap='kpt-60'), + 73: dict( + name='kpt-73', id=73, color=[255, 0, 0], type='', swap='kpt-67'), + 74: dict( + name='kpt-74', id=74, color=[255, 0, 0], type='', swap='kpt-66'), + 75: dict( + name='kpt-75', id=75, color=[255, 0, 0], type='', swap='kpt-65'), + 76: dict( + name='kpt-76', id=76, color=[255, 0, 0], type='', swap='kpt-82'), + 77: dict( + name='kpt-77', id=77, color=[255, 0, 0], type='', swap='kpt-81'), + 78: dict( + name='kpt-78', id=78, color=[255, 0, 0], type='', swap='kpt-80'), + 79: dict(name='kpt-79', id=79, color=[255, 0, 0], type='', swap=''), + 80: dict( + name='kpt-80', id=80, color=[255, 0, 0], type='', swap='kpt-78'), + 81: dict( + name='kpt-81', id=81, color=[255, 0, 0], type='', swap='kpt-77'), + 82: dict( + name='kpt-82', id=82, color=[255, 0, 0], type='', swap='kpt-76'), + 83: dict( + name='kpt-83', id=83, color=[255, 0, 0], type='', swap='kpt-87'), + 84: dict( + name='kpt-84', id=84, color=[255, 0, 0], type='', swap='kpt-86'), + 85: dict(name='kpt-85', id=85, color=[255, 0, 0], type='', swap=''), + 86: dict( + name='kpt-86', id=86, color=[255, 0, 0], type='', swap='kpt-84'), + 87: dict( + name='kpt-87', id=87, color=[255, 0, 0], type='', swap='kpt-83'), + 88: dict( + name='kpt-88', id=88, color=[255, 0, 0], type='', swap='kpt-92'), + 89: dict( + name='kpt-89', id=89, color=[255, 0, 0], type='', swap='kpt-91'), + 90: dict(name='kpt-90', id=90, color=[255, 0, 0], type='', swap=''), + 91: dict( + name='kpt-91', id=91, color=[255, 0, 0], type='', swap='kpt-89'), + 92: dict( + name='kpt-92', id=92, color=[255, 0, 0], type='', swap='kpt-88'), + 93: dict( + name='kpt-93', id=93, color=[255, 0, 0], type='', swap='kpt-95'), + 94: dict(name='kpt-94', id=94, color=[255, 0, 0], type='', swap=''), + 95: dict( + name='kpt-95', id=95, color=[255, 0, 0], type='', swap='kpt-93'), + 96: dict( + name='kpt-96', id=96, color=[255, 0, 0], type='', swap='kpt-97'), + 97: dict( + name='kpt-97', id=97, color=[255, 0, 0], type='', swap='kpt-96') + }, + skeleton_info={}, + joint_weights=[1.] * 98, + sigmas=[]) diff --git a/mmpose/configs/_base_/datasets/zebra.py b/mmpose/configs/_base_/datasets/zebra.py new file mode 100644 index 0000000000000000000000000000000000000000..eac71f796a761bbf87b123f8b7b8b4585df0c525 --- /dev/null +++ b/mmpose/configs/_base_/datasets/zebra.py @@ -0,0 +1,64 @@ +dataset_info = dict( + dataset_name='zebra', + paper_info=dict( + author='Graving, Jacob M and Chae, Daniel and Naik, Hemal and ' + 'Li, Liang and Koger, Benjamin and Costelloe, Blair R and ' + 'Couzin, Iain D', + title='DeepPoseKit, a software toolkit for fast and robust ' + 'animal pose estimation using deep learning', + container='Elife', + year='2019', + homepage='https://github.com/jgraving/DeepPoseKit-Data', + ), + keypoint_info={ + 0: + dict(name='snout', id=0, color=[255, 255, 255], type='', swap=''), + 1: + dict(name='head', id=1, color=[255, 255, 255], type='', swap=''), + 2: + dict(name='neck', id=2, color=[255, 255, 255], type='', swap=''), + 3: + dict( + name='forelegL1', + id=3, + color=[255, 255, 255], + type='', + swap='forelegR1'), + 4: + dict( + name='forelegR1', + id=4, + color=[255, 255, 255], + type='', + swap='forelegL1'), + 5: + dict( + name='hindlegL1', + id=5, + color=[255, 255, 255], + type='', + swap='hindlegR1'), + 6: + dict( + name='hindlegR1', + id=6, + color=[255, 255, 255], + type='', + swap='hindlegL1'), + 7: + dict(name='tailbase', id=7, color=[255, 255, 255], type='', swap=''), + 8: + dict(name='tailtip', id=8, color=[255, 255, 255], type='', swap='') + }, + skeleton_info={ + 0: dict(link=('head', 'snout'), id=0, color=[255, 255, 255]), + 1: dict(link=('neck', 'head'), id=1, color=[255, 255, 255]), + 2: dict(link=('forelegL1', 'neck'), id=2, color=[255, 255, 255]), + 3: dict(link=('forelegR1', 'neck'), id=3, color=[255, 255, 255]), + 4: dict(link=('hindlegL1', 'tailbase'), id=4, color=[255, 255, 255]), + 5: dict(link=('hindlegR1', 'tailbase'), id=5, color=[255, 255, 255]), + 6: dict(link=('tailbase', 'neck'), id=6, color=[255, 255, 255]), + 7: dict(link=('tailtip', 'tailbase'), id=7, color=[255, 255, 255]) + }, + joint_weights=[1.] * 9, + sigmas=[]) diff --git a/mmpose/configs/_base_/default_runtime.py b/mmpose/configs/_base_/default_runtime.py new file mode 100644 index 0000000000000000000000000000000000000000..561d574fa757fa295f349394bf57047a2d8b576d --- /dev/null +++ b/mmpose/configs/_base_/default_runtime.py @@ -0,0 +1,49 @@ +default_scope = 'mmpose' + +# hooks +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', interval=10), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='PoseVisualizationHook', enable=False), +) + +# custom hooks +custom_hooks = [ + # Synchronize model buffers such as running_mean and running_var in BN + # at the end of each epoch + dict(type='SyncBuffersHook') +] + +# multi-processing backend +env_cfg = dict( + cudnn_benchmark=False, + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + dist_cfg=dict(backend='nccl'), +) + +# visualizer +vis_backends = [ + dict(type='LocalVisBackend'), + # dict(type='TensorboardVisBackend'), + # dict(type='WandbVisBackend'), +] +visualizer = dict( + type='PoseLocalVisualizer', vis_backends=vis_backends, name='visualizer') + +# logger +log_processor = dict( + type='LogProcessor', window_size=50, by_epoch=True, num_digits=6) +log_level = 'INFO' +load_from = None +resume = False + +# file I/O backend +backend_args = dict(backend='local') + +# training/validation/testing progress +train_cfg = dict(by_epoch=True) +val_cfg = dict() +test_cfg = dict() diff --git a/mmpose/configs/animal_2d_keypoint/README.md b/mmpose/configs/animal_2d_keypoint/README.md new file mode 100644 index 0000000000000000000000000000000000000000..efcc3841a51c20d776360c99eccfaeb94247ff0d --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/README.md @@ -0,0 +1,20 @@ +# 2D Animal Keypoint Detection + +2D animal keypoint detection (animal pose estimation) aims to detect the key-point of different species, including rats, +dogs, macaques, and cheetah. It provides detailed behavioral analysis for neuroscience, medical and ecology applications. + +## Data preparation + +Please follow [DATA Preparation](/docs/en/dataset_zoo/2d_animal_keypoint.md) to prepare data. + +## Demo + +Please follow [DEMO](/demo/docs/en/2d_animal_demo.md) to generate fancy demos. + +<img src="https://user-images.githubusercontent.com/26127467/187655602-907db86e-710b-447a-8ec9-5b623d43d160.gif" height="500px" alt><br> + +<img src="https://user-images.githubusercontent.com/11788150/114201893-4446ec00-9989-11eb-808b-5718c47c7b23.gif" height="140px" alt><br> + +<img src="https://user-images.githubusercontent.com/11788150/114205282-b5d46980-998c-11eb-9d6b-85ba47f81252.gif" height="140px" alt><br> + +<img src="https://user-images.githubusercontent.com/11788150/114023530-944c8280-98a5-11eb-86b0-5f6d3e232af0.gif" height="140px" alt><br> diff --git a/mmpose/configs/animal_2d_keypoint/rtmpose/README.md b/mmpose/configs/animal_2d_keypoint/rtmpose/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fbb103e36c5c9e66292904d15c8db467ce18f3b4 --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/rtmpose/README.md @@ -0,0 +1,16 @@ +# RTMPose + +Recent studies on 2D pose estimation have achieved excellent performance on public benchmarks, yet its application in the industrial community still suffers from heavy model parameters and high latency. +In order to bridge this gap, we empirically study five aspects that affect the performance of multi-person pose estimation algorithms: paradigm, backbone network, localization algorithm, training strategy, and deployment inference, and present a high-performance real-time multi-person pose estimation framework, **RTMPose**, based on MMPose. +Our RTMPose-m achieves **75.8% AP** on COCO with **90+ FPS** on an Intel i7-11700 CPU and **430+ FPS** on an NVIDIA GTX 1660 Ti GPU, and RTMPose-l achieves **67.0% AP** on COCO-WholeBody with **130+ FPS**, outperforming existing open-source libraries. +To further evaluate RTMPose's capability in critical real-time applications, we also report the performance after deploying on the mobile device. + +## Results and Models + +### AP-10K Dataset + +Results on AP-10K validation set + +| Model | Input Size | AP | Details and Download | +| :-------: | :--------: | :---: | :------------------------------------------: | +| RTMPose-m | 256x256 | 0.722 | [rtmpose_cp10k.md](./ap10k/rtmpose_ap10k.md) | diff --git a/mmpose/configs/animal_2d_keypoint/rtmpose/ap10k/rtmpose-m_8xb64-210e_ap10k-256x256.py b/mmpose/configs/animal_2d_keypoint/rtmpose/ap10k/rtmpose-m_8xb64-210e_ap10k-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..46dbfbef9fb42e15188d2ea8ae1763e84ff05d78 --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/rtmpose/ap10k/rtmpose-m_8xb64-210e_ap10k-256x256.py @@ -0,0 +1,246 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 210 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 150 to 300 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict( + type='SimCCLabel', + input_size=(256, 256), + sigma=(5.66, 5.66), + simcc_split_ratio=2.0, + normalize=False, + use_dark=False) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=0.67, + widen_factor=0.75, + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' + 'rtmposev1/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa + )), + head=dict( + type='RTMCCHead', + in_channels=768, + out_channels=17, + input_size=codec['input_size'], + in_featuremap_size=(8, 8), + simcc_split_ratio=codec['simcc_split_ratio'], + final_layer_kernel_size=7, + gau_cfg=dict( + hidden_dims=256, + s=128, + expansion_factor=2, + dropout_rate=0., + drop_path=0., + act_fn='SiLU', + use_rel_bias=False, + pos_enc=False), + loss=dict( + type='KLDiscretLoss', + use_target_weight=True, + beta=10., + label_softmax=True), + decoder=codec), + test_cfg=dict(flip_test=True, )) + +# base dataset settings +dataset_type = 'AP10KDataset' +data_mode = 'topdown' +data_root = 'data/ap10k/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/pose/ap10k/', +# f'{data_root}': 's3://openmmlab/datasets/pose/ap10k/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.0), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/ap10k-train-split1.json', + data_prefix=dict(img='data/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/ap10k-val-split1.json', + data_prefix=dict(img='data/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = dict( + batch_size=32, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/ap10k-test-split1.json', + data_prefix=dict(img='data/'), + test_mode=True, + pipeline=val_pipeline, + )) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/ap10k-val-split1.json') +test_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/ap10k-test-split1.json') diff --git a/mmpose/configs/animal_2d_keypoint/rtmpose/ap10k/rtmpose_ap10k.md b/mmpose/configs/animal_2d_keypoint/rtmpose/ap10k/rtmpose_ap10k.md new file mode 100644 index 0000000000000000000000000000000000000000..4d035a372572aaac93ff980acbb367a1cc6a5efa --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/rtmpose/ap10k/rtmpose_ap10k.md @@ -0,0 +1,25 @@ +<!-- [ALGORITHM] --> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2108.12617">AP-10K (NeurIPS'2021)</a></summary> + +```bibtex +@misc{yu2021ap10k, + title={AP-10K: A Benchmark for Animal Pose Estimation in the Wild}, + author={Hang Yu and Yufei Xu and Jing Zhang and Wei Zhao and Ziyu Guan and Dacheng Tao}, + year={2021}, + eprint={2108.12617}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +</details> + +Results on AP-10K validation set + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AP<sup>M</sup> | AP<sup>L</sup> | ckpt | log | +| :----------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :------------: | :------------: | :-----------------------------------------: | :----------------------------------------: | +| [rtmpose-m](/configs/animal_2d_keypoint/rtmpose/ap10k/rtmpose-m_8xb64-210e_ap10k-256x256.py) | 256x256 | 0.722 | 0.939 | 0.788 | 0.569 | 0.728 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-ap10k_pt-aic-coco_210e-256x256-7a041aa1_20230206.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-ap10k_pt-aic-coco_210e-256x256-7a041aa1_20230206.json) | diff --git a/mmpose/configs/animal_2d_keypoint/rtmpose/ap10k/rtmpose_ap10k.yml b/mmpose/configs/animal_2d_keypoint/rtmpose/ap10k/rtmpose_ap10k.yml new file mode 100644 index 0000000000000000000000000000000000000000..0441d9e65faa6b0274f9152ff31ef1b66a112214 --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/rtmpose/ap10k/rtmpose_ap10k.yml @@ -0,0 +1,19 @@ +Models: +- Config: configs/animal_2d_keypoint/rtmpose/ap10k/rtmpose-m_8xb64-210e_ap10k-256x256.py + In Collection: RTMPose + Alias: animal + Metadata: + Architecture: + - RTMPose + Training Data: AP-10K + Name: rtmpose-m_8xb64-210e_ap10k-256x256 + Results: + - Dataset: AP-10K + Metrics: + AP: 0.722 + AP@0.5: 0.939 + AP@0.75: 0.788 + AP (L): 0.728 + AP (M): 0.569 + Task: Animal 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-ap10k_pt-aic-coco_210e-256x256-7a041aa1_20230206.pth diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/README.md b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b4f8e366ff378831724f970e4ef2245f9f4b4468 --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/README.md @@ -0,0 +1,54 @@ +# Top-down heatmap-based pose estimation + +Top-down methods divide the task into two stages: object detection, followed by single-object pose estimation given object bounding boxes Instead of estimating keypoint coordinates directly, the pose estimator will produce heatmaps which represent the +likelihood of being a keypoint, following the paradigm introduced in [Simple Baselines for Human Pose Estimation and Tracking](http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html). + +<div align=center> +<img src="https://user-images.githubusercontent.com/15977946/146522977-5f355832-e9c1-442f-a34f-9d24fb0aefa8.png" height=400> +</div> + +## Results and Models + +### Animal-Pose Dataset + +Results on AnimalPose validation set (1117 instances) + +| Model | Input Size | AP | AR | Details and Download | +| :--------: | :--------: | :---: | :---: | :-------------------------------------------------------: | +| HRNet-w32 | 256x256 | 0.740 | 0.780 | [hrnet_animalpose.md](./animalpose/hrnet_animalpose.md) | +| HRNet-w48 | 256x256 | 0.738 | 0.778 | [hrnet_animalpose.md](./animalpose/hrnet_animalpose.md) | +| ResNet-152 | 256x256 | 0.704 | 0.748 | [resnet_animalpose.md](./animalpose/resnet_animalpose.md) | +| ResNet-101 | 256x256 | 0.696 | 0.736 | [resnet_animalpose.md](./animalpose/resnet_animalpose.md) | +| ResNet-50 | 256x256 | 0.691 | 0.736 | [resnet_animalpose.md](./animalpose/resnet_animalpose.md) | + +### AP-10K Dataset + +Results on AP-10K validation set + +| Model | Input Size | AP | Details and Download | +| :--------: | :--------: | :---: | :--------------------------------------------------: | +| HRNet-w48 | 256x256 | 0.728 | [hrnet_ap10k.md](./ap10k/hrnet_ap10k.md) | +| HRNet-w32 | 256x256 | 0.722 | [hrnet_ap10k.md](./ap10k/hrnet_ap10k.md) | +| ResNet-101 | 256x256 | 0.681 | [resnet_ap10k.md](./ap10k/resnet_ap10k.md) | +| ResNet-50 | 256x256 | 0.680 | [resnet_ap10k.md](./ap10k/resnet_ap10k.md) | +| CSPNeXt-m | 256x256 | 0.703 | [cspnext_udp_ap10k.md](./ap10k/cspnext_udp_ap10k.md) | + +### Desert Locust Dataset + +Results on Desert Locust test set + +| Model | Input Size | AUC | EPE | Details and Download | +| :--------: | :--------: | :---: | :--: | :-------------------------------------------: | +| ResNet-152 | 160x160 | 0.925 | 1.49 | [resnet_locust.md](./locust/resnet_locust.md) | +| ResNet-101 | 160x160 | 0.907 | 2.03 | [resnet_locust.md](./locust/resnet_locust.md) | +| ResNet-50 | 160x160 | 0.900 | 2.27 | [resnet_locust.md](./locust/resnet_locust.md) | + +### Grévy’s Zebra Dataset + +Results on Grévy’s Zebra test set + +| Model | Input Size | AUC | EPE | Details and Download | +| :--------: | :--------: | :---: | :--: | :----------------------------------------: | +| ResNet-152 | 160x160 | 0.921 | 1.67 | [resnet_zebra.md](./zebra/resnet_zebra.md) | +| ResNet-101 | 160x160 | 0.915 | 1.83 | [resnet_zebra.md](./zebra/resnet_zebra.md) | +| ResNet-50 | 160x160 | 0.914 | 1.87 | [resnet_zebra.md](./zebra/resnet_zebra.md) | diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/hrnet_animalpose.md b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/hrnet_animalpose.md new file mode 100644 index 0000000000000000000000000000000000000000..58b971313fbf4a446a2c9720ac0a687fcc956513 --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/hrnet_animalpose.md @@ -0,0 +1,40 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html">HRNet (CVPR'2019)</a></summary> + +```bibtex +@inproceedings{sun2019deep, + title={Deep high-resolution representation learning for human pose estimation}, + author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={5693--5703}, + year={2019} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_ICCV_2019/html/Cao_Cross-Domain_Adaptation_for_Animal_Pose_Estimation_ICCV_2019_paper.html">Animal-Pose (ICCV'2019)</a></summary> + +```bibtex +@InProceedings{Cao_2019_ICCV, + author = {Cao, Jinkun and Tang, Hongyang and Fang, Hao-Shu and Shen, Xiaoyong and Lu, Cewu and Tai, Yu-Wing}, + title = {Cross-Domain Adaptation for Animal Pose Estimation}, + booktitle = {The IEEE International Conference on Computer Vision (ICCV)}, + month = {October}, + year = {2019} +} +``` + +</details> + +Results on AnimalPose validation set (1117 instances) + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [pose_hrnet_w32](/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_hrnet-w32_8xb64-210e_animalpose-256x256.py) | 256x256 | 0.740 | 0.959 | 0.833 | 0.780 | 0.965 | [ckpt](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_animalpose_256x256-1aa7f075_20210426.pth) | [log](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_animalpose_256x256_20210426.log.json) | +| [pose_hrnet_w48](/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_hrnet-w48_8xb64-210e_animalpose-256x256.py) | 256x256 | 0.738 | 0.958 | 0.831 | 0.778 | 0.962 | [ckpt](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_animalpose_256x256-34644726_20210426.pth) | [log](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_animalpose_256x256_20210426.log.json) | diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/hrnet_animalpose.yml b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/hrnet_animalpose.yml new file mode 100644 index 0000000000000000000000000000000000000000..caba133370aafff30be90aa9171f8df66fefe7f4 --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/hrnet_animalpose.yml @@ -0,0 +1,34 @@ +Models: +- Config: configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_hrnet-w32_8xb64-210e_animalpose-256x256.py + In Collection: HRNet + Metadata: + Architecture: &id001 + - HRNet + Training Data: Animal-Pose + Name: td-hm_hrnet-w32_8xb64-210e_animalpose-256x256 + Results: + - Dataset: Animal-Pose + Metrics: + AP: 0.740 + AP@0.5: 0.959 + AP@0.75: 0.833 + AR: 0.780 + AR@0.5: 0.965 + Task: Animal 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_animalpose_256x256-1aa7f075_20210426.pth +- Config: configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_hrnet-w48_8xb64-210e_animalpose-256x256.py + In Collection: HRNet + Metadata: + Architecture: *id001 + Training Data: Animal-Pose + Name: td-hm_hrnet-w48_8xb64-210e_animalpose-256x256 + Results: + - Dataset: Animal-Pose + Metrics: + AP: 0.738 + AP@0.5: 0.958 + AP@0.75: 0.831 + AR: 0.778 + AR@0.5: 0.962 + Task: Animal 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_animalpose_256x256-34644726_20210426.pth diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/resnet_animalpose.md b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/resnet_animalpose.md new file mode 100644 index 0000000000000000000000000000000000000000..20ddf54031e18f8bb9150fccfccff1f6cd5949bf --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/resnet_animalpose.md @@ -0,0 +1,41 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html">SimpleBaseline2D (ECCV'2018)</a></summary> + +```bibtex +@inproceedings{xiao2018simple, + title={Simple baselines for human pose estimation and tracking}, + author={Xiao, Bin and Wu, Haiping and Wei, Yichen}, + booktitle={Proceedings of the European conference on computer vision (ECCV)}, + pages={466--481}, + year={2018} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_ICCV_2019/html/Cao_Cross-Domain_Adaptation_for_Animal_Pose_Estimation_ICCV_2019_paper.html">Animal-Pose (ICCV'2019)</a></summary> + +```bibtex +@InProceedings{Cao_2019_ICCV, + author = {Cao, Jinkun and Tang, Hongyang and Fang, Hao-Shu and Shen, Xiaoyong and Lu, Cewu and Tai, Yu-Wing}, + title = {Cross-Domain Adaptation for Animal Pose Estimation}, + booktitle = {The IEEE International Conference on Computer Vision (ICCV)}, + month = {October}, + year = {2019} +} +``` + +</details> + +Results on AnimalPose validation set (1117 instances) + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [pose_resnet_50](/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res50_8xb64-210e_animalpose-256x256.py) | 256x256 | 0.691 | 0.947 | 0.770 | 0.736 | 0.955 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res50_animalpose_256x256-e1f30bff_20210426.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res50_animalpose_256x256_20210426.log.json) | +| [pose_resnet_101](/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res101_8xb64-210e_animalpose-256x256.py) | 256x256 | 0.696 | 0.948 | 0.774 | 0.736 | 0.951 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res101_animalpose_256x256-85563f4a_20210426.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res101_animalpose_256x256_20210426.log.json) | +| [pose_resnet_152](/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res152_8xb32-210e_animalpose-256x256.py) | 256x256 | 0.704 | 0.938 | 0.786 | 0.748 | 0.946 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res152_animalpose_256x256-a0a7506c_20210426.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res152_animalpose_256x256_20210426.log.json) | diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/resnet_animalpose.yml b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/resnet_animalpose.yml new file mode 100644 index 0000000000000000000000000000000000000000..345c13c138aafccb5ce1be0ea4136634327248c8 --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/resnet_animalpose.yml @@ -0,0 +1,51 @@ +Models: +- Config: configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res50_8xb64-210e_animalpose-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: &id001 + - SimpleBaseline2D + - ResNet + Training Data: Animal-Pose + Name: td-hm_res50_8xb64-210e_animalpose-256x256 + Results: + - Dataset: Animal-Pose + Metrics: + AP: 0.691 + AP@0.5: 0.947 + AP@0.75: 0.770 + AR: 0.736 + AR@0.5: 0.955 + Task: Animal 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/animal/resnet/res50_animalpose_256x256-e1f30bff_20210426.pth +- Config: configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res101_8xb64-210e_animalpose-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: Animal-Pose + Name: td-hm_res101_8xb64-210e_animalpose-256x256 + Results: + - Dataset: Animal-Pose + Metrics: + AP: 0.696 + AP@0.5: 0.948 + AP@0.75: 0.774 + AR: 0.736 + AR@0.5: 0.951 + Task: Animal 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/animal/resnet/res101_animalpose_256x256-85563f4a_20210426.pth +- Config: configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res152_8xb32-210e_animalpose-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: Animal-Pose + Name: td-hm_res152_8xb32-210e_animalpose-256x256 + Results: + - Dataset: Animal-Pose + Metrics: + AP: 0.704 + AP@0.5: 0.938 + AP@0.75: 0.786 + AR: 0.748 + AR@0.5: 0.946 + Task: Animal 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/animal/resnet/res152_animalpose_256x256-a0a7506c_20210426.pth diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_hrnet-w32_8xb64-210e_animalpose-256x256.py b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_hrnet-w32_8xb64-210e_animalpose-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..2680fe8956e7b1cbf186b1c536204917478d721f --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_hrnet-w32_8xb64-210e_animalpose-256x256.py @@ -0,0 +1,147 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256))), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w32-36af842e.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=32, + out_channels=20, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'AnimalPoseDataset' +data_mode = 'topdown' +data_root = 'data/animalpose/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/animalpose_train.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/animalpose_val.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', ann_file=data_root + 'annotations/animalpose_val.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_hrnet-w48_8xb64-210e_animalpose-256x256.py b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_hrnet-w48_8xb64-210e_animalpose-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..3d4a76d8f506c60493ef7e476cb5ed3310044ba2 --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_hrnet-w48_8xb64-210e_animalpose-256x256.py @@ -0,0 +1,147 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(48, 96)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(48, 96, 192)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(48, 96, 192, 384))), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w48-8ef0771d.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=48, + out_channels=20, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'AnimalPoseDataset' +data_mode = 'topdown' +data_root = 'data/animalpose/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/animalpose_train.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/animalpose_val.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', ann_file=data_root + 'annotations/animalpose_val.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res101_8xb64-210e_animalpose-256x256.py b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res101_8xb64-210e_animalpose-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..8ffaabb06f160fb66260507db057686f4621b6b2 --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res101_8xb64-210e_animalpose-256x256.py @@ -0,0 +1,118 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=101, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=20, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'AnimalPoseDataset' +data_mode = 'topdown' +data_root = 'data/animalpose/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/animalpose_train.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/animalpose_val.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', ann_file=data_root + 'annotations/animalpose_val.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res152_8xb32-210e_animalpose-256x256.py b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res152_8xb32-210e_animalpose-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..8ed92929c9d42fa0caad87a5f6292f75745bd0bf --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res152_8xb32-210e_animalpose-256x256.py @@ -0,0 +1,118 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=152, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet152'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=20, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'AnimalPoseDataset' +data_mode = 'topdown' +data_root = 'data/animalpose/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/animalpose_train.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/animalpose_val.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', ann_file=data_root + 'annotations/animalpose_val.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res50_8xb64-210e_animalpose-256x256.py b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res50_8xb64-210e_animalpose-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..c053c8881461de72345478da49293a6ca96c1ed4 --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res50_8xb64-210e_animalpose-256x256.py @@ -0,0 +1,118 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=20, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'AnimalPoseDataset' +data_mode = 'topdown' +data_root = 'data/animalpose/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/animalpose_train.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/animalpose_val.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', ann_file=data_root + 'annotations/animalpose_val.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/cspnext-m_udp_8xb64-210e_ap10k-256x256.py b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/cspnext-m_udp_8xb64-210e_ap10k-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..fa3139a71ac9dfa7e50bb742760b42b33178641d --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/cspnext-m_udp_8xb64-210e_ap10k-256x256.py @@ -0,0 +1,220 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 210 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 105 to 210 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=1024) + +# codec settings +codec = dict( + type='UDPHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=0.67, + widen_factor=0.75, + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmdetection/v3.0/' + 'rtmdet/cspnext_rsb_pretrain/' + 'cspnext-m_8xb256-rsb-a1-600e_in1k-ecb3bbd9.pth')), + head=dict( + type='HeatmapHead', + in_channels=768, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=False, + flip_mode='heatmap', + shift_heatmap=False, + )) + +# base dataset settings +dataset_type = 'AP10KDataset' +data_mode = 'topdown' +data_root = 'data/ap10k/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/ap10k-train-split1.json', + data_prefix=dict(img='data/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/ap10k-val-split1.json', + data_prefix=dict(img='data/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = dict( + batch_size=32, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/ap10k-test-split1.json', + data_prefix=dict(img='data/'), + test_mode=True, + pipeline=val_pipeline, + )) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/ap10k-val-split1.json') +test_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/ap10k-test-split1.json') diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/cspnext_udp_ap10k.md b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/cspnext_udp_ap10k.md new file mode 100644 index 0000000000000000000000000000000000000000..fb10359685ecf9b546093b402c292c4c8a8ba0a9 --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/cspnext_udp_ap10k.md @@ -0,0 +1,58 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2212.07784">RTMDet (ArXiv 2022)</a></summary> + +```bibtex +@misc{lyu2022rtmdet, + title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors}, + author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen}, + year={2022}, + eprint={2212.07784}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +</details> + +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2020/html/Huang_The_Devil_Is_in_the_Details_Delving_Into_Unbiased_Data_CVPR_2020_paper.html">UDP (CVPR'2020)</a></summary> + +```bibtex +@InProceedings{Huang_2020_CVPR, + author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan}, + title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation}, + booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, + month = {June}, + year = {2020} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2108.12617">AP-10K (NeurIPS'2021)</a></summary> + +```bibtex +@misc{yu2021ap10k, + title={AP-10K: A Benchmark for Animal Pose Estimation in the Wild}, + author={Hang Yu and Yufei Xu and Jing Zhang and Wei Zhao and Ziyu Guan and Dacheng Tao}, + year={2021}, + eprint={2108.12617}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +</details> + +Results on AP-10K validation set + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AP<sup>M</sup> | AP<sup>L</sup> | ckpt | log | +| :----------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :------------: | :------------: | :-----------------------------------------: | :----------------------------------------: | +| [pose_cspnext_m](/configs/animal_2d_keypoint/topdown_heatmap/ap10k/cspnext-m_udp_8xb64-210e_ap10k-256x256.py) | 256x256 | 0.703 | 0.944 | 0.776 | 0.513 | 0.710 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-ap10k_pt-in1k_210e-256x256-1f2d947a_20230123.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-ap10k_pt-in1k_210e-256x256-1f2d947a_20230123.json) | diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/cspnext_udp_ap10k.yml b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/cspnext_udp_ap10k.yml new file mode 100644 index 0000000000000000000000000000000000000000..8fedc88374a9c027ed3f3268a42b5eed24a980f0 --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/cspnext_udp_ap10k.yml @@ -0,0 +1,19 @@ +Models: +- Config: configs/animal_2d_keypoint/topdown_heatmap/ap10k/cspnext-m_udp_8xb64-210e_ap10k-256x256.py + In Collection: UDP + Metadata: + Architecture: &id001 + - UDP + - HRNet + Training Data: AP-10K + Name: cspnext-m_udp_8xb64-210e_ap10k-256x256 + Results: + - Dataset: AP-10K + Metrics: + AP: 0.703 + AP@0.5: 0.944 + AP@0.75: 0.776 + AP (L): 0.71 + AP (M): 0.513 + Task: Animal 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-ap10k_pt-in1k_210e-256x256-1f2d947a_20230123.pth diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/hrnet_ap10k.md b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/hrnet_ap10k.md new file mode 100644 index 0000000000000000000000000000000000000000..fbdd2cbf9f54807a8fcc00adc31f5839fcf94ea1 --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/hrnet_ap10k.md @@ -0,0 +1,41 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html">HRNet (CVPR'2019)</a></summary> + +```bibtex +@inproceedings{sun2019deep, + title={Deep high-resolution representation learning for human pose estimation}, + author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={5693--5703}, + year={2019} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2108.12617">AP-10K (NeurIPS'2021)</a></summary> + +```bibtex +@misc{yu2021ap10k, + title={AP-10K: A Benchmark for Animal Pose Estimation in the Wild}, + author={Hang Yu and Yufei Xu and Jing Zhang and Wei Zhao and Ziyu Guan and Dacheng Tao}, + year={2021}, + eprint={2108.12617}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +</details> + +Results on AP-10K validation set + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AP<sup>M</sup> | AP<sup>L</sup> | ckpt | log | +| :----------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :------------: | :------------: | :-----------------------------------------: | :----------------------------------------: | +| [pose_hrnet_w32](/configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_hrnet-w32_8xb64-210e_ap10k-256x256.py) | 256x256 | 0.722 | 0.935 | 0.789 | 0.557 | 0.729 | [ckpt](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_ap10k_256x256-18aac840_20211029.pth) | [log](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_ap10k_256x256-18aac840_20211029.log.json) | +| [pose_hrnet_w48](/configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_hrnet-w48_8xb64-210e_ap10k-256x256.py) | 256x256 | 0.728 | 0.936 | 0.802 | 0.577 | 0.735 | [ckpt](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_ap10k_256x256-d95ab412_20211029.pth) | [log](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_ap10k_256x256-d95ab412_20211029.log.json) | diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/hrnet_ap10k.yml b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/hrnet_ap10k.yml new file mode 100644 index 0000000000000000000000000000000000000000..38aade8349ed34a214139574c0e83ae67b37e630 --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/hrnet_ap10k.yml @@ -0,0 +1,34 @@ +Models: +- Config: configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_hrnet-w32_8xb64-210e_ap10k-256x256.py + In Collection: HRNet + Metadata: + Architecture: &id001 + - HRNet + Training Data: AP-10K + Name: td-hm_hrnet-w32_8xb64-210e_ap10k-256x256 + Results: + - Dataset: AP-10K + Metrics: + AP: 0.722 + AP@0.5: 0.935 + AP@0.75: 0.789 + AP (L): 0.729 + AP (M): 0.557 + Task: Animal 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_ap10k_256x256-18aac840_20211029.pth +- Config: configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_hrnet-w48_8xb64-210e_ap10k-256x256.py + In Collection: HRNet + Metadata: + Architecture: *id001 + Training Data: AP-10K + Name: td-hm_hrnet-w48_8xb64-210e_ap10k-256x256 + Results: + - Dataset: AP-10K + Metrics: + AP: 0.728 + AP@0.5: 0.936 + AP@0.75: 0.802 + AP (L): 0.735 + AP (M): 0.577 + Task: Animal 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_ap10k_256x256-d95ab412_20211029.pth diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/resnet_ap10k.md b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/resnet_ap10k.md new file mode 100644 index 0000000000000000000000000000000000000000..11ad6ed033516732bb921f0b32ed1d7336e6517b --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/resnet_ap10k.md @@ -0,0 +1,41 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html">SimpleBaseline2D (ECCV'2018)</a></summary> + +```bibtex +@inproceedings{xiao2018simple, + title={Simple baselines for human pose estimation and tracking}, + author={Xiao, Bin and Wu, Haiping and Wei, Yichen}, + booktitle={Proceedings of the European conference on computer vision (ECCV)}, + pages={466--481}, + year={2018} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2108.12617">AP-10K (NeurIPS'2021)</a></summary> + +```bibtex +@misc{yu2021ap10k, + title={AP-10K: A Benchmark for Animal Pose Estimation in the Wild}, + author={Hang Yu and Yufei Xu and Jing Zhang and Wei Zhao and Ziyu Guan and Dacheng Tao}, + year={2021}, + eprint={2108.12617}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +</details> + +Results on AP-10K validation set + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AP<sup>M</sup> | AP<sup>L</sup> | ckpt | log | +| :----------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :------------: | :------------: | :-----------------------------------------: | :----------------------------------------: | +| [pose_resnet_50](/configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_res50_8xb64-210e_ap10k-256x256.py) | 256x256 | 0.680 | 0.926 | 0.738 | 0.552 | 0.687 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res50_ap10k_256x256-35760eb8_20211029.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res50_ap10k_256x256-35760eb8_20211029.log.json) | +| [pose_resnet_101](/configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_res101_8xb64-210e_ap10k-256x256.py) | 256x256 | 0.681 | 0.921 | 0.751 | 0.545 | 0.690 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res101_ap10k_256x256-9edfafb9_20211029.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res101_ap10k_256x256-9edfafb9_20211029.log.json) | diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/resnet_ap10k.yml b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/resnet_ap10k.yml new file mode 100644 index 0000000000000000000000000000000000000000..84cc4156b9447dacdf0554851602cc2c907814c9 --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/resnet_ap10k.yml @@ -0,0 +1,35 @@ +Models: +- Config: configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_res50_8xb64-210e_ap10k-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: &id001 + - SimpleBaseline2D + - ResNet + Training Data: AP-10K + Name: td-hm_res50_8xb64-210e_ap10k-256x256 + Results: + - Dataset: AP-10K + Metrics: + AP: 0.680 + AP@0.5: 0.926 + AP@0.75: 0.738 + AP (L): 0.687 + AP (M): 0.552 + Task: Animal 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/animal/resnet/res50_ap10k_256x256-35760eb8_20211029.pth +- Config: configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_res101_8xb64-210e_ap10k-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: AP-10K + Name: td-hm_res101_8xb64-210e_ap10k-256x256 + Results: + - Dataset: AP-10K + Metrics: + AP: 0.681 + AP@0.5: 0.921 + AP@0.75: 0.751 + AP (L): 0.690 + AP (M): 0.545 + Task: Animal 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/animal/resnet/res101_ap10k_256x256-9edfafb9_20211029.pth diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_hrnet-w32_8xb64-210e_ap10k-256x256.py b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_hrnet-w32_8xb64-210e_ap10k-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..c61e6384aeea7efcca3ac2f2268fef01663e3234 --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_hrnet-w32_8xb64-210e_ap10k-256x256.py @@ -0,0 +1,164 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256))), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w32-36af842e.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=32, + out_channels=17, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'AP10KDataset' +data_mode = 'topdown' +data_root = 'data/ap10k/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/ap10k-train-split1.json', + data_prefix=dict(img='data/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/ap10k-val-split1.json', + data_prefix=dict(img='data/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/ap10k-test-split1.json', + data_prefix=dict(img='data/'), + test_mode=True, + pipeline=val_pipeline, + )) + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/ap10k-val-split1.json') +test_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/ap10k-test-split1.json') diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_hrnet-w48_8xb64-210e_ap10k-256x256.py b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_hrnet-w48_8xb64-210e_ap10k-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..146114a887663a230f7a504e83f13da6fa4a2571 --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_hrnet-w48_8xb64-210e_ap10k-256x256.py @@ -0,0 +1,164 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(48, 96)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(48, 96, 192)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(48, 96, 192, 384))), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w48-8ef0771d.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=48, + out_channels=17, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'AP10KDataset' +data_mode = 'topdown' +data_root = 'data/ap10k/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/ap10k-train-split1.json', + data_prefix=dict(img='data/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/ap10k-val-split1.json', + data_prefix=dict(img='data/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/ap10k-test-split1.json', + data_prefix=dict(img='data/'), + test_mode=True, + pipeline=val_pipeline, + )) + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/ap10k-val-split1.json') +test_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/ap10k-test-split1.json') diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_res101_8xb64-210e_ap10k-256x256.py b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_res101_8xb64-210e_ap10k-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..be49577511584f892cc4c82797207e8ee1d6a8b4 --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_res101_8xb64-210e_ap10k-256x256.py @@ -0,0 +1,135 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=101, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'AP10KDataset' +data_mode = 'topdown' +data_root = 'data/ap10k/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/ap10k-train-split1.json', + data_prefix=dict(img='data/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/ap10k-val-split1.json', + data_prefix=dict(img='data/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/ap10k-test-split1.json', + data_prefix=dict(img='data/'), + test_mode=True, + pipeline=val_pipeline, + )) + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/ap10k-val-split1.json') +test_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/ap10k-test-split1.json') diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_res50_8xb64-210e_ap10k-256x256.py b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_res50_8xb64-210e_ap10k-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..2172cbe938506ae2faa08ed731710e51203d579f --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_res50_8xb64-210e_ap10k-256x256.py @@ -0,0 +1,135 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'AP10KDataset' +data_mode = 'topdown' +data_root = 'data/ap10k/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/ap10k-train-split1.json', + data_prefix=dict(img='data/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/ap10k-val-split1.json', + data_prefix=dict(img='data/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/ap10k-test-split1.json', + data_prefix=dict(img='data/'), + test_mode=True, + pipeline=val_pipeline, + )) + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/ap10k-val-split1.json') +test_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/ap10k-test-split1.json') diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/locust/resnet_locust.md b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/locust/resnet_locust.md new file mode 100644 index 0000000000000000000000000000000000000000..bb7c8374926f95a1e726973f2bddc8af04a702ed --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/locust/resnet_locust.md @@ -0,0 +1,43 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html">SimpleBaseline2D (ECCV'2018)</a></summary> + +```bibtex +@inproceedings{xiao2018simple, + title={Simple baselines for human pose estimation and tracking}, + author={Xiao, Bin and Wu, Haiping and Wei, Yichen}, + booktitle={Proceedings of the European conference on computer vision (ECCV)}, + pages={466--481}, + year={2018} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://elifesciences.org/articles/47994">Desert Locust (Elife'2019)</a></summary> + +```bibtex +@article{graving2019deepposekit, + title={DeepPoseKit, a software toolkit for fast and robust animal pose estimation using deep learning}, + author={Graving, Jacob M and Chae, Daniel and Naik, Hemal and Li, Liang and Koger, Benjamin and Costelloe, Blair R and Couzin, Iain D}, + journal={Elife}, + volume={8}, + pages={e47994}, + year={2019}, + publisher={eLife Sciences Publications Limited} +} +``` + +</details> + +Results on Desert Locust test set + +| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log | +| :--------------------------------------------------------- | :--------: | :-----: | :---: | :--: | :--------------------------------------------------------: | :--------------------------------------------------------: | +| [pose_resnet_50](/configs/animal_2d_keypoint/topdown_heatmap/locust/td-hm_res50_8xb64-210e_locust-160x160.py) | 160x160 | 1.000 | 0.900 | 2.27 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res50_locust_160x160-9efca22b_20210407.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res50_locust_160x160_20210407.log.json) | +| [pose_resnet_101](/configs/animal_2d_keypoint/topdown_heatmap/locust/td-hm_res101_8xb64-210e_locust-160x160.py) | 160x160 | 1.000 | 0.907 | 2.03 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res101_locust_160x160-d77986b3_20210407.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res101_locust_160x160_20210407.log.json) | +| [pose_resnet_152](/configs/animal_2d_keypoint/topdown_heatmap/locust/td-hm_res152_8xb32-210e_locust-160x160.py) | 160x160 | 1.000 | 0.925 | 1.49 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res152_locust_160x160-4ea9b372_20210407.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res152_locust_160x160_20210407.log.json) | diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/locust/resnet_locust.yml b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/locust/resnet_locust.yml new file mode 100644 index 0000000000000000000000000000000000000000..c7d174fafc5136953beebf9b0dbc8dda5a800199 --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/locust/resnet_locust.yml @@ -0,0 +1,45 @@ +Models: +- Config: configs/animal_2d_keypoint/topdown_heatmap/locust/td-hm_res50_8xb64-210e_locust-160x160.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: &id001 + - SimpleBaseline2D + - ResNet + Training Data: Desert Locust + Name: td-hm_res50_8xb64-210e_locust-160x160 + Results: + - Dataset: Desert Locust + Metrics: + AUC: 0.9 + EPE: 2.27 + PCK@0.2: 1 + Task: Animal 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/animal/resnet/res50_locust_160x160-9efca22b_20210407.pth +- Config: configs/animal_2d_keypoint/topdown_heatmap/locust/td-hm_res101_8xb64-210e_locust-160x160.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: Desert Locust + Name: td-hm_res101_8xb64-210e_locust-160x160 + Results: + - Dataset: Desert Locust + Metrics: + AUC: 0.907 + EPE: 2.03 + PCK@0.2: 1 + Task: Animal 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/animal/resnet/res101_locust_160x160-d77986b3_20210407.pth +- Config: configs/animal_2d_keypoint/topdown_heatmap/locust/td-hm_res152_8xb32-210e_locust-160x160.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: Desert Locust + Name: td-hm_res152_8xb32-210e_locust-160x160 + Results: + - Dataset: Desert Locust + Metrics: + AUC: 0.925 + EPE: 1.49 + PCK@0.2: 1.0 + Task: Animal 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/animal/resnet/res152_locust_160x160-4ea9b372_20210407.pth diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/locust/td-hm_res101_8xb64-210e_locust-160x160.py b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/locust/td-hm_res101_8xb64-210e_locust-160x160.py new file mode 100644 index 0000000000000000000000000000000000000000..f6e6c2e39bb28913b7ba180d0ab74c71a24c6cb6 --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/locust/td-hm_res101_8xb64-210e_locust-160x160.py @@ -0,0 +1,124 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(160, 160), heatmap_size=(40, 40), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=101, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=35, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'LocustDataset' +data_mode = 'topdown' +data_root = 'data/locust/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale', padding=0.8), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + shift_factor=0.25, + rotate_factor=180, + scale_factor=(0.7, 1.3)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale', padding=0.8), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/locust_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/locust_test.json', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE'), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/locust/td-hm_res152_8xb32-210e_locust-160x160.py b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/locust/td-hm_res152_8xb32-210e_locust-160x160.py new file mode 100644 index 0000000000000000000000000000000000000000..8f0a58bc88efab80a383df61137dbb45253da636 --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/locust/td-hm_res152_8xb32-210e_locust-160x160.py @@ -0,0 +1,124 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(160, 160), heatmap_size=(40, 40), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=152, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet152'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=35, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'LocustDataset' +data_mode = 'topdown' +data_root = 'data/locust/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale', padding=0.8), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + shift_factor=0.25, + rotate_factor=180, + scale_factor=(0.7, 1.3)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale', padding=0.8), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/locust_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/locust_test.json', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE'), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/locust/td-hm_res50_8xb64-210e_locust-160x160.py b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/locust/td-hm_res50_8xb64-210e_locust-160x160.py new file mode 100644 index 0000000000000000000000000000000000000000..adbb89ee5b23f8697059f6778f1bfe13bd21432a --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/locust/td-hm_res50_8xb64-210e_locust-160x160.py @@ -0,0 +1,124 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(160, 160), heatmap_size=(40, 40), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=35, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'LocustDataset' +data_mode = 'topdown' +data_root = 'data/locust/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale', padding=0.8), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + shift_factor=0.25, + rotate_factor=180, + scale_factor=(0.7, 1.3)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale', padding=0.8), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/locust_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/locust_test.json', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE'), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/zebra/resnet_zebra.md b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/zebra/resnet_zebra.md new file mode 100644 index 0000000000000000000000000000000000000000..0c12aed0f3407c5303f66326708ebc2d082c5a1f --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/zebra/resnet_zebra.md @@ -0,0 +1,43 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html">SimpleBaseline2D (ECCV'2018)</a></summary> + +```bibtex +@inproceedings{xiao2018simple, + title={Simple baselines for human pose estimation and tracking}, + author={Xiao, Bin and Wu, Haiping and Wei, Yichen}, + booktitle={Proceedings of the European conference on computer vision (ECCV)}, + pages={466--481}, + year={2018} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://elifesciences.org/articles/47994">Grévy’s Zebra (Elife'2019)</a></summary> + +```bibtex +@article{graving2019deepposekit, + title={DeepPoseKit, a software toolkit for fast and robust animal pose estimation using deep learning}, + author={Graving, Jacob M and Chae, Daniel and Naik, Hemal and Li, Liang and Koger, Benjamin and Costelloe, Blair R and Couzin, Iain D}, + journal={Elife}, + volume={8}, + pages={e47994}, + year={2019}, + publisher={eLife Sciences Publications Limited} +} +``` + +</details> + +Results on Grévy’s Zebra test set + +| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log | +| :--------------------------------------------------------- | :--------: | :-----: | :---: | :--: | :--------------------------------------------------------: | :--------------------------------------------------------: | +| [pose_resnet_50](/configs/animal_2d_keypoint/topdown_heatmap/zebra/td-hm_res50_8xb64-210e_zebra-160x160.py) | 160x160 | 1.000 | 0.914 | 1.87 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res50_zebra_160x160-5a104833_20210407.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res50_zebra_160x160_20210407.log.json) | +| [pose_resnet_101](/configs/animal_2d_keypoint/topdown_heatmap/zebra/td-hm_res101_8xb64-210e_zebra-160x160.py) | 160x160 | 1.000 | 0.915 | 1.83 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res101_zebra_160x160-e8cb2010_20210407.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res101_zebra_160x160_20210407.log.json) | +| [pose_resnet_152](/configs/animal_2d_keypoint/topdown_heatmap/zebra/td-hm_res152_8xb32-210e_zebra-160x160.py) | 160x160 | 1.000 | 0.921 | 1.67 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res152_zebra_160x160-05de71dd_20210407.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res152_zebra_160x160_20210407.log.json) | diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/zebra/resnet_zebra.yml b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/zebra/resnet_zebra.yml new file mode 100644 index 0000000000000000000000000000000000000000..3ecedc9700739b50697314df1c4f2f23416f4cfd --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/zebra/resnet_zebra.yml @@ -0,0 +1,45 @@ +Models: +- Config: configs/animal_2d_keypoint/topdown_heatmap/zebra/td-hm_res50_8xb64-210e_zebra-160x160.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: &id001 + - SimpleBaseline2D + - ResNet + Training Data: "Gr\xE9vy\u2019s Zebra" + Name: td-hm_res50_8xb64-210e_zebra-160x160 + Results: + - Dataset: "Gr\xE9vy\u2019s Zebra" + Metrics: + AUC: 0.914 + EPE: 1.87 + PCK@0.2: 1.0 + Task: Animal 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/animal/resnet/res50_zebra_160x160-5a104833_20210407.pth +- Config: configs/animal_2d_keypoint/topdown_heatmap/zebra/td-hm_res101_8xb64-210e_zebra-160x160.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: "Gr\xE9vy\u2019s Zebra" + Name: td-hm_res101_8xb64-210e_zebra-160x160 + Results: + - Dataset: "Gr\xE9vy\u2019s Zebra" + Metrics: + AUC: 0.915 + EPE: 1.83 + PCK@0.2: 1.0 + Task: Animal 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/animal/resnet/res101_zebra_160x160-e8cb2010_20210407.pth +- Config: configs/animal_2d_keypoint/topdown_heatmap/zebra/td-hm_res152_8xb32-210e_zebra-160x160.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: "Gr\xE9vy\u2019s Zebra" + Name: td-hm_res152_8xb32-210e_zebra-160x160 + Results: + - Dataset: "Gr\xE9vy\u2019s Zebra" + Metrics: + AUC: 0.921 + EPE: 1.67 + PCK@0.2: 1.0 + Task: Animal 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/animal/resnet/res152_zebra_160x160-05de71dd_20210407.pth diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/zebra/td-hm_res101_8xb64-210e_zebra-160x160.py b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/zebra/td-hm_res101_8xb64-210e_zebra-160x160.py new file mode 100644 index 0000000000000000000000000000000000000000..68c56d80fb91b068d684ec29b5c77da3e920a71f --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/zebra/td-hm_res101_8xb64-210e_zebra-160x160.py @@ -0,0 +1,124 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(160, 160), heatmap_size=(40, 40), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=101, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=9, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'ZebraDataset' +data_mode = 'topdown' +data_root = 'data/zebra/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale', padding=0.8), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + shift_factor=0.25, + rotate_factor=180, + scale_factor=(0.7, 1.3)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale', padding=0.8), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/zebra_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/zebra_test.json', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE'), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/zebra/td-hm_res152_8xb32-210e_zebra-160x160.py b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/zebra/td-hm_res152_8xb32-210e_zebra-160x160.py new file mode 100644 index 0000000000000000000000000000000000000000..abb14eefb84dd91912f84cf407faeabc83ec5c25 --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/zebra/td-hm_res152_8xb32-210e_zebra-160x160.py @@ -0,0 +1,124 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(160, 160), heatmap_size=(40, 40), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=152, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet152'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=9, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'ZebraDataset' +data_mode = 'topdown' +data_root = 'data/zebra/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale', padding=0.8), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + shift_factor=0.25, + rotate_factor=180, + scale_factor=(0.7, 1.3)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale', padding=0.8), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/zebra_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/zebra_test.json', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE'), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/zebra/td-hm_res50_8xb64-210e_zebra-160x160.py b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/zebra/td-hm_res50_8xb64-210e_zebra-160x160.py new file mode 100644 index 0000000000000000000000000000000000000000..e4d2777751d7837e7c892868f3027b145610de24 --- /dev/null +++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/zebra/td-hm_res50_8xb64-210e_zebra-160x160.py @@ -0,0 +1,124 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(160, 160), heatmap_size=(40, 40), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=9, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'ZebraDataset' +data_mode = 'topdown' +data_root = 'data/zebra/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale', padding=0.8), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + shift_factor=0.25, + rotate_factor=180, + scale_factor=(0.7, 1.3)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale', padding=0.8), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/zebra_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/zebra_test.json', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE'), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/README.md b/mmpose/configs/body_2d_keypoint/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d005d3fed76ccdb4260fef2f1a0f2c3466136d67 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/README.md @@ -0,0 +1,21 @@ +# Human Body 2D Pose Estimation + +Multi-person human pose estimation is defined as the task of detecting the poses (or keypoints) of all people from an input image. + +Existing approaches can be categorized into top-down and bottom-up approaches. + +Top-down methods (e.g. DeepPose) divide the task into two stages: human detection and pose estimation. They perform human detection first, followed by single-person pose estimation given human bounding boxes. + +Bottom-up approaches (e.g. Associative Embedding) first detect all the keypoints and then group/associate them into person instances. + +## Data preparation + +Please follow [DATA Preparation](/docs/en/dataset_zoo/2d_body_keypoint.md) to prepare data. + +## Demo + +Please follow [Demo](/demo/docs/en/2d_human_pose_demo.md#2d-human-pose-demo) to run demos. + +<div align=center> +<img src="https://user-images.githubusercontent.com/87690686/187824368-1f1631c3-52bf-4b45-bf9a-a70cd6551e1a.jpg" height="500px" alt><br> +</div> diff --git a/mmpose/configs/body_2d_keypoint/associative_embedding/README.md b/mmpose/configs/body_2d_keypoint/associative_embedding/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f5fa8ea1734e3ef121567aaaa1032fc71b862b2 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/associative_embedding/README.md @@ -0,0 +1,9 @@ +# Associative embedding: End-to-end learning for joint detection and grouping (AE) + +Associative Embedding is one of the most popular 2D bottom-up pose estimation approaches, that first detect all the keypoints and then group/associate them into person instances. + +In order to group all the predicted keypoints to individuals, a tag is also predicted for each detected keypoint. Tags of the same person are similar, while tags of different people are different. Thus the keypoints can be grouped according to the tags. + +<div align=center> +<img src="https://user-images.githubusercontent.com/15977946/146514181-84f22623-6b73-4656-89b8-9e7f551e9cc0.png"> +</div> diff --git a/mmpose/configs/body_2d_keypoint/associative_embedding/coco/ae_hrnet-w32_8xb24-300e_coco-512x512.py b/mmpose/configs/body_2d_keypoint/associative_embedding/coco/ae_hrnet-w32_8xb24-300e_coco-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..5adc1aac1adc08cab8710ec83938d5261ebb7e7e --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/associative_embedding/coco/ae_hrnet-w32_8xb24-300e_coco-512x512.py @@ -0,0 +1,159 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=300, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=1.5e-3, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=300, + milestones=[200, 260], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=192) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', interval=50)) + +# codec settings +codec = dict( + type='AssociativeEmbedding', + input_size=(512, 512), + heatmap_size=(128, 128), + sigma=2, + decode_keypoint_order=[ + 0, 1, 2, 3, 4, 5, 6, 11, 12, 7, 8, 9, 10, 13, 14, 15, 16 + ], + decode_max_instances=30) + +# model settings +model = dict( + type='BottomupPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256))), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w32-36af842e.pth'), + ), + head=dict( + type='AssociativeEmbeddingHead', + in_channels=32, + num_keypoints=17, + tag_dim=1, + tag_per_keypoint=True, + deconv_out_channels=None, + keypoint_loss=dict(type='KeypointMSELoss', use_target_weight=True), + tag_loss=dict(type='AssociativeEmbeddingLoss', loss_weight=0.001), + # The heatmap will be resized to the input size before decoding + # if ``restore_heatmap_size==True`` + decoder=dict(codec, heatmap_size=codec['input_size'])), + test_cfg=dict( + multiscale_test=False, + flip_test=True, + shift_heatmap=True, + restore_heatmap_size=True, + align_corners=False)) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'bottomup' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [] +val_pipeline = [ + dict(type='LoadImage'), + dict( + type='BottomupResize', + input_size=codec['input_size'], + size_factor=32, + resize_mode='expand'), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=24, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=1, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json', + nms_mode='none', + score_mode='keypoint', +) +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/cid/coco/cid_hrnet-w32_8xb20-140e_coco-512x512.py b/mmpose/configs/body_2d_keypoint/cid/coco/cid_hrnet-w32_8xb20-140e_coco-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..955293dcb1314f1d57cdb9efc4f62669cf41fabc --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/cid/coco/cid_hrnet-w32_8xb20-140e_coco-512x512.py @@ -0,0 +1,164 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=140, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=1e-3, +)) + +# learning policy +param_scheduler = [ + dict( + type='MultiStepLR', + begin=0, + end=140, + milestones=[90, 120], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=160) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='DecoupledHeatmap', input_size=(512, 512), heatmap_size=(128, 128)) + +# model settings +model = dict( + type='BottomupPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256), + multiscale_output=True)), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w32-36af842e.pth'), + ), + neck=dict( + type='FeatureMapProcessor', + concat=True, + ), + head=dict( + type='CIDHead', + in_channels=480, + num_keypoints=17, + gfd_channels=32, + coupled_heatmap_loss=dict(type='FocalHeatmapLoss', loss_weight=1.0), + decoupled_heatmap_loss=dict(type='FocalHeatmapLoss', loss_weight=4.0), + contrastive_loss=dict( + type='InfoNCELoss', temperature=0.05, loss_weight=1.0), + decoder=codec, + ), + train_cfg=dict(max_train_instances=200), + test_cfg=dict( + multiscale_test=False, + flip_test=True, + shift_heatmap=False, + align_corners=False)) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'bottomup' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='BottomupRandomAffine', input_size=codec['input_size']), + dict(type='RandomFlip', direction='horizontal'), + dict(type='GenerateTarget', encoder=codec), + dict(type='BottomupGetHeatmapMask'), + dict(type='PackPoseInputs'), +] +val_pipeline = [ + dict(type='LoadImage'), + dict( + type='BottomupResize', + input_size=codec['input_size'], + size_factor=64, + resize_mode='expand'), + dict( + type='PackPoseInputs', + meta_keys=('id', 'img_id', 'img_path', 'crowd_index', 'ori_shape', + 'img_shape', 'input_size', 'input_center', 'input_scale', + 'flip', 'flip_direction', 'flip_indices', 'raw_ann_info', + 'skeleton_links')) +] + +# data loaders +train_dataloader = dict( + batch_size=20, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=1, + num_workers=1, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json', + nms_thr=0.8, + score_mode='keypoint', +) +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/cid/coco/cid_hrnet-w48_8xb20-140e_coco-512x512.py b/mmpose/configs/body_2d_keypoint/cid/coco/cid_hrnet-w48_8xb20-140e_coco-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..a114088ae217d8c8a2e0d16bab4459e163c6a129 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/cid/coco/cid_hrnet-w48_8xb20-140e_coco-512x512.py @@ -0,0 +1,164 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=140, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=1e-3, +)) + +# learning policy +param_scheduler = [ + dict( + type='MultiStepLR', + begin=0, + end=140, + milestones=[90, 120], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=160) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='DecoupledHeatmap', input_size=(512, 512), heatmap_size=(128, 128)) + +# model settings +model = dict( + type='BottomupPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(48, 96)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(48, 96, 192)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(48, 96, 192, 384), + multiscale_output=True)), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w48-8ef0771d.pth'), + ), + neck=dict( + type='FeatureMapProcessor', + concat=True, + ), + head=dict( + type='CIDHead', + in_channels=720, + num_keypoints=17, + gfd_channels=48, + coupled_heatmap_loss=dict(type='FocalHeatmapLoss', loss_weight=1.0), + decoupled_heatmap_loss=dict(type='FocalHeatmapLoss', loss_weight=4.0), + contrastive_loss=dict( + type='InfoNCELoss', temperature=0.05, loss_weight=1.0), + decoder=codec, + ), + train_cfg=dict(max_train_instances=200), + test_cfg=dict( + multiscale_test=False, + flip_test=True, + shift_heatmap=False, + align_corners=False)) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'bottomup' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='BottomupRandomAffine', input_size=codec['input_size']), + dict(type='RandomFlip', direction='horizontal'), + dict(type='GenerateTarget', encoder=codec), + dict(type='BottomupGetHeatmapMask'), + dict(type='PackPoseInputs'), +] +val_pipeline = [ + dict(type='LoadImage'), + dict( + type='BottomupResize', + input_size=codec['input_size'], + size_factor=64, + resize_mode='expand'), + dict( + type='PackPoseInputs', + meta_keys=('id', 'img_id', 'img_path', 'crowd_index', 'ori_shape', + 'img_shape', 'input_size', 'input_center', 'input_scale', + 'flip', 'flip_direction', 'flip_indices', 'raw_ann_info', + 'skeleton_links')) +] + +# data loaders +train_dataloader = dict( + batch_size=20, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=1, + num_workers=1, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json', + nms_thr=0.8, + score_mode='keypoint', +) +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/cid/coco/hrnet_coco.md b/mmpose/configs/body_2d_keypoint/cid/coco/hrnet_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..f82cb04db0150ec1b63868ea875c5654fcb800d3 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/cid/coco/hrnet_coco.md @@ -0,0 +1,42 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://openaccess.thecvf.com/content/CVPR2022/html/Wang_Contextual_Instance_Decoupling_for_Robust_Multi-Person_Pose_Estimation_CVPR_2022_paper.html">CID (CVPR'2022)</a></summary> + +```bibtex +@InProceedings{Wang_2022_CVPR, + author = {Wang, Dongkai and Zhang, Shiliang}, + title = {Contextual Instance Decoupling for Robust Multi-Person Pose Estimation}, + booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, + month = {June}, + year = {2022}, + pages = {11060-11068} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 without multi-scale test + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [CID](/configs/body_2d_keypoint/cid/coco/cid_hrnet-w32_8xb20-140e_coco-512x512.py) | 512x512 | 0.704 | 0.894 | 0.775 | 0.753 | 0.928 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/cid/coco/cid_hrnet-w32_8xb20-140e_coco-512x512_42b7e6e6-20230207.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/cid/coco/cid_hrnet-w32_8xb20-140e_coco-512x512_20230207.json) | +| [CID](/configs/body_2d_keypoint/cid/coco/cid_hrnet-w48_8xb20-140e_coco-512x512.py) | 512x512 | 0.715 | 0.900 | 0.782 | 0.765 | 0.935 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/cid/coco/cid_hrnet-w48_8xb20-140e_coco-512x512_a36c3ecf-20230207.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/cid/coco/cid_hrnet-w48_8xb20-140e_coco-512x512_20230207.json) | diff --git a/mmpose/configs/body_2d_keypoint/cid/coco/hrnet_coco.yml b/mmpose/configs/body_2d_keypoint/cid/coco/hrnet_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..b230d20e247c24d7e3c998714eabbe6b132007dc --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/cid/coco/hrnet_coco.yml @@ -0,0 +1,41 @@ +Collections: +- Name: CID + Paper: + Title: Contextual Instance Decoupling for Robust Multi-Person Pose Estimation + URL: https://openaccess.thecvf.com/content/CVPR2022/html/Wang_Contextual_Instance_Decoupling_for_Robust_Multi-Person_Pose_Estimation_CVPR_2022_paper.html + README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/algorithms/cid.md +Models: +- Config: configs/body_2d_keypoint/cid/coco/cid_hrnet-w32_8xb20-140e_coco-512x512.py + In Collection: CID + Metadata: + Architecture: &id001 + - CID + - HRNet + Training Data: COCO + Name: cid_hrnet-w32_8xb20-140e_coco-512x512 + Results: + - Dataset: COCO + Metrics: + AP: 0.704 + AP@0.5: 0.894 + AP@0.75: 0.775 + AR: 0.753 + AR@0.5: 0.928 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/cid/coco/cid_hrnet-w32_8xb20-140e_coco-512x512_42b7e6e6-20230207.pth +- Config: configs/body_2d_keypoint/cid/coco/cid_hrnet-w48_8xb20-140e_coco-512x512.py + In Collection: CID + Metadata: + Architecture: *id001 + Training Data: COCO + Name: cid_hrnet-w48_8xb20-140e_coco-512x512 + Results: + - Dataset: COCO + Metrics: + AP: 0.715 + AP@0.5: 0.9 + AP@0.75: 0.782 + AR: 0.765 + AR@0.5: 0.935 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/cid/coco/cid_hrnet-w48_8xb20-140e_coco-512x512_a36c3ecf-20230207.pth diff --git a/mmpose/configs/body_2d_keypoint/dekr/README.md b/mmpose/configs/body_2d_keypoint/dekr/README.md new file mode 100644 index 0000000000000000000000000000000000000000..04726421c0d67793dc4d2fc55fcf2cf491d3813e --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/dekr/README.md @@ -0,0 +1,22 @@ +# Bottom-up Human Pose Estimation via Disentangled Keypoint Regression (DEKR) + +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2104.02300">DEKR (CVPR'2021)</a></summary> + +```bibtex +@inproceedings{geng2021bottom, + title={Bottom-up human pose estimation via disentangled keypoint regression}, + author={Geng, Zigang and Sun, Ke and Xiao, Bin and Zhang, Zhaoxiang and Wang, Jingdong}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages={14676--14686}, + year={2021} +} +``` + +</details> + +DEKR is a popular 2D bottom-up pose estimation approach that simultaneously detects all the instances and regresses the offsets from the instance centers to joints. + +In order to predict the offsets more accurately, the offsets of different joints are regressed using separated branches with deformable convolutional layers. Thus convolution kernels with different shapes are adopted to extract features for the corresponding joint. diff --git a/mmpose/configs/body_2d_keypoint/dekr/coco/dekr_hrnet-w32_8xb10-140e_coco-512x512.py b/mmpose/configs/body_2d_keypoint/dekr/coco/dekr_hrnet-w32_8xb10-140e_coco-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..6f2d03a82fcde5408b65756e73bb10769d71aec4 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/dekr/coco/dekr_hrnet-w32_8xb10-140e_coco-512x512.py @@ -0,0 +1,186 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=140, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=1e-3, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=140, + milestones=[90, 120], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=80) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='SPR', + input_size=(512, 512), + heatmap_size=(128, 128), + sigma=(4, 2), + minimal_diagonal_length=32**0.5, + generate_keypoint_heatmaps=True, + decode_max_instances=30) + +# model settings +model = dict( + type='BottomupPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256), + multiscale_output=True)), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w32-36af842e.pth'), + ), + neck=dict( + type='FeatureMapProcessor', + concat=True, + ), + head=dict( + type='DEKRHead', + in_channels=480, + num_keypoints=17, + heatmap_loss=dict(type='KeypointMSELoss', use_target_weight=True), + displacement_loss=dict( + type='SoftWeightSmoothL1Loss', + use_target_weight=True, + supervise_empty=False, + beta=1 / 9, + loss_weight=0.002, + ), + decoder=codec, + rescore_cfg=dict( + in_channels=74, + norm_indexes=(5, 6), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/kpt_rescore_coco-33d58c5c.pth')), + ), + test_cfg=dict( + multiscale_test=False, + flip_test=True, + nms_dist_thr=0.05, + shift_heatmap=True, + align_corners=False)) + +# enable DDP training when rescore net is used +find_unused_parameters = True + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'bottomup' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='BottomupRandomAffine', input_size=codec['input_size']), + dict(type='RandomFlip', direction='horizontal'), + dict(type='GenerateTarget', encoder=codec), + dict(type='BottomupGetHeatmapMask'), + dict(type='PackPoseInputs'), +] +val_pipeline = [ + dict(type='LoadImage'), + dict( + type='BottomupResize', + input_size=codec['input_size'], + size_factor=32, + resize_mode='expand'), + dict( + type='PackPoseInputs', + meta_keys=('id', 'img_id', 'img_path', 'crowd_index', 'ori_shape', + 'img_shape', 'input_size', 'input_center', 'input_scale', + 'flip', 'flip_direction', 'flip_indices', 'raw_ann_info', + 'skeleton_links')) +] + +# data loaders +train_dataloader = dict( + batch_size=10, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=1, + num_workers=1, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json', + nms_mode='none', + score_mode='keypoint', +) +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/dekr/coco/dekr_hrnet-w48_8xb10-140e_coco-640x640.py b/mmpose/configs/body_2d_keypoint/dekr/coco/dekr_hrnet-w48_8xb10-140e_coco-640x640.py new file mode 100644 index 0000000000000000000000000000000000000000..776a6bb0394f5e24d87935f524a788828cd563ea --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/dekr/coco/dekr_hrnet-w48_8xb10-140e_coco-640x640.py @@ -0,0 +1,187 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=140, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=1e-3, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=140, + milestones=[90, 120], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=80) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='SPR', + input_size=(640, 640), + heatmap_size=(160, 160), + sigma=(4, 2), + minimal_diagonal_length=32**0.5, + generate_keypoint_heatmaps=True, + decode_max_instances=30) + +# model settings +model = dict( + type='BottomupPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(48, 96)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(48, 96, 192)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(48, 96, 192, 384), + multiscale_output=True)), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w48-8ef0771d.pth'), + ), + neck=dict( + type='FeatureMapProcessor', + concat=True, + ), + head=dict( + type='DEKRHead', + in_channels=720, + num_keypoints=17, + num_heatmap_filters=48, + heatmap_loss=dict(type='KeypointMSELoss', use_target_weight=True), + displacement_loss=dict( + type='SoftWeightSmoothL1Loss', + use_target_weight=True, + supervise_empty=False, + beta=1 / 9, + loss_weight=0.002, + ), + decoder=codec, + rescore_cfg=dict( + in_channels=74, + norm_indexes=(5, 6), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/kpt_rescore_coco-33d58c5c.pth')), + ), + test_cfg=dict( + multiscale_test=False, + flip_test=True, + nms_dist_thr=0.05, + shift_heatmap=True, + align_corners=False)) + +# enable DDP training when rescore net is used +find_unused_parameters = True + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'bottomup' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='BottomupRandomAffine', input_size=codec['input_size']), + dict(type='RandomFlip', direction='horizontal'), + dict(type='GenerateTarget', encoder=codec), + dict(type='BottomupGetHeatmapMask'), + dict(type='PackPoseInputs'), +] +val_pipeline = [ + dict(type='LoadImage'), + dict( + type='BottomupResize', + input_size=codec['input_size'], + size_factor=32, + resize_mode='expand'), + dict( + type='PackPoseInputs', + meta_keys=('id', 'img_id', 'img_path', 'crowd_index', 'ori_shape', + 'img_shape', 'input_size', 'input_center', 'input_scale', + 'flip', 'flip_direction', 'flip_indices', 'raw_ann_info', + 'skeleton_links')) +] + +# data loaders +train_dataloader = dict( + batch_size=10, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=1, + num_workers=1, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json', + nms_mode='none', + score_mode='keypoint', +) +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/dekr/coco/hrnet_coco.md b/mmpose/configs/body_2d_keypoint/dekr/coco/hrnet_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..648b9bc735eea503707402c9f90f837288872f50 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/dekr/coco/hrnet_coco.md @@ -0,0 +1,58 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2104.02300">DEKR (CVPR'2021)</a></summary> + +```bibtex +@inproceedings{geng2021bottom, + title={Bottom-up human pose estimation via disentangled keypoint regression}, + author={Geng, Zigang and Sun, Ke and Xiao, Bin and Zhang, Zhaoxiang and Wang, Jingdong}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages={14676--14686}, + year={2021} +} +``` + +</details> + +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html">HRNet (CVPR'2019)</a></summary> + +```bibtex +@inproceedings{sun2019deep, + title={Deep high-resolution representation learning for human pose estimation}, + author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={5693--5703}, + year={2019} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 without multi-scale test + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [HRNet-w32](/configs/body_2d_keypoint/dekr/coco/dekr_hrnet-w32_8xb10-140e_coco-512x512.py) | 512x512 | 0.686 | 0.868 | 0.750 | 0.735 | 0.898 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/dekr/coco/dekr_hrnet-w32_8xb10-140e_coco-512x512_ac7c17bf-20221228.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/dekr/coco/dekr_hrnet-w32_8xb10-140e_coco-512x512_20221228.json) | +| [HRNet-w48](/configs/body_2d_keypoint/dekr/coco/dekr_hrnet-w48_8xb10-140e_coco-640x640.py) | 640x640 | 0.714 | 0.883 | 0.777 | 0.762 | 0.915 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/dekr/coco/dekr_hrnet-w48_8xb10-140e_coco-640x640_74796c32-20230124.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/dekr/coco/dekr_hrnet-w48_8xb10-140e_coco-640x640_20230124.json) | diff --git a/mmpose/configs/body_2d_keypoint/dekr/coco/hrnet_coco.yml b/mmpose/configs/body_2d_keypoint/dekr/coco/hrnet_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..0246b0723b93e8c04f61324de7add7f1e569ebce --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/dekr/coco/hrnet_coco.yml @@ -0,0 +1,41 @@ +Collections: +- Name: DEKR + Paper: + Title: Bottom-up human pose estimation via disentangled keypoint regression + URL: https://arxiv.org/abs/2104.02300 + README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/algorithms/dekr.md +Models: +- Config: configs/body_2d_keypoint/dekr/coco/dekr_hrnet-w32_8xb10-140e_coco-512x512.py + In Collection: DEKR + Metadata: + Architecture: &id001 + - DEKR + - HRNet + Training Data: COCO + Name: dekr_hrnet-w32_8xb10-140e_coco-512x512 + Results: + - Dataset: COCO + Metrics: + AP: 0.686 + AP@0.5: 0.868 + AP@0.75: 0.750 + AR: 0.735 + AR@0.5: 0.898 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/dekr/coco/dekr_hrnet-w32_8xb10-140e_coco-512x512_ac7c17bf-20221228.pth +- Config: configs/body_2d_keypoint/dekr/coco/dekr_hrnet-w48_8xb10-140e_coco-640x640.py + In Collection: DEKR + Metadata: + Architecture: *id001 + Training Data: COCO + Name: dekr_hrnet-w48_8xb10-140e_coco-640x640 + Results: + - Dataset: COCO + Metrics: + AP: 0.714 + AP@0.5: 0.883 + AP@0.75: 0.777 + AR: 0.762 + AR@0.5: 0.915 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/dekr/coco/dekr_hrnet-w48_8xb10-140e_coco-640x640_74796c32-20230124.pth diff --git a/mmpose/configs/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w32_8xb10-300e_crowdpose-512x512.py b/mmpose/configs/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w32_8xb10-300e_crowdpose-512x512.py new file mode 100644 index 0000000000000000000000000000000000000000..c00f0459de9e2bd47ea2ee793315484e96685cbc --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w32_8xb10-300e_crowdpose-512x512.py @@ -0,0 +1,187 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=300, val_interval=20) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=1e-3, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=300, + milestones=[200, 260], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=80) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='crowdpose/AP', rule='greater')) + +# codec settings +codec = dict( + type='SPR', + input_size=(512, 512), + heatmap_size=(128, 128), + sigma=(4, 2), + minimal_diagonal_length=32**0.5, + generate_keypoint_heatmaps=True, + decode_max_instances=30) + +# model settings +model = dict( + type='BottomupPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256), + multiscale_output=True)), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w32-36af842e.pth'), + ), + neck=dict( + type='FeatureMapProcessor', + concat=True, + ), + head=dict( + type='DEKRHead', + in_channels=480, + num_keypoints=14, + heatmap_loss=dict(type='KeypointMSELoss', use_target_weight=True), + displacement_loss=dict( + type='SoftWeightSmoothL1Loss', + use_target_weight=True, + supervise_empty=False, + beta=1 / 9, + loss_weight=0.004, + ), + decoder=codec, + rescore_cfg=dict( + in_channels=59, + norm_indexes=(0, 1), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/kpt_rescore_crowdpose-300c7efe.pth')), + ), + test_cfg=dict( + multiscale_test=False, + flip_test=True, + nms_dist_thr=0.05, + shift_heatmap=True, + align_corners=False)) + +# enable DDP training when rescore net is used +find_unused_parameters = True + +# base dataset settings +dataset_type = 'CrowdPoseDataset' +data_mode = 'bottomup' +data_root = 'data/crowdpose/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='BottomupRandomAffine', input_size=codec['input_size']), + dict(type='RandomFlip', direction='horizontal'), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs'), +] +val_pipeline = [ + dict(type='LoadImage'), + dict( + type='BottomupResize', + input_size=codec['input_size'], + size_factor=32, + resize_mode='expand'), + dict( + type='PackPoseInputs', + meta_keys=('id', 'img_id', 'img_path', 'crowd_index', 'ori_shape', + 'img_shape', 'input_size', 'input_center', 'input_scale', + 'flip', 'flip_direction', 'flip_indices', 'raw_ann_info', + 'skeleton_links')) +] + +# data loaders +train_dataloader = dict( + batch_size=10, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mmpose_crowdpose_trainval.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=1, + num_workers=1, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mmpose_crowdpose_test.json', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/mmpose_crowdpose_test.json', + nms_mode='none', + score_mode='keypoint', + use_area=False, + iou_type='keypoints_crowd', + prefix='crowdpose') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w48_8xb5-300e_crowdpose-640x640.py b/mmpose/configs/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w48_8xb5-300e_crowdpose-640x640.py new file mode 100644 index 0000000000000000000000000000000000000000..31d637299a81876481455df33013b4262387a36e --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w48_8xb5-300e_crowdpose-640x640.py @@ -0,0 +1,188 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=300, val_interval=20) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=1e-3, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=300, + milestones=[200, 260], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=40) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='crowdpose/AP', rule='greater')) + +# codec settings +codec = dict( + type='SPR', + input_size=(640, 640), + heatmap_size=(160, 160), + sigma=(4, 2), + minimal_diagonal_length=32**0.5, + generate_keypoint_heatmaps=True, + decode_max_instances=30) + +# model settings +model = dict( + type='BottomupPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(48, 96)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(48, 96, 192)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(48, 96, 192, 384), + multiscale_output=True)), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w48-8ef0771d.pth'), + ), + neck=dict( + type='FeatureMapProcessor', + concat=True, + ), + head=dict( + type='DEKRHead', + in_channels=720, + num_keypoints=14, + num_heatmap_filters=48, + heatmap_loss=dict(type='KeypointMSELoss', use_target_weight=True), + displacement_loss=dict( + type='SoftWeightSmoothL1Loss', + use_target_weight=True, + supervise_empty=False, + beta=1 / 9, + loss_weight=0.004, + ), + decoder=codec, + rescore_cfg=dict( + in_channels=59, + norm_indexes=(0, 1), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/kpt_rescore_crowdpose-300c7efe.pth')), + ), + test_cfg=dict( + multiscale_test=False, + flip_test=True, + nms_dist_thr=0.05, + shift_heatmap=True, + align_corners=False)) + +# enable DDP training when rescore net is used +find_unused_parameters = True + +# base dataset settings +dataset_type = 'CrowdPoseDataset' +data_mode = 'bottomup' +data_root = 'data/crowdpose/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='BottomupRandomAffine', input_size=codec['input_size']), + dict(type='RandomFlip', direction='horizontal'), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs'), +] +val_pipeline = [ + dict(type='LoadImage'), + dict( + type='BottomupResize', + input_size=codec['input_size'], + size_factor=32, + resize_mode='expand'), + dict( + type='PackPoseInputs', + meta_keys=('id', 'img_id', 'img_path', 'crowd_index', 'ori_shape', + 'img_shape', 'input_size', 'input_center', 'input_scale', + 'flip', 'flip_direction', 'flip_indices', 'raw_ann_info', + 'skeleton_links')) +] + +# data loaders +train_dataloader = dict( + batch_size=5, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mmpose_crowdpose_trainval.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=1, + num_workers=1, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mmpose_crowdpose_test.json', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/mmpose_crowdpose_test.json', + nms_mode='none', + score_mode='keypoint', + use_area=False, + iou_type='keypoints_crowd', + prefix='crowdpose') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/dekr/crowdpose/hrnet_crowdpose.md b/mmpose/configs/body_2d_keypoint/dekr/crowdpose/hrnet_crowdpose.md new file mode 100644 index 0000000000000000000000000000000000000000..ea58d95b7f9a2323c68216baf04c4ee1afc2447b --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/dekr/crowdpose/hrnet_crowdpose.md @@ -0,0 +1,56 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2104.02300">DEKR (CVPR'2021)</a></summary> + +```bibtex +@inproceedings{geng2021bottom, + title={Bottom-up human pose estimation via disentangled keypoint regression}, + author={Geng, Zigang and Sun, Ke and Xiao, Bin and Zhang, Zhaoxiang and Wang, Jingdong}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages={14676--14686}, + year={2021} +} +``` + +</details> + +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html">HRNet (CVPR'2019)</a></summary> + +```bibtex +@inproceedings{sun2019deep, + title={Deep high-resolution representation learning for human pose estimation}, + author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={5693--5703}, + year={2019} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2019/html/Li_CrowdPose_Efficient_Crowded_Scenes_Pose_Estimation_and_a_New_Benchmark_CVPR_2019_paper.html">CrowdPose (CVPR'2019)</a></summary> + +```bibtex +@article{li2018crowdpose, + title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark}, + author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu}, + journal={arXiv preprint arXiv:1812.00324}, + year={2018} +} +``` + +</details> + +Results on CrowdPose test without multi-scale test + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AP (E) | AP (M) | AP (H) | ckpt | log | +| :--------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :----: | :----: | :----: | :--------------------------------------------: | :-------------------------------------------: | +| [HRNet-w32](/configs/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w32_8xb10-300e_crowdpose-512x512.py) | 512x512 | 0.663 | 0.857 | 0.714 | 0.740 | 0.671 | 0.576 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w32_8xb10-140e_crowdpose-512x512_147bae97-20221228.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w32_8xb10-140e_crowdpose-512x512_20221228.json) | +| [HRNet-w48](/configs/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w48_8xb5-300e_crowdpose-640x640.py) | 640x640 | 0.679 | 0.869 | 0.731 | 0.753 | 0.688 | 0.593 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w48_8xb5-300e_crowdpose-640x640_4ea6031e-20230128.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w48_8xb5-300e_crowdpose-640x640_20230128.json) | diff --git a/mmpose/configs/body_2d_keypoint/dekr/crowdpose/hrnet_crowdpose.yml b/mmpose/configs/body_2d_keypoint/dekr/crowdpose/hrnet_crowdpose.yml new file mode 100644 index 0000000000000000000000000000000000000000..02312e8cbacc6f8348184e50390cb2ab951965b3 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/dekr/crowdpose/hrnet_crowdpose.yml @@ -0,0 +1,37 @@ +Models: +- Config: configs/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w32_8xb10-300e_crowdpose-512x512.py + In Collection: DEKR + Metadata: + Architecture: &id001 + - DEKR + - HRNet + Training Data: CrowdPose + Name: dekr_hrnet-w32_8xb10-300e_crowdpose-512x512 + Results: + - Dataset: CrowdPose + Metrics: + AP: 0.663 + AP@0.5: 0.857 + AP@0.75: 0.714 + AP (E): 0.74 + AP (M): 0.671 + AP (L): 0.576 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w32_8xb10-140e_crowdpose-512x512_147bae97-20221228.pth +- Config: configs/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w48_8xb5-300e_crowdpose-640x640.py + In Collection: DEKR + Metadata: + Architecture: *id001 + Training Data: CrowdPose + Name: dekr_hrnet-w48_8xb5-300e_crowdpose-640x640 + Results: + - Dataset: CrowdPose + Metrics: + AP: 0.679 + AP@0.5: 0.869 + AP@0.75: 0.731 + AP (E): 0.753 + AP (M): 0.688 + AP (L): 0.593 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/bottom_up/dekr/hrnet_w48_crowdpose_640x640-ef6b6040_20220930.pth diff --git a/mmpose/configs/body_2d_keypoint/integral_regression/README.md b/mmpose/configs/body_2d_keypoint/integral_regression/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d60eaa1a575686006139a8584851e994b7b29e60 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/integral_regression/README.md @@ -0,0 +1,15 @@ +# Top-down integral-regression-based pose estimation + +Top-down methods divide the task into two stages: object detection, followed by single-object pose estimation given object bounding boxes. At the 2nd stage, integral regression based methods use a simple integral operation relates and unifies the heatmap and joint regression differentiably, thus obtain the keypoint coordinates given the features extracted from the bounding box area, following the paradigm introduced in [Integral Human Pose Regression](https://arxiv.org/abs/1711.08229). + +## Results and Models + +### COCO Dataset + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Model | Input Size | AP | AR | Details and Download | +| :------------------: | :--------: | :---: | :---: | :---------------------------------------------------: | +| ResNet-50+Debias-IPR | 256x256 | 0.675 | 0.765 | [resnet_debias_coco.md](./coco/resnet_debias_coco.md) | +| ResNet-50+DSNT | 256x256 | 0.674 | 0.764 | [resnet_dsnt_coco.md](./coco/resnet_dsnt_coco.md) | +| ResNet-50+IPR | 256x256 | 0.633 | 0.730 | [resnet_ipr_coco.md](./coco/resnet_ipr_coco.md) | diff --git a/mmpose/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_8xb64-210e_coco-256x256.py b/mmpose/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_8xb64-210e_coco-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..3dfaeeda8b850fa361eebbf5342ec64842d858e8 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_8xb64-210e_coco-256x256.py @@ -0,0 +1,134 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=train_cfg['max_epochs'], + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict( + type='IntegralRegressionLabel', + input_size=(256, 256), + heatmap_size=(64, 64), + sigma=2.0, + normalize=True) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + ), + head=dict( + type='DSNTHead', + in_channels=2048, + in_featuremap_size=(8, 8), + num_joints=17, + loss=dict( + type='MultipleLossWrapper', + losses=[ + dict(type='SmoothL1Loss', use_target_weight=True), + dict(type='KeypointMSELoss', use_target_weight=True) + ]), + decoder=codec), + test_cfg=dict( + flip_test=True, + shift_coords=True, + shift_heatmap=True, + ), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/td-hm_res50_8xb64-210e_coco-256x192.pth')) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +test_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file=f'{data_root}person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=test_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=f'{data_root}annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_debias-8xb64-210e_coco-256x256.py b/mmpose/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_debias-8xb64-210e_coco-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..9618c810ea20b0d147f71930034b616f6bed3a97 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_debias-8xb64-210e_coco-256x256.py @@ -0,0 +1,136 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=train_cfg['max_epochs'], + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict( + type='IntegralRegressionLabel', + input_size=(256, 256), + heatmap_size=(64, 64), + sigma=2.0, + normalize=True) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + ), + head=dict( + type='DSNTHead', + in_channels=2048, + in_featuremap_size=(8, 8), + num_joints=17, + debias=True, + beta=10., + loss=dict( + type='MultipleLossWrapper', + losses=[ + dict(type='SmoothL1Loss', use_target_weight=True), + dict(type='JSDiscretLoss', use_target_weight=True) + ]), + decoder=codec), + test_cfg=dict( + flip_test=True, + shift_coords=True, + shift_heatmap=True, + ), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/td-hm_res50_8xb64-210e_coco-256x192.pth')) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +test_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file=f'{data_root}person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=test_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=f'{data_root}annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_dsnt-8xb64-210e_coco-256x256.py b/mmpose/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_dsnt-8xb64-210e_coco-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..8c3897fce1acd0deabaedacea3b38b08b9138330 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_dsnt-8xb64-210e_coco-256x256.py @@ -0,0 +1,134 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=train_cfg['max_epochs'], + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict( + type='IntegralRegressionLabel', + input_size=(256, 256), + heatmap_size=(64, 64), + sigma=2.0, + normalize=True) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + ), + head=dict( + type='DSNTHead', + in_channels=2048, + in_featuremap_size=(8, 8), + num_joints=17, + loss=dict( + type='MultipleLossWrapper', + losses=[ + dict(type='SmoothL1Loss', use_target_weight=True), + dict(type='JSDiscretLoss', use_target_weight=True) + ]), + decoder=codec), + test_cfg=dict( + flip_test=True, + shift_coords=True, + shift_heatmap=True, + ), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/td-hm_res50_8xb64-210e_coco-256x192.pth')) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +test_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file=f'{data_root}person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=test_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=f'{data_root}annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_debias_coco.md b/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_debias_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..40e3660e4f24f0d67d680d0f0ddc6c9f6c6c1014 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_debias_coco.md @@ -0,0 +1,57 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://openaccess.thecvf.com/content/ICCV2021/papers/Gu_Removing_the_Bias_of_Integral_Pose_Regression_ICCV_2021_paper.pdf">Debias IPR (ICCV'2021)</a></summary> + +```bibtex +@inproceedings{gu2021removing, + title={Removing the Bias of Integral Pose Regression}, + author={Gu, Kerui and Yang, Linlin and Yao, Angela}, + booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision}, + pages={11067--11076}, + year={2021} + } +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html">ResNet (CVPR'2016)</a></summary> + +```bibtex +@inproceedings{he2016deep, + title={Deep residual learning for image recognition}, + author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={770--778}, + year={2016} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [debias-ipr_resnet_50](/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_debias-8xb64-210e_coco-256x256.py) | 256x256 | 0.675 | 0.872 | 0.740 | 0.765 | 0.928 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/integral_regression/coco/ipr_res50_debias-8xb64-210e_coco-256x256-055a7699_20220913.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/integral_regression/coco/ipr_res50_debias-8xb64-210e_coco-256x256-055a7699_20220913.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_debias_coco.yml b/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_debias_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..b965238a5d482ec7212f8f95d0e4abadfe6773ce --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_debias_coco.yml @@ -0,0 +1,25 @@ +Collections: +- Name: DebiasIPR + Paper: + Title: Removing the Bias of Integral Pose Regression + URL: https://openaccess.thecvf.com/content/ICCV2021/papers/Gu_Removing_the_Bias_of_Integral_Pose_Regression_ICCV_2021_paper.pdf + README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/algorithms/debias_ipr.md +Models: +- Config: configs/body_2d_keypoint/integral_regression/coco/ipr_res50_debias--8xb64-210e_coco-256x256.py + In Collection: DebiasIPR + Metadata: + Architecture: &id001 + - Debias + - ResNet + Training Data: COCO + Name: ipr_res50_debias--8xb64-210e_coco-256x256 + Results: + - Dataset: COCO + Metrics: + AP: 0.675 + AP@0.5: 0.872 + AP@0.75: 0.74 + AR: 0.765 + AR@0.5: 0.928 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/integral_regression/coco/ipr_res50_debias-8xb64-210e_coco-256x256-055a7699_20220913.pth diff --git a/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_dsnt_coco.md b/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_dsnt_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..608974ae82e6f05913c98c35fb8fff3e5220b83a --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_dsnt_coco.md @@ -0,0 +1,56 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/1801.07372v2">DSNT (2018)</a></summary> + +```bibtex +@article{nibali2018numerical, + title={Numerical Coordinate Regression with Convolutional Neural Networks}, + author={Nibali, Aiden and He, Zhen and Morgan, Stuart and Prendergast, Luke}, + journal={arXiv preprint arXiv:1801.07372}, + year={2018} +} +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html">ResNet (CVPR'2016)</a></summary> + +```bibtex +@inproceedings{he2016deep, + title={Deep residual learning for image recognition}, + author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={770--778}, + year={2016} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [ipr_resnet_50_dsnt](/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_dsnt-8xb64-210e_coco-256x256.py) | 256x256 | 0.674 | 0.870 | 0.744 | 0.764 | 0.928 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/integral_regression/coco/ipr_res50_dsnt-8xb64-210e_coco-256x256-441eedc0_20220913.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/integral_regression/coco/ipr_res50_dsnt-8xb64-210e_coco-256x256-441eedc0_20220913.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_dsnt_coco.yml b/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_dsnt_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..f34e839c105c514b63f1b434207d56cfa1d57726 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_dsnt_coco.yml @@ -0,0 +1,25 @@ +Collections: +- Name: DSNT + Paper: + Title: Numerical Coordinate Regression with Convolutional Neural Networks + URL: https://arxiv.org/abs/1801.07372v2 + README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/algorithms/dsnt.md +Models: +- Config: configs/body_2d_keypoint/integral_regression/coco/ipr_res50_dsnt-8xb64-210e_coco-256x256.py + In Collection: DSNT + Metadata: + Architecture: &id001 + - DSNT + - ResNet + Training Data: COCO + Name: ipr_res50_dsnt-8xb64-210e_coco-256x256 + Results: + - Dataset: COCO + Metrics: + AP: 0.674 + AP@0.5: 0.87 + AP@0.75: 0.744 + AR: 0.764 + AR@0.5: 0.928 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/integral_regression/coco/ipr_res50_dsnt-8xb64-210e_coco-256x256-441eedc0_20220913.pth diff --git a/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_ipr_coco.md b/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_ipr_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..ce4fbae5011b451b4fd639ac896de1b150b2592a --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_ipr_coco.md @@ -0,0 +1,57 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/1711.08229">IPR (ECCV'2018)</a></summary> + +```bibtex +@inproceedings{sun2018integral, + title={Integral human pose regression}, + author={Sun, Xiao and Xiao, Bin and Wei, Fangyin and Liang, Shuang and Wei, Yichen}, + booktitle={Proceedings of the European conference on computer vision (ECCV)}, + pages={529--545}, + year={2018} +} +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html">ResNet (CVPR'2016)</a></summary> + +```bibtex +@inproceedings{he2016deep, + title={Deep residual learning for image recognition}, + author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={770--778}, + year={2016} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [ipr_resnet_50](/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_8xb64-210e_coco-256x256.py) | 256x256 | 0.633 | 0.860 | 0.703 | 0.730 | 0.919 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/integral_regression/coco/ipr_res50_8xb64-210e_coco-256x256-a3898a33_20220913.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/integral_regression/coco/ipr_res50_8xb64-210e_coco-256x256-a3898a33_20220913.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_ipr_coco.yml b/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_ipr_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..fa22133f3eb3fd4020f4c17ead813fc1908b23cb --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_ipr_coco.yml @@ -0,0 +1,25 @@ +Collections: +- Name: IPR + Paper: + Title: Integral human pose regression + URL: https://arxiv.org/abs/1711.08229 + README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/algorithms/ipr.md +Models: +- Config: configs/body_2d_keypoint/integral_regression/coco/ipr_res50_8xb64-210e_coco-256x256.py + In Collection: IPR + Metadata: + Architecture: &id001 + - IPR + - ResNet + Training Data: COCO + Name: ipr_res50_8xb64-210e_coco-256x256 + Results: + - Dataset: COCO + Metrics: + AP: 0.633 + AP@0.5: 0.86 + AP@0.75: 0.703 + AR: 0.73 + AR@0.5: 0.919 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/integral_regression/coco/ipr_res50_8xb64-210e_coco-256x256-a3898a33_20220913.pth diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/README.md b/mmpose/configs/body_2d_keypoint/rtmpose/README.md new file mode 100644 index 0000000000000000000000000000000000000000..303797491770ccaf6de7945035253b23f1e78e74 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/rtmpose/README.md @@ -0,0 +1,39 @@ +# RTMPose + +Recent studies on 2D pose estimation have achieved excellent performance on public benchmarks, yet its application in the industrial community still suffers from heavy model parameters and high latency. +In order to bridge this gap, we empirically study five aspects that affect the performance of multi-person pose estimation algorithms: paradigm, backbone network, localization algorithm, training strategy, and deployment inference, and present a high-performance real-time multi-person pose estimation framework, **RTMPose**, based on MMPose. +Our RTMPose-m achieves **75.8% AP** on COCO with **90+ FPS** on an Intel i7-11700 CPU and **430+ FPS** on an NVIDIA GTX 1660 Ti GPU, and RTMPose-l achieves **67.0% AP** on COCO-WholeBody with **130+ FPS**, outperforming existing open-source libraries. +To further evaluate RTMPose's capability in critical real-time applications, we also report the performance after deploying on the mobile device. + +## Results and Models + +### COCO Dataset + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Model | Input Size | AP | AR | Details and Download | +| :----------------: | :--------: | :---: | :---: | :---------------------------------------: | +| RTMPose-t | 256x192 | 0.682 | 0.736 | [rtmpose_coco.md](./coco/rtmpose_coco.md) | +| RTMPose-s | 256x192 | 0.716 | 0.768 | [rtmpose_coco.md](./coco/rtmpose_coco.md) | +| RTMPose-m | 256x192 | 0.746 | 0.795 | [rtmpose_coco.md](./coco/rtmpose_coco.md) | +| RTMPose-l | 256x192 | 0.758 | 0.806 | [rtmpose_coco.md](./coco/rtmpose_coco.md) | +| RTMPose-t-aic-coco | 256x192 | 0.685 | 0.738 | [rtmpose_coco.md](./coco/rtmpose_coco.md) | +| RTMPose-s-aic-coco | 256x192 | 0.722 | 0.772 | [rtmpose_coco.md](./coco/rtmpose_coco.md) | +| RTMPose-m-aic-coco | 256x192 | 0.758 | 0.806 | [rtmpose_coco.md](./coco/rtmpose_coco.md) | +| RTMPose-l-aic-coco | 256x192 | 0.765 | 0.813 | [rtmpose_coco.md](./coco/rtmpose_coco.md) | +| RTMPose-m-aic-coco | 384x288 | 0.770 | 0.816 | [rtmpose_coco.md](./coco/rtmpose_coco.md) | +| RTMPose-l-aic-coco | 384x288 | 0.773 | 0.819 | [rtmpose_coco.md](./coco/rtmpose_coco.md) | + +### MPII Dataset + +| Model | Input Size | PCKh@0.5 | PCKh@0.1 | Details and Download | +| :-------: | :--------: | :------: | :------: | :---------------------------------------: | +| RTMPose-m | 256x256 | 0.907 | 0.348 | [rtmpose_mpii.md](./mpii/rtmpose_mpii.md) | + +### CrowdPose Dataset + +Results on CrowdPose test with [YOLOv3](https://github.com/eriklindernoren/PyTorch-YOLOv3) human detector + +| Model | Input Size | AP | AR | Details and Download | +| :-------: | :--------: | :---: | :---: | :------------------------------------------------------: | +| RTMPose-m | 256x192 | 0.706 | 0.788 | [rtmpose_crowdpose.md](./crowdpose/rtmpose_crowdpose.md) | diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-l_8xb256-210e_body8-256x192.py b/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-l_8xb256-210e_body8-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..dec6a3615b63ae68e6cdf3d44c28e506fa1755f5 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-l_8xb256-210e_body8-256x192.py @@ -0,0 +1,553 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 210 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 210 to 420 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=1024) + +# codec settings +codec = dict( + type='SimCCLabel', + input_size=(192, 256), + sigma=(4.9, 5.66), + simcc_split_ratio=2.0, + normalize=False, + use_dark=False) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=1., + widen_factor=1., + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' + 'rtmposev1/cspnext-l_udp-body7_210e-256x192-5e9558ef_20230504.pth' # noqa + )), + head=dict( + type='RTMCCHead', + in_channels=1024, + out_channels=17, + input_size=codec['input_size'], + in_featuremap_size=(6, 8), + simcc_split_ratio=codec['simcc_split_ratio'], + final_layer_kernel_size=7, + gau_cfg=dict( + hidden_dims=256, + s=128, + expansion_factor=2, + dropout_rate=0., + drop_path=0., + act_fn='SiLU', + use_rel_bias=False, + pos_enc=False), + loss=dict( + type='KLDiscretLoss', + use_target_weight=True, + beta=10., + label_softmax=True), + decoder=codec), + test_cfg=dict(flip_test=True)) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/' + +backend_args = dict(backend='local') + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.5, 1.5], rotate_factor=90), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict(type='PhotometricDistortion'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.0), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.5, 1.5], + rotate_factor=90), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# mapping +aic_coco = [ + (0, 6), + (1, 8), + (2, 10), + (3, 5), + (4, 7), + (5, 9), + (6, 12), + (7, 14), + (8, 16), + (9, 11), + (10, 13), + (11, 15), +] + +crowdpose_coco = [ + (0, 5), + (1, 6), + (2, 7), + (3, 8), + (4, 9), + (5, 10), + (6, 11), + (7, 12), + (8, 13), + (9, 14), + (10, 15), + (11, 16), +] + +mpii_coco = [ + (0, 16), + (1, 14), + (2, 12), + (3, 11), + (4, 13), + (5, 15), + (10, 10), + (11, 8), + (12, 6), + (13, 5), + (14, 7), + (15, 9), +] + +jhmdb_coco = [ + (3, 6), + (4, 5), + (5, 12), + (6, 11), + (7, 8), + (8, 7), + (9, 14), + (10, 13), + (11, 10), + (12, 9), + (13, 16), + (14, 15), +] + +halpe_coco = [ + (0, 0), + (1, 1), + (2, 2), + (3, 3), + (4, 4), + (5, 5), + (6, 6), + (7, 7), + (8, 8), + (9, 9), + (10, 10), + (11, 11), + (12, 12), + (13, 13), + (14, 14), + (15, 15), + (16, 16), +] + +ochuman_coco = [ + (0, 0), + (1, 1), + (2, 2), + (3, 3), + (4, 4), + (5, 5), + (6, 6), + (7, 7), + (8, 8), + (9, 9), + (10, 10), + (11, 11), + (12, 12), + (13, 13), + (14, 14), + (15, 15), + (16, 16), +] + +posetrack_coco = [ + (0, 0), + (3, 3), + (4, 4), + (5, 5), + (6, 6), + (7, 7), + (8, 8), + (9, 9), + (10, 10), + (11, 11), + (12, 12), + (13, 13), + (14, 14), + (15, 15), + (16, 16), +] + +# train datasets +dataset_coco = dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_train2017.json', + data_prefix=dict(img='detection/coco/train2017/'), + pipeline=[], +) + +dataset_aic = dict( + type='AicDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='aic/annotations/aic_train.json', + data_prefix=dict(img='pose/ai_challenge/ai_challenger_keypoint' + '_train_20170902/keypoint_train_images_20170902/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=aic_coco) + ], +) + +dataset_crowdpose = dict( + type='CrowdPoseDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='crowdpose/annotations/mmpose_crowdpose_trainval.json', + data_prefix=dict(img='pose/CrowdPose/images/'), + pipeline=[ + dict( + type='KeypointConverter', num_keypoints=17, mapping=crowdpose_coco) + ], +) + +dataset_mpii = dict( + type='MpiiDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='mpii/annotations/mpii_train.json', + data_prefix=dict(img='pose/MPI/images/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=mpii_coco) + ], +) + +dataset_jhmdb = dict( + type='JhmdbDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='jhmdb/annotations/Sub1_train.json', + data_prefix=dict(img='pose/JHMDB/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=jhmdb_coco) + ], +) + +dataset_halpe = dict( + type='HalpeDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='halpe/annotations/halpe_train_v1.json', + data_prefix=dict(img='pose/Halpe/hico_20160224_det/images/train2015'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=halpe_coco) + ], +) + +dataset_posetrack = dict( + type='PoseTrack18Dataset', + data_root=data_root, + data_mode=data_mode, + ann_file='posetrack18/annotations/posetrack18_train.json', + data_prefix=dict(img='pose/PoseChallenge2018/'), + pipeline=[ + dict( + type='KeypointConverter', num_keypoints=17, mapping=posetrack_coco) + ], +) + +# data loaders +train_dataloader = dict( + batch_size=256, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type='CombinedDataset', + metainfo=dict(from_file='configs/_base_/datasets/coco.py'), + datasets=[ + dataset_coco, + dataset_aic, + dataset_crowdpose, + dataset_mpii, + dataset_jhmdb, + dataset_halpe, + dataset_posetrack, + ], + pipeline=train_pipeline, + test_mode=False, + )) + +# val datasets +val_coco = dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_val2017.json', + data_prefix=dict(img='detection/coco/val2017/'), + pipeline=[], +) + +val_aic = dict( + type='AicDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='aic/annotations/aic_val.json', + data_prefix=dict( + img='pose/ai_challenge/ai_challenger_keypoint' + '_validation_20170911/keypoint_validation_images_20170911/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=aic_coco) + ], +) + +val_crowdpose = dict( + type='CrowdPoseDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='crowdpose/annotations/mmpose_crowdpose_test.json', + data_prefix=dict(img='pose/CrowdPose/images/'), + pipeline=[ + dict( + type='KeypointConverter', num_keypoints=17, mapping=crowdpose_coco) + ], +) + +val_mpii = dict( + type='MpiiDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='mpii/annotations/mpii_val.json', + data_prefix=dict(img='pose/MPI/images/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=mpii_coco) + ], +) + +val_jhmdb = dict( + type='JhmdbDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='jhmdb/annotations/Sub1_test.json', + data_prefix=dict(img='pose/JHMDB/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=jhmdb_coco) + ], +) + +val_halpe = dict( + type='HalpeDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='halpe/annotations/halpe_val_v1.json', + data_prefix=dict(img='detection/coco/val2017/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=halpe_coco) + ], +) + +val_ochuman = dict( + type='OCHumanDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='ochuman/annotations/' + 'ochuman_coco_format_val_range_0.00_1.00.json', + data_prefix=dict(img='pose/OCHuman/images/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=ochuman_coco) + ], +) + +val_posetrack = dict( + type='PoseTrack18Dataset', + data_root=data_root, + data_mode=data_mode, + ann_file='posetrack18/annotations/posetrack18_val.json', + data_prefix=dict(img='pose/PoseChallenge2018/'), + pipeline=[ + dict( + type='KeypointConverter', num_keypoints=17, mapping=posetrack_coco) + ], +) + +val_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_val2017.json', + bbox_file=f'{data_root}coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='detection/coco/val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) + +test_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type='CombinedDataset', + metainfo=dict(from_file='configs/_base_/datasets/coco.py'), + datasets=[ + val_coco, + val_aic, + val_crowdpose, + val_mpii, + val_jhmdb, + val_halpe, + val_ochuman, + val_posetrack, + ], + pipeline=val_pipeline, + test_mode=True, + )) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) +# default_hooks = dict( +# checkpoint=dict(save_best='AUC', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'coco/annotations/person_keypoints_val2017.json') +test_evaluator = [ + dict(type='PCKAccuracy', thr=0.1), + dict(type='AUC'), + dict(type='EPE'), +] diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-l_8xb256-210e_body8-384x288.py b/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-l_8xb256-210e_body8-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..b5c83dd50dd28a0c7775388dc7a3cbab7b0b1b41 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-l_8xb256-210e_body8-384x288.py @@ -0,0 +1,553 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 210 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 210 to 420 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=1024) + +# codec settings +codec = dict( + type='SimCCLabel', + input_size=(288, 384), + sigma=(6., 6.93), + simcc_split_ratio=2.0, + normalize=False, + use_dark=False) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=1., + widen_factor=1., + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' + 'rtmposev1/cspnext-l_udp-body7_210e-384x288-b15bc30d_20230504.pth' # noqa + )), + head=dict( + type='RTMCCHead', + in_channels=1024, + out_channels=17, + input_size=codec['input_size'], + in_featuremap_size=(9, 12), + simcc_split_ratio=codec['simcc_split_ratio'], + final_layer_kernel_size=7, + gau_cfg=dict( + hidden_dims=256, + s=128, + expansion_factor=2, + dropout_rate=0., + drop_path=0., + act_fn='SiLU', + use_rel_bias=False, + pos_enc=False), + loss=dict( + type='KLDiscretLoss', + use_target_weight=True, + beta=10., + label_softmax=True), + decoder=codec), + test_cfg=dict(flip_test=True)) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/' + +backend_args = dict(backend='local') + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.5, 1.5], rotate_factor=90), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict(type='PhotometricDistortion'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.0), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.5, 1.5], + rotate_factor=90), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# mapping +aic_coco = [ + (0, 6), + (1, 8), + (2, 10), + (3, 5), + (4, 7), + (5, 9), + (6, 12), + (7, 14), + (8, 16), + (9, 11), + (10, 13), + (11, 15), +] + +crowdpose_coco = [ + (0, 5), + (1, 6), + (2, 7), + (3, 8), + (4, 9), + (5, 10), + (6, 11), + (7, 12), + (8, 13), + (9, 14), + (10, 15), + (11, 16), +] + +mpii_coco = [ + (0, 16), + (1, 14), + (2, 12), + (3, 11), + (4, 13), + (5, 15), + (10, 10), + (11, 8), + (12, 6), + (13, 5), + (14, 7), + (15, 9), +] + +jhmdb_coco = [ + (3, 6), + (4, 5), + (5, 12), + (6, 11), + (7, 8), + (8, 7), + (9, 14), + (10, 13), + (11, 10), + (12, 9), + (13, 16), + (14, 15), +] + +halpe_coco = [ + (0, 0), + (1, 1), + (2, 2), + (3, 3), + (4, 4), + (5, 5), + (6, 6), + (7, 7), + (8, 8), + (9, 9), + (10, 10), + (11, 11), + (12, 12), + (13, 13), + (14, 14), + (15, 15), + (16, 16), +] + +ochuman_coco = [ + (0, 0), + (1, 1), + (2, 2), + (3, 3), + (4, 4), + (5, 5), + (6, 6), + (7, 7), + (8, 8), + (9, 9), + (10, 10), + (11, 11), + (12, 12), + (13, 13), + (14, 14), + (15, 15), + (16, 16), +] + +posetrack_coco = [ + (0, 0), + (3, 3), + (4, 4), + (5, 5), + (6, 6), + (7, 7), + (8, 8), + (9, 9), + (10, 10), + (11, 11), + (12, 12), + (13, 13), + (14, 14), + (15, 15), + (16, 16), +] + +# train datasets +dataset_coco = dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_train2017.json', + data_prefix=dict(img='detection/coco/train2017/'), + pipeline=[], +) + +dataset_aic = dict( + type='AicDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='aic/annotations/aic_train.json', + data_prefix=dict(img='pose/ai_challenge/ai_challenger_keypoint' + '_train_20170902/keypoint_train_images_20170902/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=aic_coco) + ], +) + +dataset_crowdpose = dict( + type='CrowdPoseDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='crowdpose/annotations/mmpose_crowdpose_trainval.json', + data_prefix=dict(img='pose/CrowdPose/images/'), + pipeline=[ + dict( + type='KeypointConverter', num_keypoints=17, mapping=crowdpose_coco) + ], +) + +dataset_mpii = dict( + type='MpiiDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='mpii/annotations/mpii_train.json', + data_prefix=dict(img='pose/MPI/images/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=mpii_coco) + ], +) + +dataset_jhmdb = dict( + type='JhmdbDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='jhmdb/annotations/Sub1_train.json', + data_prefix=dict(img='pose/JHMDB/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=jhmdb_coco) + ], +) + +dataset_halpe = dict( + type='HalpeDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='halpe/annotations/halpe_train_v1.json', + data_prefix=dict(img='pose/Halpe/hico_20160224_det/images/train2015'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=halpe_coco) + ], +) + +dataset_posetrack = dict( + type='PoseTrack18Dataset', + data_root=data_root, + data_mode=data_mode, + ann_file='posetrack18/annotations/posetrack18_train.json', + data_prefix=dict(img='pose/PoseChallenge2018/'), + pipeline=[ + dict( + type='KeypointConverter', num_keypoints=17, mapping=posetrack_coco) + ], +) + +# data loaders +train_dataloader = dict( + batch_size=256, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type='CombinedDataset', + metainfo=dict(from_file='configs/_base_/datasets/coco.py'), + datasets=[ + dataset_coco, + dataset_aic, + dataset_crowdpose, + dataset_mpii, + dataset_jhmdb, + dataset_halpe, + dataset_posetrack, + ], + pipeline=train_pipeline, + test_mode=False, + )) + +# val datasets +val_coco = dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_val2017.json', + data_prefix=dict(img='detection/coco/val2017/'), + pipeline=[], +) + +val_aic = dict( + type='AicDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='aic/annotations/aic_val.json', + data_prefix=dict( + img='pose/ai_challenge/ai_challenger_keypoint' + '_validation_20170911/keypoint_validation_images_20170911/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=aic_coco) + ], +) + +val_crowdpose = dict( + type='CrowdPoseDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='crowdpose/annotations/mmpose_crowdpose_test.json', + data_prefix=dict(img='pose/CrowdPose/images/'), + pipeline=[ + dict( + type='KeypointConverter', num_keypoints=17, mapping=crowdpose_coco) + ], +) + +val_mpii = dict( + type='MpiiDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='mpii/annotations/mpii_val.json', + data_prefix=dict(img='pose/MPI/images/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=mpii_coco) + ], +) + +val_jhmdb = dict( + type='JhmdbDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='jhmdb/annotations/Sub1_test.json', + data_prefix=dict(img='pose/JHMDB/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=jhmdb_coco) + ], +) + +val_halpe = dict( + type='HalpeDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='halpe/annotations/halpe_val_v1.json', + data_prefix=dict(img='detection/coco/val2017/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=halpe_coco) + ], +) + +val_ochuman = dict( + type='OCHumanDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='ochuman/annotations/' + 'ochuman_coco_format_val_range_0.00_1.00.json', + data_prefix=dict(img='pose/OCHuman/images/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=ochuman_coco) + ], +) + +val_posetrack = dict( + type='PoseTrack18Dataset', + data_root=data_root, + data_mode=data_mode, + ann_file='posetrack18/annotations/posetrack18_val.json', + data_prefix=dict(img='pose/PoseChallenge2018/'), + pipeline=[ + dict( + type='KeypointConverter', num_keypoints=17, mapping=posetrack_coco) + ], +) + +val_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_val2017.json', + bbox_file=f'{data_root}coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='detection/coco/val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) + +test_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type='CombinedDataset', + metainfo=dict(from_file='configs/_base_/datasets/coco.py'), + datasets=[ + val_coco, + val_aic, + val_crowdpose, + val_mpii, + val_jhmdb, + val_halpe, + val_ochuman, + val_posetrack, + ], + pipeline=val_pipeline, + test_mode=True, + )) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) +# default_hooks = dict( +# checkpoint=dict(save_best='AUC', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'coco/annotations/person_keypoints_val2017.json') +test_evaluator = [ + dict(type='PCKAccuracy', thr=0.1), + dict(type='AUC'), + dict(type='EPE'), +] diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-m_8xb256-210e_body8-256x192.py b/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-m_8xb256-210e_body8-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..39621ceb70429f4cbba45c81bc8c9ea05acd4fc5 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-m_8xb256-210e_body8-256x192.py @@ -0,0 +1,553 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 210 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 210 to 420 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=1024) + +# codec settings +codec = dict( + type='SimCCLabel', + input_size=(192, 256), + sigma=(4.9, 5.66), + simcc_split_ratio=2.0, + normalize=False, + use_dark=False) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=0.67, + widen_factor=0.75, + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' + 'rtmposev1/cspnext-m_udp-body7_210e-256x192-e0c9327b_20230504.pth' # noqa + )), + head=dict( + type='RTMCCHead', + in_channels=768, + out_channels=17, + input_size=codec['input_size'], + in_featuremap_size=(6, 8), + simcc_split_ratio=codec['simcc_split_ratio'], + final_layer_kernel_size=7, + gau_cfg=dict( + hidden_dims=256, + s=128, + expansion_factor=2, + dropout_rate=0.0, + drop_path=0.0, + act_fn='SiLU', + use_rel_bias=False, + pos_enc=False), + loss=dict( + type='KLDiscretLoss', + use_target_weight=True, + beta=10., + label_softmax=True), + decoder=codec), + test_cfg=dict(flip_test=True, )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/' + +backend_args = dict(backend='local') + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.5, 1.5], rotate_factor=90), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict(type='PhotometricDistortion'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.0), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.5, 1.5], + rotate_factor=90), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# mapping +aic_coco = [ + (0, 6), + (1, 8), + (2, 10), + (3, 5), + (4, 7), + (5, 9), + (6, 12), + (7, 14), + (8, 16), + (9, 11), + (10, 13), + (11, 15), +] + +crowdpose_coco = [ + (0, 5), + (1, 6), + (2, 7), + (3, 8), + (4, 9), + (5, 10), + (6, 11), + (7, 12), + (8, 13), + (9, 14), + (10, 15), + (11, 16), +] + +mpii_coco = [ + (0, 16), + (1, 14), + (2, 12), + (3, 11), + (4, 13), + (5, 15), + (10, 10), + (11, 8), + (12, 6), + (13, 5), + (14, 7), + (15, 9), +] + +jhmdb_coco = [ + (3, 6), + (4, 5), + (5, 12), + (6, 11), + (7, 8), + (8, 7), + (9, 14), + (10, 13), + (11, 10), + (12, 9), + (13, 16), + (14, 15), +] + +halpe_coco = [ + (0, 0), + (1, 1), + (2, 2), + (3, 3), + (4, 4), + (5, 5), + (6, 6), + (7, 7), + (8, 8), + (9, 9), + (10, 10), + (11, 11), + (12, 12), + (13, 13), + (14, 14), + (15, 15), + (16, 16), +] + +ochuman_coco = [ + (0, 0), + (1, 1), + (2, 2), + (3, 3), + (4, 4), + (5, 5), + (6, 6), + (7, 7), + (8, 8), + (9, 9), + (10, 10), + (11, 11), + (12, 12), + (13, 13), + (14, 14), + (15, 15), + (16, 16), +] + +posetrack_coco = [ + (0, 0), + (3, 3), + (4, 4), + (5, 5), + (6, 6), + (7, 7), + (8, 8), + (9, 9), + (10, 10), + (11, 11), + (12, 12), + (13, 13), + (14, 14), + (15, 15), + (16, 16), +] + +# train datasets +dataset_coco = dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_train2017.json', + data_prefix=dict(img='detection/coco/train2017/'), + pipeline=[], +) + +dataset_aic = dict( + type='AicDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='aic/annotations/aic_train.json', + data_prefix=dict(img='pose/ai_challenge/ai_challenger_keypoint' + '_train_20170902/keypoint_train_images_20170902/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=aic_coco) + ], +) + +dataset_crowdpose = dict( + type='CrowdPoseDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='crowdpose/annotations/mmpose_crowdpose_trainval.json', + data_prefix=dict(img='pose/CrowdPose/images/'), + pipeline=[ + dict( + type='KeypointConverter', num_keypoints=17, mapping=crowdpose_coco) + ], +) + +dataset_mpii = dict( + type='MpiiDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='mpii/annotations/mpii_train.json', + data_prefix=dict(img='pose/MPI/images/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=mpii_coco) + ], +) + +dataset_jhmdb = dict( + type='JhmdbDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='jhmdb/annotations/Sub1_train.json', + data_prefix=dict(img='pose/JHMDB/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=jhmdb_coco) + ], +) + +dataset_halpe = dict( + type='HalpeDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='halpe/annotations/halpe_train_v1.json', + data_prefix=dict(img='pose/Halpe/hico_20160224_det/images/train2015'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=halpe_coco) + ], +) + +dataset_posetrack = dict( + type='PoseTrack18Dataset', + data_root=data_root, + data_mode=data_mode, + ann_file='posetrack18/annotations/posetrack18_train.json', + data_prefix=dict(img='pose/PoseChallenge2018/'), + pipeline=[ + dict( + type='KeypointConverter', num_keypoints=17, mapping=posetrack_coco) + ], +) + +# data loaders +train_dataloader = dict( + batch_size=256, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type='CombinedDataset', + metainfo=dict(from_file='configs/_base_/datasets/coco.py'), + datasets=[ + dataset_coco, + dataset_aic, + dataset_crowdpose, + dataset_mpii, + dataset_jhmdb, + dataset_halpe, + dataset_posetrack, + ], + pipeline=train_pipeline, + test_mode=False, + )) + +# val datasets +val_coco = dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_val2017.json', + data_prefix=dict(img='detection/coco/val2017/'), + pipeline=[], +) + +val_aic = dict( + type='AicDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='aic/annotations/aic_val.json', + data_prefix=dict( + img='pose/ai_challenge/ai_challenger_keypoint' + '_validation_20170911/keypoint_validation_images_20170911/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=aic_coco) + ], +) + +val_crowdpose = dict( + type='CrowdPoseDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='crowdpose/annotations/mmpose_crowdpose_test.json', + data_prefix=dict(img='pose/CrowdPose/images/'), + pipeline=[ + dict( + type='KeypointConverter', num_keypoints=17, mapping=crowdpose_coco) + ], +) + +val_mpii = dict( + type='MpiiDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='mpii/annotations/mpii_val.json', + data_prefix=dict(img='pose/MPI/images/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=mpii_coco) + ], +) + +val_jhmdb = dict( + type='JhmdbDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='jhmdb/annotations/Sub1_test.json', + data_prefix=dict(img='pose/JHMDB/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=jhmdb_coco) + ], +) + +val_halpe = dict( + type='HalpeDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='halpe/annotations/halpe_val_v1.json', + data_prefix=dict(img='detection/coco/val2017/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=halpe_coco) + ], +) + +val_ochuman = dict( + type='OCHumanDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='ochuman/annotations/' + 'ochuman_coco_format_val_range_0.00_1.00.json', + data_prefix=dict(img='pose/OCHuman/images/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=ochuman_coco) + ], +) + +val_posetrack = dict( + type='PoseTrack18Dataset', + data_root=data_root, + data_mode=data_mode, + ann_file='posetrack18/annotations/posetrack18_val.json', + data_prefix=dict(img='pose/PoseChallenge2018/'), + pipeline=[ + dict( + type='KeypointConverter', num_keypoints=17, mapping=posetrack_coco) + ], +) + +val_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_val2017.json', + bbox_file=f'{data_root}coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='detection/coco/val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) + +test_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type='CombinedDataset', + metainfo=dict(from_file='configs/_base_/datasets/coco.py'), + datasets=[ + val_coco, + val_aic, + val_crowdpose, + val_mpii, + val_jhmdb, + val_halpe, + val_ochuman, + val_posetrack, + ], + pipeline=val_pipeline, + test_mode=True, + )) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) +# default_hooks = dict( +# checkpoint=dict(save_best='AUC', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'coco/annotations/person_keypoints_val2017.json') +test_evaluator = [ + dict(type='PCKAccuracy', thr=0.1), + dict(type='AUC'), + dict(type='EPE') +] diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-m_8xb256-210e_body8-384x288.py b/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-m_8xb256-210e_body8-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..42a9355c6a3531d4ab4d013cc74cf523fbb720d9 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-m_8xb256-210e_body8-384x288.py @@ -0,0 +1,553 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 210 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 210 to 420 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=1024) + +# codec settings +codec = dict( + type='SimCCLabel', + input_size=(288, 384), + sigma=(6., 6.93), + simcc_split_ratio=2.0, + normalize=False, + use_dark=False) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=0.67, + widen_factor=0.75, + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' + 'rtmposev1/cspnext-m_udp-body7_210e-384x288-b9bc2b57_20230504.pth' # noqa + )), + head=dict( + type='RTMCCHead', + in_channels=768, + out_channels=17, + input_size=codec['input_size'], + in_featuremap_size=(9, 12), + simcc_split_ratio=codec['simcc_split_ratio'], + final_layer_kernel_size=7, + gau_cfg=dict( + hidden_dims=256, + s=128, + expansion_factor=2, + dropout_rate=0.0, + drop_path=0.0, + act_fn='SiLU', + use_rel_bias=False, + pos_enc=False), + loss=dict( + type='KLDiscretLoss', + use_target_weight=True, + beta=10., + label_softmax=True), + decoder=codec), + test_cfg=dict(flip_test=True, )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/' + +backend_args = dict(backend='local') + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.5, 1.5], rotate_factor=90), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict(type='PhotometricDistortion'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.0), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.5, 1.5], + rotate_factor=90), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# mapping +aic_coco = [ + (0, 6), + (1, 8), + (2, 10), + (3, 5), + (4, 7), + (5, 9), + (6, 12), + (7, 14), + (8, 16), + (9, 11), + (10, 13), + (11, 15), +] + +crowdpose_coco = [ + (0, 5), + (1, 6), + (2, 7), + (3, 8), + (4, 9), + (5, 10), + (6, 11), + (7, 12), + (8, 13), + (9, 14), + (10, 15), + (11, 16), +] + +mpii_coco = [ + (0, 16), + (1, 14), + (2, 12), + (3, 11), + (4, 13), + (5, 15), + (10, 10), + (11, 8), + (12, 6), + (13, 5), + (14, 7), + (15, 9), +] + +jhmdb_coco = [ + (3, 6), + (4, 5), + (5, 12), + (6, 11), + (7, 8), + (8, 7), + (9, 14), + (10, 13), + (11, 10), + (12, 9), + (13, 16), + (14, 15), +] + +halpe_coco = [ + (0, 0), + (1, 1), + (2, 2), + (3, 3), + (4, 4), + (5, 5), + (6, 6), + (7, 7), + (8, 8), + (9, 9), + (10, 10), + (11, 11), + (12, 12), + (13, 13), + (14, 14), + (15, 15), + (16, 16), +] + +ochuman_coco = [ + (0, 0), + (1, 1), + (2, 2), + (3, 3), + (4, 4), + (5, 5), + (6, 6), + (7, 7), + (8, 8), + (9, 9), + (10, 10), + (11, 11), + (12, 12), + (13, 13), + (14, 14), + (15, 15), + (16, 16), +] + +posetrack_coco = [ + (0, 0), + (3, 3), + (4, 4), + (5, 5), + (6, 6), + (7, 7), + (8, 8), + (9, 9), + (10, 10), + (11, 11), + (12, 12), + (13, 13), + (14, 14), + (15, 15), + (16, 16), +] + +# train datasets +dataset_coco = dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_train2017.json', + data_prefix=dict(img='detection/coco/train2017/'), + pipeline=[], +) + +dataset_aic = dict( + type='AicDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='aic/annotations/aic_train.json', + data_prefix=dict(img='pose/ai_challenge/ai_challenger_keypoint' + '_train_20170902/keypoint_train_images_20170902/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=aic_coco) + ], +) + +dataset_crowdpose = dict( + type='CrowdPoseDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='crowdpose/annotations/mmpose_crowdpose_trainval.json', + data_prefix=dict(img='pose/CrowdPose/images/'), + pipeline=[ + dict( + type='KeypointConverter', num_keypoints=17, mapping=crowdpose_coco) + ], +) + +dataset_mpii = dict( + type='MpiiDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='mpii/annotations/mpii_train.json', + data_prefix=dict(img='pose/MPI/images/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=mpii_coco) + ], +) + +dataset_jhmdb = dict( + type='JhmdbDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='jhmdb/annotations/Sub1_train.json', + data_prefix=dict(img='pose/JHMDB/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=jhmdb_coco) + ], +) + +dataset_halpe = dict( + type='HalpeDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='halpe/annotations/halpe_train_v1.json', + data_prefix=dict(img='pose/Halpe/hico_20160224_det/images/train2015'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=halpe_coco) + ], +) + +dataset_posetrack = dict( + type='PoseTrack18Dataset', + data_root=data_root, + data_mode=data_mode, + ann_file='posetrack18/annotations/posetrack18_train.json', + data_prefix=dict(img='pose/PoseChallenge2018/'), + pipeline=[ + dict( + type='KeypointConverter', num_keypoints=17, mapping=posetrack_coco) + ], +) + +# data loaders +train_dataloader = dict( + batch_size=256, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type='CombinedDataset', + metainfo=dict(from_file='configs/_base_/datasets/coco.py'), + datasets=[ + dataset_coco, + dataset_aic, + dataset_crowdpose, + dataset_mpii, + dataset_jhmdb, + dataset_halpe, + dataset_posetrack, + ], + pipeline=train_pipeline, + test_mode=False, + )) + +# val datasets +val_coco = dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_val2017.json', + data_prefix=dict(img='detection/coco/val2017/'), + pipeline=[], +) + +val_aic = dict( + type='AicDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='aic/annotations/aic_val.json', + data_prefix=dict( + img='pose/ai_challenge/ai_challenger_keypoint' + '_validation_20170911/keypoint_validation_images_20170911/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=aic_coco) + ], +) + +val_crowdpose = dict( + type='CrowdPoseDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='crowdpose/annotations/mmpose_crowdpose_test.json', + data_prefix=dict(img='pose/CrowdPose/images/'), + pipeline=[ + dict( + type='KeypointConverter', num_keypoints=17, mapping=crowdpose_coco) + ], +) + +val_mpii = dict( + type='MpiiDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='mpii/annotations/mpii_val.json', + data_prefix=dict(img='pose/MPI/images/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=mpii_coco) + ], +) + +val_jhmdb = dict( + type='JhmdbDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='jhmdb/annotations/Sub1_test.json', + data_prefix=dict(img='pose/JHMDB/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=jhmdb_coco) + ], +) + +val_halpe = dict( + type='HalpeDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='halpe/annotations/halpe_val_v1.json', + data_prefix=dict(img='detection/coco/val2017/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=halpe_coco) + ], +) + +val_ochuman = dict( + type='OCHumanDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='ochuman/annotations/' + 'ochuman_coco_format_val_range_0.00_1.00.json', + data_prefix=dict(img='pose/OCHuman/images/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=ochuman_coco) + ], +) + +val_posetrack = dict( + type='PoseTrack18Dataset', + data_root=data_root, + data_mode=data_mode, + ann_file='posetrack18/annotations/posetrack18_val.json', + data_prefix=dict(img='pose/PoseChallenge2018/'), + pipeline=[ + dict( + type='KeypointConverter', num_keypoints=17, mapping=posetrack_coco) + ], +) + +val_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_val2017.json', + bbox_file=f'{data_root}coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='detection/coco/val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) + +test_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type='CombinedDataset', + metainfo=dict(from_file='configs/_base_/datasets/coco.py'), + datasets=[ + val_coco, + val_aic, + val_crowdpose, + val_mpii, + val_jhmdb, + val_halpe, + val_ochuman, + val_posetrack, + ], + pipeline=val_pipeline, + test_mode=True, + )) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) +# default_hooks = dict( +# checkpoint=dict(save_best='AUC', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'coco/annotations/person_keypoints_val2017.json') +test_evaluator = [ + dict(type='PCKAccuracy', thr=0.1), + dict(type='AUC'), + dict(type='EPE') +] diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-s_8xb256-210e_body8-256x192.py b/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-s_8xb256-210e_body8-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..3e82f07cf6b465e1b19250a96117e4d439061448 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-s_8xb256-210e_body8-256x192.py @@ -0,0 +1,553 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 210 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.0), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 210 to 420 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=1024) + +# codec settings +codec = dict( + type='SimCCLabel', + input_size=(192, 256), + sigma=(4.9, 5.66), + simcc_split_ratio=2.0, + normalize=False, + use_dark=False) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=0.33, + widen_factor=0.5, + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' + 'rtmposev1/cspnext-s_udp-body7_210e-256x192-8c9ccbdb_20230504.pth' # noqa + )), + head=dict( + type='RTMCCHead', + in_channels=512, + out_channels=17, + input_size=codec['input_size'], + in_featuremap_size=(6, 8), + simcc_split_ratio=codec['simcc_split_ratio'], + final_layer_kernel_size=7, + gau_cfg=dict( + hidden_dims=256, + s=128, + expansion_factor=2, + dropout_rate=0., + drop_path=0., + act_fn='SiLU', + use_rel_bias=False, + pos_enc=False), + loss=dict( + type='KLDiscretLoss', + use_target_weight=True, + beta=10., + label_softmax=True), + decoder=codec), + test_cfg=dict(flip_test=True)) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/' + +backend_args = dict(backend='local') + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict(type='PhotometricDistortion'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.0), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# mapping +aic_coco = [ + (0, 6), + (1, 8), + (2, 10), + (3, 5), + (4, 7), + (5, 9), + (6, 12), + (7, 14), + (8, 16), + (9, 11), + (10, 13), + (11, 15), +] + +crowdpose_coco = [ + (0, 5), + (1, 6), + (2, 7), + (3, 8), + (4, 9), + (5, 10), + (6, 11), + (7, 12), + (8, 13), + (9, 14), + (10, 15), + (11, 16), +] + +mpii_coco = [ + (0, 16), + (1, 14), + (2, 12), + (3, 11), + (4, 13), + (5, 15), + (10, 10), + (11, 8), + (12, 6), + (13, 5), + (14, 7), + (15, 9), +] + +jhmdb_coco = [ + (3, 6), + (4, 5), + (5, 12), + (6, 11), + (7, 8), + (8, 7), + (9, 14), + (10, 13), + (11, 10), + (12, 9), + (13, 16), + (14, 15), +] + +halpe_coco = [ + (0, 0), + (1, 1), + (2, 2), + (3, 3), + (4, 4), + (5, 5), + (6, 6), + (7, 7), + (8, 8), + (9, 9), + (10, 10), + (11, 11), + (12, 12), + (13, 13), + (14, 14), + (15, 15), + (16, 16), +] + +ochuman_coco = [ + (0, 0), + (1, 1), + (2, 2), + (3, 3), + (4, 4), + (5, 5), + (6, 6), + (7, 7), + (8, 8), + (9, 9), + (10, 10), + (11, 11), + (12, 12), + (13, 13), + (14, 14), + (15, 15), + (16, 16), +] + +posetrack_coco = [ + (0, 0), + (3, 3), + (4, 4), + (5, 5), + (6, 6), + (7, 7), + (8, 8), + (9, 9), + (10, 10), + (11, 11), + (12, 12), + (13, 13), + (14, 14), + (15, 15), + (16, 16), +] + +# train datasets +dataset_coco = dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_train2017.json', + data_prefix=dict(img='detection/coco/train2017/'), + pipeline=[], +) + +dataset_aic = dict( + type='AicDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='aic/annotations/aic_train.json', + data_prefix=dict(img='pose/ai_challenge/ai_challenger_keypoint' + '_train_20170902/keypoint_train_images_20170902/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=aic_coco) + ], +) + +dataset_crowdpose = dict( + type='CrowdPoseDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='crowdpose/annotations/mmpose_crowdpose_trainval.json', + data_prefix=dict(img='pose/CrowdPose/images/'), + pipeline=[ + dict( + type='KeypointConverter', num_keypoints=17, mapping=crowdpose_coco) + ], +) + +dataset_mpii = dict( + type='MpiiDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='mpii/annotations/mpii_train.json', + data_prefix=dict(img='pose/MPI/images/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=mpii_coco) + ], +) + +dataset_jhmdb = dict( + type='JhmdbDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='jhmdb/annotations/Sub1_train.json', + data_prefix=dict(img='pose/JHMDB/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=jhmdb_coco) + ], +) + +dataset_halpe = dict( + type='HalpeDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='halpe/annotations/halpe_train_v1.json', + data_prefix=dict(img='pose/Halpe/hico_20160224_det/images/train2015'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=halpe_coco) + ], +) + +dataset_posetrack = dict( + type='PoseTrack18Dataset', + data_root=data_root, + data_mode=data_mode, + ann_file='posetrack18/annotations/posetrack18_train.json', + data_prefix=dict(img='pose/PoseChallenge2018/'), + pipeline=[ + dict( + type='KeypointConverter', num_keypoints=17, mapping=posetrack_coco) + ], +) + +# data loaders +train_dataloader = dict( + batch_size=256, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type='CombinedDataset', + metainfo=dict(from_file='configs/_base_/datasets/coco.py'), + datasets=[ + dataset_coco, + dataset_aic, + dataset_crowdpose, + dataset_mpii, + dataset_jhmdb, + dataset_halpe, + dataset_posetrack, + ], + pipeline=train_pipeline, + test_mode=False, + )) + +# val datasets +val_coco = dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_val2017.json', + data_prefix=dict(img='detection/coco/val2017/'), + pipeline=[], +) + +val_aic = dict( + type='AicDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='aic/annotations/aic_val.json', + data_prefix=dict( + img='pose/ai_challenge/ai_challenger_keypoint' + '_validation_20170911/keypoint_validation_images_20170911/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=aic_coco) + ], +) + +val_crowdpose = dict( + type='CrowdPoseDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='crowdpose/annotations/mmpose_crowdpose_test.json', + data_prefix=dict(img='pose/CrowdPose/images/'), + pipeline=[ + dict( + type='KeypointConverter', num_keypoints=17, mapping=crowdpose_coco) + ], +) + +val_mpii = dict( + type='MpiiDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='mpii/annotations/mpii_val.json', + data_prefix=dict(img='pose/MPI/images/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=mpii_coco) + ], +) + +val_jhmdb = dict( + type='JhmdbDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='jhmdb/annotations/Sub1_test.json', + data_prefix=dict(img='pose/JHMDB/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=jhmdb_coco) + ], +) + +val_halpe = dict( + type='HalpeDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='halpe/annotations/halpe_val_v1.json', + data_prefix=dict(img='detection/coco/val2017/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=halpe_coco) + ], +) + +val_ochuman = dict( + type='OCHumanDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='ochuman/annotations/' + 'ochuman_coco_format_val_range_0.00_1.00.json', + data_prefix=dict(img='pose/OCHuman/images/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=ochuman_coco) + ], +) + +val_posetrack = dict( + type='PoseTrack18Dataset', + data_root=data_root, + data_mode=data_mode, + ann_file='posetrack18/annotations/posetrack18_val.json', + data_prefix=dict(img='pose/PoseChallenge2018/'), + pipeline=[ + dict( + type='KeypointConverter', num_keypoints=17, mapping=posetrack_coco) + ], +) + +val_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_val2017.json', + bbox_file=f'{data_root}coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='detection/coco/val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) + +test_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type='CombinedDataset', + metainfo=dict(from_file='configs/_base_/datasets/coco.py'), + datasets=[ + val_coco, + val_aic, + val_crowdpose, + val_mpii, + val_jhmdb, + val_halpe, + val_ochuman, + val_posetrack, + ], + pipeline=val_pipeline, + test_mode=True, + )) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) +# default_hooks = dict( +# checkpoint=dict(save_best='AUC', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'coco/annotations/person_keypoints_val2017.json') +test_evaluator = [ + dict(type='PCKAccuracy', thr=0.1), + dict(type='AUC'), + dict(type='EPE') +] diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-t_8xb256-210e_body8-256x192.py b/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-t_8xb256-210e_body8-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..fd705251db31acc77d9307aadeb5899688882182 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-t_8xb256-210e_body8-256x192.py @@ -0,0 +1,554 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 210 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 210 to 420 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=1024) + +# codec settings +codec = dict( + type='SimCCLabel', + input_size=(192, 256), + sigma=(4.9, 5.66), + simcc_split_ratio=2.0, + normalize=False, + use_dark=False) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=0.167, + widen_factor=0.375, + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' + 'rtmposev1/cspnext-tiny_udp-body7_210e-256x192-a3775292_20230504.pth' # noqa + )), + head=dict( + type='RTMCCHead', + in_channels=384, + out_channels=17, + input_size=codec['input_size'], + in_featuremap_size=(6, 8), + simcc_split_ratio=codec['simcc_split_ratio'], + final_layer_kernel_size=7, + gau_cfg=dict( + hidden_dims=256, + s=128, + expansion_factor=2, + dropout_rate=0., + drop_path=0., + act_fn='SiLU', + use_rel_bias=False, + pos_enc=False), + loss=dict( + type='KLDiscretLoss', + use_target_weight=True, + beta=10., + label_softmax=True), + decoder=codec), + test_cfg=dict(flip_test=True)) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/' + +backend_args = dict(backend='local') + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict(type='PhotometricDistortion'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.0), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# mapping +aic_coco = [ + (0, 6), + (1, 8), + (2, 10), + (3, 5), + (4, 7), + (5, 9), + (6, 12), + (7, 14), + (8, 16), + (9, 11), + (10, 13), + (11, 15), +] + +crowdpose_coco = [ + (0, 5), + (1, 6), + (2, 7), + (3, 8), + (4, 9), + (5, 10), + (6, 11), + (7, 12), + (8, 13), + (9, 14), + (10, 15), + (11, 16), +] + +mpii_coco = [ + (0, 16), + (1, 14), + (2, 12), + (3, 11), + (4, 13), + (5, 15), + (10, 10), + (11, 8), + (12, 6), + (13, 5), + (14, 7), + (15, 9), +] + +jhmdb_coco = [ + (3, 6), + (4, 5), + (5, 12), + (6, 11), + (7, 8), + (8, 7), + (9, 14), + (10, 13), + (11, 10), + (12, 9), + (13, 16), + (14, 15), +] + +halpe_coco = [ + (0, 0), + (1, 1), + (2, 2), + (3, 3), + (4, 4), + (5, 5), + (6, 6), + (7, 7), + (8, 8), + (9, 9), + (10, 10), + (11, 11), + (12, 12), + (13, 13), + (14, 14), + (15, 15), + (16, 16), +] + +ochuman_coco = [ + (0, 0), + (1, 1), + (2, 2), + (3, 3), + (4, 4), + (5, 5), + (6, 6), + (7, 7), + (8, 8), + (9, 9), + (10, 10), + (11, 11), + (12, 12), + (13, 13), + (14, 14), + (15, 15), + (16, 16), +] + +posetrack_coco = [ + (0, 0), + (3, 3), + (4, 4), + (5, 5), + (6, 6), + (7, 7), + (8, 8), + (9, 9), + (10, 10), + (11, 11), + (12, 12), + (13, 13), + (14, 14), + (15, 15), + (16, 16), +] + +# train datasets +dataset_coco = dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_train2017.json', + data_prefix=dict(img='detection/coco/train2017/'), + pipeline=[], +) + +dataset_aic = dict( + type='AicDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='aic/annotations/aic_train.json', + data_prefix=dict(img='pose/ai_challenge/ai_challenger_keypoint' + '_train_20170902/keypoint_train_images_20170902/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=aic_coco) + ], +) + +dataset_crowdpose = dict( + type='CrowdPoseDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='crowdpose/annotations/mmpose_crowdpose_trainval.json', + data_prefix=dict(img='pose/CrowdPose/images/'), + pipeline=[ + dict( + type='KeypointConverter', num_keypoints=17, mapping=crowdpose_coco) + ], +) + +dataset_mpii = dict( + type='MpiiDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='mpii/annotations/mpii_train.json', + data_prefix=dict(img='pose/MPI/images/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=mpii_coco) + ], +) + +dataset_jhmdb = dict( + type='JhmdbDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='jhmdb/annotations/Sub1_train.json', + data_prefix=dict(img='pose/JHMDB/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=jhmdb_coco) + ], +) + +dataset_halpe = dict( + type='HalpeDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='halpe/annotations/halpe_train_v1.json', + data_prefix=dict(img='pose/Halpe/hico_20160224_det/images/train2015'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=halpe_coco) + ], +) + +dataset_posetrack = dict( + type='PoseTrack18Dataset', + data_root=data_root, + data_mode=data_mode, + ann_file='posetrack18/annotations/posetrack18_train.json', + data_prefix=dict(img='pose/PoseChallenge2018/'), + pipeline=[ + dict( + type='KeypointConverter', num_keypoints=17, mapping=posetrack_coco) + ], +) + +# data loaders +train_dataloader = dict( + batch_size=256, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type='CombinedDataset', + metainfo=dict(from_file='configs/_base_/datasets/coco.py'), + datasets=[ + dataset_coco, + dataset_aic, + dataset_crowdpose, + dataset_mpii, + dataset_jhmdb, + dataset_halpe, + dataset_posetrack, + ], + pipeline=train_pipeline, + test_mode=False, + )) + +# val datasets +val_coco = dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_val2017.json', + data_prefix=dict(img='detection/coco/val2017/'), + pipeline=[], +) + +val_aic = dict( + type='AicDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='aic/annotations/aic_val.json', + data_prefix=dict( + img='pose/ai_challenge/ai_challenger_keypoint' + '_validation_20170911/keypoint_validation_images_20170911/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=aic_coco) + ], +) + +val_crowdpose = dict( + type='CrowdPoseDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='crowdpose/annotations/mmpose_crowdpose_test.json', + data_prefix=dict(img='pose/CrowdPose/images/'), + pipeline=[ + dict( + type='KeypointConverter', num_keypoints=17, mapping=crowdpose_coco) + ], +) + +val_mpii = dict( + type='MpiiDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='mpii/annotations/mpii_val.json', + data_prefix=dict(img='pose/MPI/images/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=mpii_coco) + ], +) + +val_jhmdb = dict( + type='JhmdbDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='jhmdb/annotations/Sub1_test.json', + data_prefix=dict(img='pose/JHMDB/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=jhmdb_coco) + ], +) + +val_halpe = dict( + type='HalpeDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='halpe/annotations/halpe_val_v1.json', + data_prefix=dict(img='detection/coco/val2017/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=halpe_coco) + ], +) + +val_ochuman = dict( + type='OCHumanDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='ochuman/annotations/' + 'ochuman_coco_format_val_range_0.00_1.00.json', + data_prefix=dict(img='pose/OCHuman/images/'), + pipeline=[ + dict(type='KeypointConverter', num_keypoints=17, mapping=ochuman_coco) + ], +) + +val_posetrack = dict( + type='PoseTrack18Dataset', + data_root=data_root, + data_mode=data_mode, + ann_file='posetrack18/annotations/posetrack18_val.json', + data_prefix=dict(img='pose/PoseChallenge2018/'), + pipeline=[ + dict( + type='KeypointConverter', num_keypoints=17, mapping=posetrack_coco) + ], +) + +val_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_val2017.json', + bbox_file=f'{data_root}coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='detection/coco/val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) + +test_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type='CombinedDataset', + metainfo=dict(from_file='configs/_base_/datasets/coco.py'), + datasets=[ + val_coco, + val_aic, + val_crowdpose, + val_mpii, + val_jhmdb, + val_halpe, + val_ochuman, + val_posetrack, + ], + pipeline=val_pipeline, + test_mode=True, + )) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) +# default_hooks = dict( +# checkpoint=dict(save_best='AUC', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + # dict( + # type='EMAHook', + # ema_type='ExpMomentumEMA', + # momentum=0.0002, + # update_buffers=True, + # priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'coco/annotations/person_keypoints_val2017.json') + +test_evaluator = [ + dict(type='PCKAccuracy', thr=0.1), + dict(type='AUC'), + dict(type='EPE') +] diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose_body8.md b/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose_body8.md new file mode 100644 index 0000000000000000000000000000000000000000..a294be844ec1ee9d1e9c72ffba4b375af97efe07 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose_body8.md @@ -0,0 +1,76 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-030-58580-8_27">RTMPose (arXiv'2023)</a></summary> + +```bibtex +@misc{https://doi.org/10.48550/arxiv.2303.07399, + doi = {10.48550/ARXIV.2303.07399}, + url = {https://arxiv.org/abs/2303.07399}, + author = {Jiang, Tao and Lu, Peng and Zhang, Li and Ma, Ningsheng and Han, Rui and Lyu, Chengqi and Li, Yining and Chen, Kai}, + keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences}, + title = {RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose}, + publisher = {arXiv}, + year = {2023}, + copyright = {Creative Commons Attribution 4.0 International} +} + +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2212.07784">RTMDet (arXiv'2022)</a></summary> + +```bibtex +@misc{lyu2022rtmdet, + title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors}, + author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen}, + year={2022}, + eprint={2212.07784}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +- Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset. +- `*` denotes model trained on 7 public datasets: + - [AI Challenger](https://mmpose.readthedocs.io/en/latest/dataset_zoo/2d_body_keypoint.html#aic) + - [MS COCO](https://mmpose.readthedocs.io/en/latest/dataset_zoo/2d_body_keypoint.html#coco) + - [CrowdPose](https://mmpose.readthedocs.io/en/latest/dataset_zoo/2d_body_keypoint.html#crowdpose) + - [MPII](https://mmpose.readthedocs.io/en/latest/dataset_zoo/2d_body_keypoint.html#mpii) + - [sub-JHMDB](https://mmpose.readthedocs.io/en/latest/dataset_zoo/2d_body_keypoint.html#sub-jhmdb-dataset) + - [Halpe](https://mmpose.readthedocs.io/en/latest/dataset_zoo/2d_wholebody_keypoint.html#halpe) + - [PoseTrack18](https://mmpose.readthedocs.io/en/latest/dataset_zoo/2d_body_keypoint.html#posetrack18) +- `Body8` denotes the addition of the [OCHuman](https://mmpose.readthedocs.io/en/latest/dataset_zoo/2d_body_keypoint.html#ochuman) dataset, in addition to the 7 datasets mentioned above, for evaluation. + +| Config | Input Size | AP<sup><br>(COCO) | PCK@0.1<sup><br>(Body8) | AUC<sup><br>(Body8) | EPE<sup><br>(Body8) | Params(M) | FLOPS(G) | Download | +| :--------------------------------------------: | :--------: | :---------------: | :---------------------: | :-----------------: | :-----------------: | :-------: | :------: | :-----------------------------------------------: | +| [RTMPose-t\*](/configs/body_2d_keypoint/rtmpose/body8/rtmpose-t_8xb256-210e_body8-256x192.py) | 256x192 | 65.9 | 91.44 | 63.18 | 19.45 | 3.34 | 0.36 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-t_simcc-body7_pt-body7_420e-256x192-026a1439_20230504.pth) | +| [RTMPose-s\*](/configs/body_2d_keypoint/rtmpose/body8/rtmpose-s_8xb256-210e_body8-256x192.py) | 256x192 | 69.7 | 92.45 | 65.15 | 17.85 | 5.47 | 0.68 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_simcc-body7_pt-body7_420e-256x192-acd4a1ef_20230504.pth) | +| [RTMPose-m\*](/configs/body_2d_keypoint/rtmpose/body8/rtmpose-m_8xb256-210e_body8-256x192.py) | 256x192 | 74.9 | 94.25 | 68.59 | 15.12 | 13.59 | 1.93 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-body7_pt-body7_420e-256x192-e48f03d0_20230504.pth) | +| [RTMPose-l\*](/configs/body_2d_keypoint/rtmpose/body8/rtmpose-l_8xb256-210e_body8-256x192.py) | 256x192 | 76.7 | 95.08 | 70.14 | 13.79 | 27.66 | 4.16 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-body7_pt-body7_420e-256x192-4dba18fc_20230504.pth) | +| [RTMPose-m\*](/configs/body_2d_keypoint/rtmpose/body8/rtmpose-m_8xb256-210e_body8-384x288.py) | 384x288 | 76.6 | 94.64 | 70.38 | 13.98 | 13.72 | 4.33 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-body7_pt-body7_420e-384x288-65e718c4_20230504.pth) | +| [RTMPose-l\*](/configs/body_2d_keypoint/rtmpose/body8/rtmpose-l_8xb256-210e_body8-384x288.py) | 384x288 | 78.3 | 95.36 | 71.58 | 13.08 | 27.79 | 9.35 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-body7_pt-body7_420e-384x288-3f5a1437_20230504.pth) | diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose_body8.yml b/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose_body8.yml new file mode 100644 index 0000000000000000000000000000000000000000..c0f5a90863edbab45cf791a1ca478610d1eb8d55 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose_body8.yml @@ -0,0 +1,93 @@ +Collections: +- Name: RTMPose + Paper: + Title: "RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose" + URL: https://arxiv.org/abs/2303.07399 + README: https://github.com/open-mmlab/mmpose/blob/main/projects/rtmpose/README.md +Models: +- Config: configs/body_2d_keypoint/rtmpose/body8/rtmpose-t_8xb256-210e_body8-256x192.py + In Collection: RTMPose + Metadata: + Architecture: &id001 + - RTMPose + Training Data: &id002 + - AI Challenger + - COCO + - CrowdPose + - MPII + - sub-JHMDB + - Halpe + - PoseTrack18 + Name: rtmpose-t_8xb256-210e_body8-256x192 + Results: + - Dataset: Body8 + Metrics: + AP: 0.659 + Mean@0.1: 0.914 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-t_simcc-body7_pt-body7_420e-256x192-026a1439_20230504.pth +- Config: configs/body_2d_keypoint/rtmpose/body8/rtmpose-s_8xb256-210e_body8-256x192.py + In Collection: RTMPose + Metadata: + Architecture: *id001 + Training Data: *id002 + Name: rtmpose-s_8xb256-210e_body8-256x192 + Results: + - Dataset: Body8 + Metrics: + AP: 0.697 + Mean@0.1: 0.925 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_simcc-body7_pt-body7_420e-256x192-acd4a1ef_20230504.pth +- Config: configs/body_2d_keypoint/rtmpose/body8/rtmpose-m_8xb256-210e_body8-256x192.py + In Collection: RTMPose + Metadata: + Architecture: *id001 + Training Data: *id002 + Name: rtmpose-m_8xb256-210e_body8-256x192 + Results: + - Dataset: Body8 + Metrics: + AP: 0.749 + Mean@0.1: 0.943 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-body7_pt-body7_420e-256x192-e48f03d0_20230504.pth +- Config: configs/body_2d_keypoint/rtmpose/body8/rtmpose-l_8xb256-210e_body8-256x192.py + In Collection: RTMPose + Metadata: + Architecture: *id001 + Training Data: *id002 + Name: rtmpose-l_8xb256-210e_body8-256x192 + Results: + - Dataset: Body8 + Metrics: + AP: 0.767 + Mean@0.1: 0.951 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-body7_pt-body7_420e-256x192-4dba18fc_20230504.pth +- Config: configs/body_2d_keypoint/rtmpose/body8/rtmpose-m_8xb256-210e_body8-384x288.py + In Collection: RTMPose + Metadata: + Architecture: *id001 + Training Data: *id002 + Name: rtmpose-m_8xb256-210e_body8-384x288 + Results: + - Dataset: Body8 + Metrics: + AP: 0.766 + Mean@0.1: 0.946 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-body7_pt-body7_420e-384x288-65e718c4_20230504.pth +- Config: configs/body_2d_keypoint/rtmpose/body8/rtmpose-l_8xb256-210e_body8-384x288.py + In Collection: RTMPose + Metadata: + Architecture: *id001 + Training Data: *id002 + Name: rtmpose-l_8xb256-210e_body8-384x288 + Results: + - Dataset: Body8 + Metrics: + AP: 0.783 + Mean@0.1: 0.964 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-body7_pt-body7_420e-384x288-3f5a1437_20230504.pth diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-256x192.py b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..5af4dd04a26d2c9bd832bad3dc54d405410d32b9 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-256x192.py @@ -0,0 +1,272 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 420 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 210 to 420 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=1024) + +# codec settings +codec = dict( + type='SimCCLabel', + input_size=(192, 256), + sigma=(4.9, 5.66), + simcc_split_ratio=2.0, + normalize=False, + use_dark=False) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=1., + widen_factor=1., + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' + 'rtmposev1/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth' # noqa + )), + head=dict( + type='RTMCCHead', + in_channels=1024, + out_channels=17, + input_size=codec['input_size'], + in_featuremap_size=(6, 8), + simcc_split_ratio=codec['simcc_split_ratio'], + final_layer_kernel_size=7, + gau_cfg=dict( + hidden_dims=256, + s=128, + expansion_factor=2, + dropout_rate=0., + drop_path=0., + act_fn='SiLU', + use_rel_bias=False, + pos_enc=False), + loss=dict( + type='KLDiscretLoss', + use_target_weight=True, + beta=10., + label_softmax=True), + decoder=codec), + test_cfg=dict(flip_test=True, )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/', +# f'{data_root}': 's3://openmmlab/datasets/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.0), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# train datasets +dataset_coco = dict( + type='RepeatDataset', + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_train2017.json', + data_prefix=dict(img='detection/coco/train2017/'), + pipeline=[], + ), + times=3) + +dataset_aic = dict( + type='AicDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='aic/annotations/aic_train.json', + data_prefix=dict(img='pose/ai_challenge/ai_challenger_keypoint' + '_train_20170902/keypoint_train_images_20170902/'), + pipeline=[ + dict( + type='KeypointConverter', + num_keypoints=17, + mapping=[ + (0, 6), + (1, 8), + (2, 10), + (3, 5), + (4, 7), + (5, 9), + (6, 12), + (7, 14), + (8, 16), + (9, 11), + (10, 13), + (11, 15), + ]) + ], +) + +# data loaders +train_dataloader = dict( + batch_size=256, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type='CombinedDataset', + metainfo=dict(from_file='configs/_base_/datasets/coco.py'), + datasets=[dataset_coco, dataset_aic], + pipeline=train_pipeline, + test_mode=False, + )) +val_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_val2017.json', + # bbox_file='data/coco/person_detection_results/' + # 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='detection/coco/val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'coco/annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-384x288.py b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..015c46b70b2e921ef942cd30bc3068e41c99145f --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-384x288.py @@ -0,0 +1,272 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 420 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 210 to 420 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=1024) + +# codec settings +codec = dict( + type='SimCCLabel', + input_size=(288, 384), + sigma=(6., 6.93), + simcc_split_ratio=2.0, + normalize=False, + use_dark=False) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=1., + widen_factor=1., + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' + 'rtmposev1/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth' # noqa + )), + head=dict( + type='RTMCCHead', + in_channels=1024, + out_channels=17, + input_size=codec['input_size'], + in_featuremap_size=(9, 12), + simcc_split_ratio=codec['simcc_split_ratio'], + final_layer_kernel_size=7, + gau_cfg=dict( + hidden_dims=256, + s=128, + expansion_factor=2, + dropout_rate=0., + drop_path=0., + act_fn='SiLU', + use_rel_bias=False, + pos_enc=False), + loss=dict( + type='KLDiscretLoss', + use_target_weight=True, + beta=10., + label_softmax=True), + decoder=codec), + test_cfg=dict(flip_test=True, )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/', +# f'{data_root}': 's3://openmmlab/datasets/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.0), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# train datasets +dataset_coco = dict( + type='RepeatDataset', + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_train2017.json', + data_prefix=dict(img='detection/coco/train2017/'), + pipeline=[], + ), + times=3) + +dataset_aic = dict( + type='AicDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='aic/annotations/aic_train.json', + data_prefix=dict(img='pose/ai_challenge/ai_challenger_keypoint' + '_train_20170902/keypoint_train_images_20170902/'), + pipeline=[ + dict( + type='KeypointConverter', + num_keypoints=17, + mapping=[ + (0, 6), + (1, 8), + (2, 10), + (3, 5), + (4, 7), + (5, 9), + (6, 12), + (7, 14), + (8, 16), + (9, 11), + (10, 13), + (11, 15), + ]) + ], +) + +# data loaders +train_dataloader = dict( + batch_size=256, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type='CombinedDataset', + metainfo=dict(from_file='configs/_base_/datasets/coco.py'), + datasets=[dataset_coco, dataset_aic], + pipeline=train_pipeline, + test_mode=False, + )) +val_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_val2017.json', + # bbox_file='data/coco/person_detection_results/' + # 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='detection/coco/val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'coco/annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..2459b3417e6e104a5d8fc0a2ca5eb01724b5864e --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_coco-256x192.py @@ -0,0 +1,232 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 420 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 210 to 420 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=1024) + +# codec settings +codec = dict( + type='SimCCLabel', + input_size=(192, 256), + sigma=(4.9, 5.66), + simcc_split_ratio=2.0, + normalize=False, + use_dark=False) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=1., + widen_factor=1., + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' + 'rtmposev1/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth' # noqa + )), + head=dict( + type='RTMCCHead', + in_channels=1024, + out_channels=17, + input_size=codec['input_size'], + in_featuremap_size=(6, 8), + simcc_split_ratio=codec['simcc_split_ratio'], + final_layer_kernel_size=7, + gau_cfg=dict( + hidden_dims=256, + s=128, + expansion_factor=2, + dropout_rate=0., + drop_path=0., + act_fn='SiLU', + use_rel_bias=False, + pos_enc=False), + loss=dict( + type='KLDiscretLoss', + use_target_weight=True, + beta=10., + label_softmax=True), + decoder=codec), + test_cfg=dict(flip_test=True)) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/', +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=256, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + # bbox_file=f'{data_root}person_detection_results/' + # 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-256x192.py b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..151197d1f11f58ae63394499fbdc02d1bb7791a0 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-256x192.py @@ -0,0 +1,272 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 420 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 210 to 420 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=1024) + +# codec settings +codec = dict( + type='SimCCLabel', + input_size=(192, 256), + sigma=(4.9, 5.66), + simcc_split_ratio=2.0, + normalize=False, + use_dark=False) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=0.67, + widen_factor=0.75, + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' + 'rtmposev1/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa + )), + head=dict( + type='RTMCCHead', + in_channels=768, + out_channels=17, + input_size=codec['input_size'], + in_featuremap_size=(6, 8), + simcc_split_ratio=codec['simcc_split_ratio'], + final_layer_kernel_size=7, + gau_cfg=dict( + hidden_dims=256, + s=128, + expansion_factor=2, + dropout_rate=0., + drop_path=0., + act_fn='SiLU', + use_rel_bias=False, + pos_enc=False), + loss=dict( + type='KLDiscretLoss', + use_target_weight=True, + beta=10., + label_softmax=True), + decoder=codec), + test_cfg=dict(flip_test=True, )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/', +# f'{data_root}': 's3://openmmlab/datasets/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.0), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# train datasets +dataset_coco = dict( + type='RepeatDataset', + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_train2017.json', + data_prefix=dict(img='detection/coco/train2017/'), + pipeline=[], + ), + times=3) + +dataset_aic = dict( + type='AicDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='aic/annotations/aic_train.json', + data_prefix=dict(img='pose/ai_challenge/ai_challenger_keypoint' + '_train_20170902/keypoint_train_images_20170902/'), + pipeline=[ + dict( + type='KeypointConverter', + num_keypoints=17, + mapping=[ + (0, 6), + (1, 8), + (2, 10), + (3, 5), + (4, 7), + (5, 9), + (6, 12), + (7, 14), + (8, 16), + (9, 11), + (10, 13), + (11, 15), + ]) + ], +) + +# data loaders +train_dataloader = dict( + batch_size=128 * 2, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type='CombinedDataset', + metainfo=dict(from_file='configs/_base_/datasets/coco.py'), + datasets=[dataset_coco, dataset_aic], + pipeline=train_pipeline, + test_mode=False, + )) +val_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_val2017.json', + # bbox_file='data/coco/person_detection_results/' + # 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='detection/coco/val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'coco/annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-384x288.py b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..6507ba247b737877293640ba89e23bc238625c24 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-384x288.py @@ -0,0 +1,272 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 420 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 210 to 420 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=1024) + +# codec settings +codec = dict( + type='SimCCLabel', + input_size=(288, 384), + sigma=(6., 6.93), + simcc_split_ratio=2.0, + normalize=False, + use_dark=False) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=0.67, + widen_factor=0.75, + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' + 'rtmposev1/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa + )), + head=dict( + type='RTMCCHead', + in_channels=768, + out_channels=17, + input_size=codec['input_size'], + in_featuremap_size=(9, 12), + simcc_split_ratio=codec['simcc_split_ratio'], + final_layer_kernel_size=7, + gau_cfg=dict( + hidden_dims=256, + s=128, + expansion_factor=2, + dropout_rate=0., + drop_path=0., + act_fn='SiLU', + use_rel_bias=False, + pos_enc=False), + loss=dict( + type='KLDiscretLoss', + use_target_weight=True, + beta=10., + label_softmax=True), + decoder=codec), + test_cfg=dict(flip_test=True, )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/', +# f'{data_root}': 's3://openmmlab/datasets/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.0), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# train datasets +dataset_coco = dict( + type='RepeatDataset', + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_train2017.json', + data_prefix=dict(img='detection/coco/train2017/'), + pipeline=[], + ), + times=3) + +dataset_aic = dict( + type='AicDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='aic/annotations/aic_train.json', + data_prefix=dict(img='pose/ai_challenge/ai_challenger_keypoint' + '_train_20170902/keypoint_train_images_20170902/'), + pipeline=[ + dict( + type='KeypointConverter', + num_keypoints=17, + mapping=[ + (0, 6), + (1, 8), + (2, 10), + (3, 5), + (4, 7), + (5, 9), + (6, 12), + (7, 14), + (8, 16), + (9, 11), + (10, 13), + (11, 15), + ]) + ], +) + +# data loaders +train_dataloader = dict( + batch_size=128 * 2, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type='CombinedDataset', + metainfo=dict(from_file='configs/_base_/datasets/coco.py'), + datasets=[dataset_coco, dataset_aic], + pipeline=train_pipeline, + test_mode=False, + )) +val_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_val2017.json', + # bbox_file='data/coco/person_detection_results/' + # 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='detection/coco/val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'coco/annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..7438820418f715683513317a6e4c7f0c0c785ab1 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_coco-256x192.py @@ -0,0 +1,232 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 420 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 210 to 420 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=1024) + +# codec settings +codec = dict( + type='SimCCLabel', + input_size=(192, 256), + sigma=(4.9, 5.66), + simcc_split_ratio=2.0, + normalize=False, + use_dark=False) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=0.67, + widen_factor=0.75, + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' + 'rtmposev1/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa + )), + head=dict( + type='RTMCCHead', + in_channels=768, + out_channels=17, + input_size=codec['input_size'], + in_featuremap_size=(6, 8), + simcc_split_ratio=codec['simcc_split_ratio'], + final_layer_kernel_size=7, + gau_cfg=dict( + hidden_dims=256, + s=128, + expansion_factor=2, + dropout_rate=0., + drop_path=0., + act_fn='SiLU', + use_rel_bias=False, + pos_enc=False), + loss=dict( + type='KLDiscretLoss', + use_target_weight=True, + beta=10., + label_softmax=True), + decoder=codec), + test_cfg=dict(flip_test=True)) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/', +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=256, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + # bbox_file=f'{data_root}person_detection_results/' + # 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_aic-coco-256x192.py b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_aic-coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..29c82d78ed8a276125aed42f80aea69dcc5f5b24 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_aic-coco-256x192.py @@ -0,0 +1,272 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 420 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.0), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 210 to 420 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=1024) + +# codec settings +codec = dict( + type='SimCCLabel', + input_size=(192, 256), + sigma=(4.9, 5.66), + simcc_split_ratio=2.0, + normalize=False, + use_dark=False) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=0.33, + widen_factor=0.5, + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' + 'rtmposev1/cspnext-s_udp-aic-coco_210e-256x192-92f5a029_20230130.pth' # noqa + )), + head=dict( + type='RTMCCHead', + in_channels=512, + out_channels=17, + input_size=codec['input_size'], + in_featuremap_size=(6, 8), + simcc_split_ratio=codec['simcc_split_ratio'], + final_layer_kernel_size=7, + gau_cfg=dict( + hidden_dims=256, + s=128, + expansion_factor=2, + dropout_rate=0., + drop_path=0., + act_fn='SiLU', + use_rel_bias=False, + pos_enc=False), + loss=dict( + type='KLDiscretLoss', + use_target_weight=True, + beta=10., + label_softmax=True), + decoder=codec), + test_cfg=dict(flip_test=True, )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/', +# f'{data_root}': 's3://openmmlab/datasets/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.0), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# train datasets +dataset_coco = dict( + type='RepeatDataset', + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_train2017.json', + data_prefix=dict(img='detection/coco/train2017/'), + pipeline=[], + ), + times=3) + +dataset_aic = dict( + type='AicDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='aic/annotations/aic_train.json', + data_prefix=dict(img='pose/ai_challenge/ai_challenger_keypoint' + '_train_20170902/keypoint_train_images_20170902/'), + pipeline=[ + dict( + type='KeypointConverter', + num_keypoints=17, + mapping=[ + (0, 6), + (1, 8), + (2, 10), + (3, 5), + (4, 7), + (5, 9), + (6, 12), + (7, 14), + (8, 16), + (9, 11), + (10, 13), + (11, 15), + ]) + ], +) + +# data loaders +train_dataloader = dict( + batch_size=128 * 2, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type='CombinedDataset', + metainfo=dict(from_file='configs/_base_/datasets/coco.py'), + datasets=[dataset_coco, dataset_aic], + pipeline=train_pipeline, + test_mode=False, + )) +val_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_val2017.json', + # bbox_file='data/coco/person_detection_results/' + # 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='detection/coco/val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'coco/annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..1a2f57c95ca8969d98664947951af4a255bbc931 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_coco-256x192.py @@ -0,0 +1,232 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 420 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 210 to 420 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=1024) + +# codec settings +codec = dict( + type='SimCCLabel', + input_size=(192, 256), + sigma=(4.9, 5.66), + simcc_split_ratio=2.0, + normalize=False, + use_dark=False) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=0.33, + widen_factor=0.5, + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' + 'rtmposev1/cspnext-s_udp-aic-coco_210e-256x192-92f5a029_20230130.pth' # noqa + )), + head=dict( + type='RTMCCHead', + in_channels=512, + out_channels=17, + input_size=codec['input_size'], + in_featuremap_size=(6, 8), + simcc_split_ratio=codec['simcc_split_ratio'], + final_layer_kernel_size=7, + gau_cfg=dict( + hidden_dims=256, + s=128, + expansion_factor=2, + dropout_rate=0., + drop_path=0., + act_fn='SiLU', + use_rel_bias=False, + pos_enc=False), + loss=dict( + type='KLDiscretLoss', + use_target_weight=True, + beta=10., + label_softmax=True), + decoder=codec), + test_cfg=dict(flip_test=True)) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/', +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=256, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + # bbox_file=f'{data_root}person_detection_results/' + # 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-t_8xb256-420e_aic-coco-256x192.py b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-t_8xb256-420e_aic-coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..dd375b06bfd029e61744e48f3ce521369fe48cc5 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-t_8xb256-420e_aic-coco-256x192.py @@ -0,0 +1,273 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 420 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 210 to 420 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=1024) + +# codec settings +codec = dict( + type='SimCCLabel', + input_size=(192, 256), + sigma=(4.9, 5.66), + simcc_split_ratio=2.0, + normalize=False, + use_dark=False) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=0.167, + widen_factor=0.375, + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' + 'rtmposev1/cspnext-tiny_udp-aic-coco_210e-256x192-cbed682d_20230130.pth' # noqa + )), + head=dict( + type='RTMCCHead', + in_channels=384, + out_channels=17, + input_size=codec['input_size'], + in_featuremap_size=(6, 8), + simcc_split_ratio=codec['simcc_split_ratio'], + final_layer_kernel_size=7, + gau_cfg=dict( + hidden_dims=256, + s=128, + expansion_factor=2, + dropout_rate=0., + drop_path=0., + act_fn='SiLU', + use_rel_bias=False, + pos_enc=False), + loss=dict( + type='KLDiscretLoss', + use_target_weight=True, + beta=10., + label_softmax=True), + decoder=codec), + test_cfg=dict(flip_test=True, )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/', +# f'{data_root}': 's3://openmmlab/datasets/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.0), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# train datasets +dataset_coco = dict( + type='RepeatDataset', + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_train2017.json', + data_prefix=dict(img='detection/coco/train2017/'), + pipeline=[], + ), + times=3) + +dataset_aic = dict( + type='AicDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='aic/annotations/aic_train.json', + data_prefix=dict(img='pose/ai_challenge/ai_challenger_keypoint' + '_train_20170902/keypoint_train_images_20170902/'), + pipeline=[ + dict( + type='KeypointConverter', + num_keypoints=17, + mapping=[ + (0, 6), + (1, 8), + (2, 10), + (3, 5), + (4, 7), + (5, 9), + (6, 12), + (7, 14), + (8, 16), + (9, 11), + (10, 13), + (11, 15), + ]) + ], +) + +# data loaders +train_dataloader = dict( + batch_size=256, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type='CombinedDataset', + metainfo=dict(from_file='configs/_base_/datasets/coco.py'), + datasets=[dataset_coco, dataset_aic], + pipeline=train_pipeline, + test_mode=False, + )) +val_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_val2017.json', + # bbox_file='data/coco/person_detection_results/' + # 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='detection/coco/val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + # Turn off EMA while training the tiny model + # dict( + # type='EMAHook', + # ema_type='ExpMomentumEMA', + # momentum=0.0002, + # update_buffers=True, + # priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'coco/annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-t_8xb256-420e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-t_8xb256-420e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..4180d6c7b15295bde1d85d97dee5134c5ef1c807 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-t_8xb256-420e_coco-256x192.py @@ -0,0 +1,233 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 420 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 210 to 420 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=1024) + +# codec settings +codec = dict( + type='SimCCLabel', + input_size=(192, 256), + sigma=(4.9, 5.66), + simcc_split_ratio=2.0, + normalize=False, + use_dark=False) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=0.167, + widen_factor=0.375, + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' + 'rtmposev1/cspnext-tiny_udp-aic-coco_210e-256x192-cbed682d_20230130.pth' # noqa + )), + head=dict( + type='RTMCCHead', + in_channels=384, + out_channels=17, + input_size=codec['input_size'], + in_featuremap_size=(6, 8), + simcc_split_ratio=codec['simcc_split_ratio'], + final_layer_kernel_size=7, + gau_cfg=dict( + hidden_dims=256, + s=128, + expansion_factor=2, + dropout_rate=0., + drop_path=0., + act_fn='SiLU', + use_rel_bias=False, + pos_enc=False), + loss=dict( + type='KLDiscretLoss', + use_target_weight=True, + beta=10., + label_softmax=True), + decoder=codec), + test_cfg=dict(flip_test=True)) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/', +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=256, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + # bbox_file=f'{data_root}person_detection_results/' + # 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + # Turn off EMA while training the tiny model + # dict( + # type='EMAHook', + # ema_type='ExpMomentumEMA', + # momentum=0.0002, + # update_buffers=True, + # priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose_coco.md b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..d3cc9298df5e723f77cf26a4184c2efc7ca4469b --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose_coco.md @@ -0,0 +1,71 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-030-58580-8_27">RTMPose (arXiv'2023)</a></summary> + +```bibtex +@misc{https://doi.org/10.48550/arxiv.2303.07399, + doi = {10.48550/ARXIV.2303.07399}, + url = {https://arxiv.org/abs/2303.07399}, + author = {Jiang, Tao and Lu, Peng and Zhang, Li and Ma, Ningsheng and Han, Rui and Lyu, Chengqi and Li, Yining and Chen, Kai}, + keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences}, + title = {RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose}, + publisher = {arXiv}, + year = {2023}, + copyright = {Creative Commons Attribution 4.0 International} +} + +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2212.07784">RTMDet (arXiv'2022)</a></summary> + +```bibtex +@misc{lyu2022rtmdet, + title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors}, + author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen}, + year={2022}, + eprint={2212.07784}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [rtmpose-t](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-t_8xb256-420e_coco-256x192.py) | 256x192 | 0.682 | 0.883 | 0.759 | 0.736 | 0.920 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-tiny_simcc-coco_pt-aic-coco_420e-256x192-e613ba3f_20230127.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-tiny_simcc-coco_pt-aic-coco_420e-256x192-e613ba3f_20230127.json) | +| [rtmpose-s](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_coco-256x192.py) | 256x192 | 0.716 | 0.892 | 0.789 | 0.768 | 0.929 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_simcc-coco_pt-aic-coco_420e-256x192-8edcf0d7_20230127.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_simcc-coco_pt-aic-coco_420e-256x192-8edcf0d7_20230127.json) | +| [rtmpose-m](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_coco-256x192.py) | 256x192 | 0.746 | 0.899 | 0.817 | 0.795 | 0.935 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco_pt-aic-coco_420e-256x192-d8dd5ca4_20230127.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco_pt-aic-coco_420e-256x192-d8dd5ca4_20230127.json) | +| [rtmpose-l](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_coco-256x192.py) | 256x192 | 0.758 | 0.906 | 0.826 | 0.806 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco_pt-aic-coco_420e-256x192-1352a4d2_20230127.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco_pt-aic-coco_420e-256x192-1352a4d2_20230127.json) | +| [rtmpose-t-aic-coco](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-t_8xb256-420e_aic-coco-256x192.py) | 256x192 | 0.685 | 0.880 | 0.761 | 0.738 | 0.918 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-tiny_simcc-aic-coco_pt-aic-coco_420e-256x192-cfc8f33d_20230126.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-tiny_simcc-aic-coco_pt-aic-coco_420e-256x192-cfc8f33d_20230126.json) | +| [rtmpose-s-aic-coco](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_aic-coco-256x192.py) | 256x192 | 0.722 | 0.892 | 0.794 | 0.772 | 0.929 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_simcc-aic-coco_pt-aic-coco_420e-256x192-fcb2599b_20230126.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_simcc-aic-coco_pt-aic-coco_420e-256x192-fcb2599b_20230126.json) | +| [rtmpose-m-aic-coco](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-256x192.py) | 256x192 | 0.758 | 0.903 | 0.826 | 0.806 | 0.940 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-256x192-63eb25f7_20230126.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-256x192-63eb25f7_20230126.json) | +| [rtmpose-l-aic-coco](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-256x192.py) | 256x192 | 0.765 | 0.906 | 0.835 | 0.813 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-aic-coco_pt-aic-coco_420e-256x192-f016ffe0_20230126.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-aic-coco_pt-aic-coco_420e-256x192-f016ffe0_20230126.json) | +| [rtmpose-m-aic-coco](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-384x288.py) | 384x288 | 0.770 | 0.908 | 0.833 | 0.816 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-384x288-a62a0b32_20230228.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-384x288-a62a0b32_20230228.json) | +| [rtmpose-l-aic-coco](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-384x288.py) | 384x288 | 0.773 | 0.907 | 0.835 | 0.819 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-aic-coco_pt-aic-coco_420e-384x288-97d6cb0f_20230228.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-aic-coco_pt-aic-coco_420e-384x288-97d6cb0f_20230228.json) | diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose_coco.yml b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..bebe64b3b7a1ce4038e58366d0c5e9998cec7b20 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose_coco.yml @@ -0,0 +1,171 @@ +Collections: +- Name: RTMPose + Paper: + Title: "RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose" + URL: https://arxiv.org/abs/2303.07399 + README: https://github.com/open-mmlab/mmpose/blob/main/projects/rtmpose/README.md +Models: +- Config: configs/body_2d_keypoint/rtmpose/coco/rtmpose-t_8xb256-420e_coco-256x192.py + In Collection: RTMPose + Metadata: + Architecture: &id001 + - RTMPose + Training Data: COCO + Name: rtmpose-t_8xb256-420e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.682 + AP@0.5: 0.883 + AP@0.75: 0.759 + AR: 0.736 + AR@0.5: 0.92 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-tiny_simcc-coco_pt-aic-coco_420e-256x192-e613ba3f_20230127.pth +- Config: configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_coco-256x192.py + In Collection: RTMPose + Metadata: + Architecture: *id001 + Training Data: COCO + Name: rtmpose-s_8xb256-420e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.716 + AP@0.5: 0.892 + AP@0.75: 0.789 + AR: 0.768 + AR@0.5: 0.929 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_simcc-coco_pt-aic-coco_420e-256x192-8edcf0d7_20230127.pth +- Config: configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_coco-256x192.py + In Collection: RTMPose + Metadata: + Architecture: *id001 + Training Data: COCO + Name: rtmpose-m_8xb256-420e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.746 + AP@0.5: 0.899 + AP@0.75: 0.817 + AR: 0.795 + AR@0.5: 0.935 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco_pt-aic-coco_420e-256x192-d8dd5ca4_20230127.pth +- Config: configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_coco-256x192.py + In Collection: RTMPose + Metadata: + Architecture: *id001 + Training Data: COCO + Name: rtmpose-l_8xb256-420e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.758 + AP@0.5: 0.906 + AP@0.75: 0.826 + AR: 0.806 + AR@0.5: 0.942 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco_pt-aic-coco_420e-256x192-1352a4d2_20230127.pth +- Config: configs/body_2d_keypoint/rtmpose/coco/rtmpose-t_8xb256-420e_aic-coco-256x192.py + In Collection: RTMPose + Metadata: + Architecture: *id001 + Training Data: &id002 + - COCO + - AI Challenger + Name: rtmpose-t_8xb256-420e_aic-coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.685 + AP@0.5: 0.88 + AP@0.75: 0.761 + AR: 0.738 + AR@0.5: 0.918 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-tiny_simcc-aic-coco_pt-aic-coco_420e-256x192-cfc8f33d_20230126.pth +- Config: configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_aic-coco-256x192.py + In Collection: RTMPose + Metadata: + Architecture: *id001 + Training Data: *id002 + Name: rtmpose-s_8xb256-420e_aic-coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.722 + AP@0.5: 0.892 + AP@0.75: 0.794 + AR: 0.772 + AR@0.5: 0.929 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_simcc-aic-coco_pt-aic-coco_420e-256x192-fcb2599b_20230126.pth +- Config: configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-256x192.py + In Collection: RTMPose + Alias: human + Metadata: + Architecture: *id001 + Training Data: *id002 + Name: rtmpose-m_8xb256-420e_aic-coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.758 + AP@0.5: 0.903 + AP@0.75: 0.826 + AR: 0.806 + AR@0.5: 0.94 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-256x192-63eb25f7_20230126.pth +- Config: configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-256x192.py + In Collection: RTMPose + Metadata: + Architecture: *id001 + Training Data: *id002 + Name: rtmpose-l_8xb256-420e_aic-coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.765 + AP@0.5: 0.906 + AP@0.75: 0.835 + AR: 0.813 + AR@0.5: 0.942 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-aic-coco_pt-aic-coco_420e-256x192-f016ffe0_20230126.pth +- Config: configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-384x288.py + In Collection: RTMPose + Metadata: + Architecture: *id001 + Training Data: *id002 + Name: rtmpose-m_8xb256-420e_aic-coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.770 + AP@0.5: 0.908 + AP@0.75: 0.833 + AR: 0.816 + AR@0.5: 0.943 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-384x288-a62a0b32_20230228.pth +- Config: configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-384x288.py + In Collection: RTMPose + Metadata: + Architecture: *id001 + Training Data: *id002 + Name: rtmpose-l_8xb256-420e_aic-coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.773 + AP@0.5: 0.907 + AP@0.75: 0.835 + AR: 0.819 + AR@0.5: 0.942 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-aic-coco_pt-aic-coco_420e-384x288-97d6cb0f_20230228.pth diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/crowdpose/rtmpose-m_8xb64-210e_crowdpose-256x192.py b/mmpose/configs/body_2d_keypoint/rtmpose/crowdpose/rtmpose-m_8xb64-210e_crowdpose-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..3660fb43b787b572856fa61c34417008c0c19451 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/rtmpose/crowdpose/rtmpose-m_8xb64-210e_crowdpose-256x192.py @@ -0,0 +1,235 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 210 +stage2_num_epochs = 30 +base_lr = 5e-4 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 150 to 300 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict( + type='SimCCLabel', + input_size=(192, 256), + sigma=(4.9, 5.66), + simcc_split_ratio=2.0, + normalize=False, + use_dark=False) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=0.67, + widen_factor=0.75, + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' + 'rtmposev1/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa + )), + head=dict( + type='RTMCCHead', + in_channels=768, + out_channels=14, + input_size=codec['input_size'], + in_featuremap_size=(6, 8), + simcc_split_ratio=codec['simcc_split_ratio'], + final_layer_kernel_size=7, + gau_cfg=dict( + hidden_dims=256, + s=128, + expansion_factor=2, + dropout_rate=0., + drop_path=0., + act_fn='SiLU', + use_rel_bias=False, + pos_enc=False), + loss=dict( + type='KLDiscretLoss', + use_target_weight=True, + beta=10., + label_softmax=True), + decoder=codec), + test_cfg=dict(flip_test=True, )) + +# base dataset settings +dataset_type = 'CrowdPoseDataset' +data_mode = 'topdown' +data_root = 'data/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/', +# f'{data_root}': 's3://openmmlab/datasets/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.0), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='crowdpose/annotations/mmpose_crowdpose_trainval.json', + data_prefix=dict(img='pose/CrowdPose/images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='crowdpose/annotations/mmpose_crowdpose_test.json', + bbox_file='data/crowdpose/annotations/det_for_crowd_test_0.1_0.5.json', + data_prefix=dict(img='pose/CrowdPose/images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict( + checkpoint=dict( + save_best='crowdpose/AP', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'crowdpose/annotations/mmpose_crowdpose_test.json', + use_area=False, + iou_type='keypoints_crowd', + prefix='crowdpose') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/crowdpose/rtmpose_crowdpose.md b/mmpose/configs/body_2d_keypoint/rtmpose/crowdpose/rtmpose_crowdpose.md new file mode 100644 index 0000000000000000000000000000000000000000..42bcf0f65f3b76b453d82e8e24bc040cd14bcb0b --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/rtmpose/crowdpose/rtmpose_crowdpose.md @@ -0,0 +1,60 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-030-58580-8_27">RTMPose (arXiv'2023)</a></summary> + +```bibtex +@misc{https://doi.org/10.48550/arxiv.2303.07399, + doi = {10.48550/ARXIV.2303.07399}, + url = {https://arxiv.org/abs/2303.07399}, + author = {Jiang, Tao and Lu, Peng and Zhang, Li and Ma, Ningsheng and Han, Rui and Lyu, Chengqi and Li, Yining and Chen, Kai}, + keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences}, + title = {RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose}, + publisher = {arXiv}, + year = {2023}, + copyright = {Creative Commons Attribution 4.0 International} +} + +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2212.07784">RTMDet (arXiv'2022)</a></summary> + +```bibtex +@misc{lyu2022rtmdet, + title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors}, + author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen}, + year={2022}, + eprint={2212.07784}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2019/html/Li_CrowdPose_Efficient_Crowded_Scenes_Pose_Estimation_and_a_New_Benchmark_CVPR_2019_paper.html">CrowdPose (CVPR'2019)</a></summary> + +```bibtex +@article{li2018crowdpose, + title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark}, + author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu}, + journal={arXiv preprint arXiv:1812.00324}, + year={2018} +} +``` + +</details> + +Results on CrowdPose test with [YOLOv3](https://github.com/eriklindernoren/PyTorch-YOLOv3) human detector + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AP (E) | AP (M) | AP (H) | ckpt | log | +| :--------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :----: | :----: | :----: | :--------------------------------------------: | :-------------------------------------------: | +| [rtmpose-m](/configs/body_2d_keypoint/rtmpose/crowdpose/rtmpose-m_8xb64-210e_crowdpose-256x192.py) | 256x192 | 0.706 | 0.841 | 0.765 | 0.799 | 0.719 | 0.582 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-crowdpose_pt-aic-coco_210e-256x192-e6192cac_20230224.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-crowdpose_pt-aic-coco_210e-256x192-e6192cac_20230224.json) | diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/crowdpose/rtmpose_crowdpose.yml b/mmpose/configs/body_2d_keypoint/rtmpose/crowdpose/rtmpose_crowdpose.yml new file mode 100644 index 0000000000000000000000000000000000000000..5fb842f56355267e7b22509c76717c97223b2721 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/rtmpose/crowdpose/rtmpose_crowdpose.yml @@ -0,0 +1,19 @@ +Models: +- Config: configs/body_2d_keypoint/rtmpose/crowdpose/rtmpose-m_8xb64-210e_crowdpose-256x192.py + In Collection: RTMPose + Metadata: + Architecture: + - RTMPose + Training Data: CrowdPose + Name: rtmpose-t_8xb256-420e_coco-256x192 + Results: + - Dataset: CrowdPose + Metrics: + AP: 0.706 + AP@0.5: 0.841 + AP@0.75: 0.765 + AP (E): 0.799 + AP (M): 0.719 + AP (L): 0.582 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-crowdpose_pt-aic-coco_210e-256x192-e6192cac_20230224.pth diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/mpii/rtmpose-m_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/rtmpose/mpii/rtmpose-m_8xb64-210e_mpii-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..d3ee02f215a3ba595f0af1882cea386ef6daa029 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/rtmpose/mpii/rtmpose-m_8xb64-210e_mpii-256x256.py @@ -0,0 +1,228 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 210 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 210 to 420 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=1024) + +# codec settings +codec = dict( + type='SimCCLabel', + input_size=(256, 256), + sigma=(5.66, 5.66), + simcc_split_ratio=2.0, + normalize=False, + use_dark=False) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=0.67, + widen_factor=0.75, + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' + 'rtmposev1/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa + )), + head=dict( + type='RTMCCHead', + in_channels=768, + out_channels=16, + input_size=codec['input_size'], + in_featuremap_size=(8, 8), + simcc_split_ratio=codec['simcc_split_ratio'], + final_layer_kernel_size=7, + gau_cfg=dict( + hidden_dims=256, + s=128, + expansion_factor=2, + dropout_rate=0., + drop_path=0., + act_fn='SiLU', + use_rel_bias=False, + pos_enc=False), + loss=dict( + type='KLDiscretLoss', + use_target_weight=True, + beta=10., + label_softmax=True), + decoder=codec), + test_cfg=dict(flip_test=True)) + +# base dataset settings +dataset_type = 'MpiiDataset' +data_mode = 'topdown' +data_root = 'data/mpii/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/pose/MPI/', +# f'{data_root}': 's3://openmmlab/datasets/pose/MPI/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_val.json', + headbox_file=f'{data_root}/annotations/mpii_gt_val.mat', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='PCK', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict(type='MpiiPCKAccuracy') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/mpii/rtmpose_mpii.md b/mmpose/configs/body_2d_keypoint/rtmpose/mpii/rtmpose_mpii.md new file mode 100644 index 0000000000000000000000000000000000000000..990edb45eb2b882e6ddfe14253562dce5a5adba9 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/rtmpose/mpii/rtmpose_mpii.md @@ -0,0 +1,43 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-030-58580-8_27">RTMPose (arXiv'2023)</a></summary> + +```bibtex +@misc{https://doi.org/10.48550/arxiv.2303.07399, + doi = {10.48550/ARXIV.2303.07399}, + url = {https://arxiv.org/abs/2303.07399}, + author = {Jiang, Tao and Lu, Peng and Zhang, Li and Ma, Ningsheng and Han, Rui and Lyu, Chengqi and Li, Yining and Chen, Kai}, + keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences}, + title = {RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose}, + publisher = {arXiv}, + year = {2023}, + copyright = {Creative Commons Attribution 4.0 International} +} + +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2014/html/Andriluka_2D_Human_Pose_2014_CVPR_paper.html">MPII (CVPR'2014)</a></summary> + +```bibtex +@inproceedings{andriluka14cvpr, + author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt}, + title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis}, + booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + year = {2014}, + month = {June} +} +``` + +</details> + +Results on MPII val set + +| Arch | Input Size | Mean / w. flip | Mean@0.1 | ckpt | log | +| :------------------------------------------------------- | :--------: | :------------: | :------: | :------------------------------------------------------: | :------------------------------------------------------: | +| [rtmpose-m](/configs/body_2d_keypoint/rtmpose/mpii/rtmpose-m_8xb64-210e_mpii-256x256.py) | 256x256 | 0.907 | 0.348 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-mpii_pt-aic-coco_210e-256x256-ec4dbec8_20230206.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-mpii_pt-aic-coco_210e-256x256-ec4dbec8_20230206.json) | diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/mpii/rtmpose_mpii.yml b/mmpose/configs/body_2d_keypoint/rtmpose/mpii/rtmpose_mpii.yml new file mode 100644 index 0000000000000000000000000000000000000000..2e1eb28659f49681496cacdbd5bb4f2062e5358b --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/rtmpose/mpii/rtmpose_mpii.yml @@ -0,0 +1,15 @@ +Models: +- Config: configs/body_2d_keypoint/rtmpose/mpii/rtmpose-m_8xb64-210e_mpii-256x256.py + In Collection: RTMPose + Metadata: + Architecture: + - RTMPose + Training Data: MPII + Name: rtmpose-m_8xb64-210e_mpii-256x256 + Results: + - Dataset: MPII + Metrics: + Mean: 0.907 + Mean@0.1: 0.348 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-mpii_pt-aic-coco_210e-256x256-ec4dbec8_20230206.pth diff --git a/mmpose/configs/body_2d_keypoint/simcc/README.md b/mmpose/configs/body_2d_keypoint/simcc/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6148c18bf5061743de2eacf531554f567fa50516 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/simcc/README.md @@ -0,0 +1,20 @@ +# Top-down SimCC-based pose estimation + +Top-down methods divide the task into two stages: object detection, followed by single-object pose estimation given object bounding boxes. At the 2nd stage, SimCC based methods reformulate human pose estimation as two classification tasks for horizontal and vertical coordinates, and uniformly divide each pixel into several bins, thus obtain the keypoint coordinates given the features extracted from the bounding box area, following the paradigm introduced in [SimCC: a Simple Coordinate Classification Perspective for Human Pose Estimation](https://arxiv.org/abs/2107.03332). + +<div align=center> +<img src="https://user-images.githubusercontent.com/13503330/189811385-6395d118-055b-4bad-89e8-f84ffa2c2aa6.png"> +</div> + +## Results and Models + +### COCO Dataset + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Model | Input Size | AP | AR | Details and Download | +| :---------------------------: | :--------: | :---: | :---: | :-----------------------------------------------: | +| ResNet-50+SimCC | 384x288 | 0.735 | 0.790 | [resnet_coco.md](./coco/resnet_coco.md) | +| ResNet-50+SimCC | 256x192 | 0.721 | 0.781 | [resnet_coco.md](./coco/resnet_coco.md) | +| S-ViPNAS-MobileNet-V3+SimCC | 256x192 | 0.695 | 0.755 | [vipnas_coco.md](./coco/vipnas_coco.md) | +| MobileNet-V2+SimCC(wo/deconv) | 256x192 | 0.620 | 0.678 | [mobilenetv2_coco.md](./coco/mobilenetv2_coco.md) | diff --git a/mmpose/configs/body_2d_keypoint/simcc/coco/mobilenetv2_coco.md b/mmpose/configs/body_2d_keypoint/simcc/coco/mobilenetv2_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..42438774bade657bd5c927d08d99353acbcf7f82 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/simcc/coco/mobilenetv2_coco.md @@ -0,0 +1,55 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2107.03332">SimCC (ECCV'2022)</a></summary> + +```bibtex +@misc{https://doi.org/10.48550/arxiv.2107.03332, + title={SimCC: a Simple Coordinate Classification Perspective for Human Pose Estimation}, + author={Li, Yanjie and Yang, Sen and Liu, Peidong and Zhang, Shoukui and Wang, Yunxiao and Wang, Zhicheng and Yang, Wankou and Xia, Shu-Tao}, + year={2021} +} +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2018/html/Sandler_MobileNetV2_Inverted_Residuals_CVPR_2018_paper.html">MobilenetV2 (CVPR'2018)</a></summary> + +```bibtex +@inproceedings{sandler2018mobilenetv2, + title={Mobilenetv2: Inverted residuals and linear bottlenecks}, + author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={4510--4520}, + year={2018} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [simcc_mobilenetv2_wo_deconv](/configs/body_2d_keypoint/simcc/coco/simcc_mobilenetv2_wo-deconv-8xb64-210e_coco-256x192.py) | 256x192 | 0.620 | 0.855 | 0.697 | 0.678 | 0.902 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_mobilenetv2_wo-deconv-8xb64-210e_coco-256x192-4b0703bb_20221010.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_mobilenetv2_wo-deconv-8xb64-210e_coco-256x192-4b0703bb_20221010.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/simcc/coco/mobilenetv2_coco.yml b/mmpose/configs/body_2d_keypoint/simcc/coco/mobilenetv2_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..00ef5aaecd3bfde35036e309d7a438fd2d9ea219 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/simcc/coco/mobilenetv2_coco.yml @@ -0,0 +1,19 @@ +Models: +- Config: configs/body_2d_keypoint/simcc/coco/simcc_mobilenetv2_wo-deconv-8xb64-210e_coco-256x192.py + In Collection: SimCC + Metadata: + Architecture: &id001 + - SimCC + - MobilenetV2 + Training Data: COCO + Name: simcc_mobilenetv2_wo-deconv-8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.62 + AP@0.5: 0.855 + AP@0.75: 0.697 + AR: 0.678 + AR@0.5: 0.902 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_mobilenetv2_wo-deconv-8xb64-210e_coco-256x192-4b0703bb_20221010.pth diff --git a/mmpose/configs/body_2d_keypoint/simcc/coco/resnet_coco.md b/mmpose/configs/body_2d_keypoint/simcc/coco/resnet_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..80592b4db38a306c297b952264dc0e0b51d64fde --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/simcc/coco/resnet_coco.md @@ -0,0 +1,56 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2107.03332">SimCC (ECCV'2022)</a></summary> + +```bibtex +@misc{https://doi.org/10.48550/arxiv.2107.03332, + title={SimCC: a Simple Coordinate Classification Perspective for Human Pose Estimation}, + author={Li, Yanjie and Yang, Sen and Liu, Peidong and Zhang, Shoukui and Wang, Yunxiao and Wang, Zhicheng and Yang, Wankou and Xia, Shu-Tao}, + year={2021} +} +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html">ResNet (CVPR'2016)</a></summary> + +```bibtex +@inproceedings{he2016deep, + title={Deep residual learning for image recognition}, + author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={770--778}, + year={2016} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [simcc_resnet_50](/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-210e_coco-256x192.py) | 256x192 | 0.721 | 0.897 | 0.798 | 0.781 | 0.937 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-210e_coco-256x192-8e0f5b59_20220919.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-210e_coco-256x192-8e0f5b59_20220919.log.json) | +| [simcc_resnet_50](/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb32-140e_coco-384x288.py) | 384x288 | 0.735 | 0.899 | 0.800 | 0.790 | 0.939 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_res50_8xb32-140e_coco-384x288-45c3ba34_20220913.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_res50_8xb32-140e_coco-384x288-45c3ba34_20220913.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/simcc/coco/resnet_coco.yml b/mmpose/configs/body_2d_keypoint/simcc/coco/resnet_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..1e56c9e477f36576909633a4107f24db5872bbfe --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/simcc/coco/resnet_coco.yml @@ -0,0 +1,41 @@ +Collections: +- Name: SimCC + Paper: + Title: A Simple Coordinate Classification Perspective for Human Pose Estimation + URL: https://arxiv.org/abs/2107.03332 + README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/algorithms/simcc.md +Models: +- Config: configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-210e_coco-256x192.py + In Collection: SimCC + Metadata: + Architecture: &id001 + - SimCC + - ResNet + Training Data: COCO + Name: simcc_res50_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.721 + AP@0.5: 0.900 + AP@0.75: 0.798 + AR: 0.781 + AR@0.5: 0.937 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-210e_coco-256x192-8e0f5b59_20220919.pth +- Config: configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb32-140e_coco-384x288.py + In Collection: SimCC + Metadata: + Architecture: *id001 + Training Data: COCO + Name: simcc_res50_8xb32-140e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.735 + AP@0.5: 0.899 + AP@0.75: 0.800 + AR: 0.790 + AR@0.5: 0.939 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_res50_8xb32-140e_coco-384x288-45c3ba34_20220913.pth diff --git a/mmpose/configs/body_2d_keypoint/simcc/coco/simcc_mobilenetv2_wo-deconv-8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/simcc/coco/simcc_mobilenetv2_wo-deconv-8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..65101ada88bbc10603d6931848e92be532d1768c --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/simcc/coco/simcc_mobilenetv2_wo-deconv-8xb64-210e_coco-256x192.py @@ -0,0 +1,124 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=train_cfg['max_epochs'], + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict( + type='SimCCLabel', input_size=(192, 256), sigma=6.0, simcc_split_ratio=2.0) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='MobileNetV2', + widen_factor=1., + out_indices=(7, ), + init_cfg=dict( + type='Pretrained', + checkpoint='mmcls://mobilenet_v2', + )), + head=dict( + type='SimCCHead', + in_channels=1280, + out_channels=17, + input_size=codec['input_size'], + in_featuremap_size=(6, 8), + simcc_split_ratio=codec['simcc_split_ratio'], + deconv_out_channels=None, + loss=dict(type='KLDiscretLoss', use_target_weight=True), + decoder=codec), + test_cfg=dict(flip_test=True, )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file=f'{data_root}person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb32-140e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb32-140e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..8ed9586bfb1f733c964b2ad093d0b862d6f3a07a --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb32-140e_coco-384x288.py @@ -0,0 +1,120 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=140, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=1e-3, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=train_cfg['max_epochs'], + milestones=[90, 120], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict( + type='SimCCLabel', input_size=(288, 384), sigma=6.0, simcc_split_ratio=2.0) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + head=dict( + type='SimCCHead', + in_channels=2048, + out_channels=17, + input_size=codec['input_size'], + in_featuremap_size=(9, 12), + simcc_split_ratio=codec['simcc_split_ratio'], + loss=dict(type='KLDiscretLoss', use_target_weight=True), + decoder=codec), + test_cfg=dict(flip_test=True)) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +test_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file=f'{data_root}person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=test_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..1e1fe440d183fc9c535d45bba7cbb6856429c760 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-210e_coco-256x192.py @@ -0,0 +1,114 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=1e-3, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict(type='MultiStepLR', milestones=[170, 200], gamma=0.1, by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict( + type='SimCCLabel', input_size=(192, 256), sigma=6.0, simcc_split_ratio=2.0) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + head=dict( + type='SimCCHead', + in_channels=2048, + out_channels=17, + input_size=codec['input_size'], + in_featuremap_size=(6, 8), + simcc_split_ratio=codec['simcc_split_ratio'], + loss=dict(type='KLDiscretLoss', use_target_weight=True), + decoder=codec), + test_cfg=dict(flip_test=True)) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +test_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file=f'{data_root}person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=test_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/simcc/coco/simcc_vipnas-mbv3_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/simcc/coco/simcc_vipnas-mbv3_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..ea61b0fb4fc49801254ded96d93f37e155796f5d --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/simcc/coco/simcc_vipnas-mbv3_8xb64-210e_coco-256x192.py @@ -0,0 +1,119 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=train_cfg['max_epochs'], + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict( + type='SimCCLabel', input_size=(192, 256), sigma=6.0, simcc_split_ratio=2.0) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict(type='ViPNAS_MobileNetV3'), + head=dict( + type='SimCCHead', + in_channels=160, + out_channels=17, + input_size=codec['input_size'], + in_featuremap_size=(6, 8), + simcc_split_ratio=codec['simcc_split_ratio'], + deconv_type='vipnas', + deconv_out_channels=(160, 160, 160), + deconv_num_groups=(160, 160, 160), + loss=dict(type='KLDiscretLoss', use_target_weight=True), + decoder=codec), + test_cfg=dict(flip_test=True, )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file=data_root + 'person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/simcc/coco/vipnas_coco.md b/mmpose/configs/body_2d_keypoint/simcc/coco/vipnas_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..a9d8b98fc3a9dbae4128ac9ce3c09a24de0474eb --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/simcc/coco/vipnas_coco.md @@ -0,0 +1,54 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2107.03332">SimCC (ECCV'2022)</a></summary> + +```bibtex +@misc{https://doi.org/10.48550/arxiv.2107.03332, + title={SimCC: a Simple Coordinate Classification Perspective for Human Pose Estimation}, + author={Li, Yanjie and Yang, Sen and Liu, Peidong and Zhang, Shoukui and Wang, Yunxiao and Wang, Zhicheng and Yang, Wankou and Xia, Shu-Tao}, + year={2021} +} +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2105.10154">ViPNAS (CVPR'2021)</a></summary> + +```bibtex +@article{xu2021vipnas, + title={ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search}, + author={Xu, Lumin and Guan, Yingda and Jin, Sheng and Liu, Wentao and Qian, Chen and Luo, Ping and Ouyang, Wanli and Wang, Xiaogang}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + year={2021} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [simcc_S-ViPNAS-MobileNetV3](/configs/body_2d_keypoint/simcc/coco/simcc_vipnas-mbv3_8xb64-210e_coco-256x192.py) | 256x192 | 0.695 | 0.883 | 0.772 | 0.755 | 0.927 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_vipnas-mbv3_8xb64-210e_coco-256x192-719f3489_20220922.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_vipnas-mbv3_8xb64-210e_coco-256x192-719f3489_20220922.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/simcc/coco/vipnas_coco.yml b/mmpose/configs/body_2d_keypoint/simcc/coco/vipnas_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..95077c05c658a42e767833d2102b2e0603288f72 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/simcc/coco/vipnas_coco.yml @@ -0,0 +1,19 @@ +Models: +- Config: configs/body_2d_keypoint/simcc/coco/simcc_vipnas-mbv3_8xb64-210e_coco-256x192.py + In Collection: SimCC + Metadata: + Architecture: &id001 + - SimCC + - ViPNAS + Training Data: COCO + Name: simcc_vipnas-mbv3_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.695 + AP@0.5: 0.883 + AP@0.75: 0.772 + AR: 0.755 + AR@0.5: 0.927 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_vipnas-mbv3_8xb64-210e_coco-256x192-719f3489_20220922.pth diff --git a/mmpose/configs/body_2d_keypoint/simcc/mpii/simcc_res50_wo-deconv-8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/simcc/mpii/simcc_res50_wo-deconv-8xb64-210e_mpii-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..965fda71e6a01b00f86669e1b7ba1b8bc57f2831 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/simcc/mpii/simcc_res50_wo-deconv-8xb64-210e_mpii-256x256.py @@ -0,0 +1,120 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=train_cfg['max_epochs'], + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict( + type='SimCCLabel', input_size=(256, 256), sigma=6.0, simcc_split_ratio=2.0) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + head=dict( + type='SimCCHead', + in_channels=2048, + out_channels=16, + input_size=codec['input_size'], + in_featuremap_size=(8, 8), + simcc_split_ratio=codec['simcc_split_ratio'], + deconv_out_channels=None, + loss=dict(type='KLDiscretLoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + shift_coords=True, + )) + +# base dataset settings +dataset_type = 'MpiiDataset' +data_mode = 'topdown' +data_root = 'data/mpii/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform', shift_prob=0), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_val.json', + headbox_file=f'{data_root}/annotations/mpii_gt_val.mat', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater')) + +# evaluators +val_evaluator = dict(type='MpiiPCKAccuracy') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/README.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9e23b874bc6211eb51a7305c7913f8d1b46e3e4e --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/README.md @@ -0,0 +1,117 @@ +# Top-down heatmap-based pose estimation + +Top-down methods divide the task into two stages: object detection, followed by single-object pose estimation given object bounding boxes. Instead of estimating keypoint coordinates directly, the pose estimator will produce heatmaps which represent the likelihood of being a keypoint, following the paradigm introduced in [Simple Baselines for Human Pose Estimation and Tracking](http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html). + +<div align=center> +<img src="https://user-images.githubusercontent.com/15977946/146522977-5f355832-e9c1-442f-a34f-9d24fb0aefa8.png" height=400> +</div> + +## Results and Models + +### COCO Dataset + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Model | Input Size | AP | AR | Details and Download | +| :-------------: | :--------: | :---: | :---: | :-------------------------------------------------: | +| ViTPose-h | 256x192 | 0.790 | 0.840 | [vitpose_coco.md](./coco/vitpose_coco.md) | +| HRNet-w48+UDP | 256x192 | 0.768 | 0.817 | [hrnet_udp_coco.md](./coco/hrnet_udp_coco.md) | +| MSPN 4-stg | 256x192 | 0.765 | 0.826 | [mspn_coco.md](./coco/mspn_coco.md) | +| HRNet-w48+Dark | 256x192 | 0.764 | 0.814 | [hrnet_dark_coco.md](./coco/hrnet_dark_coco.md) | +| HRNet-w48 | 256x192 | 0.756 | 0.809 | [hrnet_coco.md](./coco/hrnet_coco.md) | +| HRFormer-B | 256x192 | 0.754 | 0.807 | [hrformer_coco.md](./coco/hrformer_coco.md) | +| RSN-50-3x | 256x192 | 0.750 | 0.814 | [rsn_coco.md](./coco/rsn_coco.md) | +| CSPNeXt-l | 256x192 | 0.750 | 0.800 | [cspnext_udp_coco.md](./coco/cspnext_udp_coco.md) | +| HRNet-w32 | 256x192 | 0.749 | 0.804 | [hrnet_coco.md](./coco/hrnet_coco.md) | +| Swin-L | 256x192 | 0.743 | 0.798 | [swin_coco.md](./coco/swin_coco.md) | +| ViTPose-s | 256x192 | 0.739 | 0.792 | [vitpose_coco.md](./coco/vitpose_coco.md) | +| HRFormer-S | 256x192 | 0.738 | 0.793 | [hrformer_coco.md](./coco/hrformer_coco.md) | +| Swin-B | 256x192 | 0.737 | 0.794 | [swin_coco.md](./coco/swin_coco.md) | +| SEResNet-101 | 256x192 | 0.734 | 0.790 | [seresnet_coco.md](./coco/seresnet_coco.md) | +| SCNet-101 | 256x192 | 0.733 | 0.789 | [scnet_coco.md](./coco/scnet_coco.md) | +| ResNet-101+Dark | 256x192 | 0.733 | 0.786 | [resnet_dark_coco.md](./coco/resnet_dark_coco.md) | +| CSPNeXt-m | 256x192 | 0.732 | 0.785 | [cspnext_udp_coco.md](./coco/cspnext_udp_coco.md) | +| ResNetV1d-101 | 256x192 | 0.732 | 0.785 | [resnetv1d_coco.md](./coco/resnetv1d_coco.md) | +| SEResNet-50 | 256x192 | 0.729 | 0.784 | [seresnet_coco.md](./coco/seresnet_coco.md) | +| SCNet-50 | 256x192 | 0.728 | 0.784 | [scnet_coco.md](./coco/scnet_coco.md) | +| ResNet-101 | 256x192 | 0.726 | 0.783 | [resnet_coco.md](./coco/resnet_coco.md) | +| ResNeXt-101 | 256x192 | 0.726 | 0.781 | [resnext_coco.md](./coco/resnext_coco.md) | +| HourglassNet | 256x256 | 0.726 | 0.780 | [hourglass_coco.md](./coco/hourglass_coco.md) | +| ResNeSt-101 | 256x192 | 0.725 | 0.781 | [resnest_coco.md](./coco/resnest_coco.md) | +| RSN-50 | 256x192 | 0.724 | 0.790 | [rsn_coco.md](./coco/rsn_coco.md) | +| Swin-T | 256x192 | 0.724 | 0.782 | [swin_coco.md](./coco/swin_coco.md) | +| MSPN 1-stg | 256x192 | 0.723 | 0.788 | [mspn_coco.md](./coco/mspn_coco.md) | +| ResNetV1d-50 | 256x192 | 0.722 | 0.777 | [resnetv1d_coco.md](./coco/resnetv1d_coco.md) | +| ResNeSt-50 | 256x192 | 0.720 | 0.775 | [resnest_coco.md](./coco/resnest_coco.md) | +| ResNet-50 | 256x192 | 0.718 | 0.774 | [resnet_coco.md](./coco/resnet_coco.md) | +| ResNeXt-50 | 256x192 | 0.715 | 0.771 | [resnext_coco.md](./coco/resnext_coco.md) | +| PVT-S | 256x192 | 0.714 | 0.773 | [pvt_coco.md](./coco/pvt_coco.md) | +| CSPNeXt-s | 256x192 | 0.697 | 0.753 | [cspnext_udp_coco.md](./coco/cspnext_udp_coco.md) | +| LiteHRNet-30 | 256x192 | 0.676 | 0.736 | [litehrnet_coco.md](./coco/litehrnet_coco.md) | +| CSPNeXt-tiny | 256x192 | 0.665 | 0.723 | [cspnext_udp_coco.md](./coco/cspnext_udp_coco.md) | +| MobileNet-v2 | 256x192 | 0.648 | 0.709 | [mobilenetv2_coco.md](./coco/mobilenetv2_coco.md) | +| LiteHRNet-18 | 256x192 | 0.642 | 0.705 | [litehrnet_coco.md](./coco/litehrnet_coco.md) | +| CPM | 256x192 | 0.627 | 0.689 | [cpm_coco.md](./coco/cpm_coco.md) | +| ShuffleNet-v2 | 256x192 | 0.602 | 0.668 | [shufflenetv2_coco.md](./coco/shufflenetv2_coco.md) | +| ShuffleNet-v1 | 256x192 | 0.587 | 0.654 | [shufflenetv1_coco.md](./coco/shufflenetv1_coco.md) | +| AlexNet | 256x192 | 0.448 | 0.521 | [alexnet_coco.md](./coco/alexnet_coco.md) | + +### MPII Dataset + +| Model | Input Size | PCKh@0.5 | PCKh@0.1 | Details and Download | +| :------------: | :--------: | :------: | :------: | :-------------------------------------------------: | +| HRNet-w48+Dark | 256x256 | 0.905 | 0.360 | [hrnet_dark_mpii.md](./mpii/hrnet_dark_mpii.md) | +| HRNet-w48 | 256x256 | 0.902 | 0.303 | [hrnet_mpii.md](./mpii/cspnext_udp_mpii.md) | +| HRNet-w48 | 256x256 | 0.901 | 0.337 | [hrnet_mpii.md](./mpii/hrnet_mpii.md) | +| HRNet-w32 | 256x256 | 0.900 | 0.334 | [hrnet_mpii.md](./mpii/hrnet_mpii.md) | +| HourglassNet | 256x256 | 0.889 | 0.317 | [hourglass_mpii.md](./mpii/hourglass_mpii.md) | +| ResNet-152 | 256x256 | 0.889 | 0.303 | [resnet_mpii.md](./mpii/resnet_mpii.md) | +| ResNetV1d-152 | 256x256 | 0.888 | 0.300 | [resnetv1d_mpii.md](./mpii/resnetv1d_mpii.md) | +| SCNet-50 | 256x256 | 0.888 | 0.290 | [scnet_mpii.md](./mpii/scnet_mpii.md) | +| ResNeXt-152 | 256x256 | 0.887 | 0.294 | [resnext_mpii.md](./mpii/resnext_mpii.md) | +| SEResNet-50 | 256x256 | 0.884 | 0.292 | [seresnet_mpii.md](./mpii/seresnet_mpii.md) | +| ResNet-50 | 256x256 | 0.882 | 0.286 | [resnet_mpii.md](./mpii/resnet_mpii.md) | +| ResNetV1d-50 | 256x256 | 0.881 | 0.290 | [resnetv1d_mpii.md](./mpii/resnetv1d_mpii.md) | +| CPM | 368x368\* | 0.876 | 0.285 | [cpm_mpii.md](./mpii/cpm_mpii.md) | +| LiteHRNet-30 | 256x256 | 0.869 | 0.271 | [litehrnet_mpii.md](./mpii/litehrnet_mpii.md) | +| LiteHRNet-18 | 256x256 | 0.859 | 0.260 | [litehrnet_mpii.md](./mpii/litehrnet_mpii.md) | +| MobileNet-v2 | 256x256 | 0.854 | 0.234 | [mobilenetv2_mpii.md](./mpii/mobilenetv2_mpii.md) | +| ShuffleNet-v2 | 256x256 | 0.828 | 0.205 | [shufflenetv2_mpii.md](./mpii/shufflenetv2_mpii.md) | +| ShuffleNet-v1 | 256x256 | 0.824 | 0.195 | [shufflenetv1_mpii.md](./mpii/shufflenetv1_mpii.md) | + +### CrowdPose Dataset + +Results on CrowdPose test with [YOLOv3](https://github.com/eriklindernoren/PyTorch-YOLOv3) human detector + +| Model | Input Size | AP | AR | Details and Download | +| :--------: | :--------: | :---: | :---: | :--------------------------------------------------------: | +| HRNet-w32 | 256x192 | 0.675 | 0.816 | [hrnet_crowdpose.md](./crowdpose/hrnet_crowdpose.md) | +| CSPNeXt-m | 256x192 | 0.662 | 0.755 | [hrnet_crowdpose.md](./crowdpose/cspnext_udp_crowdpose.md) | +| ResNet-101 | 256x192 | 0.647 | 0.800 | [resnet_crowdpose.md](./crowdpose/resnet_crowdpose.md) | +| HRNet-w32 | 256x192 | 0.637 | 0.785 | [resnet_crowdpose.md](./crowdpose/resnet_crowdpose.md) | + +### AIC Dataset + +Results on AIC val set with ground-truth bounding boxes. + +| Model | Input Size | AP | AR | Details and Download | +| :--------: | :--------: | :---: | :---: | :----------------------------------: | +| HRNet-w32 | 256x192 | 0.323 | 0.366 | [hrnet_aic.md](./aic/hrnet_aic.md) | +| ResNet-101 | 256x192 | 0.294 | 0.337 | [resnet_aic.md](./aic/resnet_aic.md) | + +### JHMDB Dataset + +| Model | Input Size | PCK(norm. by person size) | PCK (norm. by torso size) | Details and Download | +| :-------: | :--------: | :-----------------------: | :-----------------------: | :----------------------------------------: | +| ResNet-50 | 256x256 | 96.0 | 80.1 | [resnet_jhmdb.md](./jhmdb/resnet_jhmdb.md) | +| CPM | 368x368 | 89.8 | 65.7 | [cpm_jhmdb.md](./jhmdb/cpm_jhmdb.md) | + +### PoseTrack2018 Dataset + +Results on PoseTrack2018 val with ground-truth bounding boxes. + +| Model | Input Size | AP | Details and Download | +| :-------: | :--------: | :--: | :----------------------------------------------------------: | +| HRNet-w48 | 256x192 | 84.6 | [hrnet_posetrack18.md](./posetrack18/hrnet_posetrack18.md) | +| HRNet-w32 | 256x192 | 83.4 | [hrnet_posetrack18.md](./posetrack18/hrnet_posetrack18.md) | +| ResNet-50 | 256x192 | 81.2 | [resnet_posetrack18.md](./posetrack18/resnet_posetrack18.md) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/hrnet_aic.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/hrnet_aic.md new file mode 100644 index 0000000000000000000000000000000000000000..4b2cefcdcbe8909a7134a556f7da48881c6333c3 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/hrnet_aic.md @@ -0,0 +1,38 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html">HRNet (CVPR'2019)</a></summary> + +```bibtex +@inproceedings{sun2019deep, + title={Deep high-resolution representation learning for human pose estimation}, + author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={5693--5703}, + year={2019} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/1711.06475">AI Challenger (ArXiv'2017)</a></summary> + +```bibtex +@article{wu2017ai, + title={Ai challenger: A large-scale dataset for going deeper in image understanding}, + author={Wu, Jiahong and Zheng, He and Zhao, Bo and Li, Yixin and Yan, Baoming and Liang, Rui and Wang, Wenjia and Zhou, Shipei and Lin, Guosen and Fu, Yanwei and others}, + journal={arXiv preprint arXiv:1711.06475}, + year={2017} +} +``` + +</details> + +Results on AIC val set with ground-truth bounding boxes + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [pose_hrnet_w32](/configs/body_2d_keypoint/topdown_heatmap/aic/td-hm_hrnet-w32_8xb64-210e_aic-256x192.py) | 256x192 | 0.323 | 0.761 | 0.218 | 0.366 | 0.789 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_aic_256x192-30a4e465_20200826.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_aic_256x192_20200826.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/hrnet_aic.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/hrnet_aic.yml new file mode 100644 index 0000000000000000000000000000000000000000..0bbc52ccb8d3fc439887937333898f84ca40b167 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/hrnet_aic.yml @@ -0,0 +1,18 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/aic/td-hm_hrnet-w32_8xb64-210e_aic-256x192.py + In Collection: HRNet + Metadata: + Architecture: + - HRNet + Training Data: AI Challenger + Name: td-hm_hrnet-w32_8xb64-210e_aic-256x192 + Results: + - Dataset: AI Challenger + Metrics: + AP: 0.323 + AP@0.5: 0.761 + AP@0.75: 0.218 + AR: 0.366 + AR@0.5: 0.789 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_aic_256x192-30a4e465_20200826.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/resnet_aic.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/resnet_aic.md new file mode 100644 index 0000000000000000000000000000000000000000..1cb0f57eb38ac56e2aaaeef20ad7bda5cb240e96 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/resnet_aic.md @@ -0,0 +1,55 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html">SimpleBaseline2D (ECCV'2018)</a></summary> + +```bibtex +@inproceedings{xiao2018simple, + title={Simple baselines for human pose estimation and tracking}, + author={Xiao, Bin and Wu, Haiping and Wei, Yichen}, + booktitle={Proceedings of the European conference on computer vision (ECCV)}, + pages={466--481}, + year={2018} +} +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html">ResNet (CVPR'2016)</a></summary> + +```bibtex +@inproceedings{he2016deep, + title={Deep residual learning for image recognition}, + author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={770--778}, + year={2016} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/1711.06475">AI Challenger (ArXiv'2017)</a></summary> + +```bibtex +@article{wu2017ai, + title={Ai challenger: A large-scale dataset for going deeper in image understanding}, + author={Wu, Jiahong and Zheng, He and Zhao, Bo and Li, Yixin and Yan, Baoming and Liang, Rui and Wang, Wenjia and Zhou, Shipei and Lin, Guosen and Fu, Yanwei and others}, + journal={arXiv preprint arXiv:1711.06475}, + year={2017} +} +``` + +</details> + +Results on AIC val set with ground-truth bounding boxes + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [pose_resnet_101](/configs/body_2d_keypoint/topdown_heatmap/aic/td-hm_res101_8xb64-210e_aic-256x192.py) | 256x192 | 0.294 | 0.736 | 0.172 | 0.337 | 0.762 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res101_aic_256x192-79b35445_20200826.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res101_aic_256x192_20200826.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/resnet_aic.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/resnet_aic.yml new file mode 100644 index 0000000000000000000000000000000000000000..e320056858565d88ac3b1a4e3e4960019be02ffb --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/resnet_aic.yml @@ -0,0 +1,19 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/aic/td-hm_res101_8xb64-210e_aic-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: + - SimpleBaseline2D + - ResNet + Training Data: AI Challenger + Name: td-hm_res101_8xb64-210e_aic-256x192 + Results: + - Dataset: AI Challenger + Metrics: + AP: 0.294 + AP@0.5: 0.736 + AP@0.75: 0.172 + AR: 0.337 + AR@0.5: 0.762 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res101_aic_256x192-79b35445_20200826.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/td-hm_hrnet-w32_8xb64-210e_aic-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/td-hm_hrnet-w32_8xb64-210e_aic-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..4d4c504d388fbe627ef7f62393e5135604403110 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/td-hm_hrnet-w32_8xb64-210e_aic-256x192.py @@ -0,0 +1,151 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256))), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w32-36af842e.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=32, + out_channels=14, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'AicDataset' +data_mode = 'topdown' +data_root = 'data/aic/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/aic_train.json', + data_prefix=dict(img='ai_challenger_keypoint_train_20170902/' + 'keypoint_train_images_20170902/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/aic_val.json', + data_prefix=dict(img='ai_challenger_keypoint_validation_20170911/' + 'keypoint_validation_images_20170911/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/aic_val.json', + use_area=False) +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/td-hm_res101_8xb64-210e_aic-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/td-hm_res101_8xb64-210e_aic-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..e61da3a5c4b6cbb89e78576c34ca8040f0fcca05 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/td-hm_res101_8xb64-210e_aic-256x192.py @@ -0,0 +1,122 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=101, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=14, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'AicDataset' +data_mode = 'topdown' +data_root = 'data/aic/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/aic_train.json', + data_prefix=dict(img='ai_challenger_keypoint_train_20170902/' + 'keypoint_train_images_20170902/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/aic_val.json', + data_prefix=dict(img='ai_challenger_keypoint_validation_20170911/' + 'keypoint_validation_images_20170911/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/aic_val.json', + use_area=False) +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/alexnet_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/alexnet_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..6f82685ba81d28da32bfd9e578df397c9690ef5f --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/alexnet_coco.md @@ -0,0 +1,40 @@ +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="https://proceedings.neurips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf">AlexNet (NeurIPS'2012)</a></summary> + +```bibtex +@inproceedings{krizhevsky2012imagenet, + title={Imagenet classification with deep convolutional neural networks}, + author={Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E}, + booktitle={Advances in neural information processing systems}, + pages={1097--1105}, + year={2012} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [pose_alexnet](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_alexnet_8xb64-210e_coco-256x192.py) | 256x192 | 0.448 | 0.767 | 0.461 | 0.521 | 0.829 | [ckpt](https://download.openmmlab.com/mmpose/top_down/alexnet/alexnet_coco_256x192-a7b1fd15_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/alexnet/alexnet_coco_256x192_20200727.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/alexnet_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/alexnet_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..0c851c3c793f7360617313a2b4f09e49ebc87484 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/alexnet_coco.yml @@ -0,0 +1,19 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_alexnet_8xb64-210e_coco-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: + - SimpleBaseline2D + - AlexNet + Training Data: COCO + Name: td-hm_alexnet_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.448 + AP@0.5: 0.767 + AP@0.75: 0.461 + AR: 0.521 + AR@0.5: 0.829 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/alexnet/alexnet_coco_256x192-a7b1fd15_20200727.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cpm_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cpm_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..3d4453a36986ccca8511e3f589dfac39cc3185ec --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cpm_coco.md @@ -0,0 +1,41 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2016/html/Wei_Convolutional_Pose_Machines_CVPR_2016_paper.html">CPM (CVPR'2016)</a></summary> + +```bibtex +@inproceedings{wei2016convolutional, + title={Convolutional pose machines}, + author={Wei, Shih-En and Ramakrishna, Varun and Kanade, Takeo and Sheikh, Yaser}, + booktitle={Proceedings of the IEEE conference on Computer Vision and Pattern Recognition}, + pages={4724--4732}, + year={2016} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [cpm](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_cpm_8xb64-210e_coco-256x192.py) | 256x192 | 0.627 | 0.862 | 0.709 | 0.689 | 0.906 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_cpm_8xb64-210e_coco-256x192-0e978875_20220920.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_cpm_8xb64-210e_coco-256x192_20220920.log) | +| [cpm](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_cpm_8xb32-210e_coco-384x288.py) | 384x288 | 0.652 | 0.865 | 0.730 | 0.710 | 0.907 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_cpm_8xb32-210e_coco-384x288-165487b8_20221011.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_cpm_8xb32-210e_coco-384x288_20221011.log) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cpm_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cpm_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..2c1cad9713c6a8be51e59ec67047267c8a425e1f --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cpm_coco.yml @@ -0,0 +1,40 @@ +Collections: +- Name: CPM + Paper: + Title: Convolutional pose machines + URL: http://openaccess.thecvf.com/content_cvpr_2016/html/Wei_Convolutional_Pose_Machines_CVPR_2016_paper.html + README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/backbones/cpm.md +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_cpm_8xb64-210e_coco-256x192.py + In Collection: CPM + Metadata: + Architecture: &id001 + - CPM + Training Data: COCO + Name: td-hm_cpm_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.627 + AP@0.5: 0.862 + AP@0.75: 0.709 + AR: 0.689 + AR@0.5: 0.906 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_cpm_8xb64-210e_coco-256x192-0e978875_20220920.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_cpm_8xb32-210e_coco-384x288.py + In Collection: CPM + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_cpm_8xb32-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.652 + AP@0.5: 0.865 + AP@0.75: 0.730 + AR: 0.710 + AR@0.5: 0.907 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_cpm_8xb32-210e_coco-384x288-165487b8_20221011.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-l_udp_8xb256-210e_aic-coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-l_udp_8xb256-210e_aic-coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..fc1eb0d36c8b185369c8a722522f527fa37e0f8c --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-l_udp_8xb256-210e_aic-coco-256x192.py @@ -0,0 +1,284 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 210 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 105 to 210 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=1024) + +# codec settings +codec = dict( + type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# keypoint mappings +keypoint_mapping_coco = [ + (0, 0), + (1, 1), + (2, 2), + (3, 3), + (4, 4), + (5, 5), + (6, 6), + (7, 7), + (8, 8), + (9, 9), + (10, 10), + (11, 11), + (12, 12), + (13, 13), + (14, 14), + (15, 15), + (16, 16), +] + +keypoint_mapping_aic = [ + (0, 6), + (1, 8), + (2, 10), + (3, 5), + (4, 7), + (5, 9), + (6, 12), + (7, 14), + (8, 16), + (9, 11), + (10, 13), + (11, 15), + (12, 17), + (13, 18), +] + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=1., + widen_factor=1., + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmdetection/v3.0/' + 'rtmdet/cspnext_rsb_pretrain/' + 'cspnext-l_8xb256-rsb-a1-600e_in1k-6a760974.pth')), + head=dict( + type='HeatmapHead', + in_channels=1024, + out_channels=19, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=False, + output_keypoint_indices=[ + target for _, target in keypoint_mapping_coco + ])) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/', +# f'{data_root}': 's3://openmmlab/datasets/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# train datasets +dataset_coco = dict( + type='RepeatDataset', + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_train2017.json', + data_prefix=dict(img='detection/coco/train2017/'), + pipeline=[ + dict( + type='KeypointConverter', + num_keypoints=19, + mapping=keypoint_mapping_coco) + ], + ), + times=3) + +dataset_aic = dict( + type='AicDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='aic/annotations/aic_train.json', + data_prefix=dict(img='pose/ai_challenge/ai_challenger_keypoint' + '_train_20170902/keypoint_train_images_20170902/'), + pipeline=[ + dict( + type='KeypointConverter', + num_keypoints=19, + mapping=keypoint_mapping_aic) + ], +) + +# data loaders +train_dataloader = dict( + batch_size=256, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type='CombinedDataset', + metainfo=dict(from_file='configs/_base_/datasets/coco_aic.py'), + datasets=[dataset_coco, dataset_aic], + pipeline=train_pipeline, + test_mode=False, + )) +val_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_val2017.json', + # bbox_file='data/coco/person_detection_results/' + # 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='detection/coco/val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'coco/annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-l_udp_8xb256-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-l_udp_8xb256-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..6cce193544c775b5f4c749e3ca9c81ff547a507e --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-l_udp_8xb256-210e_coco-256x192.py @@ -0,0 +1,214 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 210 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 105 to 210 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=1024) + +# codec settings +codec = dict( + type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=1., + widen_factor=1., + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmdetection/v3.0/' + 'rtmdet/cspnext_rsb_pretrain/' + 'cspnext-l_8xb256-rsb-a1-600e_in1k-6a760974.pth')), + head=dict( + type='HeatmapHead', + in_channels=1024, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=False, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/', +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=256, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + # bbox_file='data/coco/person_detection_results/' + # 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-m_udp_8xb256-210e_aic-coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-m_udp_8xb256-210e_aic-coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..096bf307859ee2946e8d42c66dc10ed23dbfe545 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-m_udp_8xb256-210e_aic-coco-256x192.py @@ -0,0 +1,284 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 210 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 105 to 210 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=1024) + +# codec settings +codec = dict( + type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# keypoint mappings +keypoint_mapping_coco = [ + (0, 0), + (1, 1), + (2, 2), + (3, 3), + (4, 4), + (5, 5), + (6, 6), + (7, 7), + (8, 8), + (9, 9), + (10, 10), + (11, 11), + (12, 12), + (13, 13), + (14, 14), + (15, 15), + (16, 16), +] + +keypoint_mapping_aic = [ + (0, 6), + (1, 8), + (2, 10), + (3, 5), + (4, 7), + (5, 9), + (6, 12), + (7, 14), + (8, 16), + (9, 11), + (10, 13), + (11, 15), + (12, 17), + (13, 18), +] + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=0.67, + widen_factor=0.75, + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmdetection/v3.0/' + 'rtmdet/cspnext_rsb_pretrain/' + 'cspnext-m_8xb256-rsb-a1-600e_in1k-ecb3bbd9.pth')), + head=dict( + type='HeatmapHead', + in_channels=768, + out_channels=19, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=False, + output_keypoint_indices=[ + target for _, target in keypoint_mapping_coco + ])) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/', +# f'{data_root}': 's3://openmmlab/datasets/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# train datasets +dataset_coco = dict( + type='RepeatDataset', + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_train2017.json', + data_prefix=dict(img='detection/coco/train2017/'), + pipeline=[ + dict( + type='KeypointConverter', + num_keypoints=19, + mapping=keypoint_mapping_coco) + ], + ), + times=3) + +dataset_aic = dict( + type='AicDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='aic/annotations/aic_train.json', + data_prefix=dict(img='pose/ai_challenge/ai_challenger_keypoint' + '_train_20170902/keypoint_train_images_20170902/'), + pipeline=[ + dict( + type='KeypointConverter', + num_keypoints=19, + mapping=keypoint_mapping_aic) + ], +) + +# data loaders +train_dataloader = dict( + batch_size=256, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type='CombinedDataset', + metainfo=dict(from_file='configs/_base_/datasets/coco_aic.py'), + datasets=[dataset_coco, dataset_aic], + pipeline=train_pipeline, + test_mode=False, + )) +val_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_val2017.json', + # bbox_file='data/coco/person_detection_results/' + # 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='detection/coco/val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'coco/annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-m_udp_8xb256-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-m_udp_8xb256-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..f86e9a8d609c2f200c888ad183f3cd890f35c388 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-m_udp_8xb256-210e_coco-256x192.py @@ -0,0 +1,214 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 210 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 105 to 210 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=1024) + +# codec settings +codec = dict( + type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=0.67, + widen_factor=0.75, + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmdetection/v3.0/' + 'rtmdet/cspnext_rsb_pretrain/' + 'cspnext-m_8xb256-rsb-a1-600e_in1k-ecb3bbd9.pth')), + head=dict( + type='HeatmapHead', + in_channels=768, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=False, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/', +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=256, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + # bbox_file='data/coco/person_detection_results/' + # 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-s_udp_8xb256-210e_aic-coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-s_udp_8xb256-210e_aic-coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..94cc7d02d2789fd5a82ff9d352063f5afe99aaf0 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-s_udp_8xb256-210e_aic-coco-256x192.py @@ -0,0 +1,284 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 210 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.0), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 105 to 210 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=1024) + +# codec settings +codec = dict( + type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# keypoint mappings +keypoint_mapping_coco = [ + (0, 0), + (1, 1), + (2, 2), + (3, 3), + (4, 4), + (5, 5), + (6, 6), + (7, 7), + (8, 8), + (9, 9), + (10, 10), + (11, 11), + (12, 12), + (13, 13), + (14, 14), + (15, 15), + (16, 16), +] + +keypoint_mapping_aic = [ + (0, 6), + (1, 8), + (2, 10), + (3, 5), + (4, 7), + (5, 9), + (6, 12), + (7, 14), + (8, 16), + (9, 11), + (10, 13), + (11, 15), + (12, 17), + (13, 18), +] + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=0.33, + widen_factor=0.5, + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmdetection/v3.0/' + 'rtmdet/cspnext_rsb_pretrain/' + 'cspnext-s_imagenet_600e-ea671761.pth')), + head=dict( + type='HeatmapHead', + in_channels=512, + out_channels=19, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=False, + output_keypoint_indices=[ + target for _, target in keypoint_mapping_coco + ])) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/', +# f'{data_root}': 's3://openmmlab/datasets/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# train datasets +dataset_coco = dict( + type='RepeatDataset', + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_train2017.json', + data_prefix=dict(img='detection/coco/train2017/'), + pipeline=[ + dict( + type='KeypointConverter', + num_keypoints=19, + mapping=keypoint_mapping_coco) + ], + ), + times=3) + +dataset_aic = dict( + type='AicDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='aic/annotations/aic_train.json', + data_prefix=dict(img='pose/ai_challenge/ai_challenger_keypoint' + '_train_20170902/keypoint_train_images_20170902/'), + pipeline=[ + dict( + type='KeypointConverter', + num_keypoints=19, + mapping=keypoint_mapping_aic) + ], +) + +# data loaders +train_dataloader = dict( + batch_size=256, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type='CombinedDataset', + metainfo=dict(from_file='configs/_base_/datasets/coco_aic.py'), + datasets=[dataset_coco, dataset_aic], + pipeline=train_pipeline, + test_mode=False, + )) +val_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_val2017.json', + # bbox_file='data/coco/person_detection_results/' + # 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='detection/coco/val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'coco/annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-s_udp_8xb256-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-s_udp_8xb256-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..6f50542e5bceb86c652bf4d8ab893386197217ef --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-s_udp_8xb256-210e_coco-256x192.py @@ -0,0 +1,214 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 210 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 105 to 210 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=1024) + +# codec settings +codec = dict( + type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=0.33, + widen_factor=0.5, + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmdetection/v3.0/' + 'rtmdet/cspnext_rsb_pretrain/' + 'cspnext-s_imagenet_600e-ea671761.pth')), + head=dict( + type='HeatmapHead', + in_channels=512, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=False, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/', +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=256, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + # bbox_file='data/coco/person_detection_results/' + # 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-tiny_udp_8xb256-210e_aic-coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-tiny_udp_8xb256-210e_aic-coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..cef1b204501573d4e0d3228c36595eb784fdc83b --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-tiny_udp_8xb256-210e_aic-coco-256x192.py @@ -0,0 +1,284 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 210 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.0), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 105 to 210 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=1024) + +# codec settings +codec = dict( + type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# keypoint mappings +keypoint_mapping_coco = [ + (0, 0), + (1, 1), + (2, 2), + (3, 3), + (4, 4), + (5, 5), + (6, 6), + (7, 7), + (8, 8), + (9, 9), + (10, 10), + (11, 11), + (12, 12), + (13, 13), + (14, 14), + (15, 15), + (16, 16), +] + +keypoint_mapping_aic = [ + (0, 6), + (1, 8), + (2, 10), + (3, 5), + (4, 7), + (5, 9), + (6, 12), + (7, 14), + (8, 16), + (9, 11), + (10, 13), + (11, 15), + (12, 17), + (13, 18), +] + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=0.167, + widen_factor=0.375, + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmdetection/v3.0/' + 'rtmdet/cspnext_rsb_pretrain/' + 'cspnext-tiny_imagenet_600e-3a2dd350.pth')), + head=dict( + type='HeatmapHead', + in_channels=384, + out_channels=19, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=False, + output_keypoint_indices=[ + target for _, target in keypoint_mapping_coco + ])) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/', +# f'{data_root}': 's3://openmmlab/datasets/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# train datasets +dataset_coco = dict( + type='RepeatDataset', + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_train2017.json', + data_prefix=dict(img='detection/coco/train2017/'), + pipeline=[ + dict( + type='KeypointConverter', + num_keypoints=19, + mapping=keypoint_mapping_coco) + ], + ), + times=3) + +dataset_aic = dict( + type='AicDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='aic/annotations/aic_train.json', + data_prefix=dict(img='pose/ai_challenge/ai_challenger_keypoint' + '_train_20170902/keypoint_train_images_20170902/'), + pipeline=[ + dict( + type='KeypointConverter', + num_keypoints=19, + mapping=keypoint_mapping_aic) + ], +) + +# data loaders +train_dataloader = dict( + batch_size=256, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type='CombinedDataset', + metainfo=dict(from_file='configs/_base_/datasets/coco_aic.py'), + datasets=[dataset_coco, dataset_aic], + pipeline=train_pipeline, + test_mode=False, + )) +val_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/person_keypoints_val2017.json', + # bbox_file='data/coco/person_detection_results/' + # 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='detection/coco/val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + # dict( + # type='EMAHook', + # ema_type='ExpMomentumEMA', + # momentum=0.0002, + # update_buffers=True, + # priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'coco/annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-tiny_udp_8xb256-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-tiny_udp_8xb256-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..7ec0bb2be7dbd59be7401cca1d4995d7741ee2b6 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-tiny_udp_8xb256-210e_coco-256x192.py @@ -0,0 +1,214 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 210 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 105 to 210 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=1024) + +# codec settings +codec = dict( + type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=0.167, + widen_factor=0.375, + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmdetection/v3.0/' + 'rtmdet/cspnext_rsb_pretrain/' + 'cspnext-tiny_imagenet_600e-3a2dd350.pth')), + head=dict( + type='HeatmapHead', + in_channels=384, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=False, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/', +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=256, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + # bbox_file='data/coco/person_detection_results/' + # 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + # dict( + # type='EMAHook', + # ema_type='ExpMomentumEMA', + # momentum=0.0002, + # update_buffers=True, + # priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext_udp_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext_udp_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..7aad2bf6b31428bb8ff52149d0e8cb9f85820709 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext_udp_coco.md @@ -0,0 +1,69 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2212.07784">RTMDet (ArXiv 2022)</a></summary> + +```bibtex +@misc{lyu2022rtmdet, + title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors}, + author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen}, + year={2022}, + eprint={2212.07784}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +</details> + +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2020/html/Huang_The_Devil_Is_in_the_Details_Delving_Into_Unbiased_Data_CVPR_2020_paper.html">UDP (CVPR'2020)</a></summary> + +```bibtex +@InProceedings{Huang_2020_CVPR, + author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan}, + title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation}, + booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, + month = {June}, + year = {2020} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [pose_cspnext_t_udp](/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-tiny_udp_8xb256-210e_coco-256x192.py) | 256x192 | 0.665 | 0.874 | 0.723 | 0.723 | 0.917 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-tiny_udp-coco_pt-in1k_210e-256x192-0908dd2d_20230123.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-tiny_udp-coco_pt-in1k_210e-256x192-0908dd2d_20230123.json) | +| [pose_cspnext_s_udp](/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-s_udp_8xb256-210e_coco-256x192.py) | 256x192 | 0.697 | 0.886 | 0.776 | 0.753 | 0.929 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-s_udp-coco_pt-in1k_210e-256x192-92dbfc1d_20230123.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-s_udp-coco_pt-in1k_210e-256x192-92dbfc1d_20230123.json) | +| [pose_cspnext_m_udp](/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-m_udp_8xb256-210e_coco-256x192.py) | 256x192 | 0.732 | 0.896 | 0.806 | 0.785 | 0.937 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-coco_pt-in1k_210e-256x192-95f5967e_20230123.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-coco_pt-in1k_210e-256x192-95f5967e_20230123.json) | +| [pose_cspnext_l_udp](/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-l_udp_8xb256-210e_coco-256x192.py) | 256x192 | 0.750 | 0.904 | 0.822 | 0.800 | 0.941 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-l_udp-coco_pt-in1k_210e-256x192-661cdd8c_20230123.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-l_udp-coco_pt-in1k_210e-256x192-661cdd8c_20230123.json) | +| [pose_cspnext_t_udp_aic_coco](/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-tiny_udp_8xb256-210e_aic-coco-256x192.py) | 256x192 | 0.655 | 0.884 | 0.731 | 0.689 | 0.890 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-tiny_udp-aic-coco_210e-256x192-cbed682d_20230130.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-tiny_udp-aic-coco_210e-256x192-cbed682d_20230130.json) | +| [pose_cspnext_s_udp_aic_coco](/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-s_udp_8xb256-210e_aic-coco-256x192.py) | 256x192 | 0.700 | 0.905 | 0.783 | 0.733 | 0.918 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-s_udp-aic-coco_210e-256x192-92f5a029_20230130.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-s_udp-aic-coco_210e-256x192-92f5a029_20230130.json) | +| [pose_cspnext_m_udp_aic_coco](/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-m_udp_8xb256-210e_aic-coco-256x192.py) | 256x192 | 0.748 | 0.925 | 0.818 | 0.777 | 0.933 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.json) | +| [pose_cspnext_l_udp_aic_coco](/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-l_udp_8xb256-210e_aic-coco-256x192.py) | 256x192 | 0.772 | 0.936 | 0.839 | 0.799 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.json) | + +Note that, UDP also adopts the unbiased encoding/decoding algorithm of [DARK](https://mmpose.readthedocs.io/en/latest/model_zoo_papers/techniques.html#darkpose-cvpr-2020). + +Flip test and detector is not used in the result of aic-coco training. diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext_udp_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext_udp_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..aab5c44e1b651eb86335f0afee53872e5a5c5c34 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext_udp_coco.yml @@ -0,0 +1,139 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-tiny_udp_8xb256-210e_coco-256x192.py + In Collection: UDP + Metadata: + Architecture: &id001 + - CSPNeXt + - UDP + Training Data: COCO + Name: cspnext-tiny_udp_8xb256-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.665 + AP@0.5: 0.874 + AP@0.75: 0.723 + AR: 0.723 + AR@0.5: 0.917 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-tiny_udp-coco_pt-in1k_210e-256x192-0908dd2d_20230123.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-s_udp_8xb256-210e_coco-256x192.py + In Collection: UDP + Metadata: + Architecture: *id001 + Training Data: COCO + Name: cspnext-s_udp_8xb256-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.697 + AP@0.5: 0.886 + AP@0.75: 0.776 + AR: 0.753 + AR@0.5: 0.929 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-s_udp-coco_pt-in1k_210e-256x192-92dbfc1d_20230123.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-m_udp_8xb256-210e_coco-256x192.py + In Collection: UDP + Metadata: + Architecture: *id001 + Training Data: COCO + Name: cspnext-m_udp_8xb256-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.732 + AP@0.5: 0.896 + AP@0.75: 0.806 + AR: 0.785 + AR@0.5: 0.937 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-coco_pt-in1k_210e-256x192-95f5967e_20230123.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-l_udp_8xb256-210e_coco-256x192.py + In Collection: UDP + Metadata: + Architecture: *id001 + Training Data: COCO + Name: cspnext-l_udp_8xb256-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.750 + AP@0.5: 0.904 + AP@0.75: 0.822 + AR: 0.8 + AR@0.5: 0.941 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-l_udp-coco_pt-in1k_210e-256x192-661cdd8c_20230123.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-tiny_udp_8xb256-210e_aic-coco-256x192.py + In Collection: UDP + Metadata: + Architecture: *id001 + Training Data: + - COCO + - AIC + Name: cspnext-tiny_udp_8xb256-210e_aic-coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.655 + AP@0.5: 0.884 + AP@0.75: 0.731 + AR: 0.689 + AR@0.5: 0.89 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-tiny_udp-aic-coco_210e-256x192-cbed682d_20230130.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-s_udp_8xb256-210e_aic-coco-256x192.py + In Collection: UDP + Metadata: + Architecture: *id001 + Training Data: + - COCO + - AIC + Name: cspnext-s_udp_8xb256-210e_aic-coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.7 + AP@0.5: 0.905 + AP@0.75: 0.783 + AR: 0.733 + AR@0.5: 0.918 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-s_udp-aic-coco_210e-256x192-92f5a029_20230130.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-m_udp_8xb256-210e_aic-coco-256x192.py + In Collection: UDP + Metadata: + Architecture: *id001 + Training Data: + - COCO + - AIC + Name: cspnext-m_udp_8xb256-210e_aic-coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.748 + AP@0.5: 0.925 + AP@0.75: 0.818 + AR: 0.777 + AR@0.5: 0.933 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-l_udp_8xb256-210e_aic-coco-256x192.py + In Collection: UDP + Metadata: + Architecture: *id001 + Training Data: + - COCO + - AIC + Name: cspnext-l_udp_8xb256-210e_aic-coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.772 + AP@0.5: 0.936 + AP@0.75: 0.839 + AR: 0.799 + AR@0.5: 0.943 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hourglass_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hourglass_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..dc7dee47c3ec6917f3fffb034e36df3a5a226504 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hourglass_coco.md @@ -0,0 +1,42 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-46484-8_29">Hourglass (ECCV'2016)</a></summary> + +```bibtex +@inproceedings{newell2016stacked, + title={Stacked hourglass networks for human pose estimation}, + author={Newell, Alejandro and Yang, Kaiyu and Deng, Jia}, + booktitle={European conference on computer vision}, + pages={483--499}, + year={2016}, + organization={Springer} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [pose_hourglass_52](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hourglass52_8xb32-210e_coco-256x256.py) | 256x256 | 0.726 | 0.896 | 0.799 | 0.780 | 0.934 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_coco_256x256-4ec713ba_20200709.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_coco_256x256_20200709.log.json) | +| [pose_hourglass_52](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hourglass52_8xb32-210e_coco-384x384.py) | 384x384 | 0.746 | 0.900 | 0.812 | 0.797 | 0.939 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_coco_384x384-be91ba2b_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_coco_384x384_20200812.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hourglass_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hourglass_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..6d9cfd91e99a8d53a437f4d71e318fb1226b18e6 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hourglass_coco.yml @@ -0,0 +1,40 @@ +Collections: +- Name: Hourglass + Paper: + Title: Stacked hourglass networks for human pose estimation + URL: https://link.springer.com/chapter/10.1007/978-3-319-46484-8_29 + README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/backbones/hourglass.md +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hourglass52_8xb32-210e_coco-256x256.py + In Collection: Hourglass + Metadata: + Architecture: &id001 + - Hourglass + Training Data: COCO + Name: td-hm_hourglass52_8xb32-210e_coco-256x256 + Results: + - Dataset: COCO + Metrics: + AP: 0.726 + AP@0.5: 0.896 + AP@0.75: 0.799 + AR: 0.780 + AR@0.5: 0.934 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_coco_256x256-4ec713ba_20200709.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hourglass52_8xb32-210e_coco-384x384.py + In Collection: Hourglass + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_hourglass52_8xb32-210e_coco-384x384 + Results: + - Dataset: COCO + Metrics: + AP: 0.746 + AP@0.5: 0.900 + AP@0.75: 0.812 + AR: 0.797 + AR@0.5: 0.939 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_coco_384x384-be91ba2b_20200812.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrformer_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrformer_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..87309d2e7ceab17aa055a121379e7ef5c62d3c66 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrformer_coco.md @@ -0,0 +1,43 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://proceedings.neurips.cc/paper/2021/hash/3bbfdde8842a5c44a0323518eec97cbe-Abstract.html">HRFormer (NIPS'2021)</a></summary> + +```bibtex +@article{yuan2021hrformer, + title={HRFormer: High-Resolution Vision Transformer for Dense Predict}, + author={Yuan, Yuhui and Fu, Rao and Huang, Lang and Lin, Weihong and Zhang, Chao and Chen, Xilin and Wang, Jingdong}, + journal={Advances in Neural Information Processing Systems}, + volume={34}, + year={2021} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [pose_hrformer_small](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-small_8xb32-210e_coco-256x192.py) | 256x192 | 0.738 | 0.904 | 0.812 | 0.793 | 0.941 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrformer/hrformer_small_coco_256x192-5310d898_20220316.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrformer/hrformer_small_coco_256x192_20220316.log.json) | +| [pose_hrformer_small](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-small_8xb32-210e_coco-384x288.py) | 384x288 | 0.757 | 0.905 | 0.824 | 0.807 | 0.941 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrformer/hrformer_small_coco_384x288-98d237ed_20220316.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrformer/hrformer_small_coco_384x288_20220316.log.json) | +| [pose_hrformer_base](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-base_8xb32-210e_coco-256x192.py) | 256x192 | 0.754 | 0.906 | 0.827 | 0.807 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrformer/hrformer_base_coco_256x192-6f5f1169_20220316.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrformer/hrformer_base_coco_256x192_20220316.log.json) | +| [pose_hrformer_base](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-base_8xb32-210e_coco-384x288.py) | 384x288 | 0.774 | 0.909 | 0.842 | 0.823 | 0.945 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrformer/hrformer_base_coco_384x288-ecf0758d_20220316.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrformer/hrformer_base_coco_256x192_20220316.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrformer_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrformer_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..5ac7dc3636a7a020a396db422d8de1521a172cd1 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrformer_coco.yml @@ -0,0 +1,72 @@ +Collections: +- Name: HRFormer + Paper: + Title: 'HRFormer: High-Resolution Vision Transformer for Dense Predict' + URL: https://proceedings.neurips.cc/paper/2021/hash/3bbfdde8842a5c44a0323518eec97cbe-Abstract.html + README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/backbones/hrformer.md +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-small_8xb32-210e_coco-256x192.py + In Collection: HRFormer + Metadata: + Architecture: &id001 + - HRFormer + Training Data: COCO + Name: td-hm_hrformer-small_8xb32-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.738 + AP@0.5: 0.904 + AP@0.75: 0.812 + AR: 0.793 + AR@0.5: 0.941 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/hrformer/hrformer_small_coco_256x192-5310d898_20220316.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-small_8xb32-210e_coco-384x288.py + In Collection: HRFormer + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_hrformer-small_8xb32-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.757 + AP@0.5: 0.905 + AP@0.75: 0.824 + AR: 0.807 + AR@0.5: 0.941 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/hrformer/hrformer_small_coco_384x288-98d237ed_20220316.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-base_8xb32-210e_coco-256x192.py + In Collection: HRFormer + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_hrformer-base_8xb32-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.754 + AP@0.5: 0.906 + AP@0.75: 0.827 + AR: 0.807 + AR@0.5: 0.943 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/hrformer/hrformer_base_coco_256x192-6f5f1169_20220316.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-base_8xb32-210e_coco-384x288.py + In Collection: HRFormer + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_hrformer-base_8xb32-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.774 + AP@0.5: 0.909 + AP@0.75: 0.842 + AR: 0.823 + AR@0.5: 0.945 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/hrformer/hrformer_base_coco_384x288-ecf0758d_20220316.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_augmentation_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_augmentation_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..efe9cd27b91e2189760dec73682b997e2be58b95 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_augmentation_coco.md @@ -0,0 +1,62 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html">HRNet (CVPR'2019)</a></summary> + +```bibtex +@inproceedings{sun2019deep, + title={Deep high-resolution representation learning for human pose estimation}, + author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={5693--5703}, + year={2019} +} +``` + +</details> + +<!-- [OTHERS] --> + +<details> +<summary align="right"><a href="https://www.mdpi.com/649002">Albumentations (Information'2020)</a></summary> + +```bibtex +@article{buslaev2020albumentations, + title={Albumentations: fast and flexible image augmentations}, + author={Buslaev, Alexander and Iglovikov, Vladimir I and Khvedchenya, Eugene and Parinov, Alex and Druzhinin, Mikhail and Kalinin, Alexandr A}, + journal={Information}, + volume={11}, + number={2}, + pages={125}, + year={2020}, + publisher={Multidisciplinary Digital Publishing Institute} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [coarsedropout](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_coarsedropout-8xb64-210e_coco-256x192.py) | 256x192 | 0.753 | 0.908 | 0.822 | 0.805 | 0.944 | [ckpt](https://download.openmmlab.com/mmpose/top_down/augmentation/hrnet_w32_coco_256x192_coarsedropout-0f16a0ce_20210320.pth) | [log](https://download.openmmlab.com/mmpose/top_down/augmentation/hrnet_w32_coco_256x192_coarsedropout_20210320.log.json) | +| [gridmask](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_gridmask-8xb64-210e_coco-256x192.py) | 256x192 | 0.752 | 0.906 | 0.825 | 0.804 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/augmentation/hrnet_w32_coco_256x192_gridmask-868180df_20210320.pth) | [log](https://download.openmmlab.com/mmpose/top_down/augmentation/hrnet_w32_coco_256x192_gridmask_20210320.log.json) | +| [photometric](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_photometric-8xb64-210e_coco-256x192.py) | 256x192 | 0.754 | 0.908 | 0.825 | 0.805 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/augmentation/hrnet_w32_coco_256x192_photometric-308cf591_20210320.pth) | [log](https://download.openmmlab.com/mmpose/top_down/augmentation/hrnet_w32_coco_256x192_photometric_20210320.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_augmentation_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_augmentation_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..7a29de4f64a702fddb2f42d9527c1762879f89b1 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_augmentation_coco.yml @@ -0,0 +1,56 @@ +Collections: +- Name: Albumentations + Paper: + Title: 'Albumentations: fast and flexible image augmentations' + URL: https://www.mdpi.com/649002 + README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/techniques/albumentations.md +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_coarsedropout-8xb64-210e_coco-256x192.py + In Collection: Albumentations + Metadata: + Architecture: &id001 + - HRNet + Training Data: COCO + Name: td-hm_hrnet-w32_coarsedropout-8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.753 + AP@0.5: 0.908 + AP@0.75: 0.822 + AR: 0.805 + AR@0.5: 0.944 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/augmentation/hrnet_w32_coco_256x192_coarsedropout-0f16a0ce_20210320.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_gridmask-8xb64-210e_coco-256x192.py + In Collection: Albumentations + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_hrnet-w32_gridmask-8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.752 + AP@0.5: 0.906 + AP@0.75: 0.825 + AR: 0.804 + AR@0.5: 0.943 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/augmentation/hrnet_w32_coco_256x192_gridmask-868180df_20210320.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_photometric-8xb64-210e_coco-256x192.py + In Collection: Albumentations + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_hrnet-w32_photometric-8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.754 + AP@0.5: 0.908 + AP@0.75: 0.825 + AR: 0.805 + AR@0.5: 0.943 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/augmentation/hrnet_w32_coco_256x192_photometric-308cf591_20210320.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..51fbf1322e474f8dd5cb8ca46de7c14a9bb26540 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_coco.md @@ -0,0 +1,43 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html">HRNet (CVPR'2019)</a></summary> + +```bibtex +@inproceedings{sun2019deep, + title={Deep high-resolution representation learning for human pose estimation}, + author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={5693--5703}, + year={2019} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [pose_hrnet_w32](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-256x192.py) | 256x192 | 0.749 | 0.906 | 0.821 | 0.804 | 0.945 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-256x192-81c58e40_20220909.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-256x192_20220909.log) | +| [pose_hrnet_w32](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-384x288.py) | 384x288 | 0.761 | 0.908 | 0.826 | 0.811 | 0.944 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-384x288-ca5956af_20220909.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-384x288_20220909.log) | +| [pose_hrnet_w48](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-256x192.py) | 256x192 | 0.756 | 0.908 | 0.826 | 0.809 | 0.945 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-256x192-0e67c616_20220913.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-256x192_20220913.log) | +| [pose_hrnet_w48](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-384x288.py) | 384x288 | 0.767 | 0.911 | 0.832 | 0.817 | 0.947 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-384x288-c161b7de_20220915.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-384x288_20220915.log) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..a0e5debe859ccc059e892b28d85aadb11f1a1857 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_coco.yml @@ -0,0 +1,124 @@ +Collections: +- Name: HRNet + Paper: + Title: Deep high-resolution representation learning for human pose estimation + URL: http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html + README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/backbones/hrnet.md +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-256x192.py + In Collection: HRNet + Metadata: + Architecture: &id001 + - HRNet + Training Data: COCO + Name: td-hm_hrnet-w32_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.746 + AP@0.5: 0.904 + AP@0.75: 0.819 + AR: 0.799 + AR@0.5: 0.942 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-256x192-81c58e40_20220909.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-384x288.py + In Collection: HRNet + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_hrnet-w32_8xb64-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.76 + AP@0.5: 0.906 + AP@0.75: 0.83 + AR: 0.81 + AR@0.5: 0.943 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-384x288-ca5956af_20220909.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-256x192.py + In Collection: HRNet + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_hrnet-w48_8xb32-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.756 + AP@0.5: 0.907 + AP@0.75: 0.825 + AR: 0.806 + AR@0.5: 0.942 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-256x192-0e67c616_20220913.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-384x288.py + In Collection: HRNet + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_hrnet-w48_8xb32-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.767 + AP@0.5: 0.91 + AP@0.75: 0.831 + AR: 0.816 + AR@0.5: 0.946 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-384x288-c161b7de_20220915.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-merge.py + In Collection: HRNet + Metadata: + Architecture: *id001 + Training Data: + - COCO + - AI Challenger + Name: td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-merge + Results: + - Dataset: COCO + Metrics: + AP: 0.757 + AP@0.5: 0.907 + AP@0.75: 0.829 + AR: 0.809 + AR@0.5: 0.944 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-merge-b05435b9_20221025.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-combine.py + In Collection: HRNet + Metadata: + Architecture: *id001 + Training Data: + - COCO + - AI Challenger + Name: td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-combine + Results: + - Dataset: COCO + Metrics: + AP: 0.756 + AP@0.5: 0.906 + AP@0.75: 0.826 + AR: 0.807 + AR@0.5: 0.943 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-combine-4ce66880_20221026.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_fp16-8xb64-210e_coco-256x192.py + In Collection: HRNet + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_hrnet-w32_fp16-8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.749 + AP@0.5: 0.907 + AP@0.75: 0.822 + AR: 0.802 + AR@0.5: 0.946 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_fp16-8xb64-210e_coco-256x192-f1e84e3b_20220914.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_coco_aic.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_coco_aic.md new file mode 100644 index 0000000000000000000000000000000000000000..fd88e25e64c5aec1d0e354ecb5d426e13de02ffa --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_coco_aic.md @@ -0,0 +1,61 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html">HRNet (CVPR'2019)</a></summary> + +```bibtex +@inproceedings{sun2019deep, + title={Deep high-resolution representation learning for human pose estimation}, + author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={5693--5703}, + year={2019} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/1711.06475">AI Challenger (ArXiv'2017)</a></summary> + +```bibtex +@article{wu2017ai, + title={Ai challenger: A large-scale dataset for going deeper in image understanding}, + author={Wu, Jiahong and Zheng, He and Zhao, Bo and Li, Yixin and Yan, Baoming and Liang, Rui and Wang, Wenjia and Zhou, Shipei and Lin, Guosen and Fu, Yanwei and others}, + journal={arXiv preprint arXiv:1711.06475}, + year={2017} +} +``` + +</details> + +MMPose supports training model with combined datasets. [coco-aic-merge](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-merge.py) and [coco-aic-combine](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-combine.py) are two examples. + +- [coco-aic-merge](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-merge.py) leverages AIC data with partial keypoints as auxiliary data to train a COCO model +- [coco-aic-combine](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-combine.py) constructs a combined dataset whose keypoints are the union of COCO and AIC keypoints to train a model that predicts keypoints of both datasets. + +Evaluation results on COCO val2017 of models trained with solely COCO dataset and combined dataset as shown below. These models are evaluated with detector having human AP of 56.4 on COCO val2017 dataset. + +| Train Set | Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :------------------------------------------- | :------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------: | :------------------------------------: | +| [coco](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-256x192.py) | pose_hrnet_w32 | 256x192 | 0.749 | 0.906 | 0.821 | 0.804 | 0.945 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-256x192-81c58e40_20220909.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-256x192_20220909.log) | +| [coco-aic-merge](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-merge.py) | pose_hrnet_w32 | 256x192 | 0.757 | 0.907 | 0.829 | 0.809 | 0.944 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-merge-b05435b9_20221025.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-merge_20221025.log) | +| [coco-aic-combine](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-combine.py) | pose_hrnet_w32 | 256x192 | 0.756 | 0.906 | 0.826 | 0.807 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-combine-4ce66880_20221026.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-combine_20221026.log) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_dark_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_dark_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..c18382ec68035c0f7d560d782bbb44b5af2d5024 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_dark_coco.md @@ -0,0 +1,60 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html">HRNet (CVPR'2019)</a></summary> + +```bibtex +@inproceedings{sun2019deep, + title={Deep high-resolution representation learning for human pose estimation}, + author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={5693--5703}, + year={2019} +} +``` + +</details> + +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2020/html/Zhang_Distribution-Aware_Coordinate_Representation_for_Human_Pose_Estimation_CVPR_2020_paper.html">DarkPose (CVPR'2020)</a></summary> + +```bibtex +@inproceedings{zhang2020distribution, + title={Distribution-aware coordinate representation for human pose estimation}, + author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages={7093--7102}, + year={2020} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [pose_hrnet_w32_dark](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_dark-8xb64-210e_coco-256x192.py) | 256x192 | 0.757 | 0.907 | 0.825 | 0.807 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_dark-8xb64-210e_coco-256x192-0e00bf12_20220914.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_dark-8xb64-210e_coco-256x192_20220914.log) | +| [pose_hrnet_w32_dark](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_dark-8xb64-210e_coco-384x288.py) | 384x288 | 0.766 | 0.907 | 0.829 | 0.815 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_dark-8xb64-210e_coco-384x288-9bab4c9b_20220917.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_dark-8xb64-210e_coco-384x288_20220917.log) | +| [pose_hrnet_w48_dark](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_dark-8xb32-210e_coco-256x192.py) | 256x192 | 0.764 | 0.907 | 0.831 | 0.814 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_dark-8xb32-210e_coco-256x192-e1ebdd6f_20220913.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_dark-8xb32-210e_coco-256x192_20220913.log) | +| [pose_hrnet_w48_dark](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_dark-8xb32-210e_coco-384x288.py) | 384x288 | 0.772 | 0.911 | 0.833 | 0.821 | 0.948 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_dark-8xb32-210e_coco-384x288-39c3c381_20220916.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_dark-8xb32-210e_coco-384x288_20220916.log) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_dark_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_dark_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..9f14e9ffad78d3d36f5f89f2166c6b3cda7ab2ff --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_dark_coco.yml @@ -0,0 +1,73 @@ +Collections: +- Name: DarkPose + Paper: + Title: Distribution-aware coordinate representation for human pose estimation + URL: http://openaccess.thecvf.com/content_CVPR_2020/html/Zhang_Distribution-Aware_Coordinate_Representation_for_Human_Pose_Estimation_CVPR_2020_paper.html + README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/techniques/dark.md +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_dark-8xb64-210e_coco-256x192.py + In Collection: DarkPose + Metadata: + Architecture: &id001 + - HRNet + - DarkPose + Training Data: COCO + Name: td-hm_hrnet-w32_dark-8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.757 + AP@0.5: 0.907 + AP@0.75: 0.825 + AR: 0.807 + AR@0.5: 0.943 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_dark-8xb64-210e_coco-256x192-0e00bf12_20220914.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_dark-8xb64-210e_coco-384x288.py + In Collection: DarkPose + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_hrnet-w32_dark-8xb64-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.766 + AP@0.5: 0.907 + AP@0.75: 0.829 + AR: 0.815 + AR@0.5: 0.942 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_dark-8xb64-210e_coco-384x288-9bab4c9b_20220917.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_dark-8xb32-210e_coco-256x192.py + In Collection: DarkPose + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_hrnet-w48_dark-8xb32-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.764 + AP@0.5: 0.907 + AP@0.75: 0.831 + AR: 0.814 + AR@0.5: 0.942 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_dark-8xb32-210e_coco-256x192-e1ebdd6f_20220913.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_dark-8xb32-210e_coco-384x288.py + In Collection: DarkPose + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_hrnet-w48_dark-8xb32-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.772 + AP@0.5: 0.911 + AP@0.75: 0.833 + AR: 0.821 + AR@0.5: 0.948 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_dark-8xb32-210e_coco-384x288-39c3c381_20220916.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_fp16_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_fp16_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..3e52624dc760eaf8bc4d6c7f75a29c4e1747a6e0 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_fp16_coco.md @@ -0,0 +1,56 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html">HRNet (CVPR'2019)</a></summary> + +```bibtex +@inproceedings{sun2019deep, + title={Deep high-resolution representation learning for human pose estimation}, + author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={5693--5703}, + year={2019} +} +``` + +</details> + +<!-- [OTHERS] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/1710.03740">FP16 (ArXiv'2017)</a></summary> + +```bibtex +@article{micikevicius2017mixed, + title={Mixed precision training}, + author={Micikevicius, Paulius and Narang, Sharan and Alben, Jonah and Diamos, Gregory and Elsen, Erich and Garcia, David and Ginsburg, Boris and Houston, Michael and Kuchaiev, Oleksii and Venkatesh, Ganesh and others}, + journal={arXiv preprint arXiv:1710.03740}, + year={2017} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [pose_hrnet_w32_fp16](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_fp16-8xb64-210e_coco-256x192.py) | 256x192 | 0.749 | 0.907 | 0.822 | 0.802 | 0.946 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_fp16-8xb64-210e_coco-256x192-f1e84e3b_20220914.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_fp16-8xb64-210e_coco-256x192_20220914.log) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_udp_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_udp_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..2b85d85a25125cb8eef083a87e597591850a1402 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_udp_coco.md @@ -0,0 +1,63 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html">HRNet (CVPR'2019)</a></summary> + +```bibtex +@inproceedings{sun2019deep, + title={Deep high-resolution representation learning for human pose estimation}, + author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={5693--5703}, + year={2019} +} +``` + +</details> + +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2020/html/Huang_The_Devil_Is_in_the_Details_Delving_Into_Unbiased_Data_CVPR_2020_paper.html">UDP (CVPR'2020)</a></summary> + +```bibtex +@InProceedings{Huang_2020_CVPR, + author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan}, + title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation}, + booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, + month = {June}, + year = {2020} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [pose_hrnet_w32_udp](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py) | 256x192 | 0.762 | 0.907 | 0.829 | 0.810 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192-73ede547_20220914.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192_20220914.log) | +| [pose_hrnet_w32_udp](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-384x288.py) | 384x288 | 0.768 | 0.909 | 0.832 | 0.815 | 0.945 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-384x288-9a3f7c85_20220914.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-384x288_20220914.log) | +| [pose_hrnet_w48_udp](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_udp-8xb32-210e_coco-256x192.py) | 256x192 | 0.768 | 0.908 | 0.833 | 0.817 | 0.945 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_udp-8xb32-210e_coco-256x192-3feaef8f_20220913.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_udp-8xb32-210e_coco-256x192_20220913.log) | +| [pose_hrnet_w48_udp](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_udp-8xb32-210e_coco-384x288.py) | 384x288 | 0.773 | 0.911 | 0.836 | 0.821 | 0.946 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_udp-8xb32-210e_coco-384x288-70d7ab01_20220913.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_udp-8xb32-210e_coco-384x288_20220913.log) | +| [pose_hrnet_w32_udp_regress](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-regress-8xb64-210e_coco-256x192.py) | 256x192 | 0.759 | 0.907 | 0.827 | 0.813 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-regress-8xb64-210e_coco-256x192-9c0b77b4_20220926.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-regress-8xb64-210e_coco-256x192_20220226.log) | + +Note that, UDP also adopts the unbiased encoding/decoding algorithm of [DARK](https://mmpose.readthedocs.io/en/latest/model_zoo_papers/techniques.html#darkpose-cvpr-2020). diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_udp_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_udp_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..01cba761ec4a97c8d84273de7f2cf720de62ed5b --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_udp_coco.yml @@ -0,0 +1,90 @@ +Collections: +- Name: UDP + Paper: + Title: 'The Devil Is in the Details: Delving Into Unbiased Data Processing for + Human Pose Estimation' + URL: http://openaccess.thecvf.com/content_CVPR_2020/html/Huang_The_Devil_Is_in_the_Details_Delving_Into_Unbiased_Data_CVPR_2020_paper.html + README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/techniques/udp.md +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py + In Collection: UDP + Metadata: + Architecture: &id001 + - HRNet + - UDP + Training Data: COCO + Name: td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.762 + AP@0.5: 0.907 + AP@0.75: 0.829 + AR: 0.810 + AR@0.5: 0.942 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192-73ede547_20220914.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-384x288.py + In Collection: UDP + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_hrnet-w32_udp-8xb64-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.768 + AP@0.5: 0.909 + AP@0.75: 0.832 + AR: 0.815 + AR@0.5: 0.945 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-384x288-9a3f7c85_20220914.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_udp-8xb32-210e_coco-256x192.py + In Collection: UDP + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_hrnet-w48_udp-8xb32-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.768 + AP@0.5: 0.908 + AP@0.75: 0.833 + AR: 0.817 + AR@0.5: 0.945 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_udp-8xb32-210e_coco-256x192-3feaef8f_20220913.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_udp-8xb32-210e_coco-384x288.py + In Collection: UDP + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_hrnet-w48_udp-8xb32-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.773 + AP@0.5: 0.911 + AP@0.75: 0.836 + AR: 0.821 + AR@0.5: 0.946 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_udp-8xb32-210e_coco-384x288-70d7ab01_20220913.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-regress-8xb64-210e_coco-256x192.py + In Collection: UDP + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_hrnet-w32_udp-regress-8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.759 + AP@0.5: 0.907 + AP@0.75: 0.827 + AR: 0.813 + AR@0.5: 0.943 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-regress-8xb64-210e_coco-256x192-9c0b77b4_20220926.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/litehrnet_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/litehrnet_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..28f608d54af3d7098886a077dbdfd5b7f4e50b4f --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/litehrnet_coco.md @@ -0,0 +1,42 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2104.06403">LiteHRNet (CVPR'2021)</a></summary> + +```bibtex +@inproceedings{Yulitehrnet21, + title={Lite-HRNet: A Lightweight High-Resolution Network}, + author={Yu, Changqian and Xiao, Bin and Gao, Changxin and Yuan, Lu and Zhang, Lei and Sang, Nong and Wang, Jingdong}, + booktitle={CVPR}, + year={2021} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [LiteHRNet-18](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-18_8xb64-210e_coco-256x192.py) | 256x192 | 0.642 | 0.867 | 0.719 | 0.705 | 0.911 | [ckpt](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet18_coco_256x192-6bace359_20211230.pth) | [log](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet18_coco_256x192_20211230.log.json) | +| [LiteHRNet-18](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-18_8xb32-210e_coco-384x288.py) | 384x288 | 0.676 | 0.876 | 0.746 | 0.735 | 0.919 | [ckpt](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet18_coco_384x288-8d4dac48_20211230.pth) | [log](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet18_coco_384x288_20211230.log.json) | +| [LiteHRNet-30](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-30_8xb64-210e_coco-256x192.py) | 256x192 | 0.676 | 0.880 | 0.756 | 0.736 | 0.922 | [ckpt](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet30_coco_256x192-4176555b_20210626.pth) | [log](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet30_coco_256x192_20210626.log.json) | +| [LiteHRNet-30](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-30_8xb32-210e_coco-384x288.py) | 384x288 | 0.700 | 0.883 | 0.776 | 0.758 | 0.926 | [ckpt](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet30_coco_384x288-a3aef5c4_20210626.pth) | [log](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet30_coco_384x288_20210626.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/litehrnet_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/litehrnet_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..f92360587237823bde9ce0f042c08c8f5915ca3f --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/litehrnet_coco.yml @@ -0,0 +1,72 @@ +Collections: +- Name: LiteHRNet + Paper: + Title: 'Lite-HRNet: A Lightweight High-Resolution Network' + URL: https://arxiv.org/abs/2104.06403 + README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/backbones/litehrnet.md +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-18_8xb64-210e_coco-256x192.py + In Collection: LiteHRNet + Metadata: + Architecture: &id001 + - LiteHRNet + Training Data: COCO + Name: td-hm_litehrnet-18_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.642 + AP@0.5: 0.867 + AP@0.75: 0.719 + AR: 0.705 + AR@0.5: 0.911 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet18_coco_256x192-6bace359_20211230.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-18_8xb32-210e_coco-384x288.py + In Collection: LiteHRNet + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_litehrnet-18_8xb32-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.676 + AP@0.5: 0.876 + AP@0.75: 0.746 + AR: 0.735 + AR@0.5: 0.919 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet18_coco_384x288-8d4dac48_20211230.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-30_8xb64-210e_coco-256x192.py + In Collection: LiteHRNet + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_litehrnet-30_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.676 + AP@0.5: 0.88 + AP@0.75: 0.756 + AR: 0.736 + AR@0.5: 0.922 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet30_coco_256x192-4176555b_20210626.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-30_8xb32-210e_coco-384x288.py + In Collection: LiteHRNet + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_litehrnet-30_8xb32-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.7 + AP@0.5: 0.883 + AP@0.75: 0.776 + AR: 0.758 + AR@0.5: 0.926 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet30_coco_384x288-a3aef5c4_20210626.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/mobilenetv2_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/mobilenetv2_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..aed9fd0246bf4d6f0d3379d7317478ab9013eef2 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/mobilenetv2_coco.md @@ -0,0 +1,41 @@ +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2018/html/Sandler_MobileNetV2_Inverted_Residuals_CVPR_2018_paper.html">MobilenetV2 (CVPR'2018)</a></summary> + +```bibtex +@inproceedings{sandler2018mobilenetv2, + title={Mobilenetv2: Inverted residuals and linear bottlenecks}, + author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={4510--4520}, + year={2018} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [pose_mobilenetv2](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_mobilenetv2_8xb64-210e_coco-256x192.py) | 256x192 | 0.648 | 0.874 | 0.725 | 0.709 | 0.918 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_mobilenetv2_8xb64-210e_coco-256x192-55a04c35_20221016.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_mobilenetv2_8xb64-210e_coco-256x192_20221016.log) | +| [pose_mobilenetv2](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_mobilenetv2_8xb64-210e_coco-384x288.py) | 384x288 | 0.677 | 0.882 | 0.746 | 0.734 | 0.920 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_mobilenetv2_8xb64-210e_coco-384x288-d3ab1457_20221013.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_mobilenetv2_8xb64-210e_coco-384x288_20221013.log) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/mobilenetv2_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/mobilenetv2_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..c7993fe516dd6a79895ed08f50c624d23c4ee0aa --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/mobilenetv2_coco.yml @@ -0,0 +1,35 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_mobilenetv2_8xb64-210e_coco-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: &id001 + - SimpleBaseline2D + - MobilenetV2 + Training Data: COCO + Name: td-hm_mobilenetv2_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.648 + AP@0.5: 0.874 + AP@0.75: 0.725 + AR: 0.709 + AR@0.5: 0.918 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_mobilenetv2_8xb64-210e_coco-256x192-55a04c35_20221016.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_mobilenetv2_8xb64-210e_coco-384x288.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_mobilenetv2_8xb64-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.677 + AP@0.5: 0.882 + AP@0.75: 0.746 + AR: 0.734 + AR@0.5: 0.920 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_mobilenetv2_8xb64-210e_coco-384x288-d3ab1457_20221013.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/mspn_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/mspn_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..d86bc2c2ada7c560e1e2770a1e864c27a8417d3c --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/mspn_coco.md @@ -0,0 +1,42 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/1901.00148">MSPN (ArXiv'2019)</a></summary> + +```bibtex +@article{li2019rethinking, + title={Rethinking on Multi-Stage Networks for Human Pose Estimation}, + author={Li, Wenbo and Wang, Zhicheng and Yin, Binyi and Peng, Qixiang and Du, Yuming and Xiao, Tianzi and Yu, Gang and Lu, Hongtao and Wei, Yichen and Sun, Jian}, + journal={arXiv preprint arXiv:1901.00148}, + year={2019} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [mspn_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_mspn50_8xb32-210e_coco-256x192.py) | 256x192 | 0.723 | 0.895 | 0.794 | 0.788 | 0.934 | [ckpt](https://download.openmmlab.com/mmpose/top_down/mspn/mspn50_coco_256x192-8fbfb5d0_20201123.pth) | [log](https://download.openmmlab.com/mmpose/top_down/mspn/mspn50_coco_256x192_20201123.log.json) | +| [2xmspn_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_2xmspn50_8xb32-210e_coco-256x192.py) | 256x192 | 0.754 | 0.903 | 0.826 | 0.816 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/top_down/mspn/2xmspn50_coco_256x192-c8765a5c_20201123.pth) | [log](https://download.openmmlab.com/mmpose/top_down/mspn/2xmspn50_coco_256x192_20201123.log.json) | +| [3xmspn_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_3xmspn50_8xb32-210e_coco-256x192.py) | 256x192 | 0.758 | 0.904 | 0.830 | 0.821 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/mspn/3xmspn50_coco_256x192-e348f18e_20201123.pth) | [log](https://download.openmmlab.com/mmpose/top_down/mspn/3xmspn50_coco_256x192_20201123.log.json) | +| [4xmspn_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_4xmspn50_8xb32-210e_coco-256x192.py) | 256x192 | 0.765 | 0.906 | 0.835 | 0.826 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/mspn/4xmspn50_coco_256x192-7b837afb_20201123.pth) | [log](https://download.openmmlab.com/mmpose/top_down/mspn/4xmspn50_coco_256x192_20201123.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/mspn_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/mspn_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..77eca18b6f50220650671a6f2b88eabd06a14baf --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/mspn_coco.yml @@ -0,0 +1,72 @@ +Collections: +- Name: MSPN + Paper: + Title: Rethinking on Multi-Stage Networks for Human Pose Estimation + URL: https://arxiv.org/abs/1901.00148 + README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/backbones/mspn.md +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_mspn50_8xb32-210e_coco-256x192.py + In Collection: MSPN + Metadata: + Architecture: &id001 + - MSPN + Training Data: COCO + Name: td-hm_mspn50_8xb32-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.723 + AP@0.5: 0.895 + AP@0.75: 0.794 + AR: 0.788 + AR@0.5: 0.934 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/mspn/mspn50_coco_256x192-8fbfb5d0_20201123.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_2xmspn50_8xb32-210e_coco-256x192.py + In Collection: MSPN + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_2xmspn50_8xb32-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.754 + AP@0.5: 0.903 + AP@0.75: 0.826 + AR: 0.816 + AR@0.5: 0.942 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/mspn/2xmspn50_coco_256x192-c8765a5c_20201123.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_3xmspn50_8xb32-210e_coco-256x192.py + In Collection: MSPN + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_3xmspn50_8xb32-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.758 + AP@0.5: 0.904 + AP@0.75: 0.83 + AR: 0.821 + AR@0.5: 0.943 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/mspn/3xmspn50_coco_256x192-e348f18e_20201123.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_4xmspn50_8xb32-210e_coco-256x192.py + In Collection: MSPN + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_4xmspn50_8xb32-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.765 + AP@0.5: 0.906 + AP@0.75: 0.835 + AR: 0.826 + AR@0.5: 0.943 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/mspn/4xmspn50_coco_256x192-7b837afb_20201123.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/pvt_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/pvt_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..8a375a4c2022442f0ce1cb19820da0cea5a1e802 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/pvt_coco.md @@ -0,0 +1,57 @@ +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2102.12122">PVT (ICCV'2021)</a></summary> + +```bibtex +@inproceedings{wang2021pyramid, + title={Pyramid vision transformer: A versatile backbone for dense prediction without convolutions}, + author={Wang, Wenhai and Xie, Enze and Li, Xiang and Fan, Deng-Ping and Song, Kaitao and Liang, Ding and Lu, Tong and Luo, Ping and Shao, Ling}, + booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision}, + pages={568--578}, + year={2021} +} +``` + +</details> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2106.13797">PVTV2 (CVMJ'2022)</a></summary> + +```bibtex +@article{wang2022pvt, + title={PVT v2: Improved baselines with Pyramid Vision Transformer}, + author={Wang, Wenhai and Xie, Enze and Li, Xiang and Fan, Deng-Ping and Song, Kaitao and Liang, Ding and Lu, Tong and Luo, Ping and Shao, Ling}, + journal={Computational Visual Media}, + pages={1--10}, + year={2022}, + publisher={Springer} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [pose_pvt-s](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_pvt-s_8xb64-210e_coco-256x192.py) | 256x192 | 0.714 | 0.896 | 0.794 | 0.773 | 0.936 | [ckpt](https://download.openmmlab.com/mmpose/top_down/pvt/pvt_small_coco_256x192-4324a49d_20220501.pth) | [log](https://download.openmmlab.com/mmpose/top_down/pvt/pvt_small_coco_256x192_20220501.log.json) | +| [pose_pvtv2-b2](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_pvtv2-b2_8xb64-210e_coco-256x192.py) | 256x192 | 0.737 | 0.905 | 0.812 | 0.791 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/top_down/pvt/pvtv2_b2_coco_256x192-b4212737_20220501.pth) | [log](https://download.openmmlab.com/mmpose/top_down/pvt/pvtv2_b2_coco_256x192_20220501.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/pvt_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/pvt_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..2b4303d7040486806f98e01edb6d296538c3089f --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/pvt_coco.yml @@ -0,0 +1,35 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_pvt-s_8xb64-210e_coco-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: &id001 + - SimpleBaseline2D + - PVT + Training Data: COCO + Name: td-hm_pvt-s_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.714 + AP@0.5: 0.896 + AP@0.75: 0.794 + AR: 0.773 + AR@0.5: 0.936 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/pvt/pvt_small_coco_256x192-4324a49d_20220501.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_pvtv2-b2_8xb64-210e_coco-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_pvtv2-b2_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.737 + AP@0.5: 0.905 + AP@0.75: 0.812 + AR: 0.791 + AR@0.5: 0.942 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/pvt/pvtv2_b2_coco_256x192-b4212737_20220501.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnest_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnest_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..cb7ada4d6b72dfea2a029773148ed852ba00b1a8 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnest_coco.md @@ -0,0 +1,46 @@ +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2004.08955">ResNeSt (ArXiv'2020)</a></summary> + +```bibtex +@article{zhang2020resnest, + title={ResNeSt: Split-Attention Networks}, + author={Zhang, Hang and Wu, Chongruo and Zhang, Zhongyue and Zhu, Yi and Zhang, Zhi and Lin, Haibin and Sun, Yue and He, Tong and Muller, Jonas and Manmatha, R. and Li, Mu and Smola, Alexander}, + journal={arXiv preprint arXiv:2004.08955}, + year={2020} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [pose_resnest_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest50_8xb64-210e_coco-256x192.py) | 256x192 | 0.720 | 0.899 | 0.800 | 0.775 | 0.939 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnest/resnest50_coco_256x192-6e65eece_20210320.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnest/resnest50_coco_256x192_20210320.log.json) | +| [pose_resnest_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest50_8xb64-210e_coco-384x288.py) | 384x288 | 0.737 | 0.900 | 0.811 | 0.789 | 0.937 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnest/resnest50_coco_384x288-dcd20436_20210320.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnest/resnest50_coco_384x288_20210320.log.json) | +| [pose_resnest_101](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest101_8xb64-210e_coco-256x192.py) | 256x192 | 0.725 | 0.900 | 0.807 | 0.781 | 0.939 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnest/resnest101_coco_256x192-2ffcdc9d_20210320.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnest/resnest101_coco_256x192_20210320.log.json) | +| [pose_resnest_101](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest101_8xb32-210e_coco-384x288.py) | 384x288 | 0.745 | 0.905 | 0.818 | 0.798 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnest/resnest101_coco_384x288-80660658_20210320.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnest/resnest101_coco_384x288_20210320.log.json) | +| [pose_resnest_200](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest200_8xb64-210e_coco-256x192.py) | 256x192 | 0.731 | 0.905 | 0.812 | 0.787 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnest/resnest200_coco_256x192-db007a48_20210517.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnest/resnest200_coco_256x192_20210517.log.json) | +| [pose_resnest_200](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest200_8xb16-210e_coco-384x288.py) | 384x288 | 0.753 | 0.907 | 0.827 | 0.805 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnest/resnest200_coco_384x288-b5bb76cb_20210517.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnest/resnest200_coco_384x288_20210517.log.json) | +| [pose_resnest_269](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest269_8xb32-210e_coco-256x192.py) | 256x192 | 0.737 | 0.907 | 0.819 | 0.792 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnest/resnest269_coco_256x192-2a7882ac_20210517.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnest/resnest269_coco_256x192_20210517.log.json) | +| [pose_resnest_269](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest269_8xb16-210e_coco-384x288.py) | 384x288 | 0.754 | 0.908 | 0.828 | 0.805 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnest/resnest269_coco_384x288-b142b9fb_20210517.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnest/resnest269_coco_384x288_20210517.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnest_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnest_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..082c6a0aa278868876a472edbb747813b95281ac --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnest_coco.yml @@ -0,0 +1,131 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest50_8xb64-210e_coco-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: &id001 + - SimpleBaseline2D + - ResNeSt + Training Data: COCO + Name: td-hm_resnest50_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.720 + AP@0.5: 0.899 + AP@0.75: 0.8 + AR: 0.775 + AR@0.5: 0.939 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnest/resnest50_coco_256x192-6e65eece_20210320.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest50_8xb64-210e_coco-384x288.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_resnest50_8xb64-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.737 + AP@0.5: 0.9 + AP@0.75: 0.811 + AR: 0.789 + AR@0.5: 0.937 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnest/resnest50_coco_384x288-dcd20436_20210320.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest101_8xb64-210e_coco-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_resnest101_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.725 + AP@0.5: 0.9 + AP@0.75: 0.807 + AR: 0.781 + AR@0.5: 0.939 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnest/resnest101_coco_256x192-2ffcdc9d_20210320.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest101_8xb32-210e_coco-384x288.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_resnest101_8xb32-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.745 + AP@0.5: 0.905 + AP@0.75: 0.818 + AR: 0.798 + AR@0.5: 0.942 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnest/resnest101_coco_384x288-80660658_20210320.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest200_8xb64-210e_coco-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_resnest200_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.731 + AP@0.5: 0.905 + AP@0.75: 0.812 + AR: 0.787 + AR@0.5: 0.943 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnest/resnest200_coco_256x192-db007a48_20210517.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest200_8xb16-210e_coco-384x288.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_resnest200_8xb16-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.753 + AP@0.5: 0.907 + AP@0.75: 0.827 + AR: 0.805 + AR@0.5: 0.943 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnest/resnest200_coco_384x288-b5bb76cb_20210517.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest269_8xb32-210e_coco-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_resnest269_8xb32-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.737 + AP@0.5: 0.907 + AP@0.75: 0.819 + AR: 0.792 + AR@0.5: 0.943 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnest/resnest269_coco_256x192-2a7882ac_20210517.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest269_8xb16-210e_coco-384x288.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_resnest269_8xb16-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.754 + AP@0.5: 0.908 + AP@0.75: 0.828 + AR: 0.805 + AR@0.5: 0.943 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnest/resnest269_coco_384x288-b142b9fb_20210517.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnet_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnet_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..4ce6da38c6728ae80f1733743977ef641511e1b2 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnet_coco.md @@ -0,0 +1,62 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html">SimpleBaseline2D (ECCV'2018)</a></summary> + +```bibtex +@inproceedings{xiao2018simple, + title={Simple baselines for human pose estimation and tracking}, + author={Xiao, Bin and Wu, Haiping and Wei, Yichen}, + booktitle={Proceedings of the European conference on computer vision (ECCV)}, + pages={466--481}, + year={2018} +} +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html">ResNet (CVPR'2016)</a></summary> + +```bibtex +@inproceedings{he2016deep, + title={Deep residual learning for image recognition}, + author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={770--778}, + year={2016} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [pose_resnet_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-256x192.py) | 256x192 | 0.718 | 0.898 | 0.796 | 0.774 | 0.934 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-256x192-04af38ce_20220923.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-256x192_20220923.log) | +| [pose_resnet_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-384x288.py) | 384x288 | 0.731 | 0.900 | 0.799 | 0.782 | 0.937 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-384x288-7b8db90e_20220923.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-384x288_20220923.log) | +| [pose_resnet_101](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_8xb64-210e_coco-256x192.py) | 256x192 | 0.728 | 0.904 | 0.809 | 0.783 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_8xb64-210e_coco-256x192-065d3625_20220926.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_8xb64-210e_coco-256x192_20220926.log) | +| [pose_resnet_101](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_8xb32-210e_coco-384x288.py) | 384x288 | 0.749 | 0.906 | 0.817 | 0.799 | 0.941 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_8xb64-210e_coco-256x192-065d3625_20220926.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_8xb64-210e_coco-256x192_20220926.log) | +| [pose_resnet_152](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_8xb32-210e_coco-256x192.py) | 256x192 | 0.736 | 0.904 | 0.818 | 0.791 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_8xb32-210e_coco-256x192-0345f330_20220928.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_8xb32-210e_coco-256x192_20220928.log) | +| [pose_resnet_152](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_8xb32-210e_coco-384x288.py) | 384x288 | 0.750 | 0.908 | 0.821 | 0.800 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_8xb32-210e_coco-384x288-7fbb906f_20220927.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_8xb32-210e_coco-384x288_20220927.log) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnet_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnet_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..296be8898bc73d00923eef74d7275802ae9f7c9e --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnet_coco.yml @@ -0,0 +1,121 @@ +Collections: +- Name: SimpleBaseline2D + Paper: + Title: Simple baselines for human pose estimation and tracking + URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html + README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/algorithms/simplebaseline2d.md +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: &id001 + - SimpleBaseline2D + - ResNet + Training Data: COCO + Name: td-hm_res50_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.718 + AP@0.5: 0.898 + AP@0.75: 0.796 + AR: 0.774 + AR@0.5: 0.934 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-256x192-04af38ce_20220923.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-384x288.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_res50_8xb64-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.731 + AP@0.5: 0.9 + AP@0.75: 0.799 + AR: 0.782 + AR@0.5: 0.937 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-384x288-7b8db90e_20220923.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_8xb64-210e_coco-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_res101_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.728 + AP@0.5: 0.904 + AP@0.75: 0.809 + AR: 0.783 + AR@0.5: 0.942 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_8xb64-210e_coco-256x192-065d3625_20220926.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_8xb32-210e_coco-384x288.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_res101_8xb32-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.749 + AP@0.5: 0.906 + AP@0.75: 0.817 + AR: 0.799 + AR@0.5: 0.941 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_8xb64-210e_coco-256x192-065d3625_20220926.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_8xb32-210e_coco-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_res152_8xb32-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.736 + AP@0.5: 0.904 + AP@0.75: 0.818 + AR: 0.791 + AR@0.5: 0.942 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_8xb32-210e_coco-256x192-0345f330_20220928.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_8xb32-210e_coco-384x288.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_res152_8xb32-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.75 + AP@0.5: 0.908 + AP@0.75: 0.821 + AR: 0.8 + AR@0.5: 0.942 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_8xb32-210e_coco-384x288-7fbb906f_20220927.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_fp16-8xb64-210e_coco-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_res50_fp16-8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.716 + AP@0.5: 0.898 + AP@0.75: 0.798 + AR: 0.772 + AR@0.5: 0.937 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_fp16-8xb64-210e_coco-256x192-463da051_20220927.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnet_dark_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnet_dark_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..6f1b0107f30e336df3004788a174d4bfd2f7aef0 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnet_dark_coco.md @@ -0,0 +1,79 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html">SimpleBaseline2D (ECCV'2018)</a></summary> + +```bibtex +@inproceedings{xiao2018simple, + title={Simple baselines for human pose estimation and tracking}, + author={Xiao, Bin and Wu, Haiping and Wei, Yichen}, + booktitle={Proceedings of the European conference on computer vision (ECCV)}, + pages={466--481}, + year={2018} +} +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html">ResNet (CVPR'2016)</a></summary> + +```bibtex +@inproceedings{he2016deep, + title={Deep residual learning for image recognition}, + author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={770--778}, + year={2016} +} +``` + +</details> + +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2020/html/Zhang_Distribution-Aware_Coordinate_Representation_for_Human_Pose_Estimation_CVPR_2020_paper.html">DarkPose (CVPR'2020)</a></summary> + +```bibtex +@inproceedings{zhang2020distribution, + title={Distribution-aware coordinate representation for human pose estimation}, + author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages={7093--7102}, + year={2020} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [pose_resnet_50_dark](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_dark-8xb64-210e_coco-256x192.py) | 256x192 | 0.724 | 0.897 | 0.797 | 0.777 | 0.934 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_dark-8xb64-210e_coco-256x192-c129dcb6_20220926.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_dark-8xb64-210e_coco-256x192_20220926.log) | +| [pose_resnet_50_dark](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_dark-8xb64-210e_coco-384x288.py) | 384x288 | 0.735 | 0.902 | 0.801 | 0.786 | 0.938 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_dark-8xb64-210e_coco-384x288-8b90b538_20220926.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_dark-8xb64-210e_coco-384x288_20220926.log) | +| [pose_resnet_101_dark](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_dark-8xb64-210e_coco-256x192.py) | 256x192 | 0.733 | 0.900 | 0.810 | 0.786 | 0.938 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_dark-8xb64-210e_coco-256x192-528ec248_20220926.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_dark-8xb64-210e_coco-256x192_20220926.log) | +| [pose_resnet_101_dark](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_dark-8xb64-210e_coco-384x288.py) | 384x288 | 0.749 | 0.905 | 0.818 | 0.799 | 0.940 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_dark-8xb64-210e_coco-384x288-487d40a4_20220926.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_dark-8xb64-210e_coco-384x288_20220926.log) | +| [pose_resnet_152_dark](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_dark-8xb32-210e_coco-256x192.py) | 256x192 | 0.743 | 0.906 | 0.819 | 0.796 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_dark-8xb32-210e_coco-256x192-f754df5f_20221031.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_dark-8xb32-210e_coco-256x192_20221031.log) | +| [pose_resnet_152_dark](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_dark-8xb32-210e_coco-384x288.py) | 384x288 | 0.755 | 0.907 | 0.825 | 0.805 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_dark-8xb32-210e_coco-384x288-329f8454_20221031.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_dark-8xb32-210e_coco-384x288_20221031.log) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnet_dark_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnet_dark_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..02e4a7f43f61b709a6ede2f3a42ab5ac91e56cd8 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnet_dark_coco.yml @@ -0,0 +1,100 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_dark-8xb64-210e_coco-256x192.py + In Collection: DarkPose + Metadata: + Architecture: &id001 + - SimpleBaseline2D + - ResNet + - DarkPose + Training Data: COCO + Name: td-hm_res50_dark-8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.724 + AP@0.5: 0.897 + AP@0.75: 0.797 + AR: 0.777 + AR@0.5: 0.934 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_dark-8xb64-210e_coco-256x192-c129dcb6_20220926.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_dark-8xb64-210e_coco-384x288.py + In Collection: DarkPose + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_res50_dark-8xb64-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.735 + AP@0.5: 0.902 + AP@0.75: 0.801 + AR: 0.786 + AR@0.5: 0.938 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_dark-8xb64-210e_coco-384x288-8b90b538_20220926.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_dark-8xb64-210e_coco-256x192.py + In Collection: DarkPose + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_res101_dark-8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.733 + AP@0.5: 0.9 + AP@0.75: 0.81 + AR: 0.786 + AR@0.5: 0.938 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_dark-8xb64-210e_coco-256x192-528ec248_20220926.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_dark-8xb64-210e_coco-384x288.py + In Collection: DarkPose + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_res101_dark-8xb64-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.749 + AP@0.5: 0.905 + AP@0.75: 0.818 + AR: 0.799 + AR@0.5: 0.94 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_dark-8xb64-210e_coco-384x288-487d40a4_20220926.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_dark-8xb32-210e_coco-256x192.py + In Collection: DarkPose + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_res152_dark-8xb32-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.743 + AP@0.5: 0.906 + AP@0.75: 0.819 + AR: 0.796 + AR@0.5: 0.943 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_dark-8xb32-210e_coco-256x192-f754df5f_20221031.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_dark-8xb32-210e_coco-384x288.py + In Collection: DarkPose + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_res152_dark-8xb32-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.757 + AP@0.5: 0.907 + AP@0.75: 0.825 + AR: 0.805 + AR@0.5: 0.943 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_dark-8xb32-210e_coco-384x288-329f8454_20221031.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnet_fp16_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnet_fp16_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..2731ca8534b509b694f6d5f6958ad6f080c171c1 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnet_fp16_coco.md @@ -0,0 +1,73 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html">SimpleBaseline2D (ECCV'2018)</a></summary> + +```bibtex +@inproceedings{xiao2018simple, + title={Simple baselines for human pose estimation and tracking}, + author={Xiao, Bin and Wu, Haiping and Wei, Yichen}, + booktitle={Proceedings of the European conference on computer vision (ECCV)}, + pages={466--481}, + year={2018} +} +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html">ResNet (CVPR'2016)</a></summary> + +```bibtex +@inproceedings{he2016deep, + title={Deep residual learning for image recognition}, + author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={770--778}, + year={2016} +} +``` + +</details> + +<!-- [OTHERS] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/1710.03740">FP16 (ArXiv'2017)</a></summary> + +```bibtex +@article{micikevicius2017mixed, + title={Mixed precision training}, + author={Micikevicius, Paulius and Narang, Sharan and Alben, Jonah and Diamos, Gregory and Elsen, Erich and Garcia, David and Ginsburg, Boris and Houston, Michael and Kuchaiev, Oleksii and Venkatesh, Ganesh and others}, + journal={arXiv preprint arXiv:1710.03740}, + year={2017} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [pose_resnet_50_fp16](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_fp16-8xb64-210e_coco-256x192.py) | 256x192 | 0.716 | 0.898 | 0.798 | 0.772 | 0.937 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_fp16-8xb64-210e_coco-256x192-463da051_20220927.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_fp16-8xb64-210e_coco-256x192_20220927.log) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnetv1d_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnetv1d_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..106720153251651f5ae1a53ace2a7333c5882898 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnetv1d_coco.md @@ -0,0 +1,45 @@ +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2019/html/He_Bag_of_Tricks_for_Image_Classification_with_Convolutional_Neural_Networks_CVPR_2019_paper.html">ResNetV1D (CVPR'2019)</a></summary> + +```bibtex +@inproceedings{he2019bag, + title={Bag of tricks for image classification with convolutional neural networks}, + author={He, Tong and Zhang, Zhi and Zhang, Hang and Zhang, Zhongyue and Xie, Junyuan and Li, Mu}, + booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}, + pages={558--567}, + year={2019} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [pose_resnetv1d_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d50_8xb64-210e_coco-256x192.py) | 256x192 | 0.722 | 0.897 | 0.796 | 0.777 | 0.936 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d50_8xb64-210e_coco-256x192-27545d63_20221020.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d50_8xb64-210e_coco-256x192_20221020.log) | +| [pose_resnetv1d_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d50_8xb64-210e_coco-384x288.py) | 384x288 | 0.730 | 0.899 | 0.800 | 0.782 | 0.935 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d50_8xb64-210e_coco-384x288-0646b46e_20221020.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d50_8xb64-210e_coco-384x288_20221020.log) | +| [pose_resnetv1d_101](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d101_8xb64-210e_coco-256x192.py) | 256x192 | 0.732 | 0.901 | 0.808 | 0.785 | 0.940 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d101_8xb64-210e_coco-256x192-ee9e7212_20221021.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d101_8xb64-210e_coco-256x192_20221021.log) | +| [pose_resnetv1d_101](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d101_8xb32-210e_coco-384x288.py) | 384x288 | 0.748 | 0.906 | 0.817 | 0.798 | 0.941 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d101_8xb32-210e_coco-384x288-d0b5875f_20221028.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d101_8xb32-210e_coco-384x288_20221028.log) | +| [pose_resnetv1d_152](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d152_8xb32-210e_coco-256x192.py) | 256x192 | 0.737 | 0.904 | 0.814 | 0.790 | 0.940 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d152_8xb32-210e_coco-256x192-fd49f947_20221021.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d152_8xb32-210e_coco-256x192_20221021.log) | +| [pose_resnetv1d_152](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d152_8xb48-210e_coco-384x288.py) | 384x288 | 0.751 | 0.907 | 0.821 | 0.801 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d152_8xb48-210e_coco-384x288-b9a99602_20221022.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d152_8xb48-210e_coco-384x288_20221022.log) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnetv1d_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnetv1d_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..765c8aaabc2fa08b5ca343a5f0bd8ac6a94c764b --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnetv1d_coco.yml @@ -0,0 +1,99 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d50_8xb64-210e_coco-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: &id001 + - SimpleBaseline2D + - ResNetV1D + Training Data: COCO + Name: td-hm_resnetv1d50_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.722 + AP@0.5: 0.897 + AP@0.75: 0.796 + AR: 0.777 + AR@0.5: 0.936 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d50_8xb64-210e_coco-256x192-27545d63_20221020.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d50_8xb64-210e_coco-384x288.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_resnetv1d50_8xb64-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.73 + AP@0.5: 0.899 + AP@0.75: 0.8 + AR: 0.782 + AR@0.5: 0.935 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d50_8xb64-210e_coco-384x288-0646b46e_20221020.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d101_8xb64-210e_coco-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_resnetv1d101_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.732 + AP@0.5: 0.901 + AP@0.75: 0.808 + AR: 0.785 + AR@0.5: 0.940 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d101_8xb64-210e_coco-256x192-ee9e7212_20221021.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d101_8xb32-210e_coco-384x288.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_resnetv1d101_8xb32-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.748 + AP@0.5: 0.906 + AP@0.75: 0.817 + AR: 0.798 + AR@0.5: 0.941 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d101_8xb32-210e_coco-384x288-d0b5875f_20221028.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d152_8xb32-210e_coco-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_resnetv1d152_8xb32-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.737 + AP@0.5: 0.904 + AP@0.75: 0.814 + AR: 0.790 + AR@0.5: 0.94 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d152_8xb32-210e_coco-256x192-fd49f947_20221021.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d152_8xb48-210e_coco-384x288.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_resnetv1d152_8xb48-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.751 + AP@0.5: 0.907 + AP@0.75: 0.821 + AR: 0.801 + AR@0.5: 0.942 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d152_8xb48-210e_coco-384x288-b9a99602_20221022.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnext_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnext_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..8862fddf6ca5c3ffe0c9df407787a1d7a0312c36 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnext_coco.md @@ -0,0 +1,45 @@ +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2017/html/Xie_Aggregated_Residual_Transformations_CVPR_2017_paper.html">ResNext (CVPR'2017)</a></summary> + +```bibtex +@inproceedings{xie2017aggregated, + title={Aggregated residual transformations for deep neural networks}, + author={Xie, Saining and Girshick, Ross and Doll{\'a}r, Piotr and Tu, Zhuowen and He, Kaiming}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={1492--1500}, + year={2017} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [pose_resnext_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext50_8xb64-210e_coco-256x192.py) | 256x192 | 0.715 | 0.897 | 0.791 | 0.771 | 0.935 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnext/resnext50_coco_256x192-dcff15f6_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnext/resnext50_coco_256x192_20200727.log.json) | +| [pose_resnext_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext50_8xb64-210e_coco-384x288.py) | 384x288 | 0.724 | 0.899 | 0.794 | 0.777 | 0.936 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnext/resnext50_coco_384x288-412c848f_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnext/resnext50_coco_384x288_20200727.log.json) | +| [pose_resnext_101](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext101_8xb64-210e_coco-256x192.py) | 256x192 | 0.726 | 0.900 | 0.801 | 0.781 | 0.939 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnext/resnext101_coco_256x192-c7eba365_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnext/resnext101_coco_256x192_20200727.log.json) | +| [pose_resnext_101](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext101_8xb32-210e_coco-384x288.py) | 384x288 | 0.744 | 0.903 | 0.815 | 0.794 | 0.939 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnext/resnext101_coco_384x288-f5eabcd6_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnext/resnext101_coco_384x288_20200727.log.json) | +| [pose_resnext_152](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext152_8xb32-210e_coco-256x192.py) | 256x192 | 0.730 | 0.903 | 0.808 | 0.785 | 0.940 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnext/resnext152_coco_256x192-102449aa_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnext/resnext152_coco_256x192_20200727.log.json) | +| [pose_resnext_152](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext152_8xb48-210e_coco-384x288.py) | 384x288 | 0.742 | 0.904 | 0.810 | 0.794 | 0.940 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnext/resnext152_coco_384x288-806176df_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnext/resnext152_coco_384x288_20200727.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnext_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnext_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..1ebb616ecdcbfc4f94c2a0ded053e9ef18e66c45 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnext_coco.yml @@ -0,0 +1,99 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext50_8xb64-210e_coco-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: &id001 + - SimpleBaseline2D + - ResNext + Training Data: COCO + Name: td-hm_resnext50_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.715 + AP@0.5: 0.897 + AP@0.75: 0.791 + AR: 0.771 + AR@0.5: 0.935 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnext/resnext50_coco_256x192-dcff15f6_20200727.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext50_8xb64-210e_coco-384x288.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_resnext50_8xb64-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.724 + AP@0.5: 0.899 + AP@0.75: 0.794 + AR: 0.777 + AR@0.5: 0.936 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnext/resnext50_coco_384x288-412c848f_20200727.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext101_8xb64-210e_coco-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_resnext101_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.726 + AP@0.5: 0.9 + AP@0.75: 0.801 + AR: 0.781 + AR@0.5: 0.939 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnext/resnext101_coco_256x192-c7eba365_20200727.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext101_8xb32-210e_coco-384x288.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_resnext101_8xb32-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.744 + AP@0.5: 0.903 + AP@0.75: 0.815 + AR: 0.794 + AR@0.5: 0.939 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnext/resnext101_coco_384x288-f5eabcd6_20200727.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext152_8xb32-210e_coco-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_resnext152_8xb32-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.73 + AP@0.5: 0.903 + AP@0.75: 0.808 + AR: 0.785 + AR@0.5: 0.94 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnext/resnext152_coco_256x192-102449aa_20200727.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext152_8xb48-210e_coco-384x288.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_resnext152_8xb48-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.742 + AP@0.5: 0.904 + AP@0.75: 0.81 + AR: 0.794 + AR@0.5: 0.94 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnext/resnext152_coco_384x288-806176df_20200727.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/rsn_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/rsn_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..40f570c3c142266e13024c30ebd35ca2fcc1d00f --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/rsn_coco.md @@ -0,0 +1,44 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-030-58580-8_27">RSN (ECCV'2020)</a></summary> + +```bibtex +@misc{cai2020learning, + title={Learning Delicate Local Representations for Multi-Person Pose Estimation}, + author={Yuanhao Cai and Zhicheng Wang and Zhengxiong Luo and Binyi Yin and Angang Du and Haoqian Wang and Xinyu Zhou and Erjin Zhou and Xiangyu Zhang and Jian Sun}, + year={2020}, + eprint={2003.04030}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [rsn_18](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_rsn18_8xb32-210e_coco-256x192.py) | 256x192 | 0.704 | 0.887 | 0.781 | 0.773 | 0.927 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_rsn18_8xb32-210e_coco-256x192-9049ed09_20221013.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_rsn18_8xb32-210e_coco-256x192_20221013.log) | +| [rsn_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_rsn50_8xb32-210e_coco-256x192.py) | 256x192 | 0.724 | 0.894 | 0.799 | 0.790 | 0.935 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_rsn50_8xb32-210e_coco-256x192-c35901d5_20221013.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_rsn50_8xb32-210e_coco-256x192_20221013.log) | +| [2xrsn_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_2xrsn50_8xb32-210e_coco-256x192.py) | 256x192 | 0.748 | 0.900 | 0.821 | 0.810 | 0.939 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_2xrsn50_8xb32-210e_coco-256x192-9ede341e_20221013.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_2xrsn50_8xb32-210e_coco-256x192_20221013.log) | +| [3xrsn_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_3xrsn50_8xb32-210e_coco-256x192.py) | 256x192 | 0.750 | 0.900 | 0.824 | 0.814 | 0.941 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_3xrsn50_8xb32-210e_coco-256x192-c3e3c4fe_20221013.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_3xrsn50_8xb32-210e_coco-256x192_20221013.log) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/rsn_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/rsn_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..2974aaf2c0d84117178ad5c017ba0acbea6b024f --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/rsn_coco.yml @@ -0,0 +1,72 @@ +Collections: +- Name: RSN + Paper: + Title: Learning Delicate Local Representations for Multi-Person Pose Estimation + URL: https://link.springer.com/chapter/10.1007/978-3-030-58580-8_27 + README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/backbones/rsn.md +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_rsn18_8xb32-210e_coco-256x192.py + In Collection: RSN + Metadata: + Architecture: &id001 + - RSN + Training Data: COCO + Name: td-hm_rsn18_8xb32-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.704 + AP@0.5: 0.887 + AP@0.75: 0.781 + AR: 0.773 + AR@0.5: 0.927 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_rsn18_8xb32-210e_coco-256x192-9049ed09_20221013.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_rsn50_8xb32-210e_coco-256x192.py + In Collection: RSN + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_rsn50_8xb32-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.724 + AP@0.5: 0.894 + AP@0.75: 0.799 + AR: 0.79 + AR@0.5: 0.935 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_rsn50_8xb32-210e_coco-256x192-c35901d5_20221013.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_2xrsn50_8xb32-210e_coco-256x192.py + In Collection: RSN + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_2xrsn50_8xb32-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.748 + AP@0.5: 0.9 + AP@0.75: 0.821 + AR: 0.81 + AR@0.5: 0.939 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_2xrsn50_8xb32-210e_coco-256x192-9ede341e_20221013.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_3xrsn50_8xb32-210e_coco-256x192.py + In Collection: RSN + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_3xrsn50_8xb32-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.75 + AP@0.5: 0.9 + AP@0.75: 0.824 + AR: 0.814 + AR@0.5: 0.941 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_3xrsn50_8xb32-210e_coco-256x192-c3e3c4fe_20221013.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/scnet_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/scnet_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..5fb5833e236c1136456a0f8cf4ada0ad47b3caa9 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/scnet_coco.md @@ -0,0 +1,43 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2020/html/Liu_Improving_Convolutional_Networks_With_Self-Calibrated_Convolutions_CVPR_2020_paper.html">SCNet (CVPR'2020)</a></summary> + +```bibtex +@inproceedings{liu2020improving, + title={Improving Convolutional Networks with Self-Calibrated Convolutions}, + author={Liu, Jiang-Jiang and Hou, Qibin and Cheng, Ming-Ming and Wang, Changhu and Feng, Jiashi}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages={10096--10105}, + year={2020} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [pose_scnet_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet50_8xb64-210e_coco-256x192.py) | 256x192 | 0.728 | 0.899 | 0.807 | 0.784 | 0.938 | [ckpt](https://download.openmmlab.com/mmpose/top_down/scnet/scnet50_coco_256x192-6920f829_20200709.pth) | [log](https://download.openmmlab.com/mmpose/top_down/scnet/scnet50_coco_256x192_20200709.log.json) | +| [pose_scnet_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet50_8xb32-210e_coco-384x288.py) | 384x288 | 0.751 | 0.906 | 0.818 | 0.802 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/top_down/scnet/scnet50_coco_384x288-9cacd0ea_20200709.pth) | [log](https://download.openmmlab.com/mmpose/top_down/scnet/scnet50_coco_384x288_20200709.log.json) | +| [pose_scnet_101](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet101_8xb32-210e_coco-256x192.py) | 256x192 | 0.733 | 0.902 | 0.811 | 0.789 | 0.940 | [ckpt](https://download.openmmlab.com/mmpose/top_down/scnet/scnet101_coco_256x192-6d348ef9_20200709.pth) | [log](https://download.openmmlab.com/mmpose/top_down/scnet/scnet101_coco_256x192_20200709.log.json) | +| [pose_scnet_101](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet101_8xb48-210e_coco-384x288.py) | 384x288 | 0.752 | 0.906 | 0.823 | 0.804 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/scnet/scnet101_coco_384x288-0b6e631b_20200709.pth) | [log](https://download.openmmlab.com/mmpose/top_down/scnet/scnet101_coco_384x288_20200709.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/scnet_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/scnet_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..cf68c67f90621472b71916e876d79794df3d583c --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/scnet_coco.yml @@ -0,0 +1,66 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet50_8xb64-210e_coco-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: &id001 + - SCNet + Training Data: COCO + Name: td-hm_scnet50_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.728 + AP@0.5: 0.899 + AP@0.75: 0.807 + AR: 0.784 + AR@0.5: 0.938 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/scnet/scnet50_coco_256x192-6920f829_20200709.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet50_8xb32-210e_coco-384x288.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: topdown_heatmap_scnet50_coco_384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.751 + AP@0.5: 0.906 + AP@0.75: 0.818 + AR: 0.802 + AR@0.5: 0.942 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/scnet/scnet50_coco_384x288-9cacd0ea_20200709.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet101_8xb32-210e_coco-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_scnet101_8xb32-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.733 + AP@0.5: 0.902 + AP@0.75: 0.811 + AR: 0.789 + AR@0.5: 0.94 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/scnet/scnet101_coco_256x192-6d348ef9_20200709.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet101_8xb48-210e_coco-384x288.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_scnet101_8xb48-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.752 + AP@0.5: 0.906 + AP@0.75: 0.823 + AR: 0.804 + AR@0.5: 0.943 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/scnet/scnet101_coco_384x288-0b6e631b_20200709.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/seresnet_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/seresnet_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..b704d9d1902f6dbdb6dd80517d4c44f35ae86097 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/seresnet_coco.md @@ -0,0 +1,47 @@ +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2018/html/Hu_Squeeze-and-Excitation_Networks_CVPR_2018_paper">SEResNet (CVPR'2018)</a></summary> + +```bibtex +@inproceedings{hu2018squeeze, + title={Squeeze-and-excitation networks}, + author={Hu, Jie and Shen, Li and Sun, Gang}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={7132--7141}, + year={2018} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [pose_seresnet_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet50_8xb64-210e_coco-256x192.py) | 256x192 | 0.729 | 0.903 | 0.807 | 0.784 | 0.941 | [ckpt](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet50_coco_256x192-25058b66_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet50_coco_256x192_20200727.log.json) | +| [pose_seresnet_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet50_8xb64-210e_coco-384x288.py) | 384x288 | 0.748 | 0.904 | 0.819 | 0.799 | 0.941 | [ckpt](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet50_coco_384x288-bc0b7680_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet50_coco_384x288_20200727.log.json) | +| [pose_seresnet_101](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet101_8xb64-210e_coco-256x192.py) | 256x192 | 0.734 | 0.905 | 0.814 | 0.790 | 0.941 | [ckpt](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet101_coco_256x192-83f29c4d_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet101_coco_256x192_20200727.log.json) | +| [pose_seresnet_101](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet101_8xb32-210e_coco-384x288.py) | 384x288 | 0.754 | 0.907 | 0.823 | 0.805 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet101_coco_384x288-48de1709_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet101_coco_384x288_20200727.log.json) | +| [pose_seresnet_152\*](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet152_8xb32-210e_coco-256x192.py) | 256x192 | 0.730 | 0.899 | 0.810 | 0.787 | 0.939 | [ckpt](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet152_coco_256x192-1c628d79_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet152_coco_256x192_20200727.log.json) | +| [pose_seresnet_152\*](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet152_8xb48-210e_coco-384x288.py) | 384x288 | 0.753 | 0.906 | 0.824 | 0.806 | 0.945 | [ckpt](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet152_coco_384x288-58b23ee8_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet152_coco_384x288_20200727.log.json) | + +Note that * means without imagenet pre-training. diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/seresnet_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/seresnet_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..945e84e223fc6ee0fa8820e331dea7df91bd8650 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/seresnet_coco.yml @@ -0,0 +1,98 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet50_8xb64-210e_coco-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: &id001 + - SEResNet + Training Data: COCO + Name: td-hm_seresnet50_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.729 + AP@0.5: 0.903 + AP@0.75: 0.807 + AR: 0.784 + AR@0.5: 0.941 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet50_coco_256x192-25058b66_20200727.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet50_8xb64-210e_coco-384x288.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_seresnet50_8xb64-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.748 + AP@0.5: 0.904 + AP@0.75: 0.819 + AR: 0.799 + AR@0.5: 0.941 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet50_coco_384x288-bc0b7680_20200727.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet101_8xb64-210e_coco-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_seresnet101_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.734 + AP@0.5: 0.905 + AP@0.75: 0.814 + AR: 0.79 + AR@0.5: 0.941 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet101_coco_256x192-83f29c4d_20200727.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet101_8xb32-210e_coco-384x288.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_seresnet101_8xb32-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.754 + AP@0.5: 0.907 + AP@0.75: 0.823 + AR: 0.805 + AR@0.5: 0.943 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet101_coco_384x288-48de1709_20200727.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet152_8xb32-210e_coco-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_seresnet152_8xb32-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.73 + AP@0.5: 0.899 + AP@0.75: 0.81 + AR: 0.787 + AR@0.5: 0.939 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet152_coco_256x192-1c628d79_20200727.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet152_8xb48-210e_coco-384x288.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_seresnet152_8xb48-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.753 + AP@0.5: 0.906 + AP@0.75: 0.824 + AR: 0.806 + AR@0.5: 0.945 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet152_coco_384x288-58b23ee8_20200727.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/shufflenetv1_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/shufflenetv1_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..0c8be860ab7d2a58b3ba813347d754b9f5a98268 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/shufflenetv1_coco.md @@ -0,0 +1,41 @@ +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2018/html/Zhang_ShuffleNet_An_Extremely_CVPR_2018_paper.html">ShufflenetV1 (CVPR'2018)</a></summary> + +```bibtex +@inproceedings{zhang2018shufflenet, + title={Shufflenet: An extremely efficient convolutional neural network for mobile devices}, + author={Zhang, Xiangyu and Zhou, Xinyu and Lin, Mengxiao and Sun, Jian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={6848--6856}, + year={2018} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [pose_shufflenetv1](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv1_8xb64-210e_coco-256x192.py) | 256x192 | 0.587 | 0.849 | 0.654 | 0.654 | 0.896 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv1_8xb64-210e_coco-256x192-7a7ea4f4_20221013.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv1_8xb64-210e_coco-256x192_20221013.log) | +| [pose_shufflenetv1](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv1_8xb64-210e_coco-384x288.py) | 384x288 | 0.626 | 0.862 | 0.696 | 0.687 | 0.903 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv1_8xb64-210e_coco-384x288-8342f8ba_20221013.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv1_8xb64-210e_coco-384x288_20221013.log) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/shufflenetv1_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/shufflenetv1_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..fbdc89936d59ecdbdfbc410f4b92f00070423145 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/shufflenetv1_coco.yml @@ -0,0 +1,35 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv1_8xb64-210e_coco-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: &id001 + - SimpleBaseline2D + - ShufflenetV1 + Training Data: COCO + Name: td-hm_shufflenetv1_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.587 + AP@0.5: 0.849 + AP@0.75: 0.654 + AR: 0.654 + AR@0.5: 0.896 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv1_8xb64-210e_coco-256x192-7a7ea4f4_20221013.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv1_8xb64-210e_coco-384x288.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_shufflenetv1_8xb64-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.626 + AP@0.5: 0.862 + AP@0.75: 0.696 + AR: 0.687 + AR@0.5: 0.903 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv1_8xb64-210e_coco-384x288-8342f8ba_20221013.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/shufflenetv2_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/shufflenetv2_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..f613f4fef145e8444b207a608b661b11aba31983 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/shufflenetv2_coco.md @@ -0,0 +1,41 @@ +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_ECCV_2018/html/Ningning_Light-weight_CNN_Architecture_ECCV_2018_paper.html">ShufflenetV2 (ECCV'2018)</a></summary> + +```bibtex +@inproceedings{ma2018shufflenet, + title={Shufflenet v2: Practical guidelines for efficient cnn architecture design}, + author={Ma, Ningning and Zhang, Xiangyu and Zheng, Hai-Tao and Sun, Jian}, + booktitle={Proceedings of the European conference on computer vision (ECCV)}, + pages={116--131}, + year={2018} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [pose_shufflenetv2](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv2_8xb64-210e_coco-256x192.py) | 256x192 | 0.602 | 0.857 | 0.672 | 0.668 | 0.902 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv2_8xb64-210e_coco-256x192-51fb931e_20221014.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv2_8xb64-210e_coco-256x192_20221014.log) | +| [pose_shufflenetv2](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv2_8xb64-210e_coco-384x288.py) | 384x288 | 0.638 | 0.866 | 0.707 | 0.699 | 0.910 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv2_8xb64-210e_coco-384x288-d30ab55c_20221014.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv2_8xb64-210e_coco-384x288_20221014.log) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/shufflenetv2_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/shufflenetv2_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..cdda3a8667ee0e22146a257dfd25c514a50dc6f2 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/shufflenetv2_coco.yml @@ -0,0 +1,35 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv2_8xb64-210e_coco-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: &id001 + - SimpleBaseline2D + - ShufflenetV2 + Training Data: COCO + Name: td-hm_shufflenetv2_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.602 + AP@0.5: 0.857 + AP@0.75: 0.672 + AR: 0.668 + AR@0.5: 0.902 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv2_8xb64-210e_coco-256x192-51fb931e_20221014.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv2_8xb64-210e_coco-384x288.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_shufflenetv2_8xb64-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.638 + AP@0.5: 0.866 + AP@0.75: 0.707 + AR: 0.699 + AR@0.5: 0.91 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv2_8xb64-210e_coco-384x288-d30ab55c_20221014.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/swin_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/swin_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..5bcc5bd187526b01e711fe3049e3007146409cd5 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/swin_coco.md @@ -0,0 +1,78 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html">SimpleBaseline2D (ECCV'2018)</a></summary> + +```bibtex +@inproceedings{xiao2018simple, + title={Simple baselines for human pose estimation and tracking}, + author={Xiao, Bin and Wu, Haiping and Wei, Yichen}, + booktitle={Proceedings of the European conference on computer vision (ECCV)}, + pages={466--481}, + year={2018} +} +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2103.14030">Swin (ICCV'2021)</a></summary> + +```bibtex +@inproceedings{liu2021swin, + title={Swin transformer: Hierarchical vision transformer using shifted windows}, + author={Liu, Ze and Lin, Yutong and Cao, Yue and Hu, Han and Wei, Yixuan and Zhang, Zheng and Lin, Stephen and Guo, Baining}, + booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision}, + pages={10012--10022}, + year={2021} +} +``` + +</details> + +<!-- [OTHERS] --> + +<details> +<summary align="right"><a href="https://openaccess.thecvf.com/content_cvpr_2017/html/Lin_Feature_Pyramid_Networks_CVPR_2017_paper.html">FPN (CVPR'2017)</a></summary> + +```bibtex +@inproceedings{lin2017feature, + title={Feature pyramid networks for object detection}, + author={Lin, Tsung-Yi and Doll{\'a}r, Piotr and Girshick, Ross and He, Kaiming and Hariharan, Bharath and Belongie, Serge}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={2117--2125}, + year={2017} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [pose_swin_t](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-t-p4-w7_8xb32-210e_coco-256x192.py) | 256x192 | 0.724 | 0.901 | 0.806 | 0.782 | 0.940 | [ckpt](https://download.openmmlab.com/mmpose/top_down/swin/swin_t_p4_w7_coco_256x192-eaefe010_20220503.pth) | [log](https://download.openmmlab.com/mmpose/top_down/swin/swin_t_p4_w7_coco_256x192_20220503.log.json) | +| [pose_swin_b](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-b-p4-w7_8xb32-210e_coco-256x192.py) | 256x192 | 0.737 | 0.904 | 0.820 | 0.794 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/top_down/swin/swin_b_p4_w7_coco_256x192-7432be9e_20220705.pth) | [log](https://download.openmmlab.com/mmpose/top_down/swin/swin_b_p4_w7_coco_256x192_20220705.log.json) | +| [pose_swin_b](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-b-p4-w7_8xb32-210e_coco-384x288.py) | 384x288 | 0.759 | 0.910 | 0.832 | 0.811 | 0.946 | [ckpt](https://download.openmmlab.com/mmpose/top_down/swin/swin_b_p4_w7_coco_384x288-3abf54f9_20220705.pth) | [log](https://download.openmmlab.com/mmpose/top_down/swin/swin_b_p4_w7_coco_384x288_20220705.log.json) | +| [pose_swin_l](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-l-p4-w7_8xb32-210e_coco-256x192.py) | 256x192 | 0.743 | 0.906 | 0.821 | 0.798 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/swin/swin_l_p4_w7_coco_256x192-642a89db_20220705.pth) | [log](https://download.openmmlab.com/mmpose/top_down/swin/swin_l_p4_w7_coco_256x192_20220705.log.json) | +| [pose_swin_l](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-l-p4-w7_8xb32-210e_coco-384x288.py) | 384x288 | 0.763 | 0.912 | 0.830 | 0.814 | 0.949 | [ckpt](https://download.openmmlab.com/mmpose/top_down/swin/swin_l_p4_w7_coco_384x288-c36b7845_20220705.pth) | [log](https://download.openmmlab.com/mmpose/top_down/swin/swin_l_p4_w7_coco_384x288_20220705.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/swin_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/swin_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..09ede5fa5c4ec01b77d997b4b318527fa0e27daf --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/swin_coco.yml @@ -0,0 +1,99 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-t-p4-w7_8xb32-210e_coco-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: &id001 + - SimpleBaseline2D + - Swin + Training Data: COCO + Name: td-hm_swin-t-p4-w7_8xb32-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.724 + AP@0.5: 0.901 + AP@0.75: 0.806 + AR: 0.782 + AR@0.5: 0.94 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/swin/swin_t_p4_w7_coco_256x192-eaefe010_20220503.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-b-p4-w7_8xb32-210e_coco-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_swin-b-p4-w7_8xb32-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.737 + AP@0.5: 0.904 + AP@0.75: 0.82 + AR: 0.794 + AR@0.5: 0.942 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/swin/swin_b_p4_w7_coco_256x192-7432be9e_20220705.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-b-p4-w7_8xb32-210e_coco-384x288.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_swin-b-p4-w7_8xb32-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.759 + AP@0.5: 0.91 + AP@0.75: 0.832 + AR: 0.811 + AR@0.5: 0.946 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/swin/swin_b_p4_w7_coco_384x288-3abf54f9_20220705.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-l-p4-w7_8xb32-210e_coco-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_swin-l-p4-w7_8xb32-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.743 + AP@0.5: 0.906 + AP@0.75: 0.821 + AR: 0.798 + AR@0.5: 0.943 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/swin/swin_l_p4_w7_coco_256x192-642a89db_20220705.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-l-p4-w7_8xb32-210e_coco-384x288.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_swin-l-p4-w7_8xb32-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.763 + AP@0.5: 0.912 + AP@0.75: 0.83 + AR: 0.814 + AR@0.5: 0.949 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/swin/swin_l_p4_w7_coco_384x288-c36b7845_20220705.pth +- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/swin_b_p4_w7_fpn_coco_256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO + Name: topdown_heatmap_swin_b_p4_w7_fpn_coco_256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.741 + AP@0.5: 0.907 + AP@0.75: 0.821 + AR: 0.798 + AR@0.5: 0.946 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/swin/swin_b_p4_w7_fpn_coco_256x192-a3b91c45_20220705.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_2xmspn50_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_2xmspn50_8xb32-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..7af125c24d81c4bfa81cdafa3cb95f9729511b66 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_2xmspn50_8xb32-210e_coco-256x192.py @@ -0,0 +1,152 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-3, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +# multiple kernel_sizes of heatmap gaussian for 'Megvii' approach. +kernel_sizes = [15, 11, 9, 7, 5] +codec = [ + dict( + type='MegviiHeatmap', + input_size=(192, 256), + heatmap_size=(48, 64), + kernel_size=kernel_size) for kernel_size in kernel_sizes +] + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='MSPN', + unit_channels=256, + num_stages=2, + num_units=4, + num_blocks=[3, 4, 6, 3], + norm_cfg=dict(type='BN'), + init_cfg=dict( + type='Pretrained', + checkpoint='torchvision://resnet50', + )), + head=dict( + type='MSPNHead', + out_shape=(64, 48), + unit_channels=256, + out_channels=17, + num_stages=2, + num_units=4, + norm_cfg=dict(type='BN'), + # each sub list is for a stage + # and each element in each list is for a unit + level_indices=[0, 1, 2, 3] + [1, 2, 3, 4], + loss=([ + dict( + type='KeypointMSELoss', + use_target_weight=True, + loss_weight=0.25) + ] * 3 + [ + dict( + type='KeypointOHKMMSELoss', + use_target_weight=True, + loss_weight=1.) + ]) * 2, + decoder=codec[-1]), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=False, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec[0]['input_size']), + dict(type='GenerateTarget', multilevel=True, encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec[0]['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json', + nms_mode='none') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_2xrsn50_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_2xrsn50_8xb32-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..0680f6995eee3dc9a345eab0353f5dc65c023f0f --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_2xrsn50_8xb32-210e_coco-256x192.py @@ -0,0 +1,154 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-3, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +# multiple kernel_sizes of heatmap gaussian for 'Megvii' approach. +kernel_sizes = [15, 11, 9, 7, 5] +codec = [ + dict( + type='MegviiHeatmap', + input_size=(192, 256), + heatmap_size=(48, 64), + kernel_size=kernel_size) for kernel_size in kernel_sizes +] + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='RSN', + unit_channels=256, + num_stages=2, + num_units=4, + num_blocks=[3, 4, 6, 3], + num_steps=4, + norm_cfg=dict(type='BN'), + ), + head=dict( + type='MSPNHead', + out_shape=(64, 48), + unit_channels=256, + out_channels=17, + num_stages=2, + num_units=4, + norm_cfg=dict(type='BN'), + # each sub list is for a stage + # and each element in each list is for a unit + level_indices=[0, 1, 2, 3] + [1, 2, 3, 4], + loss=([ + dict( + type='KeypointMSELoss', + use_target_weight=True, + loss_weight=0.25) + ] * 3 + [ + dict( + type='KeypointOHKMMSELoss', + use_target_weight=True, + loss_weight=1.) + ]) * 2, + decoder=codec[-1]), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=False, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec[0]['input_size']), + dict(type='GenerateTarget', multilevel=True, encoder=codec), + dict(type='PackPoseInputs') +] + +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec[0]['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json', + nms_mode='none') +test_evaluator = val_evaluator + +# fp16 settings +fp16 = dict(loss_scale='dynamic') diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_3xmspn50_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_3xmspn50_8xb32-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..41162f01e5ac5c63977c11ea70b49372ef2b8476 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_3xmspn50_8xb32-210e_coco-256x192.py @@ -0,0 +1,152 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-3, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +# multiple kernel_sizes of heatmap gaussian for 'Megvii' approach. +kernel_sizes = [15, 11, 9, 7, 5] +codec = [ + dict( + type='MegviiHeatmap', + input_size=(192, 256), + heatmap_size=(48, 64), + kernel_size=kernel_size) for kernel_size in kernel_sizes +] + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='MSPN', + unit_channels=256, + num_stages=3, + num_units=4, + num_blocks=[3, 4, 6, 3], + norm_cfg=dict(type='BN'), + init_cfg=dict( + type='Pretrained', + checkpoint='torchvision://resnet50', + )), + head=dict( + type='MSPNHead', + out_shape=(64, 48), + unit_channels=256, + out_channels=17, + num_stages=3, + num_units=4, + norm_cfg=dict(type='BN'), + # each sub list is for a stage + # and each element in each list is for a unit + level_indices=[0, 1, 2, 3] * 2 + [1, 2, 3, 4], + loss=([ + dict( + type='KeypointMSELoss', + use_target_weight=True, + loss_weight=0.25) + ] * 3 + [ + dict( + type='KeypointOHKMMSELoss', + use_target_weight=True, + loss_weight=1.) + ]) * 3, + decoder=codec[-1]), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=False, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec[0]['input_size']), + dict(type='GenerateTarget', multilevel=True, encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec[0]['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json', + nms_mode='none') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_3xrsn50_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_3xrsn50_8xb32-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..99326451c6d05162bc3df0c8d71e8305baf574fd --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_3xrsn50_8xb32-210e_coco-256x192.py @@ -0,0 +1,154 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-3, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +# multiple kernel_sizes of heatmap gaussian for 'Megvii' approach. +kernel_sizes = [15, 11, 9, 7, 5] +codec = [ + dict( + type='MegviiHeatmap', + input_size=(192, 256), + heatmap_size=(48, 64), + kernel_size=kernel_size) for kernel_size in kernel_sizes +] + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='RSN', + unit_channels=256, + num_stages=3, + num_units=4, + num_blocks=[3, 4, 6, 3], + num_steps=4, + norm_cfg=dict(type='BN'), + ), + head=dict( + type='MSPNHead', + out_shape=(64, 48), + unit_channels=256, + out_channels=17, + num_stages=3, + num_units=4, + norm_cfg=dict(type='BN'), + # each sub list is for a stage + # and each element in each list is for a unit + level_indices=[0, 1, 2, 3] * 2 + [1, 2, 3, 4], + loss=([ + dict( + type='KeypointMSELoss', + use_target_weight=True, + loss_weight=0.25) + ] * 3 + [ + dict( + type='KeypointOHKMMSELoss', + use_target_weight=True, + loss_weight=1.) + ]) * 3, + decoder=codec[-1]), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=False, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec[0]['input_size']), + dict(type='GenerateTarget', multilevel=True, encoder=codec), + dict(type='PackPoseInputs') +] + +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec[0]['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json', + nms_mode='none') +test_evaluator = val_evaluator + +# fp16 settings +fp16 = dict(loss_scale='dynamic') diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_4xmspn50_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_4xmspn50_8xb32-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..999245e74dfc87985e34e3122979fe02486c5b4f --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_4xmspn50_8xb32-210e_coco-256x192.py @@ -0,0 +1,152 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-3, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +# multiple kernel_sizes of heatmap gaussian for 'Megvii' approach. +kernel_sizes = [15, 11, 9, 7, 5] +codec = [ + dict( + type='MegviiHeatmap', + input_size=(192, 256), + heatmap_size=(48, 64), + kernel_size=kernel_size) for kernel_size in kernel_sizes +] + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='MSPN', + unit_channels=256, + num_stages=4, + num_units=4, + num_blocks=[3, 4, 6, 3], + norm_cfg=dict(type='BN'), + init_cfg=dict( + type='Pretrained', + checkpoint='torchvision://resnet50', + )), + head=dict( + type='MSPNHead', + out_shape=(64, 48), + unit_channels=256, + out_channels=17, + num_stages=4, + num_units=4, + norm_cfg=dict(type='BN'), + # each sub list is for a stage + # and each element in each list is for a unit + level_indices=[0, 1, 2, 3] * 3 + [1, 2, 3, 4], + loss=([ + dict( + type='KeypointMSELoss', + use_target_weight=True, + loss_weight=0.25) + ] * 3 + [ + dict( + type='KeypointOHKMMSELoss', + use_target_weight=True, + loss_weight=1.) + ]) * 4, + decoder=codec[-1]), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=False, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec[0]['input_size']), + dict(type='GenerateTarget', multilevel=True, encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec[0]['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json', + nms_mode='none') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base-simple_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base-simple_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..13eb5f373a22892f453e0237004f79eecd6d866a --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base-simple_8xb64-210e_coco-256x192.py @@ -0,0 +1,153 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +custom_imports = dict( + imports=['mmpose.engine.optim_wrappers.layer_decay_optim_wrapper'], + allow_failed_imports=False) + +optim_wrapper = dict( + optimizer=dict( + type='AdamW', lr=5e-4, betas=(0.9, 0.999), weight_decay=0.1), + paramwise_cfg=dict( + num_layers=12, + layer_decay_rate=0.75, + custom_keys={ + 'bias': dict(decay_multi=0.0), + 'pos_embed': dict(decay_mult=0.0), + 'relative_position_bias_table': dict(decay_mult=0.0), + 'norm': dict(decay_mult=0.0), + }, + ), + constructor='LayerDecayOptimWrapperConstructor', + clip_grad=dict(max_norm=1., norm_type=2), +) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) + +# codec settings +codec = dict( + type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='mmcls.VisionTransformer', + arch='base', + img_size=(256, 192), + patch_size=16, + qkv_bias=True, + drop_path_rate=0.3, + with_cls_token=False, + output_cls_token=False, + patch_cfg=dict(padding=2), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'v1/pretrained_models/mae_pretrain_vit_base.pth'), + ), + neck=dict(type='FeatureMapProcessor', scale_factor=4.0, apply_relu=True), + head=dict( + type='HeatmapHead', + in_channels=768, + out_channels=17, + deconv_out_channels=[], + deconv_kernel_sizes=[], + final_layer=dict(kernel_size=3, padding=1), + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec, + ), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=False, + )) + +# base dataset settings +data_root = 'data/coco/' +dataset_type = 'CocoDataset' +data_mode = 'topdown' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..8725fa2ca0c7aa4af44d6fcf2dbe50ed5b5c3d7c --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base_8xb64-210e_coco-256x192.py @@ -0,0 +1,150 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +custom_imports = dict( + imports=['mmpose.engine.optim_wrappers.layer_decay_optim_wrapper'], + allow_failed_imports=False) + +optim_wrapper = dict( + optimizer=dict( + type='AdamW', lr=5e-4, betas=(0.9, 0.999), weight_decay=0.1), + paramwise_cfg=dict( + num_layers=12, + layer_decay_rate=0.75, + custom_keys={ + 'bias': dict(decay_multi=0.0), + 'pos_embed': dict(decay_mult=0.0), + 'relative_position_bias_table': dict(decay_mult=0.0), + 'norm': dict(decay_mult=0.0), + }, + ), + constructor='LayerDecayOptimWrapperConstructor', + clip_grad=dict(max_norm=1., norm_type=2), +) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) + +# codec settings +codec = dict( + type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='mmcls.VisionTransformer', + arch='base', + img_size=(256, 192), + patch_size=16, + qkv_bias=True, + drop_path_rate=0.3, + with_cls_token=False, + output_cls_token=False, + patch_cfg=dict(padding=2), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'v1/pretrained_models/mae_pretrain_vit_base.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=768, + out_channels=17, + deconv_out_channels=(256, 256), + deconv_kernel_sizes=(4, 4), + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=False, + )) + +# base dataset settings +data_root = 'data/coco/' +dataset_type = 'CocoDataset' +data_mode = 'topdown' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge-simple_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge-simple_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..7d94f97c1bcd3337bb7397db6f92884df9fc0438 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge-simple_8xb64-210e_coco-256x192.py @@ -0,0 +1,153 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +custom_imports = dict( + imports=['mmpose.engine.optim_wrappers.layer_decay_optim_wrapper'], + allow_failed_imports=False) + +optim_wrapper = dict( + optimizer=dict( + type='AdamW', lr=5e-4, betas=(0.9, 0.999), weight_decay=0.1), + paramwise_cfg=dict( + num_layers=32, + layer_decay_rate=0.85, + custom_keys={ + 'bias': dict(decay_multi=0.0), + 'pos_embed': dict(decay_mult=0.0), + 'relative_position_bias_table': dict(decay_mult=0.0), + 'norm': dict(decay_mult=0.0), + }, + ), + constructor='LayerDecayOptimWrapperConstructor', + clip_grad=dict(max_norm=1., norm_type=2), +) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) + +# codec settings +codec = dict( + type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='mmpretrain.VisionTransformer', + arch='huge', + img_size=(256, 192), + patch_size=16, + qkv_bias=True, + drop_path_rate=0.55, + with_cls_token=False, + out_type='featmap', + patch_cfg=dict(padding=2), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'v1/pretrained_models/mae_pretrain_vit_huge.pth'), + ), + neck=dict(type='FeatureMapProcessor', scale_factor=4.0, apply_relu=True), + head=dict( + type='HeatmapHead', + in_channels=1280, + out_channels=17, + deconv_out_channels=[], + deconv_kernel_sizes=[], + final_layer=dict(kernel_size=3, padding=1), + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec, + ), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=False, + )) + +# base dataset settings +data_root = 'data/coco/' +dataset_type = 'CocoDataset' +data_mode = 'topdown' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..1953188a19827607647edd68266e466ce545aa0e --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge_8xb64-210e_coco-256x192.py @@ -0,0 +1,150 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +custom_imports = dict( + imports=['mmpose.engine.optim_wrappers.layer_decay_optim_wrapper'], + allow_failed_imports=False) + +optim_wrapper = dict( + optimizer=dict( + type='AdamW', lr=5e-4, betas=(0.9, 0.999), weight_decay=0.1), + paramwise_cfg=dict( + num_layers=32, + layer_decay_rate=0.85, + custom_keys={ + 'bias': dict(decay_multi=0.0), + 'pos_embed': dict(decay_mult=0.0), + 'relative_position_bias_table': dict(decay_mult=0.0), + 'norm': dict(decay_mult=0.0), + }, + ), + constructor='LayerDecayOptimWrapperConstructor', + clip_grad=dict(max_norm=1., norm_type=2), +) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) + +# codec settings +codec = dict( + type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='mmcls.VisionTransformer', + arch='huge', + img_size=(256, 192), + patch_size=16, + qkv_bias=True, + drop_path_rate=0.55, + with_cls_token=False, + output_cls_token=False, + patch_cfg=dict(padding=2), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'v1/pretrained_models/mae_pretrain_vit_huge.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=1280, + out_channels=17, + deconv_out_channels=(256, 256), + deconv_kernel_sizes=(4, 4), + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=False, + )) + +# base dataset settings +data_root = 'data/coco/' +dataset_type = 'CocoDataset' +data_mode = 'topdown' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-large-simple_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-large-simple_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..8086b09410716a881a2cf9dd0773414937e05ea6 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-large-simple_8xb64-210e_coco-256x192.py @@ -0,0 +1,153 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +custom_imports = dict( + imports=['mmpose.engine.optim_wrappers.layer_decay_optim_wrapper'], + allow_failed_imports=False) + +optim_wrapper = dict( + optimizer=dict( + type='AdamW', lr=5e-4, betas=(0.9, 0.999), weight_decay=0.1), + paramwise_cfg=dict( + num_layers=24, + layer_decay_rate=0.8, + custom_keys={ + 'bias': dict(decay_multi=0.0), + 'pos_embed': dict(decay_mult=0.0), + 'relative_position_bias_table': dict(decay_mult=0.0), + 'norm': dict(decay_mult=0.0), + }, + ), + constructor='LayerDecayOptimWrapperConstructor', + clip_grad=dict(max_norm=1., norm_type=2), +) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) + +# codec settings +codec = dict( + type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='mmcls.VisionTransformer', + arch='large', + img_size=(256, 192), + patch_size=16, + qkv_bias=True, + drop_path_rate=0.5, + with_cls_token=False, + output_cls_token=False, + patch_cfg=dict(padding=2), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'v1/pretrained_models/mae_pretrain_vit_large.pth'), + ), + neck=dict(type='FeatureMapProcessor', scale_factor=4.0, apply_relu=True), + head=dict( + type='HeatmapHead', + in_channels=1024, + out_channels=17, + deconv_out_channels=[], + deconv_kernel_sizes=[], + final_layer=dict(kernel_size=3, padding=1), + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec, + ), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=False, + )) + +# base dataset settings +data_root = 'data/coco/' +dataset_type = 'CocoDataset' +data_mode = 'topdown' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-large_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-large_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..43d5df71545129679aa6f06f6a3fe35caa0fb56d --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-large_8xb64-210e_coco-256x192.py @@ -0,0 +1,150 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +custom_imports = dict( + imports=['mmpose.engine.optim_wrappers.layer_decay_optim_wrapper'], + allow_failed_imports=False) + +optim_wrapper = dict( + optimizer=dict( + type='AdamW', lr=5e-4, betas=(0.9, 0.999), weight_decay=0.1), + paramwise_cfg=dict( + num_layers=24, + layer_decay_rate=0.8, + custom_keys={ + 'bias': dict(decay_multi=0.0), + 'pos_embed': dict(decay_mult=0.0), + 'relative_position_bias_table': dict(decay_mult=0.0), + 'norm': dict(decay_mult=0.0), + }, + ), + constructor='LayerDecayOptimWrapperConstructor', + clip_grad=dict(max_norm=1., norm_type=2), +) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) + +# codec settings +codec = dict( + type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='mmcls.VisionTransformer', + arch='large', + img_size=(256, 192), + patch_size=16, + qkv_bias=True, + drop_path_rate=0.5, + with_cls_token=False, + output_cls_token=False, + patch_cfg=dict(padding=2), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'v1/pretrained_models/mae_pretrain_vit_large.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=1024, + out_channels=17, + deconv_out_channels=(256, 256), + deconv_kernel_sizes=(4, 4), + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=False, + )) + +# base dataset settings +data_root = 'data/coco/' +dataset_type = 'CocoDataset' +data_mode = 'topdown' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small-simple_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small-simple_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..b57b0d3735ff02fdc3ccae9e750f146dbecda3f0 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small-simple_8xb64-210e_coco-256x192.py @@ -0,0 +1,158 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +custom_imports = dict( + imports=['mmpose.engine.optim_wrappers.layer_decay_optim_wrapper'], + allow_failed_imports=False) + +optim_wrapper = dict( + optimizer=dict( + type='AdamW', lr=5e-4, betas=(0.9, 0.999), weight_decay=0.1), + paramwise_cfg=dict( + num_layers=12, + layer_decay_rate=0.8, + custom_keys={ + 'bias': dict(decay_multi=0.0), + 'pos_embed': dict(decay_mult=0.0), + 'relative_position_bias_table': dict(decay_mult=0.0), + 'norm': dict(decay_mult=0.0), + }, + ), + constructor='LayerDecayOptimWrapperConstructor', + clip_grad=dict(max_norm=1., norm_type=2), +) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) + +# codec settings +codec = dict( + type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='mmcls.VisionTransformer', + arch={ + 'embed_dims': 384, + 'num_layers': 12, + 'num_heads': 12, + 'feedforward_channels': 384 * 4 + }, + img_size=(256, 192), + patch_size=16, + qkv_bias=True, + drop_path_rate=0.1, + with_cls_token=False, + output_cls_token=False, + patch_cfg=dict(padding=2), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'v1/pretrained_models/mae_pretrain_vit_small.pth'), + ), + neck=dict(type='FeatureMapProcessor', scale_factor=4.0, apply_relu=True), + head=dict( + type='HeatmapHead', + in_channels=384, + out_channels=17, + deconv_out_channels=[], + deconv_kernel_sizes=[], + final_layer=dict(kernel_size=3, padding=1), + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec, + ), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=False, + )) + +# base dataset settings +data_root = 'data/coco/' +dataset_type = 'CocoDataset' +data_mode = 'topdown' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..5d08a31a022fed56a34e8e47dd2bd518c410b884 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small_8xb64-210e_coco-256x192.py @@ -0,0 +1,155 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +custom_imports = dict( + imports=['mmpose.engine.optim_wrappers.layer_decay_optim_wrapper'], + allow_failed_imports=False) + +optim_wrapper = dict( + optimizer=dict( + type='AdamW', lr=5e-4, betas=(0.9, 0.999), weight_decay=0.1), + paramwise_cfg=dict( + num_layers=12, + layer_decay_rate=0.8, + custom_keys={ + 'bias': dict(decay_multi=0.0), + 'pos_embed': dict(decay_mult=0.0), + 'relative_position_bias_table': dict(decay_mult=0.0), + 'norm': dict(decay_mult=0.0), + }, + ), + constructor='LayerDecayOptimWrapperConstructor', + clip_grad=dict(max_norm=1., norm_type=2), +) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) + +# codec settings +codec = dict( + type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='mmcls.VisionTransformer', + arch={ + 'embed_dims': 384, + 'num_layers': 12, + 'num_heads': 12, + 'feedforward_channels': 384 * 4 + }, + img_size=(256, 192), + patch_size=16, + qkv_bias=True, + drop_path_rate=0.1, + with_cls_token=False, + output_cls_token=False, + patch_cfg=dict(padding=2), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'v1/pretrained_models/mae_pretrain_vit_small.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=384, + out_channels=17, + deconv_out_channels=(256, 256), + deconv_kernel_sizes=(4, 4), + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=False, + )) + +# base dataset settings +data_root = 'data/coco/' +dataset_type = 'CocoDataset' +data_mode = 'topdown' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_alexnet_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_alexnet_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..4051f4c5ec52fe170d5a6a050e867fe5ebb255a3 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_alexnet_8xb64-210e_coco-256x192.py @@ -0,0 +1,117 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(40, 56), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict(type='AlexNet', num_classes=-1), + head=dict( + type='HeatmapHead', + in_channels=256, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_cpm_8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_cpm_8xb32-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..38b23cf7182c45a507b87c4a372fd2e174e32eb1 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_cpm_8xb32-210e_coco-384x288.py @@ -0,0 +1,125 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(36, 48), sigma=3) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='CPM', + in_channels=3, + out_channels=17, + feat_channels=128, + num_stages=6), + head=dict( + type='CPMHead', + in_channels=17, + out_channels=17, + num_stages=6, + deconv_out_channels=None, + final_layer=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_cpm_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_cpm_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..17f7eb9677fbf0d285628e059835a45f443caeef --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_cpm_8xb64-210e_coco-256x192.py @@ -0,0 +1,125 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(24, 32), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='CPM', + in_channels=3, + out_channels=17, + feat_channels=128, + num_stages=6), + head=dict( + type='CPMHead', + in_channels=17, + out_channels=17, + num_stages=6, + deconv_out_channels=None, + final_layer=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hourglass52_8xb32-210e_coco-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hourglass52_8xb32-210e_coco-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..b9d49c8e6a7df8160db26ff6a0cbabe20b6f4a4a --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hourglass52_8xb32-210e_coco-256x256.py @@ -0,0 +1,122 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HourglassNet', + num_stacks=1, + ), + head=dict( + type='CPMHead', + in_channels=256, + out_channels=17, + num_stages=1, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hourglass52_8xb32-210e_coco-384x384.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hourglass52_8xb32-210e_coco-384x384.py new file mode 100644 index 0000000000000000000000000000000000000000..d9932ff9e3773a591650ee94a95da2784bf562eb --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hourglass52_8xb32-210e_coco-384x384.py @@ -0,0 +1,122 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(384, 384), heatmap_size=(96, 96), sigma=3) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HourglassNet', + num_stacks=1, + ), + head=dict( + type='CPMHead', + in_channels=256, + out_channels=17, + num_stages=1, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-base_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-base_8xb32-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..8b81dbdaac0c4df6eed9f287379b25e81ab6ce7d --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-base_8xb32-210e_coco-256x192.py @@ -0,0 +1,174 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict( + optimizer=dict( + type='AdamW', + lr=5e-4, + betas=(0.9, 0.999), + weight_decay=0.01, + ), + paramwise_cfg=dict( + custom_keys={'relative_position_bias_table': dict(decay_mult=0.)})) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRFormer', + in_channels=3, + norm_cfg=norm_cfg, + extra=dict( + drop_path_rate=0.2, + with_rpe=True, + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(2, ), + num_channels=(64, ), + num_heads=[2], + mlp_ratios=[4]), + stage2=dict( + num_modules=1, + num_branches=2, + block='HRFORMERBLOCK', + num_blocks=(2, 2), + num_channels=(78, 156), + num_heads=[2, 4], + mlp_ratios=[4, 4], + window_sizes=[7, 7]), + stage3=dict( + num_modules=4, + num_branches=3, + block='HRFORMERBLOCK', + num_blocks=(2, 2, 2), + num_channels=(78, 156, 312), + num_heads=[2, 4, 8], + mlp_ratios=[4, 4, 4], + window_sizes=[7, 7, 7]), + stage4=dict( + num_modules=2, + num_branches=4, + block='HRFORMERBLOCK', + num_blocks=(2, 2, 2, 2), + num_channels=(78, 156, 312, 624), + num_heads=[2, 4, 8, 16], + mlp_ratios=[4, 4, 4, 4], + window_sizes=[7, 7, 7, 7])), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrformer_base-32815020_20220226.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=78, + out_channels=17, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator + +# fp16 settings +fp16 = dict(loss_scale='dynamic') diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-base_8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-base_8xb32-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..351685464c9560dd748da728372dbcf46a8dfc70 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-base_8xb32-210e_coco-384x288.py @@ -0,0 +1,174 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict( + optimizer=dict( + type='AdamW', + lr=5e-4, + betas=(0.9, 0.999), + weight_decay=0.01, + ), + paramwise_cfg=dict( + custom_keys={'relative_position_bias_table': dict(decay_mult=0.)})) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRFormer', + in_channels=3, + norm_cfg=norm_cfg, + extra=dict( + drop_path_rate=0.2, + with_rpe=True, + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(2, ), + num_channels=(64, ), + num_heads=[2], + mlp_ratios=[4]), + stage2=dict( + num_modules=1, + num_branches=2, + block='HRFORMERBLOCK', + num_blocks=(2, 2), + num_channels=(78, 156), + num_heads=[2, 4], + mlp_ratios=[4, 4], + window_sizes=[7, 7]), + stage3=dict( + num_modules=4, + num_branches=3, + block='HRFORMERBLOCK', + num_blocks=(2, 2, 2), + num_channels=(78, 156, 312), + num_heads=[2, 4, 8], + mlp_ratios=[4, 4, 4], + window_sizes=[7, 7, 7]), + stage4=dict( + num_modules=2, + num_branches=4, + block='HRFORMERBLOCK', + num_blocks=(2, 2, 2, 2), + num_channels=(78, 156, 312, 624), + num_heads=[2, 4, 8, 16], + mlp_ratios=[4, 4, 4, 4], + window_sizes=[7, 7, 7, 7])), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrformer_base-32815020_20220226.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=78, + out_channels=17, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator + +# fp16 settings +fp16 = dict(loss_scale='dynamic') diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-small_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-small_8xb32-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..6c59395c8ad5365285c3a26d9fbeb3855b050433 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-small_8xb32-210e_coco-256x192.py @@ -0,0 +1,174 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict( + optimizer=dict( + type='AdamW', + lr=5e-4, + betas=(0.9, 0.999), + weight_decay=0.01, + ), + paramwise_cfg=dict( + custom_keys={'relative_position_bias_table': dict(decay_mult=0.)})) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRFormer', + in_channels=3, + norm_cfg=norm_cfg, + extra=dict( + drop_path_rate=0.1, + with_rpe=True, + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(2, ), + num_channels=(64, ), + num_heads=[2], + num_mlp_ratios=[4]), + stage2=dict( + num_modules=1, + num_branches=2, + block='HRFORMERBLOCK', + num_blocks=(2, 2), + num_channels=(32, 64), + num_heads=[1, 2], + mlp_ratios=[4, 4], + window_sizes=[7, 7]), + stage3=dict( + num_modules=4, + num_branches=3, + block='HRFORMERBLOCK', + num_blocks=(2, 2, 2), + num_channels=(32, 64, 128), + num_heads=[1, 2, 4], + mlp_ratios=[4, 4, 4], + window_sizes=[7, 7, 7]), + stage4=dict( + num_modules=2, + num_branches=4, + block='HRFORMERBLOCK', + num_blocks=(2, 2, 2, 2), + num_channels=(32, 64, 128, 256), + num_heads=[1, 2, 4, 8], + mlp_ratios=[4, 4, 4, 4], + window_sizes=[7, 7, 7, 7])), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrformer_small-09516375_20220226.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=32, + out_channels=17, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator + +# fp16 settings +fp16 = dict(loss_scale='dynamic') diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-small_8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-small_8xb32-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..eee3521a7c617e30efa16224520bda00fe2e64e7 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-small_8xb32-210e_coco-384x288.py @@ -0,0 +1,174 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict( + optimizer=dict( + type='AdamW', + lr=5e-4, + betas=(0.9, 0.999), + weight_decay=0.01, + ), + paramwise_cfg=dict( + custom_keys={'relative_position_bias_table': dict(decay_mult=0.)})) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRFormer', + in_channels=3, + norm_cfg=norm_cfg, + extra=dict( + drop_path_rate=0.1, + with_rpe=True, + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(2, ), + num_channels=(64, ), + num_heads=[2], + num_mlp_ratios=[4]), + stage2=dict( + num_modules=1, + num_branches=2, + block='HRFORMERBLOCK', + num_blocks=(2, 2), + num_channels=(32, 64), + num_heads=[1, 2], + mlp_ratios=[4, 4], + window_sizes=[7, 7]), + stage3=dict( + num_modules=4, + num_branches=3, + block='HRFORMERBLOCK', + num_blocks=(2, 2, 2), + num_channels=(32, 64, 128), + num_heads=[1, 2, 4], + mlp_ratios=[4, 4, 4], + window_sizes=[7, 7, 7]), + stage4=dict( + num_modules=2, + num_branches=4, + block='HRFORMERBLOCK', + num_blocks=(2, 2, 2, 2), + num_channels=(32, 64, 128, 256), + num_heads=[1, 2, 4, 8], + mlp_ratios=[4, 4, 4, 4], + window_sizes=[7, 7, 7, 7])), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrformer_small-09516375_20220226.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=32, + out_channels=17, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator + +# fp16 settings +fp16 = dict(loss_scale='dynamic') diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..ea486d830a5d397f0e65958c832933a3de6fee6d --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-256x192.py @@ -0,0 +1,150 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256))), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w32-36af842e.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=32, + out_channels=17, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..ae15d35ee11973169434b0b6d6b03ec46c9530a4 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-384x288.py @@ -0,0 +1,150 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256))), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w32-36af842e.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=32, + out_channels=17, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-combine.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-combine.py new file mode 100644 index 0000000000000000000000000000000000000000..f5d2ed0bfd422568e71aca13c7be56217dd5d381 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-combine.py @@ -0,0 +1,221 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=3)) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# keypoint mappings +keypoint_mapping_coco = [ + (0, 0), + (1, 1), + (2, 2), + (3, 3), + (4, 4), + (5, 5), + (6, 6), + (7, 7), + (8, 8), + (9, 9), + (10, 10), + (11, 11), + (12, 12), + (13, 13), + (14, 14), + (15, 15), + (16, 16), +] + +keypoint_mapping_aic = [ + (0, 6), + (1, 8), + (2, 10), + (3, 5), + (4, 7), + (5, 9), + (6, 12), + (7, 14), + (8, 16), + (9, 11), + (10, 13), + (11, 15), + (12, 17), + (13, 18), +] + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + metainfo=dict(from_file='configs/_base_/datasets/coco_aic.py'), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256))), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w32-36af842e.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=32, + out_channels=19, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + output_keypoint_indices=[ + target for _, target in keypoint_mapping_coco + ])) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# train datasets +dataset_coco = dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=[ + dict( + type='KeypointConverter', + num_keypoints=19, + mapping=keypoint_mapping_coco) + ], +) + +dataset_aic = dict( + type='AicDataset', + data_root='data/aic/', + data_mode=data_mode, + ann_file='annotations/aic_train.json', + data_prefix=dict(img='ai_challenger_keypoint_train_20170902/' + 'keypoint_train_images_20170902/'), + pipeline=[ + dict( + type='KeypointConverter', + num_keypoints=19, + mapping=keypoint_mapping_aic) + ], +) + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type='CombinedDataset', + metainfo=dict(from_file='configs/_base_/datasets/coco_aic.py'), + datasets=[dataset_coco, dataset_aic], + pipeline=train_pipeline, + test_mode=False, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-merge.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-merge.py new file mode 100644 index 0000000000000000000000000000000000000000..847a40da2f08516a24e8bb765aac454a5cf0dc5f --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-merge.py @@ -0,0 +1,187 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256))), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w32-36af842e.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=32, + out_channels=17, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# train datasets +dataset_coco = dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=[], +) + +dataset_aic = dict( + type='AicDataset', + data_root='data/aic/', + data_mode=data_mode, + ann_file='annotations/aic_train.json', + data_prefix=dict(img='ai_challenger_keypoint_train_20170902/' + 'keypoint_train_images_20170902/'), + pipeline=[ + dict( + type='KeypointConverter', + num_keypoints=17, + mapping=[ + (0, 6), + (1, 8), + (2, 10), + (3, 5), + (4, 7), + (5, 9), + (6, 12), + (7, 14), + (8, 16), + (9, 11), + (10, 13), + (11, 15), + ]) + ], +) + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type='CombinedDataset', + metainfo=dict(from_file='configs/_base_/datasets/coco.py'), + datasets=[dataset_coco, dataset_aic], + pipeline=train_pipeline, + test_mode=False, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_coarsedropout-8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_coarsedropout-8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..a3ac0bd58901ec998641eec822561abb97779fc0 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_coarsedropout-8xb64-210e_coco-256x192.py @@ -0,0 +1,165 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256))), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/v1/' + 'body_2d_keypoint/topdown_heatmap/coco/' + 'td-hm_hrnet-w32_8xb64-210e_coco-256x192-81c58e40_20220909.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=32, + out_channels=17, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict( + type='Albumentation', + transforms=[ + dict( + type='CoarseDropout', + max_holes=8, + max_height=40, + max_width=40, + min_holes=1, + min_height=10, + min_width=10, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_dark-8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_dark-8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..7273a0503bd7e67505820de75a4be106922f43f0 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_dark-8xb64-210e_coco-256x192.py @@ -0,0 +1,154 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', + input_size=(192, 256), + heatmap_size=(48, 64), + sigma=2, + unbiased=True) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256))), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w32-36af842e.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=32, + out_channels=17, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_dark-8xb64-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_dark-8xb64-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..67b13b8babfe0ac672902f42212b66c5254433a2 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_dark-8xb64-210e_coco-384x288.py @@ -0,0 +1,154 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', + input_size=(288, 384), + heatmap_size=(72, 96), + sigma=3, + unbiased=True) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256))), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w32-36af842e.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=32, + out_channels=17, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_fp16-8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_fp16-8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..306d0aeb44b8014c3fa31743ff92b55b3b417927 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_fp16-8xb64-210e_coco-256x192.py @@ -0,0 +1,7 @@ +_base_ = ['./td-hm_hrnet-w32_8xb64-210e_coco-256x192.py'] + +# fp16 settings +optim_wrapper = dict( + type='AmpOptimWrapper', + loss_scale='dynamic', +) diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_gridmask-8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_gridmask-8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..d380ad243db94d0ef80a55cee830fe28954c3b0e --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_gridmask-8xb64-210e_coco-256x192.py @@ -0,0 +1,162 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256))), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/v1/' + 'body_2d_keypoint/topdown_heatmap/coco/' + 'td-hm_hrnet-w32_8xb64-210e_coco-256x192-81c58e40_20220909.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=32, + out_channels=17, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict( + type='Albumentation', + transforms=[ + dict( + type='GridDropout', + unit_size_min=10, + unit_size_max=40, + random_offset=True, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_photometric-8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_photometric-8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..f0bc7486ca27f2e58a41077527de9add9d9600b3 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_photometric-8xb64-210e_coco-256x192.py @@ -0,0 +1,153 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256))), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/v1/' + 'body_2d_keypoint/topdown_heatmap/coco/' + 'td-hm_hrnet-w32_8xb64-210e_coco-256x192-81c58e40_20220909.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=32, + out_channels=17, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PhotometricDistortion'), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..143a686ef7536a6cfccdbdf431de9188062caa3e --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py @@ -0,0 +1,150 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256))), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w32-36af842e.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=32, + out_channels=17, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=False, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..113a91e18ce1fd3a934199f872ee6989c1e7cf95 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-384x288.py @@ -0,0 +1,150 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='UDPHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256))), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w32-36af842e.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=32, + out_channels=17, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=False, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-regress-8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-regress-8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..d147de838a2fce6b0293ede36ecac81b51942036 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-regress-8xb64-210e_coco-256x192.py @@ -0,0 +1,155 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='UDPHeatmap', + input_size=(192, 256), + heatmap_size=(48, 64), + sigma=2, + heatmap_type='combined') + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256))), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w32-36af842e.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=32, + out_channels=3 * 17, + deconv_out_channels=None, + loss=dict(type='CombinedTargetMSELoss', use_target_weight=True), + decoder=codec), + train_cfg=dict(compute_acc=False), + test_cfg=dict( + flip_test=True, + flip_mode='udp_combined', + shift_heatmap=False, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..1c5ff70ab47a0cf027c04983e6c1f3640ba56802 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-256x192.py @@ -0,0 +1,150 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(48, 96)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(48, 96, 192)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(48, 96, 192, 384))), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w48-8ef0771d.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=48, + out_channels=17, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..f83b7d31a43bd0d84d55fbc2825438efa607fff0 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-384x288.py @@ -0,0 +1,150 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(48, 96)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(48, 96, 192)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(48, 96, 192, 384))), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w48-8ef0771d.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=48, + out_channels=17, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_dark-8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_dark-8xb32-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..daf3cbaddc15d9ded726a3ce7183f2364ddb74c6 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_dark-8xb32-210e_coco-256x192.py @@ -0,0 +1,154 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', + input_size=(192, 256), + heatmap_size=(48, 64), + sigma=2, + unbiased=True) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(48, 96)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(48, 96, 192)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(48, 96, 192, 384))), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w48-8ef0771d.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=48, + out_channels=17, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_dark-8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_dark-8xb32-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..eec52999c960c693c92b472cbff1d89d752dd2f1 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_dark-8xb32-210e_coco-384x288.py @@ -0,0 +1,154 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', + input_size=(288, 384), + heatmap_size=(72, 96), + sigma=3, + unbiased=True) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(48, 96)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(48, 96, 192)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(48, 96, 192, 384))), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w48-8ef0771d.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=48, + out_channels=17, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_udp-8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_udp-8xb32-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..b705cb7fb3b59f158be04b4496e2a49922213f4f --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_udp-8xb32-210e_coco-256x192.py @@ -0,0 +1,150 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(48, 96)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(48, 96, 192)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(48, 96, 192, 384))), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w48-8ef0771d.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=48, + out_channels=17, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=False, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_udp-8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_udp-8xb32-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..cfa17ef098e5b471aba21b9d1a53dc154d8125cb --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_udp-8xb32-210e_coco-384x288.py @@ -0,0 +1,150 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='UDPHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(48, 96)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(48, 96, 192)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(48, 96, 192, 384))), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w48-8ef0771d.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=48, + out_channels=17, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=False, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-18_8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-18_8xb32-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..caa7c267a09ea1080980dfeba1f26c22b9655169 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-18_8xb32-210e_coco-384x288.py @@ -0,0 +1,140 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='LiteHRNet', + in_channels=3, + extra=dict( + stem=dict(stem_channels=32, out_channels=32, expand_ratio=1), + num_stages=3, + stages_spec=dict( + num_modules=(2, 4, 2), + num_branches=(2, 3, 4), + num_blocks=(2, 2, 2), + module_type=('LITE', 'LITE', 'LITE'), + with_fuse=(True, True, True), + reduce_ratios=(8, 8, 8), + num_channels=( + (40, 80), + (40, 80, 160), + (40, 80, 160, 320), + )), + with_head=True, + )), + head=dict( + type='HeatmapHead', + in_channels=40, + out_channels=17, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + rotate_factor=60, + scale_factor=(0.75, 1.25)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-18_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-18_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..6f5a564d115bf7c94b6706ce337acbbccd94fb34 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-18_8xb64-210e_coco-256x192.py @@ -0,0 +1,140 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='LiteHRNet', + in_channels=3, + extra=dict( + stem=dict(stem_channels=32, out_channels=32, expand_ratio=1), + num_stages=3, + stages_spec=dict( + num_modules=(2, 4, 2), + num_branches=(2, 3, 4), + num_blocks=(2, 2, 2), + module_type=('LITE', 'LITE', 'LITE'), + with_fuse=(True, True, True), + reduce_ratios=(8, 8, 8), + num_channels=( + (40, 80), + (40, 80, 160), + (40, 80, 160, 320), + )), + with_head=True, + )), + head=dict( + type='HeatmapHead', + in_channels=40, + out_channels=17, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + rotate_factor=60, + scale_factor=(0.75, 1.25)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-30_8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-30_8xb32-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..663593552563dbe296ac3c780fda650dd8298c41 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-30_8xb32-210e_coco-384x288.py @@ -0,0 +1,140 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='LiteHRNet', + in_channels=3, + extra=dict( + stem=dict(stem_channels=32, out_channels=32, expand_ratio=1), + num_stages=3, + stages_spec=dict( + num_modules=(3, 8, 3), + num_branches=(2, 3, 4), + num_blocks=(2, 2, 2), + module_type=('LITE', 'LITE', 'LITE'), + with_fuse=(True, True, True), + reduce_ratios=(8, 8, 8), + num_channels=( + (40, 80), + (40, 80, 160), + (40, 80, 160, 320), + )), + with_head=True, + )), + head=dict( + type='HeatmapHead', + in_channels=40, + out_channels=17, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + rotate_factor=60, + scale_factor=(0.75, 1.25)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-30_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-30_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..6b5d347cd9537af2a690ee3c6d02323a8c53bbd8 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-30_8xb64-210e_coco-256x192.py @@ -0,0 +1,140 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='LiteHRNet', + in_channels=3, + extra=dict( + stem=dict(stem_channels=32, out_channels=32, expand_ratio=1), + num_stages=3, + stages_spec=dict( + num_modules=(3, 8, 3), + num_branches=(2, 3, 4), + num_blocks=(2, 2, 2), + module_type=('LITE', 'LITE', 'LITE'), + with_fuse=(True, True, True), + reduce_ratios=(8, 8, 8), + num_channels=( + (40, 80), + (40, 80, 160), + (40, 80, 160, 320), + )), + with_head=True, + )), + head=dict( + type='HeatmapHead', + in_channels=40, + out_channels=17, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + rotate_factor=60, + scale_factor=(0.75, 1.25)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_mobilenetv2_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_mobilenetv2_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..ff8eaccb7e093a16416ea52983d6cb7feb6d7814 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_mobilenetv2_8xb64-210e_coco-256x192.py @@ -0,0 +1,124 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='MobileNetV2', + widen_factor=1., + out_indices=(7, ), + init_cfg=dict( + type='Pretrained', + checkpoint='mmcls://mobilenet_v2', + )), + head=dict( + type='HeatmapHead', + in_channels=1280, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_mobilenetv2_8xb64-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_mobilenetv2_8xb64-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..d01e4c6c3dc9924079d35bde2445fb93b3541cba --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_mobilenetv2_8xb64-210e_coco-384x288.py @@ -0,0 +1,124 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='MobileNetV2', + widen_factor=1., + out_indices=(7, ), + init_cfg=dict( + type='Pretrained', + checkpoint='mmcls://mobilenet_v2', + )), + head=dict( + type='HeatmapHead', + in_channels=1280, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_mspn50_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_mspn50_8xb32-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..d0e2e9893c6429c99b847747170690654411e68b --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_mspn50_8xb32-210e_coco-256x192.py @@ -0,0 +1,152 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-3, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +# multiple kernel_sizes of heatmap gaussian for 'Megvii' approach. +kernel_sizes = [11, 9, 7, 5] +codec = [ + dict( + type='MegviiHeatmap', + input_size=(192, 256), + heatmap_size=(48, 64), + kernel_size=kernel_size) for kernel_size in kernel_sizes +] + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='MSPN', + unit_channels=256, + num_stages=1, + num_units=4, + num_blocks=[3, 4, 6, 3], + norm_cfg=dict(type='BN'), + init_cfg=dict( + type='Pretrained', + checkpoint='torchvision://resnet50', + )), + head=dict( + type='MSPNHead', + out_shape=(64, 48), + unit_channels=256, + out_channels=17, + num_stages=1, + num_units=4, + norm_cfg=dict(type='BN'), + # each sub list is for a stage + # and each element in each list is for a unit + level_indices=[0, 1, 2, 3], + loss=[ + dict( + type='KeypointMSELoss', + use_target_weight=True, + loss_weight=0.25) + ] * 3 + [ + dict( + type='KeypointOHKMMSELoss', + use_target_weight=True, + loss_weight=1.) + ], + decoder=codec[-1]), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=False, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec[0]['input_size']), + dict(type='GenerateTarget', multilevel=True, encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec[0]['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json', + nms_mode='none') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_pvt-s_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_pvt-s_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..1b474b3f2fe7a5db3571846f7ab54c5c05c33136 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_pvt-s_8xb64-210e_coco-256x192.py @@ -0,0 +1,127 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='PyramidVisionTransformer', + num_layers=[3, 4, 6, 3], + init_cfg=dict( + type='Pretrained', + checkpoint='https://github.com/whai362/PVT/' + 'releases/download/v2/pvt_small.pth'), + ), + neck=dict(type='FeatureMapProcessor', select_index=3), + head=dict( + type='HeatmapHead', + in_channels=512, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_pvtv2-b2_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_pvtv2-b2_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..e8921e68030e89110afe8c44717b051b02616a13 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_pvtv2-b2_8xb64-210e_coco-256x192.py @@ -0,0 +1,128 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='PyramidVisionTransformerV2', + embed_dims=64, + num_layers=[3, 4, 6, 3], + init_cfg=dict( + type='Pretrained', + checkpoint='https://github.com/whai362/PVT/' + 'releases/download/v2/pvt_v2_b2.pth'), + ), + neck=dict(type='FeatureMapProcessor', select_index=3), + head=dict( + type='HeatmapHead', + in_channels=512, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_8xb32-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..cd13e4a4222f21baa200c4c8ccb17986aacfc935 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_8xb32-210e_coco-384x288.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=101, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..5486548481df742e6bc53bd32d65501971e356f5 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_8xb64-210e_coco-256x192.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=101, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_dark-8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_dark-8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..39b7b3220d64d2ac905288c6bf2c0dd1ca2be7f1 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_dark-8xb64-210e_coco-256x192.py @@ -0,0 +1,125 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', + input_size=(192, 256), + heatmap_size=(48, 64), + sigma=2, + unbiased=True) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=101, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_dark-8xb64-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_dark-8xb64-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..f7c99503d46a7e0dc4402250e073b6ce9128d121 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_dark-8xb64-210e_coco-384x288.py @@ -0,0 +1,125 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', + input_size=(288, 384), + heatmap_size=(72, 96), + sigma=3, + unbiased=True) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=101, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_8xb32-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..beccab1bd105b618b601d5d331cc0fc680df1bf7 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_8xb32-210e_coco-256x192.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=152, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet152'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_8xb32-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..25d5039f05e3d9b2387be6bc0690e5d3904faded --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_8xb32-210e_coco-384x288.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=152, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet152'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_dark-8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_dark-8xb32-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..acd91192447b4ef5f41745db0c4b93357b53b778 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_dark-8xb32-210e_coco-256x192.py @@ -0,0 +1,125 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', + input_size=(192, 256), + heatmap_size=(48, 64), + sigma=2, + unbiased=True) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=152, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet152'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_dark-8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_dark-8xb32-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..49bd2b224bea33419d392931391ba90806ee24a7 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_dark-8xb32-210e_coco-384x288.py @@ -0,0 +1,126 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', + input_size=(288, 384), + heatmap_size=(72, 96), + sigma=3, + unbiased=True, + blur_kernel_size=17) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=152, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet152'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..7dbe1b43f77f35fb6564b9d6322a1b8c08d93a60 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-256x192.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..d74cc1392d27911a1e3d2b3239840717da5a4fb5 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-384x288.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_dark-8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_dark-8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..fdec305b10c5aaa202957650a81975158d0d1b9c --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_dark-8xb64-210e_coco-256x192.py @@ -0,0 +1,125 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', + input_size=(192, 256), + heatmap_size=(48, 64), + sigma=2, + unbiased=True) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_dark-8xb64-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_dark-8xb64-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..b34ad210f37ce883b21377192fbe035a7c1fcd56 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_dark-8xb64-210e_coco-384x288.py @@ -0,0 +1,125 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', + input_size=(288, 384), + heatmap_size=(72, 96), + sigma=3, + unbiased=True) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_fp16-8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_fp16-8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..66a6a27822fb72e7aef421bf1bf2230598c26125 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_fp16-8xb64-210e_coco-256x192.py @@ -0,0 +1,7 @@ +_base_ = ['./td-hm_res50_8xb64-210e_coco-256x192.py'] + +# fp16 settings +optim_wrapper = dict( + type='AmpOptimWrapper', + loss_scale='dynamic', +) diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest101_8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest101_8xb32-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..5bfbace9f6313fe89201ba5c243e51b4aa90ca27 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest101_8xb32-210e_coco-384x288.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNeSt', + depth=101, + init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnest101'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest101_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest101_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..030ae95d634e40f172dae07eb2bef163084906a3 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest101_8xb64-210e_coco-256x192.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNeSt', + depth=101, + init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnest101'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest200_8xb16-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest200_8xb16-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..bdcdb6c75fb74e65ca53797eb33039f6d36357ce --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest200_8xb16-210e_coco-384x288.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=128) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNeSt', + depth=200, + init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnest200'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=16, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=16, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest200_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest200_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..1a5e1e8e4a570e09b9fd3a5f096584275bfb8858 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest200_8xb64-210e_coco-256x192.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNeSt', + depth=200, + init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnest200'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest269_8xb16-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest269_8xb16-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..b519e9d2ef951298da6f3d4794d5c8660e83159d --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest269_8xb16-210e_coco-384x288.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=128) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNeSt', + depth=269, + init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnest269'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=16, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=16, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest269_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest269_8xb32-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..b3588d1fa31e29ec960a35050ff8659e712712ec --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest269_8xb32-210e_coco-256x192.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNeSt', + depth=269, + init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnest269'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest50_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest50_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..43295bb41f1b4b2c87119baec30b7efc9ecb80d9 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest50_8xb64-210e_coco-256x192.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNeSt', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnest50'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest50_8xb64-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest50_8xb64-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..e45320b036372894e9ddd0bcee6c457e86a8ecee --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest50_8xb64-210e_coco-384x288.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNeSt', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnest50'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d101_8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d101_8xb32-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..4fc55228face0a1586627e3ffa823ffe645c812a --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d101_8xb32-210e_coco-384x288.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNetV1d', + depth=101, + init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnet101_v1d'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d101_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d101_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..6c8cc4e808c2fff488bc4b5c977a34d7978a6d03 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d101_8xb64-210e_coco-256x192.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNetV1d', + depth=101, + init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnet101_v1d'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d152_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d152_8xb32-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..a85a7f80c43b090426182ab9c3acaa5659b0f4d5 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d152_8xb32-210e_coco-256x192.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNetV1d', + depth=152, + init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnet152_v1d'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d152_8xb48-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d152_8xb48-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..7a728ce806415f8da3afd036835171b64976a41a --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d152_8xb48-210e_coco-384x288.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=384) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNetV1d', + depth=152, + init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnet152_v1d'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=48, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d50_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d50_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..c241cdd3ddbee8398c3da8d96d7d3d46bce99f24 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d50_8xb64-210e_coco-256x192.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNetV1d', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnet50_v1d'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d50_8xb64-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d50_8xb64-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..4d1cea135b49cf1d70e10194685c394dc2c8bc1a --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d50_8xb64-210e_coco-384x288.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNetV1d', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnet50_v1d'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext101_8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext101_8xb32-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..508233371b4a819fe0b14a01798cfe48e6b32303 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext101_8xb32-210e_coco-384x288.py @@ -0,0 +1,122 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNeXt', + depth=101, + init_cfg=dict( + type='Pretrained', checkpoint='mmcls://resnext101_32x4d'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext101_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext101_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..eafed7f07526dce3b46bcd800272764a1614a051 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext101_8xb64-210e_coco-256x192.py @@ -0,0 +1,122 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNeXt', + depth=101, + init_cfg=dict( + type='Pretrained', checkpoint='mmcls://resnext101_32x4d'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext152_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext152_8xb32-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..27c2c263b05193b10f0c0af2235b81d86cca1bc4 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext152_8xb32-210e_coco-256x192.py @@ -0,0 +1,122 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNeXt', + depth=152, + init_cfg=dict( + type='Pretrained', checkpoint='mmcls://resnext152_32x4d'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext152_8xb48-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext152_8xb48-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..c02caeb7461f1fb312d02cfe7496c57a8b9b11e2 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext152_8xb48-210e_coco-384x288.py @@ -0,0 +1,122 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=384) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNeXt', + depth=152, + init_cfg=dict( + type='Pretrained', checkpoint='mmcls://resnext152_32x4d'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=48, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext50_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext50_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..b088a44ca6a043abac5e52596486362124d244c5 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext50_8xb64-210e_coco-256x192.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNeXt', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnext50_32x4d'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext50_8xb64-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext50_8xb64-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..9f97235218992e772298fcb74c1494331eeb50a7 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext50_8xb64-210e_coco-384x288.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNeXt', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnext50_32x4d'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_rsn18_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_rsn18_8xb32-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..18d16bd26784ad9f706e39bd83c25fc913ef4b08 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_rsn18_8xb32-210e_coco-256x192.py @@ -0,0 +1,154 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=2e-2, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 190, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +# multiple kernel_sizes of heatmap gaussian for 'Megvii' approach. +kernel_sizes = [11, 9, 7, 5] +codec = [ + dict( + type='MegviiHeatmap', + input_size=(192, 256), + heatmap_size=(48, 64), + kernel_size=kernel_size) for kernel_size in kernel_sizes +] + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='RSN', + unit_channels=256, + num_stages=1, + num_units=4, + num_blocks=[2, 2, 2, 2], + num_steps=4, + norm_cfg=dict(type='BN'), + ), + head=dict( + type='MSPNHead', + out_shape=(64, 48), + unit_channels=256, + out_channels=17, + num_stages=1, + num_units=4, + norm_cfg=dict(type='BN'), + # each sub list is for a stage + # and each element in each list is for a unit + level_indices=[0, 1, 2, 3], + loss=[ + dict( + type='KeypointMSELoss', + use_target_weight=True, + loss_weight=0.25) + ] * 3 + [ + dict( + type='KeypointOHKMMSELoss', + use_target_weight=True, + loss_weight=1.) + ], + decoder=codec[-1]), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=False, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec[0]['input_size']), + dict(type='GenerateTarget', multilevel=True, encoder=codec), + dict(type='PackPoseInputs') +] + +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec[0]['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json', + nms_mode='none') +test_evaluator = val_evaluator + +# fp16 settings +fp16 = dict(loss_scale='dynamic') diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_rsn50_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_rsn50_8xb32-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..069cb413123be20dee06dd8014b583dfa267fa46 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_rsn50_8xb32-210e_coco-256x192.py @@ -0,0 +1,154 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-3, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +# multiple kernel_sizes of heatmap gaussian for 'Megvii' approach. +kernel_sizes = [11, 9, 7, 5] +codec = [ + dict( + type='MegviiHeatmap', + input_size=(192, 256), + heatmap_size=(48, 64), + kernel_size=kernel_size) for kernel_size in kernel_sizes +] + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='RSN', + unit_channels=256, + num_stages=1, + num_units=4, + num_blocks=[3, 4, 6, 3], + num_steps=4, + norm_cfg=dict(type='BN'), + ), + head=dict( + type='MSPNHead', + out_shape=(64, 48), + unit_channels=256, + out_channels=17, + num_stages=1, + num_units=4, + norm_cfg=dict(type='BN'), + # each sub list is for a stage + # and each element in each list is for a unit + level_indices=[0, 1, 2, 3], + loss=[ + dict( + type='KeypointMSELoss', + use_target_weight=True, + loss_weight=0.25) + ] * 3 + [ + dict( + type='KeypointOHKMMSELoss', + use_target_weight=True, + loss_weight=1.) + ], + decoder=codec[-1]), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=False, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec[0]['input_size']), + dict(type='GenerateTarget', multilevel=True, encoder=codec), + dict(type='PackPoseInputs') +] + +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec[0]['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json', + nms_mode='none') +test_evaluator = val_evaluator + +# fp16 settings +fp16 = dict(loss_scale='dynamic') diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet101_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet101_8xb32-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..544c87242f5f3e7e4a0b129aa927e21a8c5a4430 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet101_8xb32-210e_coco-256x192.py @@ -0,0 +1,124 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='SCNet', + depth=101, + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/scnet101-94250a77.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=1, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=1, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet101_8xb48-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet101_8xb48-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..1af2e44ef013ea525d0d7cfe19312c07a1b5ae93 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet101_8xb48-210e_coco-384x288.py @@ -0,0 +1,124 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='SCNet', + depth=101, + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/scnet101-94250a77.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=48, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet50_8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet50_8xb32-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..efa1ad924cf5da56fc0ab69cee89eee48355376d --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet50_8xb32-210e_coco-384x288.py @@ -0,0 +1,124 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='SCNet', + depth=50, + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/scnet50-7ef0a199.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=1, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=1, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet50_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet50_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..9d784d80296e085f201da67e7f45732af6fe8938 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet50_8xb64-210e_coco-256x192.py @@ -0,0 +1,124 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='SCNet', + depth=50, + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/scnet50-7ef0a199.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet101_8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet101_8xb32-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..b515b744c4c9b43126b2e85b9c32b5663016be70 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet101_8xb32-210e_coco-384x288.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='SEResNet', + depth=101, + init_cfg=dict(type='Pretrained', checkpoint='mmcls://se-resnet101'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet101_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet101_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..f6d9fab2eda60ecef464a645b572866d1954cbcf --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet101_8xb64-210e_coco-256x192.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='SEResNet', + depth=101, + init_cfg=dict(type='Pretrained', checkpoint='mmcls://se-resnet101'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet152_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet152_8xb32-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..a0ef9bf5711f01625f3faf0d46122dae2eca8c35 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet152_8xb32-210e_coco-256x192.py @@ -0,0 +1,120 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='SEResNet', + depth=152, + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet152_8xb48-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet152_8xb48-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..13524c121772b7a73b79dd7d9c2fd4fd6e5ad882 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet152_8xb48-210e_coco-384x288.py @@ -0,0 +1,120 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=384) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='SEResNet', + depth=152, + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=48, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet50_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet50_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..93fb78fac56a697164a383229436c79de5392be5 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet50_8xb64-210e_coco-256x192.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='SEResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='mmcls://se-resnet50'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet50_8xb64-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet50_8xb64-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..fa2002a70a94d7104d07ec2c921a6c1123f859ab --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet50_8xb64-210e_coco-384x288.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='SEResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='mmcls://se-resnet50'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv1_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv1_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..029f48d3d90bdc113066c67200cbe15772bd0b9b --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv1_8xb64-210e_coco-256x192.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ShuffleNetV1', + groups=3, + init_cfg=dict(type='Pretrained', checkpoint='mmcls://shufflenet_v1'), + ), + head=dict( + type='HeatmapHead', + in_channels=960, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv1_8xb64-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv1_8xb64-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..f06c325bd1213995bb51bd9c1e477de0604e4cb7 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv1_8xb64-210e_coco-384x288.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ShuffleNetV1', + groups=3, + init_cfg=dict(type='Pretrained', checkpoint='mmcls://shufflenet_v1'), + ), + head=dict( + type='HeatmapHead', + in_channels=960, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv2_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv2_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..333998490e38105e4f73a55af6358e868943117a --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv2_8xb64-210e_coco-256x192.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ShuffleNetV2', + widen_factor=1.0, + init_cfg=dict(type='Pretrained', checkpoint='mmcls://shufflenet_v2'), + ), + head=dict( + type='HeatmapHead', + in_channels=1024, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv2_8xb64-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv2_8xb64-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..e7be5484e8d56f6001a3c1e5de91dd1b8c32821f --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv2_8xb64-210e_coco-384x288.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ShuffleNetV2', + widen_factor=1.0, + init_cfg=dict(type='Pretrained', checkpoint='mmcls://shufflenet_v2'), + ), + head=dict( + type='HeatmapHead', + in_channels=1024, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-b-p4-w7_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-b-p4-w7_8xb32-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..81877b893f69b66a2263a4d5dfea8407d56668af --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-b-p4-w7_8xb32-210e_coco-256x192.py @@ -0,0 +1,139 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='SwinTransformer', + embed_dims=128, + depths=[2, 2, 18, 2], + num_heads=[4, 8, 16, 32], + window_size=7, + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.3, + patch_norm=True, + out_indices=(3, ), + with_cp=False, + convert_weights=True, + init_cfg=dict( + type='Pretrained', + checkpoint='https://github.com/SwinTransformer/storage/releases/' + 'download/v1.0.0/swin_base_patch4_window7_224_22k.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=1024, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-b-p4-w7_8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-b-p4-w7_8xb32-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..0c1d5fa12f97259031d65030e5abee8cb61d372d --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-b-p4-w7_8xb32-210e_coco-384x288.py @@ -0,0 +1,139 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=2) + +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='SwinTransformer', + embed_dims=128, + depths=[2, 2, 18, 2], + num_heads=[4, 8, 16, 32], + window_size=12, + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.3, + patch_norm=True, + out_indices=(3, ), + with_cp=False, + convert_weights=True, + init_cfg=dict( + type='Pretrained', + checkpoint='https://github.com/SwinTransformer/storage/releases/' + 'download/v1.0.0/swin_base_patch4_window12_384_22k.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=1024, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-l-p4-w7_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-l-p4-w7_8xb32-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..14d08a49f865a901b0832f40dd2819b8ee43d58c --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-l-p4-w7_8xb32-210e_coco-256x192.py @@ -0,0 +1,148 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict( + optimizer=dict( + type='AdamW', + lr=5e-4, + betas=(0.9, 0.999), + weight_decay=0.01, + ), + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='SwinTransformer', + embed_dims=192, + depths=[2, 2, 18, 2], + num_heads=[6, 12, 24, 48], + window_size=7, + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.5, + patch_norm=True, + out_indices=(3, ), + with_cp=False, + convert_weights=True, + init_cfg=dict( + type='Pretrained', + checkpoint='https://github.com/SwinTransformer/storage/releases/' + 'download/v1.0.0/swin_base_patch4_window7_224_22k.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=1536, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-l-p4-w7_8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-l-p4-w7_8xb32-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..692c8df1a616dabbcb93a9be67f4626862eae172 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-l-p4-w7_8xb32-210e_coco-384x288.py @@ -0,0 +1,148 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict( + optimizer=dict( + type='AdamW', + lr=5e-4, + betas=(0.9, 0.999), + weight_decay=0.01, + ), + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=2) + +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='SwinTransformer', + embed_dims=192, + depths=[2, 2, 18, 2], + num_heads=[6, 12, 24, 48], + window_size=7, + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.5, + patch_norm=True, + out_indices=(3, ), + with_cp=False, + convert_weights=True, + init_cfg=dict( + type='Pretrained', + checkpoint='https://github.com/SwinTransformer/storage/releases/' + 'download/v1.0.0/swin_base_patch4_window12_384_22k.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=1536, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-t-p4-w7_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-t-p4-w7_8xb32-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..068ee0649f4cf97f5887ff5b17f44d6e1e1609b3 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-t-p4-w7_8xb32-210e_coco-256x192.py @@ -0,0 +1,139 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='SwinTransformer', + embed_dims=96, + depths=[2, 2, 6, 2], + num_heads=[3, 6, 12, 24], + window_size=7, + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.2, + patch_norm=True, + out_indices=(3, ), + with_cp=False, + convert_weights=True, + init_cfg=dict( + type='Pretrained', + checkpoint='https://github.com/SwinTransformer/storage/releases/' + 'download/v1.0.0/swin_tiny_patch4_window7_224.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=768, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_vgg16-bn_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_vgg16-bn_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..b85adb998bb5f2660ef00d1d395a6ca8bb4763c0 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_vgg16-bn_8xb64-210e_coco-256x192.py @@ -0,0 +1,122 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='VGG', + depth=16, + norm_cfg=dict(type='BN'), + init_cfg=dict(type='Pretrained', checkpoint='mmcls://vgg16_bn'), + ), + head=dict( + type='HeatmapHead', + in_channels=512, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_vipnas-mbv3_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_vipnas-mbv3_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..04fcc1ad2ef3152e217fa20bc0a325d44b1e6f0d --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_vipnas-mbv3_8xb64-210e_coco-256x192.py @@ -0,0 +1,122 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict(type='ViPNAS_MobileNetV3'), + head=dict( + type='ViPNASHead', + in_channels=160, + out_channels=17, + deconv_out_channels=(160, 160, 160), + deconv_num_groups=(160, 160, 160), + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + rotate_factor=60, + scale_factor=(0.75, 1.25)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_vipnas-res50_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_vipnas-res50_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..8190d7ffd2ca650f939935487551f0a62a8bf078 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_vipnas-res50_8xb64-210e_coco-256x192.py @@ -0,0 +1,120 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict(type='ViPNAS_ResNet', depth=50), + head=dict( + type='ViPNASHead', + in_channels=608, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + rotate_factor=60, + scale_factor=(0.75, 1.25)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vgg_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vgg_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..a2c19453f3e3e1be0490c6e55becd2ba4ae14f04 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vgg_coco.md @@ -0,0 +1,39 @@ +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/1409.1556">VGG (ICLR'2015)</a></summary> + +```bibtex +@article{simonyan2014very, + title={Very deep convolutional networks for large-scale image recognition}, + author={Simonyan, Karen and Zisserman, Andrew}, + journal={arXiv preprint arXiv:1409.1556}, + year={2014} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [vgg](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_vgg16-bn_8xb64-210e_coco-256x192.py) | 256x192 | 0.699 | 0.890 | 0.769 | 0.754 | 0.927 | [ckpt](https://download.openmmlab.com/mmpose/top_down/vgg/vgg16_bn_coco_256x192-7e7c58d6_20210517.pth) | [log](https://download.openmmlab.com/mmpose/top_down/vgg/vgg16_bn_coco_256x192_20210517.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vgg_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vgg_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..166fa05fcddc05ffe60a996ec63bb747d58ea7dd --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vgg_coco.yml @@ -0,0 +1,19 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_vgg16-bn_8xb64-210e_coco-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: + - SimpleBaseline2D + - VGG + Training Data: COCO + Name: td-hm_vgg16-bn_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.699 + AP@0.5: 0.89 + AP@0.75: 0.769 + AR: 0.754 + AR@0.5: 0.927 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/vgg/vgg16_bn_coco_256x192-7e7c58d6_20210517.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vipnas_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vipnas_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..b6a178865bd6844c0f73fbe7db43aa4be795dc71 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vipnas_coco.md @@ -0,0 +1,40 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2105.10154">ViPNAS (CVPR'2021)</a></summary> + +```bibtex +@article{xu2021vipnas, + title={ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search}, + author={Xu, Lumin and Guan, Yingda and Jin, Sheng and Liu, Wentao and Qian, Chen and Luo, Ping and Ouyang, Wanli and Wang, Xiaogang}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + year={2021} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [S-ViPNAS-MobileNetV3](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_vipnas-mbv3_8xb64-210e_coco-256x192.py) | 256x192 | 0.700 | 0.887 | 0.783 | 0.758 | 0.929 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_vipnas-mbv3_8xb64-210e_coco-256x192-e0987441_20221010.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_vipnas-mbv3_8xb64-210e_coco-256x192_20221010.log) | +| [S-ViPNAS-Res50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_vipnas-res50_8xb64-210e_coco-256x192.py) | 256x192 | 0.711 | 0.894 | 0.787 | 0.769 | 0.934 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_vipnas-res50_8xb64-210e_coco-256x192-35d4bff9_20220917.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_vipnas-res50_8xb64-210e_coco-256x192_20220917.log) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vipnas_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vipnas_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..cbdaa5bcabf800b60b14a044e5de0e71f753017a --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vipnas_coco.yml @@ -0,0 +1,40 @@ +Collections: +- Name: ViPNAS + Paper: + Title: 'ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search' + URL: https://arxiv.org/abs/2105.10154 + README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/backbones/vipnas.md +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_vipnas-mbv3_8xb64-210e_coco-256x192.py + In Collection: ViPNAS + Metadata: + Architecture: &id001 + - ViPNAS + Training Data: COCO + Name: td-hm_vipnas-mbv3_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.7 + AP@0.5: 0.887 + AP@0.75: 0.783 + AR: 0.758 + AR@0.5: 0.929 + Task: Body 2D Keypoint + Weights: (https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_vipnas-mbv3_8xb64-210e_coco-256x192-e0987441_20221010.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_vipnas-res50_8xb64-210e_coco-256x192.py + In Collection: ViPNAS + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-hm_vipnas-res50_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.711 + AP@0.5: 0.894 + AP@0.75: 0.787 + AR: 0.769 + AR@0.5: 0.934 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_vipnas-res50_8xb64-210e_coco-256x192-35d4bff9_20220917.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vitpose_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vitpose_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..f9266001d5d49b223c5b00b488db5a5545b9dae8 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vitpose_coco.md @@ -0,0 +1,62 @@ +To utilize ViTPose, you'll need to have [MMClassification](https://github.com/open-mmlab/mmclassification). To install the required version, run the following command: + +```shell +mim install 'mmcls>=1.0.0rc5' +``` + +<!-- [BACKBONE] --> + +<details> + +<summary align="right"><a href="https://arxiv.org/abs/2204.12484">ViTPose (NeurIPS'2022)</a></summary> + +```bibtex +@inproceedings{ + xu2022vitpose, + title={Vi{TP}ose: Simple Vision Transformer Baselines for Human Pose Estimation}, + author={Yufei Xu and Jing Zhang and Qiming Zhang and Dacheng Tao}, + booktitle={Advances in Neural Information Processing Systems}, + year={2022}, +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-030-58545-7_12">COCO-WholeBody (ECCV'2020)</a></summary> + +```bibtex +@inproceedings{jin2020whole, + title={Whole-Body Human Pose Estimation in the Wild}, + author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping}, + booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, + year={2020} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +> With classic decoder + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [ViTPose-S](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small_8xb64-210e_coco-256x192.py) | 256x192 | 0.739 | 0.903 | 0.816 | 0.792 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small_8xb64-210e_coco-256x192-62d7a712_20230314.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small_8xb64-210e_coco-256x192-62d7a712_20230314.json) | +| [ViTPose-B](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base_8xb64-210e_coco-256x192.py) | 256x192 | 0.757 | 0.905 | 0.829 | 0.810 | 0.946 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base_8xb64-210e_coco-256x192-216eae50_20230314.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base_8xb64-210e_coco-256x192-216eae50_20230314.json) | +| [ViTPose-L](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-large_8xb64-210e_coco-256x192.py) | 256x192 | 0.782 | 0.914 | 0.850 | 0.834 | 0.952 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-large_8xb64-210e_coco-256x192-53609f55_20230314.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-large_8xb64-210e_coco-256x192-53609f55_20230314.json) | +| [ViTPose-H](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge_8xb64-210e_coco-256x192.py) | 256x192 | 0.788 | 0.917 | 0.855 | 0.839 | 0.954 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge_8xb64-210e_coco-256x192-e32adcd4_20230314.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge_8xb64-210e_coco-256x192-e32adcd4_20230314.json) | +| [ViTPose-H\*](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge_8xb64-210e_coco-256x192.py) | 256x192 | 0.790 | 0.916 | 0.857 | 0.840 | 0.953 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge_3rdparty_coco-256x192-5b738c8e_20230314) | - | + +*Models with * are converted from the [official repo](https://github.com/ViTAE-Transformer/ViTPose). The config files of these models are only for validation.* + +> With simple decoder + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [ViTPose-S](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small-simple_8xb64-210e_coco-256x192.py) | 256x192 | 0.736 | 0.900 | 0.811 | 0.790 | 0.940 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small-simple_8xb64-210e_coco-256x192-4c101a76_20230314.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small-simple_8xb64-210e_coco-256x192-4c101a76_20230314.json) | +| [ViTPose-B](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base-simple_8xb64-210e_coco-256x192.py) | 256x192 | 0.756 | 0.906 | 0.826 | 0.809 | 0.946 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base-simple_8xb64-210e_coco-256x192-0b8234ea_20230407.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base-simple_8xb64-210e_coco-256x192-0b8234ea_20230407.json) | +| [ViTPose-L](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-large-simple_8xb64-210e_coco-256x192.py) | 256x192 | 0.781 | 0.914 | 0.853 | 0.833 | 0.952 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-large-simple_8xb64-210e_coco-256x192-3a7ee9e1_20230314.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-large-simple_8xb64-210e_coco-256x192-3a7ee9e1_20230314.json) | +| [ViTPose-H](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge-simple_8xb64-210e_coco-256x192.py) | 256x192 | 0.789 | 0.916 | 0.856 | 0.839 | 0.953 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge-simple_8xb64-210e_coco-256x192-ffd48c05_20230314.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge-simple_8xb64-210e_coco-256x192-ffd48c05_20230314.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vitpose_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vitpose_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..6d1cc7db15b055d418adf9bc429b5319e81b71df --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vitpose_coco.yml @@ -0,0 +1,155 @@ +Collections: +- Name: ViTPose + Paper: + Title: 'ViTPose: Simple Vision Transformer Baselines for Human Pose Estimation' + URL: https://arxiv.org/abs/2204.12484 + README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/algorithms/vitpose.md + Metadata: + Training Resources: 8x A100 GPUs +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small_8xb64-210e_coco-256x192.py + In Collection: ViTPose + Metadata: + Architecture: &id001 + - ViTPose + - Classic Head + Model Size: Small + Training Data: COCO + Name: td-hm_ViTPose-small_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.739 + AP@0.5: 0.903 + AP@0.75: 0.816 + AR: 0.792 + AR@0.5: 0.942 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small_8xb64-210e_coco-256x192-62d7a712_20230314.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base_8xb64-210e_coco-256x192.py + In Collection: ViTPose + Metadata: + Architecture: *id001 + Model Size: Base + Training Data: COCO + Name: td-hm_ViTPose-base_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.757 + AP@0.5: 0.905 + AP@0.75: 0.829 + AR: 0.81 + AR@0.5: 0.946 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base_8xb64-210e_coco-256x192-216eae50_20230314.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-large_8xb64-210e_coco-256x192.py + In Collection: ViTPose + Metadata: + Architecture: *id001 + Model Size: Large + Training Data: COCO + Name: td-hm_ViTPose-large_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.782 + AP@0.5: 0.914 + AP@0.75: 0.850 + AR: 0.834 + AR@0.5: 0.952 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-large_8xb64-210e_coco-256x192-53609f55_20230314.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge_8xb64-210e_coco-256x192.py + In Collection: ViTPose + Metadata: + Architecture: *id001 + Model Size: Huge + Training Data: COCO + Name: td-hm_ViTPose-huge_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.788 + AP@0.5: 0.917 + AP@0.75: 0.855 + AR: 0.839 + AR@0.5: 0.954 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge_8xb64-210e_coco-256x192-e32adcd4_20230314.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small-simple_8xb64-210e_coco-256x192.py + In Collection: ViTPose + Alias: vitpose-s + Metadata: + Architecture: &id002 + - ViTPose + - Simple Head + Model Size: Small + Training Data: COCO + Name: td-hm_ViTPose-small-simple_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.736 + AP@0.5: 0.900 + AP@0.75: 0.811 + AR: 0.790 + AR@0.5: 0.940 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small-simple_8xb64-210e_coco-256x192-4c101a76_20230314.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base-simple_8xb64-210e_coco-256x192.py + In Collection: ViTPose + Alias: + - vitpose + - vitpose-b + Metadata: + Architecture: *id002 + Model Size: Base + Training Data: COCO + Name: td-hm_ViTPose-base-simple_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.756 + AP@0.5: 0.906 + AP@0.75: 0.826 + AR: 0.809 + AR@0.5: 0.946 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base-simple_8xb64-210e_coco-256x192-0b8234ea_20230407.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-large-simple_8xb64-210e_coco-256x192.py + In Collection: ViTPose + Alias: vitpose-l + Metadata: + Architecture: *id002 + Model Size: Large + Training Data: COCO + Name: td-hm_ViTPose-large-simple_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.781 + AP@0.5: 0.914 + AP@0.75: 0.853 + AR: 0.833 + AR@0.5: 0.952 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-large-simple_8xb64-210e_coco-256x192-3a7ee9e1_20230314.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge-simple_8xb64-210e_coco-256x192.py + In Collection: ViTPose + Alias: vitpose-h + Metadata: + Architecture: *id002 + Model Size: Huge + Training Data: COCO + Name: td-hm_ViTPose-huge-simple_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.789 + AP@0.5: 0.916 + AP@0.75: 0.856 + AR: 0.839 + AR@0.5: 0.953 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge-simple_8xb64-210e_coco-256x192-ffd48c05_20230314.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/cspnext-m_udp_8xb64-210e_crowpose-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/cspnext-m_udp_8xb64-210e_crowpose-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..b1ba19a130b13f8c2c670ae15cff5e21303bc89e --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/cspnext-m_udp_8xb64-210e_crowpose-256x192.py @@ -0,0 +1,217 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 210 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 150 to 300 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict( + type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=0.67, + widen_factor=0.75, + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmdetection/v3.0/' + 'rtmdet/cspnext_rsb_pretrain/' + 'cspnext-m_8xb256-rsb-a1-600e_in1k-ecb3bbd9.pth')), + head=dict( + type='HeatmapHead', + in_channels=768, + out_channels=14, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=False, + )) + +# base dataset settings +dataset_type = 'CrowdPoseDataset' +data_mode = 'topdown' +data_root = 'data/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/', +# f'{data_root}': 's3://openmmlab/datasets/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.0), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='crowdpose/annotations/mmpose_crowdpose_trainval.json', + data_prefix=dict(img='pose/CrowdPose/images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='crowdpose/annotations/mmpose_crowdpose_test.json', + bbox_file='data/crowdpose/annotations/det_for_crowd_test_0.1_0.5.json', + data_prefix=dict(img='pose/CrowdPose/images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict( + checkpoint=dict( + save_best='crowdpose/AP', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'crowdpose/annotations/mmpose_crowdpose_test.json', + use_area=False, + iou_type='keypoints_crowd', + prefix='crowdpose') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/cspnext_udp_crowdpose.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/cspnext_udp_crowdpose.md new file mode 100644 index 0000000000000000000000000000000000000000..24c35348389b3f532fffa418a21b6edce6d21cb0 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/cspnext_udp_crowdpose.md @@ -0,0 +1,56 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2212.07784">RTMDet (ArXiv 2022)</a></summary> + +```bibtex +@misc{lyu2022rtmdet, + title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors}, + author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen}, + year={2022}, + eprint={2212.07784}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +</details> + +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2020/html/Huang_The_Devil_Is_in_the_Details_Delving_Into_Unbiased_Data_CVPR_2020_paper.html">UDP (CVPR'2020)</a></summary> + +```bibtex +@InProceedings{Huang_2020_CVPR, + author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan}, + title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation}, + booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, + month = {June}, + year = {2020} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2019/html/Li_CrowdPose_Efficient_Crowded_Scenes_Pose_Estimation_and_a_New_Benchmark_CVPR_2019_paper.html">CrowdPose (CVPR'2019)</a></summary> + +```bibtex +@article{li2018crowdpose, + title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark}, + author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu}, + journal={arXiv preprint arXiv:1812.00324}, + year={2018} +} +``` + +</details> + +Results on CrowdPose test with [YOLOv3](https://github.com/eriklindernoren/PyTorch-YOLOv3) human detector + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AP (E) | AP (M) | AP (H) | ckpt | log | +| :--------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :----: | :----: | :----: | :--------------------------------------------: | :-------------------------------------------: | +| [pose_cspnext_m](/configs/body_2d_keypoint/topdown_heatmap/crowdpose/cspnext-m_udp_8xb64-210e_crowpose-256x192.py) | 256x192 | 0.662 | 0.821 | 0.723 | 0.759 | 0.675 | 0.539 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-crowdpose_pt-in1k_210e-256x192-f591079f_20230123.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-crowdpose_pt-in1k_210e-256x192-f591079f_20230123.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/cspnext_udp_crowdpose.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/cspnext_udp_crowdpose.yml new file mode 100644 index 0000000000000000000000000000000000000000..6e5b4cd691ccb083db97b33b3531b0e69f39af12 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/cspnext_udp_crowdpose.yml @@ -0,0 +1,20 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/crowdpose/cspnext-m_udp_8xb64-210e_crowpose-256x192.py + In Collection: UDP + Metadata: + Architecture: + - UDP + - CSPNeXt + Training Data: CrowdPose + Name: cspnext-m_udp_8xb64-210e_crowpose-256x192 + Results: + - Dataset: CrowdPose + Metrics: + AP: 0.662 + AP (E): 0.759 + AP (H): 0.539 + AP (M): 0.675 + AP@0.5: 0.821 + AP@0.75: 0.723 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-crowdpose_pt-in1k_210e-256x192-f591079f_20230123.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/hrnet_crowdpose.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/hrnet_crowdpose.md new file mode 100644 index 0000000000000000000000000000000000000000..c0d24d47175d592b536d905881df852959406eed --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/hrnet_crowdpose.md @@ -0,0 +1,38 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html">HRNet (CVPR'2019)</a></summary> + +```bibtex +@inproceedings{sun2019deep, + title={Deep high-resolution representation learning for human pose estimation}, + author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={5693--5703}, + year={2019} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2019/html/Li_CrowdPose_Efficient_Crowded_Scenes_Pose_Estimation_and_a_New_Benchmark_CVPR_2019_paper.html">CrowdPose (CVPR'2019)</a></summary> + +```bibtex +@article{li2018crowdpose, + title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark}, + author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu}, + journal={arXiv preprint arXiv:1812.00324}, + year={2018} +} +``` + +</details> + +Results on CrowdPose test with [YOLOv3](https://github.com/eriklindernoren/PyTorch-YOLOv3) human detector + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AP (E) | AP (M) | AP (H) | ckpt | log | +| :--------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :----: | :----: | :----: | :--------------------------------------------: | :-------------------------------------------: | +| [pose_hrnet_w32](/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_hrnet-w32_8xb64-210e_crowdpose-256x192.py) | 256x192 | 0.675 | 0.825 | 0.729 | 0.770 | 0.687 | 0.553 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_crowdpose_256x192-960be101_20201227.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_crowdpose_256x192_20201227.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/hrnet_crowdpose.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/hrnet_crowdpose.yml new file mode 100644 index 0000000000000000000000000000000000000000..c37fa9154feab028c1dd3d3511fccabcc2805042 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/hrnet_crowdpose.yml @@ -0,0 +1,19 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_hrnet-w32_8xb64-210e_crowdpose-256x192.py + In Collection: HRNet + Metadata: + Architecture: + - HRNet + Training Data: CrowdPose + Name: td-hm_hrnet-w32_8xb64-210e_crowdpose-256x192 + Results: + - Dataset: CrowdPose + Metrics: + AP: 0.675 + AP (E): 0.77 + AP (H): 0.553 + AP (M): 0.687 + AP@0.5: 0.825 + AP@0.75: 0.729 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_crowdpose_256x192-960be101_20201227.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/resnet_crowdpose.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/resnet_crowdpose.md new file mode 100644 index 0000000000000000000000000000000000000000..56a771806d361061652b57e624b13169db1bb410 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/resnet_crowdpose.md @@ -0,0 +1,58 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html">SimpleBaseline2D (ECCV'2018)</a></summary> + +```bibtex +@inproceedings{xiao2018simple, + title={Simple baselines for human pose estimation and tracking}, + author={Xiao, Bin and Wu, Haiping and Wei, Yichen}, + booktitle={Proceedings of the European conference on computer vision (ECCV)}, + pages={466--481}, + year={2018} +} +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html">ResNet (CVPR'2016)</a></summary> + +```bibtex +@inproceedings{he2016deep, + title={Deep residual learning for image recognition}, + author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={770--778}, + year={2016} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2019/html/Li_CrowdPose_Efficient_Crowded_Scenes_Pose_Estimation_and_a_New_Benchmark_CVPR_2019_paper.html">CrowdPose (CVPR'2019)</a></summary> + +```bibtex +@article{li2018crowdpose, + title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark}, + author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu}, + journal={arXiv preprint arXiv:1812.00324}, + year={2018} +} +``` + +</details> + +Results on CrowdPose test with [YOLOv3](https://github.com/eriklindernoren/PyTorch-YOLOv3) human detector + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AP (E) | AP (M) | AP (H) | ckpt | log | +| :--------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :----: | :----: | :----: | :--------------------------------------------: | :-------------------------------------------: | +| [pose_resnet_50](/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res50_8xb64-210e_crowdpose-256x192.py) | 256x192 | 0.637 | 0.808 | 0.692 | 0.738 | 0.650 | 0.506 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_crowdpose_256x192-c6a526b6_20201227.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_crowdpose_256x192_20201227.log.json) | +| [pose_resnet_101](/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res101_8xb64-210e_crowdpose-256x192.py) | 256x192 | 0.647 | 0.810 | 0.703 | 0.745 | 0.658 | 0.521 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res101_crowdpose_256x192-8f5870f4_20201227.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res101_crowdpose_256x192_20201227.log.json) | +| [pose_resnet_101](/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res101_8xb64-210e_crowdpose-320x256.py) | 320x256 | 0.661 | 0.821 | 0.714 | 0.759 | 0.672 | 0.534 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res101_crowdpose_320x256-c88c512a_20201227.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res101_crowdpose_320x256_20201227.log.json) | +| [pose_resnet_152](/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res152_8xb64-210e_crowdpose-256x192.py) | 256x192 | 0.656 | 0.818 | 0.712 | 0.754 | 0.666 | 0.533 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res152_crowdpose_256x192-dbd49aba_20201227.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res152_crowdpose_256x192_20201227.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/resnet_crowdpose.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/resnet_crowdpose.yml new file mode 100644 index 0000000000000000000000000000000000000000..1477c28deb33691632ccdb33035ed8075e43e241 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/resnet_crowdpose.yml @@ -0,0 +1,71 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res50_8xb64-210e_crowdpose-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: &id001 + - SimpleBaseline2D + - ResNet + Training Data: CrowdPose + Name: td-hm_res50_8xb64-210e_crowdpose-256x192 + Results: + - Dataset: CrowdPose + Metrics: + AP: 0.637 + AP (E): 0.738 + AP (H): 0.506 + AP (M): 0.65 + AP@0.5: 0.808 + AP@0.75: 0.692 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_crowdpose_256x192-c6a526b6_20201227.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res101_8xb64-210e_crowdpose-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: CrowdPose + Name: td-hm_res101_8xb64-210e_crowdpose-256x192 + Results: + - Dataset: CrowdPose + Metrics: + AP: 0.647 + AP (E): 0.745 + AP (H): 0.521 + AP (M): 0.658 + AP@0.5: 0.81 + AP@0.75: 0.703 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res101_crowdpose_256x192-8f5870f4_20201227.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res101_8xb64-210e_crowdpose-320x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: CrowdPose + Name: td-hm_res101_8xb64-210e_crowdpose-320x256 + Results: + - Dataset: CrowdPose + Metrics: + AP: 0.661 + AP (E): 0.759 + AP (H): 0.534 + AP (M): 0.672 + AP@0.5: 0.821 + AP@0.75: 0.714 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res101_crowdpose_320x256-c88c512a_20201227.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res152_8xb64-210e_crowdpose-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: CrowdPose + Name: td-hm_res152_8xb64-210e_crowdpose-256x192 + Results: + - Dataset: CrowdPose + Metrics: + AP: 0.656 + AP (E): 0.754 + AP (H): 0.533 + AP (M): 0.666 + AP@0.5: 0.818 + AP@0.75: 0.712 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res152_crowdpose_256x192-dbd49aba_20201227.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_hrnet-w32_8xb64-210e_crowdpose-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_hrnet-w32_8xb64-210e_crowdpose-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..3117314a43ab214da46a83c5621f1860bcb3f57f --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_hrnet-w32_8xb64-210e_crowdpose-256x192.py @@ -0,0 +1,152 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='crowdpose/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256))), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w32-36af842e.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=32, + out_channels=14, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CrowdPoseDataset' +data_mode = 'topdown' +data_root = 'data/crowdpose/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mmpose_crowdpose_trainval.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mmpose_crowdpose_test.json', + bbox_file='data/crowdpose/annotations/det_for_crowd_test_0.1_0.5.json', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/mmpose_crowdpose_test.json', + use_area=False, + iou_type='keypoints_crowd', + prefix='crowdpose') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res101_8xb64-210e_crowdpose-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res101_8xb64-210e_crowdpose-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..79cae1d130a3713944069e37a3258811b068e655 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res101_8xb64-210e_crowdpose-256x192.py @@ -0,0 +1,123 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='crowdpose/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=101, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=14, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CrowdPoseDataset' +data_mode = 'topdown' +data_root = 'data/crowdpose/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mmpose_crowdpose_trainval.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mmpose_crowdpose_test.json', + bbox_file='data/crowdpose/annotations/det_for_crowd_test_0.1_0.5.json', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/mmpose_crowdpose_test.json', + use_area=False, + iou_type='keypoints_crowd', + prefix='crowdpose') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res101_8xb64-210e_crowdpose-320x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res101_8xb64-210e_crowdpose-320x256.py new file mode 100644 index 0000000000000000000000000000000000000000..eac5caf859095d3867fdd45fde58774b8c5ce54e --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res101_8xb64-210e_crowdpose-320x256.py @@ -0,0 +1,123 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='crowdpose/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 320), heatmap_size=(64, 80), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=101, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=14, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CrowdPoseDataset' +data_mode = 'topdown' +data_root = 'data/crowdpose/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mmpose_crowdpose_trainval.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mmpose_crowdpose_test.json', + bbox_file='data/crowdpose/annotations/det_for_crowd_test_0.1_0.5.json', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/mmpose_crowdpose_test.json', + use_area=False, + iou_type='keypoints_crowd', + prefix='crowdpose') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res152_8xb64-210e_crowdpose-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res152_8xb64-210e_crowdpose-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..5b99439535a54b4bc69ca5ee270aa5a0d7fa26bf --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res152_8xb64-210e_crowdpose-256x192.py @@ -0,0 +1,123 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='crowdpose/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=152, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet152'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=14, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CrowdPoseDataset' +data_mode = 'topdown' +data_root = 'data/crowdpose/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mmpose_crowdpose_trainval.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mmpose_crowdpose_test.json', + bbox_file='data/crowdpose/annotations/det_for_crowd_test_0.1_0.5.json', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/mmpose_crowdpose_test.json', + use_area=False, + iou_type='keypoints_crowd', + prefix='crowdpose') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res50_8xb64-210e_crowdpose-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res50_8xb64-210e_crowdpose-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..d669b2e2670657a25def5234037e371bede0882d --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res50_8xb64-210e_crowdpose-256x192.py @@ -0,0 +1,123 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='crowdpose/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=14, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CrowdPoseDataset' +data_mode = 'topdown' +data_root = 'data/crowdpose/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mmpose_crowdpose_trainval.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mmpose_crowdpose_test.json', + bbox_file='data/crowdpose/annotations/det_for_crowd_test_0.1_0.5.json', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=data_root + 'annotations/mmpose_crowdpose_test.json', + use_area=False, + iou_type='keypoints_crowd', + prefix='crowdpose') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/cpm_jhmdb.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/cpm_jhmdb.md new file mode 100644 index 0000000000000000000000000000000000000000..29df027e3f76c0801c1c89303e776f78d5c6047e --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/cpm_jhmdb.md @@ -0,0 +1,56 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2016/html/Wei_Convolutional_Pose_Machines_CVPR_2016_paper.html">CPM (CVPR'2016)</a></summary> + +```bibtex +@inproceedings{wei2016convolutional, + title={Convolutional pose machines}, + author={Wei, Shih-En and Ramakrishna, Varun and Kanade, Takeo and Sheikh, Yaser}, + booktitle={Proceedings of the IEEE conference on Computer Vision and Pattern Recognition}, + pages={4724--4732}, + year={2016} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://www.cv-foundation.org/openaccess/content_iccv_2013/html/Jhuang_Towards_Understanding_Action_2013_ICCV_paper.html">JHMDB (ICCV'2013)</a></summary> + +```bibtex +@inproceedings{Jhuang:ICCV:2013, + title = {Towards understanding action recognition}, + author = {H. Jhuang and J. Gall and S. Zuffi and C. Schmid and M. J. Black}, + booktitle = {International Conf. on Computer Vision (ICCV)}, + month = Dec, + pages = {3192-3199}, + year = {2013} +} +``` + +</details> + +Results on Sub-JHMDB dataset + +The models are pre-trained on MPII dataset only. NO test-time augmentation (multi-scale /rotation testing) is used. + +- Normalized by Person Size + +| Split | Arch | Input Size | Head | Sho | Elb | Wri | Hip | Knee | Ank | Mean | ckpt | log | +| :------ | :------------------------------------------------: | :--------: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :-------------------------------------------------: | :------------------------------------------------: | +| Sub1 | [cpm](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub1-368x368.py) | 368x368 | 96.1 | 91.9 | 81.0 | 78.9 | 96.6 | 90.8 | 87.3 | 89.5 | [ckpt](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub1_368x368-2d2585c9_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub1_368x368_20201122.log.json) | +| Sub2 | [cpm](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub2-368x368.py) | 368x368 | 98.1 | 93.6 | 77.1 | 70.9 | 94.0 | 89.1 | 84.7 | 87.4 | [ckpt](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub2_368x368-fc742f1f_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub2_368x368_20201122.log.json) | +| Sub3 | [cpm](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub3-368x368.py) | 368x368 | 97.9 | 94.9 | 87.3 | 84.0 | 98.6 | 94.4 | 86.2 | 92.4 | [ckpt](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub3_368x368-49337155_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub3_368x368_20201122.log.json) | +| Average | cpm | 368x368 | 97.4 | 93.5 | 81.5 | 77.9 | 96.4 | 91.4 | 86.1 | 89.8 | - | - | + +- Normalized by Torso Size + +| Split | Arch | Input Size | Head | Sho | Elb | Wri | Hip | Knee | Ank | Mean | ckpt | log | +| :------ | :------------------------------------------------: | :--------: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :-------------------------------------------------: | :------------------------------------------------: | +| Sub1 | [cpm](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub1-368x368.py) | 368x368 | 89.0 | 63.0 | 54.0 | 54.9 | 68.2 | 63.1 | 61.2 | 66.0 | [ckpt](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub1_368x368-2d2585c9_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub1_368x368_20201122.log.json) | +| Sub2 | [cpm](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub2-368x368.py) | 368x368 | 90.3 | 57.9 | 46.8 | 44.3 | 60.8 | 58.2 | 62.4 | 61.1 | [ckpt](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub2_368x368-fc742f1f_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub2_368x368_20201122.log.json) | +| Sub3 | [cpm](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub3-368x368.py) | 368x368 | 91.0 | 72.6 | 59.9 | 54.0 | 73.2 | 68.5 | 65.8 | 70.3 | [ckpt](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub3_368x368-49337155_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub3_368x368_20201122.log.json) | +| Average | cpm | 368x368 | 90.1 | 64.5 | 53.6 | 51.1 | 67.4 | 63.3 | 63.1 | 65.7 | - | - | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/cpm_jhmdb.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/cpm_jhmdb.yml new file mode 100644 index 0000000000000000000000000000000000000000..f9f6d7568b50b65bd8f5754539f861b15daca8c7 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/cpm_jhmdb.yml @@ -0,0 +1,116 @@ +Models: +- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub1-368x368.py + In Collection: CPM + Metadata: + Architecture: &id001 + - CPM + Training Data: JHMDB + Name: td-hm_cpm_8xb32-40e_jhmdb-sub1-368x368 + Results: + - Dataset: JHMDB + Metrics: + Ank: 87.3 + Elb: 81 + Head: 96.1 + Hip: 96.6 + Knee: 90.8 + Mean: 89.5 + Sho: 91.9 + Wri: 78.9 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub1_368x368-2d2585c9_20201122.pth +- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub2-368x368.py + In Collection: CPM + Metadata: + Architecture: *id001 + Training Data: JHMDB + Name: td-hm_cpm_8xb32-40e_jhmdb-sub2-368x368 + Results: + - Dataset: JHMDB + Metrics: + Ank: 84.7 + Elb: 77.1 + Head: 98.1 + Hip: 94.0 + Knee: 89.1 + Mean: 87.4 + Sho: 93.6 + Wri: 70.9 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub2_368x368-fc742f1f_20201122.pth +- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub3-368x368.py + In Collection: CPM + Metadata: + Architecture: *id001 + Training Data: JHMDB + Name: td-hm_cpm_8xb32-40e_jhmdb-sub3-368x368 + Results: + - Dataset: JHMDB + Metrics: + Ank: 86.2 + Elb: 87.3 + Head: 97.9 + Hip: 98.6 + Knee: 94.4 + Mean: 92.4 + Sho: 94.9 + Wri: 84.0 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub3_368x368-49337155_20201122.pth +- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub1-368x368.py + In Collection: CPM + Metadata: + Architecture: *id001 + Training Data: JHMDB + Name: td-hm_cpm_8xb32-40e_jhmdb-sub1-368x368 + Results: + - Dataset: JHMDB + Metrics: + Ank: 61.2 + Elb: 54.0 + Head: 89.0 + Hip: 68.2 + Knee: 63.1 + Mean: 66.0 + Sho: 63.0 + Wri: 54.9 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub1_368x368-2d2585c9_20201122.pth +- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub2-368x368.py + In Collection: CPM + Metadata: + Architecture: *id001 + Training Data: JHMDB + Name: td-hm_cpm_8xb32-40e_jhmdb-sub2-368x368 + Results: + - Dataset: JHMDB + Metrics: + Ank: 62.4 + Elb: 46.8 + Head: 90.3 + Hip: 60.8 + Knee: 58.2 + Mean: 61.1 + Sho: 57.9 + Wri: 44.3 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub2_368x368-fc742f1f_20201122.pth +- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub3-368x368.py + In Collection: CPM + Metadata: + Architecture: *id001 + Training Data: JHMDB + Name: td-hm_cpm_8xb32-40e_jhmdb-sub3-368x368 + Results: + - Dataset: JHMDB + Metrics: + Ank: 65.8 + Elb: 59.9 + Head: 91.0 + Hip: 73.2 + Knee: 68.5 + Mean: 70.3 + Sho: 72.6 + Wri: 54.0 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub3_368x368-49337155_20201122.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/resnet_jhmdb.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/resnet_jhmdb.md new file mode 100644 index 0000000000000000000000000000000000000000..22422e731653b1b01840feb0f79eb82459bb968f --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/resnet_jhmdb.md @@ -0,0 +1,81 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html">SimpleBaseline2D (ECCV'2018)</a></summary> + +```bibtex +@inproceedings{xiao2018simple, + title={Simple baselines for human pose estimation and tracking}, + author={Xiao, Bin and Wu, Haiping and Wei, Yichen}, + booktitle={Proceedings of the European conference on computer vision (ECCV)}, + pages={466--481}, + year={2018} +} +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html">ResNet (CVPR'2016)</a></summary> + +```bibtex +@inproceedings{he2016deep, + title={Deep residual learning for image recognition}, + author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={770--778}, + year={2016} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://www.cv-foundation.org/openaccess/content_iccv_2013/html/Jhuang_Towards_Understanding_Action_2013_ICCV_paper.html">JHMDB (ICCV'2013)</a></summary> + +```bibtex +@inproceedings{Jhuang:ICCV:2013, + title = {Towards understanding action recognition}, + author = {H. Jhuang and J. Gall and S. Zuffi and C. Schmid and M. J. Black}, + booktitle = {International Conf. on Computer Vision (ICCV)}, + month = Dec, + pages = {3192-3199}, + year = {2013} +} +``` + +</details> + +Results on Sub-JHMDB dataset + +The models are pre-trained on MPII dataset only. *NO* test-time augmentation (multi-scale /rotation testing) is used. + +- Normalized by Person Size + +| Split | Arch | Input Size | Head | Sho | Elb | Wri | Hip | Knee | Ank | Mean | ckpt | log | +| :------ | :------------------------------------------------: | :--------: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :-------------------------------------------------: | :------------------------------------------------: | +| Sub1 | [pose_resnet_50](/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub1-256x256.py) | 256x256 | 99.1 | 98.0 | 93.8 | 91.3 | 99.4 | 96.5 | 92.8 | 96.1 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub1_256x256-932cb3b4_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub1_256x256_20201122.log.json) | +| Sub2 | [pose_resnet_50](/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub2-256x256.py) | 256x256 | 99.3 | 97.1 | 90.6 | 87.0 | 98.9 | 96.3 | 94.1 | 95.0 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub2_256x256-83d606f7_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub2_256x256_20201122.log.json) | +| Sub3 | [pose_resnet_50](/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub3-256x256.py) | 256x256 | 99.0 | 97.9 | 94.0 | 91.6 | 99.7 | 98.0 | 94.7 | 96.7 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub3_256x256-c4ec1a0b_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub3_256x256_20201122.log.json) | +| Average | pose_resnet_50 | 256x256 | 99.2 | 97.7 | 92.8 | 90.0 | 99.3 | 96.9 | 93.9 | 96.0 | - | - | +| Sub1 | [pose_resnet_50 (2 Deconv.)](/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub1-256x256.py) | 256x256 | 99.1 | 98.5 | 94.6 | 92.0 | 99.4 | 94.6 | 92.5 | 96.1 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub1_256x256-f0574a52_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub1_256x256_20201122.log.json) | +| Sub2 | [pose_resnet_50 (2 Deconv.)](/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub2-256x256.py) | 256x256 | 99.3 | 97.8 | 91.0 | 87.0 | 99.1 | 96.5 | 93.8 | 95.2 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub2_256x256-f63af0ff_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub2_256x256_20201122.log.json) | +| Sub3 | [pose_resnet_50 (2 Deconv.)](/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub3-256x256.py) | 256x256 | 98.8 | 98.4 | 94.3 | 92.1 | 99.8 | 97.5 | 93.8 | 96.7 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub3_256x256-c4bc2ddb_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub3_256x256_20201122.log.json) | +| Average | pose_resnet_50 (2 Deconv.) | 256x256 | 99.1 | 98.2 | 93.3 | 90.4 | 99.4 | 96.2 | 93.4 | 96.0 | - | - | + +- Normalized by Torso Size + +| Split | Arch | Input Size | Head | Sho | Elb | Wri | Hip | Knee | Ank | Mean | ckpt | log | +| :------ | :------------------------------------------------: | :--------: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :-------------------------------------------------: | :------------------------------------------------: | +| Sub1 | [pose_resnet_50](/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub1-256x256.py) | 256x256 | 93.3 | 83.2 | 74.4 | 72.7 | 85.0 | 81.2 | 78.9 | 81.9 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub1_256x256-932cb3b4_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub1_256x256_20201122.log.json) | +| Sub2 | [pose_resnet_50](/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub2-256x256.py) | 256x256 | 94.1 | 74.9 | 64.5 | 62.5 | 77.9 | 71.9 | 78.6 | 75.5 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub2_256x256-83d606f7_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub2_256x256_20201122.log.json) | +| Sub3 | [pose_resnet_50](/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub3-256x256.py) | 256x256 | 97.0 | 82.2 | 74.9 | 70.7 | 84.7 | 83.7 | 84.2 | 82.9 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub3_256x256-c4ec1a0b_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub3_256x256_20201122.log.json) | +| Average | pose_resnet_50 | 256x256 | 94.8 | 80.1 | 71.3 | 68.6 | 82.5 | 78.9 | 80.6 | 80.1 | - | - | +| Sub1 | [pose_resnet_50 (2 Deconv.)](/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub1-256x256.py) | 256x256 | 92.4 | 80.6 | 73.2 | 70.5 | 82.3 | 75.4 | 75.0 | 79.2 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub1_256x256-f0574a52_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub1_256x256_20201122.log.json) | +| Sub2 | [pose_resnet_50 (2 Deconv.)](/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub2-256x256.py) | 256x256 | 93.4 | 73.6 | 63.8 | 60.5 | 75.1 | 68.4 | 75.5 | 73.7 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub2_256x256-f63af0ff_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub2_256x256_20201122.log.json) | +| Sub3 | [pose_resnet_50 (2 Deconv.)](/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub3-256x256.py) | 256x256 | 96.1 | 81.2 | 72.6 | 67.9 | 83.6 | 80.9 | 81.5 | 81.2 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub3_256x256-c4bc2ddb_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub3_256x256_20201122.log.json) | +| Average | pose_resnet_50 (2 Deconv.) | 256x256 | 94.0 | 78.5 | 69.9 | 66.3 | 80.3 | 74.9 | 77.3 | 78.0 | - | - | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/resnet_jhmdb.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/resnet_jhmdb.yml new file mode 100644 index 0000000000000000000000000000000000000000..d7480d12a0ce45db7ea28af0b02e2493b34c04b8 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/resnet_jhmdb.yml @@ -0,0 +1,231 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub1-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: &id001 + - SimpleBaseline2D + - ResNet + Training Data: JHMDB + Name: td-hm_res50_8xb64-20e_jhmdb-sub1-256x256 + Results: + - Dataset: JHMDB + Metrics: + Ank: 92.8 + Elb: 93.8 + Head: 99.1 + Hip: 99.4 + Knee: 96.5 + Mean: 96.1 + Sho: 98.0 + Wri: 91.3 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub1_256x256-932cb3b4_20201122.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub2-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: JHMDB + Name: td-hm_res50_8xb64-20e_jhmdb-sub2-256x256 + Results: + - Dataset: JHMDB + Metrics: + Ank: 94.1 + Elb: 90.6 + Head: 99.3 + Hip: 98.9 + Knee: 96.3 + Mean: 95.0 + Sho: 97.1 + Wri: 87.0 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub2_256x256-83d606f7_20201122.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub3-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: JHMDB + Name: td-hm_res50_8xb64-20e_jhmdb-sub3-256x256 + Results: + - Dataset: JHMDB + Metrics: + Ank: 94.7 + Elb: 94.0 + Head: 99.0 + Hip: 99.7 + Knee: 98.0 + Mean: 96.7 + Sho: 97.9 + Wri: 91.6 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub3_256x256-c4ec1a0b_20201122.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub1-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: JHMDB + Name: td-hm_res50-2deconv_8xb64-40e_jhmdb-sub1-256x256 + Results: + - Dataset: JHMDB + Metrics: + Ank: 92.5 + Elb: 94.6 + Head: 99.1 + Hip: 99.4 + Knee: 94.6 + Mean: 96.1 + Sho: 98.5 + Wri: 92.0 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub1_256x256-f0574a52_20201122.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub2-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: JHMDB + Name: td-hm_res50-2deconv_8xb64-40e_jhmdb-sub2-256x256 + Results: + - Dataset: JHMDB + Metrics: + Ank: 93.8 + Elb: 91.0 + Head: 99.3 + Hip: 99.1 + Knee: 96.5 + Mean: 95.2 + Sho: 97.8 + Wri: 87.0 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub2_256x256-f63af0ff_20201122.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub3-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: JHMDB + Name: td-hm_res50-2deconv_8xb64-40e_jhmdb-sub3-256x256 + Results: + - Dataset: JHMDB + Metrics: + Ank: 93.8 + Elb: 94.3 + Head: 98.8 + Hip: 99.8 + Knee: 97.5 + Mean: 96.7 + Sho: 98.4 + Wri: 92.1 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub3_256x256-c4bc2ddb_20201122.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub1-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: JHMDB + Name: td-hm_res50_8xb64-20e_jhmdb-sub1-256x256 + Results: + - Dataset: JHMDB + Metrics: + Ank: 78.9 + Elb: 74.4 + Head: 93.3 + Hip: 85.0 + Knee: 81.2 + Mean: 81.9 + Sho: 83.2 + Wri: 72.7 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub1_256x256-932cb3b4_20201122.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub2-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: JHMDB + Name: td-hm_res50_8xb64-20e_jhmdb-sub2-256x256 + Results: + - Dataset: JHMDB + Metrics: + Ank: 78.6 + Elb: 64.5 + Head: 94.1 + Hip: 77.9 + Knee: 71.9 + Mean: 75.5 + Sho: 74.9 + Wri: 62.5 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub2_256x256-83d606f7_20201122.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub3-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: JHMDB + Name: td-hm_res50_8xb64-20e_jhmdb-sub3-256x256 + Results: + - Dataset: JHMDB + Metrics: + Ank: 84.2 + Elb: 74.9 + Head: 97.0 + Hip: 84.7 + Knee: 83.7 + Mean: 82.9 + Sho: 82.2 + Wri: 70.7 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub3_256x256-c4ec1a0b_20201122.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub1-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: JHMDB + Name: td-hm_res50-2deconv_8xb64-40e_jhmdb-sub1-256x256 + Results: + - Dataset: JHMDB + Metrics: + Ank: 75.0 + Elb: 73.2 + Head: 92.4 + Hip: 82.3 + Knee: 75.4 + Mean: 79.2 + Sho: 80.6 + Wri: 70.5 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub1_256x256-f0574a52_20201122.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub2-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: JHMDB + Name: td-hm_res50-2deconv_8xb64-40e_jhmdb-sub2-256x256 + Results: + - Dataset: JHMDB + Metrics: + Ank: 75.5 + Elb: 63.8 + Head: 93.4 + Hip: 75.1 + Knee: 68.4 + Mean: 73.7 + Sho: 73.6 + Wri: 60.5 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub2_256x256-f63af0ff_20201122.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub3-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: JHMDB + Name: td-hm_res50-2deconv_8xb64-40e_jhmdb-sub3-256x256 + Results: + - Dataset: JHMDB + Metrics: + Ank: 81.5 + Elb: 72.6 + Head: 96.1 + Hip: 83.6 + Knee: 80.9 + Mean: 81.2 + Sho: 81.2 + Wri: 67.9 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub3_256x256-c4bc2ddb_20201122.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub1-368x368.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub1-368x368.py new file mode 100644 index 0000000000000000000000000000000000000000..479039f5428f7f5e736beb4cfe9c7b88c986e4ed --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub1-368x368.py @@ -0,0 +1,127 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=40, val_interval=1) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=40, + milestones=[20, 30], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='PCK', rule='greater', interval=1)) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(368, 368), heatmap_size=(46, 46), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='CPM', + in_channels=3, + out_channels=15, + feat_channels=128, + num_stages=6), + head=dict( + type='CPMHead', + in_channels=15, + out_channels=15, + num_stages=6, + deconv_out_channels=None, + final_layer=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'JhmdbDataset' +data_mode = 'topdown' +data_root = 'data/jhmdb/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + rotate_factor=60, + scale_factor=(0.75, 1.25)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/Sub1_train.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/Sub1_test.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='JhmdbPCKAccuracy', thr=0.2, norm_item=['bbox', 'torso']), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub2-368x368.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub2-368x368.py new file mode 100644 index 0000000000000000000000000000000000000000..88b60e9f87dfc783610aa8222a4256d9625efc60 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub2-368x368.py @@ -0,0 +1,127 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=40, val_interval=1) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=40, + milestones=[20, 30], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='PCK', rule='greater', interval=1)) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(368, 368), heatmap_size=(46, 46), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='CPM', + in_channels=3, + out_channels=15, + feat_channels=128, + num_stages=6), + head=dict( + type='CPMHead', + in_channels=15, + out_channels=15, + num_stages=6, + deconv_out_channels=None, + final_layer=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'JhmdbDataset' +data_mode = 'topdown' +data_root = 'data/jhmdb/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + rotate_factor=60, + scale_factor=(0.75, 1.25)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/Sub2_train.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/Sub2_test.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='JhmdbPCKAccuracy', thr=0.2, norm_item=['bbox', 'torso']), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub3-368x368.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub3-368x368.py new file mode 100644 index 0000000000000000000000000000000000000000..602b2bcfd6aac7df667da5d71ea9d8ea233778ad --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub3-368x368.py @@ -0,0 +1,127 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=40, val_interval=1) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=40, + milestones=[20, 30], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='PCK', rule='greater', interval=1)) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(368, 368), heatmap_size=(46, 46), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='CPM', + in_channels=3, + out_channels=15, + feat_channels=128, + num_stages=6), + head=dict( + type='CPMHead', + in_channels=15, + out_channels=15, + num_stages=6, + deconv_out_channels=None, + final_layer=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'JhmdbDataset' +data_mode = 'topdown' +data_root = 'data/jhmdb/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + rotate_factor=60, + scale_factor=(0.75, 1.25)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/Sub3_train.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/Sub3_test.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='JhmdbPCKAccuracy', thr=0.2, norm_item=['bbox', 'torso']), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub1-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub1-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..8d104e1e86e0818947f86612dbbe8b4c9b30e31f --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub1-256x256.py @@ -0,0 +1,122 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=40, val_interval=1) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=40, + milestones=[20, 30], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='PCK', rule='greater', interval=1)) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(32, 32), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict(type='ResNet', depth=50), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=15, + deconv_out_channels=(256, 256), + deconv_kernel_sizes=(4, 4), + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) +load_from = 'https://download.openmmlab.com/mmpose/top_down/resnet/res50_mpii_256x256-418ffc88_20200812.pth' # noqa: E501 + +# base dataset settings +dataset_type = 'JhmdbDataset' +data_mode = 'topdown' +data_root = 'data/jhmdb/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + rotate_factor=60, + scale_factor=(0.75, 1.25)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/Sub1_train.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/Sub1_test.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='JhmdbPCKAccuracy', thr=0.2, norm_item=['bbox', 'torso']), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub2-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub2-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..6135ce29ab3b070586d0324f95c37b272002459e --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub2-256x256.py @@ -0,0 +1,122 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=40, val_interval=1) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=40, + milestones=[20, 30], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='PCK', rule='greater', interval=1)) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(32, 32), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict(type='ResNet', depth=50), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=15, + deconv_out_channels=(256, 256), + deconv_kernel_sizes=(4, 4), + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) +load_from = 'https://download.openmmlab.com/mmpose/top_down/resnet/res50_mpii_256x256-418ffc88_20200812.pth' # noqa: E501 + +# base dataset settings +dataset_type = 'JhmdbDataset' +data_mode = 'topdown' +data_root = 'data/jhmdb/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + rotate_factor=60, + scale_factor=(0.75, 1.25)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/Sub2_train.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/Sub2_test.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='JhmdbPCKAccuracy', thr=0.2, norm_item=['bbox', 'torso']), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub3-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub3-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..44d95b15b2a0e73eb93deefc32e5e3f093212648 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub3-256x256.py @@ -0,0 +1,122 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=40, val_interval=1) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=40, + milestones=[20, 30], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='PCK', rule='greater', interval=1)) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(32, 32), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict(type='ResNet', depth=50), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=15, + deconv_out_channels=(256, 256), + deconv_kernel_sizes=(4, 4), + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) +load_from = 'https://download.openmmlab.com/mmpose/top_down/resnet/res50_mpii_256x256-418ffc88_20200812.pth' # noqa: E501 + +# base dataset settings +dataset_type = 'JhmdbDataset' +data_mode = 'topdown' +data_root = 'data/jhmdb/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + rotate_factor=60, + scale_factor=(0.75, 1.25)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/Sub3_train.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/Sub3_test.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='JhmdbPCKAccuracy', thr=0.2, norm_item=['bbox', 'torso']), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub1-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub1-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..9578a66c18b3b58a9cd85ecb4941913eac6175ea --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub1-256x256.py @@ -0,0 +1,120 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=20, val_interval=1) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=20, + milestones=[8, 15], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='PCK', rule='greater', interval=1)) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict(type='ResNet', depth=50), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=15, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) +load_from = 'https://download.openmmlab.com/mmpose/top_down/resnet/res50_mpii_256x256-418ffc88_20200812.pth' # noqa: E501 + +# base dataset settings +dataset_type = 'JhmdbDataset' +data_mode = 'topdown' +data_root = 'data/jhmdb/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + rotate_factor=60, + scale_factor=(0.75, 1.25)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/Sub1_train.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/Sub1_test.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='JhmdbPCKAccuracy', thr=0.2, norm_item=['bbox', 'torso']), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub2-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub2-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..856c89e660b7e2e866c4bd48eff32bf9faff731d --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub2-256x256.py @@ -0,0 +1,120 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=20, val_interval=1) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=20, + milestones=[8, 15], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='PCK', rule='greater', interval=1)) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict(type='ResNet', depth=50), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=15, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) +load_from = 'https://download.openmmlab.com/mmpose/top_down/resnet/res50_mpii_256x256-418ffc88_20200812.pth' # noqa: E501 + +# base dataset settings +dataset_type = 'JhmdbDataset' +data_mode = 'topdown' +data_root = 'data/jhmdb/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + rotate_factor=60, + scale_factor=(0.75, 1.25)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/Sub2_train.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/Sub2_test.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='JhmdbPCKAccuracy', thr=0.2, norm_item=['bbox', 'torso']), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub3-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub3-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..73065968848a063b462504c55e4a2ac85ffd49d9 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub3-256x256.py @@ -0,0 +1,120 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=20, val_interval=1) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=20, + milestones=[8, 15], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='PCK', rule='greater', interval=1)) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict(type='ResNet', depth=50), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=15, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) +load_from = 'https://download.openmmlab.com/mmpose/top_down/resnet/res50_mpii_256x256-418ffc88_20200812.pth' # noqa: E501 + +# base dataset settings +dataset_type = 'JhmdbDataset' +data_mode = 'topdown' +data_root = 'data/jhmdb/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + rotate_factor=60, + scale_factor=(0.75, 1.25)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/Sub3_train.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/Sub3_test.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='JhmdbPCKAccuracy', thr=0.2, norm_item=['bbox', 'torso']), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/cpm_mpii.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/cpm_mpii.md new file mode 100644 index 0000000000000000000000000000000000000000..0c2888bb88431497c30d7a046595488c7edaa87b --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/cpm_mpii.md @@ -0,0 +1,39 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2016/html/Wei_Convolutional_Pose_Machines_CVPR_2016_paper.html">CPM (CVPR'2016)</a></summary> + +```bibtex +@inproceedings{wei2016convolutional, + title={Convolutional pose machines}, + author={Wei, Shih-En and Ramakrishna, Varun and Kanade, Takeo and Sheikh, Yaser}, + booktitle={Proceedings of the IEEE conference on Computer Vision and Pattern Recognition}, + pages={4724--4732}, + year={2016} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2014/html/Andriluka_2D_Human_Pose_2014_CVPR_paper.html">MPII (CVPR'2014)</a></summary> + +```bibtex +@inproceedings{andriluka14cvpr, + author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt}, + title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis}, + booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + year = {2014}, + month = {June} +} +``` + +</details> + +Results on MPII val set + +| Arch | Input Size | Mean | Mean@0.1 | ckpt | log | +| :---------------------------------------------------------- | :--------: | :---: | :------: | :---------------------------------------------------------: | :---------------------------------------------------------: | +| [cpm](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_cpm_8xb64-210e_mpii-368x368.py) | 368x368 | 0.876 | 0.285 | [ckpt](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_mpii_368x368-116e62b8_20200822.pth) | [log](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_mpii_368x368_20200822.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/cpm_mpii.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/cpm_mpii.yml new file mode 100644 index 0000000000000000000000000000000000000000..3e2e439253e2eccb96d42ec887d6889abe520d65 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/cpm_mpii.yml @@ -0,0 +1,15 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_cpm_8xb64-210e_mpii-368x368.py + In Collection: CPM + Metadata: + Architecture: + - CPM + Training Data: MPII + Name: td-hm_cpm_8xb64-210e_mpii-368x368 + Results: + - Dataset: MPII + Metrics: + Mean: 0.876 + Mean@0.1: 0.285 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/cpm/cpm_mpii_368x368-116e62b8_20200822.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/cspnext-m_udp_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/cspnext-m_udp_8xb64-210e_mpii-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..fc8d6fdcea8d717c9ecbc70fb966364dea14257e --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/cspnext-m_udp_8xb64-210e_mpii-256x256.py @@ -0,0 +1,210 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 210 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 210 to 420 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=1024) + +# codec settings +codec = dict( + type='UDPHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=0.67, + widen_factor=0.75, + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmdetection/v3.0/' + 'rtmdet/cspnext_rsb_pretrain/' + 'cspnext-m_8xb256-rsb-a1-600e_in1k-ecb3bbd9.pth')), + head=dict( + type='HeatmapHead', + in_channels=768, + out_channels=16, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=False, + flip_mode='heatmap', + shift_heatmap=False, + )) + +# base dataset settings +dataset_type = 'MpiiDataset' +data_mode = 'topdown' +data_root = 'data/mpii/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/pose/MPI/', +# f'{data_root}': 's3://openmmlab/datasets/pose/MPI/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_val.json', + headbox_file=f'{data_root}/annotations/mpii_gt_val.mat', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='PCK', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict(type='MpiiPCKAccuracy') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/cspnext_udp_mpii.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/cspnext_udp_mpii.md new file mode 100644 index 0000000000000000000000000000000000000000..80aec4c28e443d64a5db78d81508b182695f487d --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/cspnext_udp_mpii.md @@ -0,0 +1,57 @@ +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2212.07784">RTMDet (arXiv'2022)</a></summary> + +```bibtex +@misc{lyu2022rtmdet, + title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors}, + author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen}, + year={2022}, + eprint={2212.07784}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +</details> + +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2020/html/Huang_The_Devil_Is_in_the_Details_Delving_Into_Unbiased_Data_CVPR_2020_paper.html">UDP (CVPR'2020)</a></summary> + +```bibtex +@InProceedings{Huang_2020_CVPR, + author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan}, + title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation}, + booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, + month = {June}, + year = {2020} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2014/html/Andriluka_2D_Human_Pose_2014_CVPR_paper.html">MPII (CVPR'2014)</a></summary> + +```bibtex +@inproceedings{andriluka14cvpr, + author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt}, + title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis}, + booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + year = {2014}, + month = {June} +} +``` + +</details> + +Results on MPII val set + +| Arch | Input Size | Mean | Mean@0.1 | ckpt | log | +| :---------------------------------------------------------- | :--------: | :---: | :------: | :---------------------------------------------------------: | :---------------------------------------------------------: | +| [pose_hrnet_w32](/configs/body_2d_keypoint/topdown_heatmap/mpii/cspnext-m_udp_8xb64-210e_mpii-256x256.py) | 256x256 | 0.902 | 0.303 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-mpii_pt-in1k_210e-256x256-68d0402f_20230208.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-mpii_pt-in1k_210e-256x256-68d0402f_20230208.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/cspnext_udp_mpii.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/cspnext_udp_mpii.yml new file mode 100644 index 0000000000000000000000000000000000000000..7256f3b15443a4f51a68a32c6f271ef6d8c58089 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/cspnext_udp_mpii.yml @@ -0,0 +1,16 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/cspnext-m_udp_8xb64-210e_mpii-256x256.py + In Collection: UDP + Metadata: + Architecture: + - UDP + - CSPNeXt + Training Data: MPII + Name: cspnext-m_udp_8xb64-210e_mpii-256x256 + Results: + - Dataset: MPII + Metrics: + Mean: 0.902 + Mean@0.1: 0.303 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-mpii_pt-in1k_210e-256x256-68d0402f_20230208.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hourglass_mpii.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hourglass_mpii.md new file mode 100644 index 0000000000000000000000000000000000000000..98e795de4ff558bf49270e9c1ed6890e9de1f311 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hourglass_mpii.md @@ -0,0 +1,41 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-46484-8_29">Hourglass (ECCV'2016)</a></summary> + +```bibtex +@inproceedings{newell2016stacked, + title={Stacked hourglass networks for human pose estimation}, + author={Newell, Alejandro and Yang, Kaiyu and Deng, Jia}, + booktitle={European conference on computer vision}, + pages={483--499}, + year={2016}, + organization={Springer} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2014/html/Andriluka_2D_Human_Pose_2014_CVPR_paper.html">MPII (CVPR'2014)</a></summary> + +```bibtex +@inproceedings{andriluka14cvpr, + author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt}, + title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis}, + booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + year = {2014}, + month = {June} +} +``` + +</details> + +Results on MPII val set + +| Arch | Input Size | Mean | Mean@0.1 | ckpt | log | +| :---------------------------------------------------------- | :--------: | :---: | :------: | :---------------------------------------------------------: | :---------------------------------------------------------: | +| [pose_hourglass_52](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hourglass52_8xb64-210e_mpii-256x256.py) | 256x256 | 0.889 | 0.317 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_mpii_256x256-ae358435_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_mpii_256x256_20200812.log.json) | +| [pose_hourglass_52](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hourglass52_8xb32-210e_mpii-384x384.py) | 384x384 | 0.894 | 0.367 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_mpii_384x384-04090bc3_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_mpii_384x384_20200812.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hourglass_mpii.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hourglass_mpii.yml new file mode 100644 index 0000000000000000000000000000000000000000..eb22cd98ce6536a4280cb188694d47d51c47e050 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hourglass_mpii.yml @@ -0,0 +1,28 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hourglass52_8xb64-210e_mpii-256x256.py + In Collection: Hourglass + Metadata: + Architecture: &id001 + - Hourglass + Training Data: MPII + Name: td-hm_hourglass52_8xb64-210e_mpii-256x256 + Results: + - Dataset: MPII + Metrics: + Mean: 0.889 + Mean@0.1: 0.317 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_mpii_256x256-ae358435_20200812.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hourglass52_8xb32-210e_mpii-384x384.py + In Collection: Hourglass + Metadata: + Architecture: *id001 + Training Data: MPII + Name: td-hm_hourglass52_8xb32-210e_mpii-384x384 + Results: + - Dataset: MPII + Metrics: + Mean: 0.894 + Mean@0.1: 0.367 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_mpii_384x384-04090bc3_20200812.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hrnet_dark_mpii.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hrnet_dark_mpii.md new file mode 100644 index 0000000000000000000000000000000000000000..a03a96ba2e5aa9a39bd1849ec933b4ac58e4f438 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hrnet_dark_mpii.md @@ -0,0 +1,57 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html">HRNet (CVPR'2019)</a></summary> + +```bibtex +@inproceedings{sun2019deep, + title={Deep high-resolution representation learning for human pose estimation}, + author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={5693--5703}, + year={2019} +} +``` + +</details> + +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2020/html/Zhang_Distribution-Aware_Coordinate_Representation_for_Human_Pose_Estimation_CVPR_2020_paper.html">DarkPose (CVPR'2020)</a></summary> + +```bibtex +@inproceedings{zhang2020distribution, + title={Distribution-aware coordinate representation for human pose estimation}, + author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages={7093--7102}, + year={2020} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2014/html/Andriluka_2D_Human_Pose_2014_CVPR_paper.html">MPII (CVPR'2014)</a></summary> + +```bibtex +@inproceedings{andriluka14cvpr, + author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt}, + title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis}, + booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + year = {2014}, + month = {June} +} +``` + +</details> + +Results on MPII val set + +| Arch | Input Size | Mean | Mean@0.1 | ckpt | log | +| :---------------------------------------------------------- | :--------: | :---: | :------: | :---------------------------------------------------------: | :---------------------------------------------------------: | +| [pose_hrnet_w32_dark](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w32_dark-8xb64-210e_mpii-256x256.py) | 256x256 | 0.904 | 0.354 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_mpii_256x256_dark-f1601c5b_20200927.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_mpii_256x256_dark_20200927.log.json) | +| [pose_hrnet_w48_dark](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w48_dark-8xb64-210e_mpii-256x256.py) | 256x256 | 0.905 | 0.360 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_mpii_256x256_dark-0decd39f_20200927.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_mpii_256x256_dark_20200927.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hrnet_dark_mpii.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hrnet_dark_mpii.yml new file mode 100644 index 0000000000000000000000000000000000000000..0283b5c827de5a2f170460ddb7e159328faf9e76 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hrnet_dark_mpii.yml @@ -0,0 +1,29 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w32_dark-8xb64-210e_mpii-256x256.py + In Collection: DarkPose + Metadata: + Architecture: &id001 + - HRNet + - DarkPose + Training Data: MPII + Name: td-hm_hrnet-w32_dark-8xb64-210e_mpii-256x256 + Results: + - Dataset: MPII + Metrics: + Mean: 0.904 + Mean@0.1: 0.354 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_mpii_256x256_dark-f1601c5b_20200927.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w48_dark-8xb64-210e_mpii-256x256.py + In Collection: DarkPose + Metadata: + Architecture: *id001 + Training Data: MPII + Name: td-hm_hrnet-w48_dark-8xb64-210e_mpii-256x256 + Results: + - Dataset: MPII + Metrics: + Mean: 0.905 + Mean@0.1: 0.36 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_mpii_256x256_dark-0decd39f_20200927.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hrnet_mpii.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hrnet_mpii.md new file mode 100644 index 0000000000000000000000000000000000000000..7e8a69f64f5165db52c2d672464c080a73427215 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hrnet_mpii.md @@ -0,0 +1,40 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html">HRNet (CVPR'2019)</a></summary> + +```bibtex +@inproceedings{sun2019deep, + title={Deep high-resolution representation learning for human pose estimation}, + author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={5693--5703}, + year={2019} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2014/html/Andriluka_2D_Human_Pose_2014_CVPR_paper.html">MPII (CVPR'2014)</a></summary> + +```bibtex +@inproceedings{andriluka14cvpr, + author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt}, + title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis}, + booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + year = {2014}, + month = {June} +} +``` + +</details> + +Results on MPII val set + +| Arch | Input Size | Mean | Mean@0.1 | ckpt | log | +| :---------------------------------------------------------- | :--------: | :---: | :------: | :---------------------------------------------------------: | :---------------------------------------------------------: | +| [pose_hrnet_w32](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w32_8xb64-210e_mpii-256x256.py) | 256x256 | 0.900 | 0.334 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_mpii_256x256-6c4f923f_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_mpii_256x256_20200812.log.json) | +| [pose_hrnet_w48](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w48_8xb64-210e_mpii-256x256.py) | 256x256 | 0.901 | 0.337 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_mpii_256x256-92cab7bd_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_mpii_256x256_20200812.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hrnet_mpii.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hrnet_mpii.yml new file mode 100644 index 0000000000000000000000000000000000000000..f32129742da5306e1d9f1ea7b8048480a7efa12c --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hrnet_mpii.yml @@ -0,0 +1,28 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w32_8xb64-210e_mpii-256x256.py + In Collection: HRNet + Metadata: + Architecture: &id001 + - HRNet + Training Data: MPII + Name: td-hm_hrnet-w32_8xb64-210e_mpii-256x256 + Results: + - Dataset: MPII + Metrics: + Mean: 0.9 + Mean@0.1: 0.334 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_mpii_256x256-6c4f923f_20200812.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w48_8xb64-210e_mpii-256x256.py + In Collection: HRNet + Metadata: + Architecture: *id001 + Training Data: MPII + Name: td-hm_hrnet-w48_8xb64-210e_mpii-256x256 + Results: + - Dataset: MPII + Metrics: + Mean: 0.901 + Mean@0.1: 0.337 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_mpii_256x256-92cab7bd_20200812.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/litehrnet_mpii.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/litehrnet_mpii.md new file mode 100644 index 0000000000000000000000000000000000000000..e6647569508429fe88683ddcb16c0eaebd3309a6 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/litehrnet_mpii.md @@ -0,0 +1,39 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2104.06403">LiteHRNet (CVPR'2021)</a></summary> + +```bibtex +@inproceedings{Yulitehrnet21, + title={Lite-HRNet: A Lightweight High-Resolution Network}, + author={Yu, Changqian and Xiao, Bin and Gao, Changxin and Yuan, Lu and Zhang, Lei and Sang, Nong and Wang, Jingdong}, + booktitle={CVPR}, + year={2021} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2014/html/Andriluka_2D_Human_Pose_2014_CVPR_paper.html">MPII (CVPR'2014)</a></summary> + +```bibtex +@inproceedings{andriluka14cvpr, + author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt}, + title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis}, + booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + year = {2014}, + month = {June} +} +``` + +</details> + +Results on MPII val set + +| Arch | Input Size | Mean | Mean@0.1 | ckpt | log | +| :---------------------------------------------------------- | :--------: | :---: | :------: | :---------------------------------------------------------: | :---------------------------------------------------------: | +| [LiteHRNet-18](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_litehrnet-18_8xb64-210e_mpii-256x256.py) | 256x256 | 0.859 | 0.260 | [ckpt](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet18_mpii_256x256-cabd7984_20210623.pth) | [log](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet18_mpii_256x256_20210623.log.json) | +| [LiteHRNet-30](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_litehrnet-30_8xb64-210e_mpii-256x256.py) | 256x256 | 0.869 | 0.271 | [ckpt](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet30_mpii_256x256-faae8bd8_20210622.pth) | [log](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet30_mpii_256x256_20210622.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/litehrnet_mpii.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/litehrnet_mpii.yml new file mode 100644 index 0000000000000000000000000000000000000000..c4314b7a74d57bbe45cd635aba06313043cc912e --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/litehrnet_mpii.yml @@ -0,0 +1,28 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_litehrnet-18_8xb64-210e_mpii-256x256.py + In Collection: LiteHRNet + Metadata: + Architecture: &id001 + - LiteHRNet + Training Data: MPII + Name: td-hm_litehrnet-18_8xb64-210e_mpii-256x256 + Results: + - Dataset: MPII + Metrics: + Mean: 0.859 + Mean@0.1: 0.26 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet18_mpii_256x256-cabd7984_20210623.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_litehrnet-30_8xb64-210e_mpii-256x256.py + In Collection: LiteHRNet + Metadata: + Architecture: *id001 + Training Data: MPII + Name: td-hm_litehrnet-30_8xb64-210e_mpii-256x256 + Results: + - Dataset: MPII + Metrics: + Mean: 0.869 + Mean@0.1: 0.271 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet30_mpii_256x256-faae8bd8_20210622.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/mobilenetv2_mpii.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/mobilenetv2_mpii.md new file mode 100644 index 0000000000000000000000000000000000000000..8bb280a8210042fb5d5f7a3f085e7a7b98ad651b --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/mobilenetv2_mpii.md @@ -0,0 +1,39 @@ +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2018/html/Sandler_MobileNetV2_Inverted_Residuals_CVPR_2018_paper.html">MobilenetV2 (CVPR'2018)</a></summary> + +```bibtex +@inproceedings{sandler2018mobilenetv2, + title={Mobilenetv2: Inverted residuals and linear bottlenecks}, + author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={4510--4520}, + year={2018} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2014/html/Andriluka_2D_Human_Pose_2014_CVPR_paper.html">MPII (CVPR'2014)</a></summary> + +```bibtex +@inproceedings{andriluka14cvpr, + author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt}, + title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis}, + booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + year = {2014}, + month = {June} +} +``` + +</details> + +Results on MPII val set + +| Arch | Input Size | Mean | Mean@0.1 | ckpt | log | +| :---------------------------------------------------------- | :--------: | :---: | :------: | :---------------------------------------------------------: | :---------------------------------------------------------: | +| [pose_mobilenetv2](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_mobilenetv2_8xb64-210e_mpii-256x256.py) | 256x256 | 0.854 | 0.234 | [ckpt](https://download.openmmlab.com/mmpose/top_down/mobilenetv2/mobilenetv2_mpii_256x256-e068afa7_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/mobilenetv2/mobilenetv2_mpii_256x256_20200812.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/mobilenetv2_mpii.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/mobilenetv2_mpii.yml new file mode 100644 index 0000000000000000000000000000000000000000..afc54f79340332a73b079284f6920d9476972e7f --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/mobilenetv2_mpii.yml @@ -0,0 +1,16 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_mobilenetv2_8xb64-210e_mpii-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: + - SimpleBaseline2D + - MobilenetV2 + Training Data: MPII + Name: td-hm_mobilenetv2_8xb64-210e_mpii-256x256 + Results: + - Dataset: MPII + Metrics: + Mean: 0.854 + Mean@0.1: 0.234 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/mobilenetv2/mobilenetv2_mpii_256x256-e068afa7_20200812.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnet_mpii.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnet_mpii.md new file mode 100644 index 0000000000000000000000000000000000000000..b8d98c4d6e9b6e716ce543b7ddb3101eef58eb20 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnet_mpii.md @@ -0,0 +1,58 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html">SimpleBaseline2D (ECCV'2018)</a></summary> + +```bibtex +@inproceedings{xiao2018simple, + title={Simple baselines for human pose estimation and tracking}, + author={Xiao, Bin and Wu, Haiping and Wei, Yichen}, + booktitle={Proceedings of the European conference on computer vision (ECCV)}, + pages={466--481}, + year={2018} +} +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html">ResNet (CVPR'2016)</a></summary> + +```bibtex +@inproceedings{he2016deep, + title={Deep residual learning for image recognition}, + author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={770--778}, + year={2016} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2014/html/Andriluka_2D_Human_Pose_2014_CVPR_paper.html">MPII (CVPR'2014)</a></summary> + +```bibtex +@inproceedings{andriluka14cvpr, + author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt}, + title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis}, + booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + year = {2014}, + month = {June} +} +``` + +</details> + +Results on MPII val set + +| Arch | Input Size | Mean | Mean@0.1 | ckpt | log | +| :---------------------------------------------------------- | :--------: | :---: | :------: | :---------------------------------------------------------: | :---------------------------------------------------------: | +| [pose_resnet_50](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_res50_8xb64-210e_mpii-256x256.py) | 256x256 | 0.882 | 0.286 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_mpii_256x256-418ffc88_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_mpii_256x256_20200812.log.json) | +| [pose_resnet_101](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_res101_8xb64-210e_mpii-256x256.py) | 256x256 | 0.888 | 0.290 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res101_mpii_256x256-416f5d71_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res101_mpii_256x256_20200812.log.json) | +| [pose_resnet_152](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_res152_8xb32-210e_mpii-256x256.py) | 256x256 | 0.889 | 0.303 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res152_mpii_256x256-3ecba29d_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res152_mpii_256x256_20200812.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnet_mpii.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnet_mpii.yml new file mode 100644 index 0000000000000000000000000000000000000000..ff92c4f7ce78a07c141c049902d6f2cd320e0dcb --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnet_mpii.yml @@ -0,0 +1,42 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_res50_8xb64-210e_mpii-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: &id001 + - SimpleBaseline2D + - ResNet + Training Data: MPII + Name: td-hm_res50_8xb64-210e_mpii-256x256 + Results: + - Dataset: MPII + Metrics: + Mean: 0.882 + Mean@0.1: 0.286 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_mpii_256x256-418ffc88_20200812.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_res101_8xb64-210e_mpii-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: MPII + Name: td-hm_res101_8xb64-210e_mpii-256x256 + Results: + - Dataset: MPII + Metrics: + Mean: 0.888 + Mean@0.1: 0.29 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res101_mpii_256x256-416f5d71_20200812.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_res152_8xb32-210e_mpii-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: MPII + Name: td-hm_res152_8xb32-210e_mpii-256x256 + Results: + - Dataset: MPII + Metrics: + Mean: 0.889 + Mean@0.1: 0.303 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res152_mpii_256x256-3ecba29d_20200812.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnetv1d_mpii.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnetv1d_mpii.md new file mode 100644 index 0000000000000000000000000000000000000000..23362650980ca29784bb769d3bd55538272b79d5 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnetv1d_mpii.md @@ -0,0 +1,41 @@ +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2019/html/He_Bag_of_Tricks_for_Image_Classification_with_Convolutional_Neural_Networks_CVPR_2019_paper.html">ResNetV1D (CVPR'2019)</a></summary> + +```bibtex +@inproceedings{he2019bag, + title={Bag of tricks for image classification with convolutional neural networks}, + author={He, Tong and Zhang, Zhi and Zhang, Hang and Zhang, Zhongyue and Xie, Junyuan and Li, Mu}, + booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}, + pages={558--567}, + year={2019} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2014/html/Andriluka_2D_Human_Pose_2014_CVPR_paper.html">MPII (CVPR'2014)</a></summary> + +```bibtex +@inproceedings{andriluka14cvpr, + author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt}, + title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis}, + booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + year = {2014}, + month = {June} +} +``` + +</details> + +Results on MPII val set + +| Arch | Input Size | Mean | Mean@0.1 | ckpt | log | +| :---------------------------------------------------------- | :--------: | :---: | :------: | :---------------------------------------------------------: | :---------------------------------------------------------: | +| [pose_resnetv1d_50](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnetv1d50_8xb64-210e_mpii-256x256.py) | 256x256 | 0.881 | 0.290 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d50_mpii_256x256-2337a92e_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d50_mpii_256x256_20200812.log.json) | +| [pose_resnetv1d_101](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnetv1d101_8xb64-210e_mpii-256x256.py) | 256x256 | 0.883 | 0.295 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d101_mpii_256x256-2851d710_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d101_mpii_256x256_20200812.log.json) | +| [pose_resnetv1d_152](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnetv1d152_8xb64-210e_mpii-256x256.py) | 256x256 | 0.888 | 0.300 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d152_mpii_256x256-8b10a87c_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d152_mpii_256x256_20200812.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnetv1d_mpii.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnetv1d_mpii.yml new file mode 100644 index 0000000000000000000000000000000000000000..e98e722db1428a622c82c53255fb962c890d3f58 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnetv1d_mpii.yml @@ -0,0 +1,42 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnetv1d50_8xb64-210e_mpii-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: &id001 + - SimpleBaseline2D + - ResNetV1D + Training Data: MPII + Name: td-hm_resnetv1d50_8xb64-210e_mpii-256x256 + Results: + - Dataset: MPII + Metrics: + Mean: 0.881 + Mean@0.1: 0.29 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d50_mpii_256x256-2337a92e_20200812.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnetv1d101_8xb64-210e_mpii-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: MPII + Name: td-hm_resnetv1d101_8xb64-210e_mpii-256x256 + Results: + - Dataset: MPII + Metrics: + Mean: 0.883 + Mean@0.1: 0.295 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d101_mpii_256x256-2851d710_20200812.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnetv1d152_8xb64-210e_mpii-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: MPII + Name: td-hm_resnetv1d152_8xb64-210e_mpii-256x256 + Results: + - Dataset: MPII + Metrics: + Mean: 0.888 + Mean@0.1: 0.3 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d152_mpii_256x256-8b10a87c_20200812.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnext_mpii.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnext_mpii.md new file mode 100644 index 0000000000000000000000000000000000000000..bf9d5acf8c1e48aca593f1162c35e32e73b6774d --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnext_mpii.md @@ -0,0 +1,39 @@ +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2017/html/Xie_Aggregated_Residual_Transformations_CVPR_2017_paper.html">ResNext (CVPR'2017)</a></summary> + +```bibtex +@inproceedings{xie2017aggregated, + title={Aggregated residual transformations for deep neural networks}, + author={Xie, Saining and Girshick, Ross and Doll{\'a}r, Piotr and Tu, Zhuowen and He, Kaiming}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={1492--1500}, + year={2017} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2014/html/Andriluka_2D_Human_Pose_2014_CVPR_paper.html">MPII (CVPR'2014)</a></summary> + +```bibtex +@inproceedings{andriluka14cvpr, + author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt}, + title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis}, + booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + year = {2014}, + month = {June} +} +``` + +</details> + +Results on MPII val set + +| Arch | Input Size | Mean | Mean@0.1 | ckpt | log | +| :---------------------------------------------------------- | :--------: | :---: | :------: | :---------------------------------------------------------: | :---------------------------------------------------------: | +| [pose_resnext_152](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnext152_8xb32-210e_mpii-256x256.py) | 256x256 | 0.887 | 0.294 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnext/resnext152_mpii_256x256-df302719_20200927.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnext/resnext152_mpii_256x256_20200927.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnext_mpii.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnext_mpii.yml new file mode 100644 index 0000000000000000000000000000000000000000..580dda77b013b7eae4ed288925741cb8d3f6f246 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnext_mpii.yml @@ -0,0 +1,16 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnext152_8xb32-210e_mpii-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: + - SimpleBaseline2D + - ResNext + Training Data: MPII + Name: td-hm_resnext152_8xb32-210e_mpii-256x256 + Results: + - Dataset: MPII + Metrics: + Mean: 0.887 + Mean@0.1: 0.294 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnext/resnext152_mpii_256x256-df302719_20200927.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/scnet_mpii.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/scnet_mpii.md new file mode 100644 index 0000000000000000000000000000000000000000..cf0e4befffc4b023dfe84a23d7880cb959154f1e --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/scnet_mpii.md @@ -0,0 +1,40 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2020/html/Liu_Improving_Convolutional_Networks_With_Self-Calibrated_Convolutions_CVPR_2020_paper.html">SCNet (CVPR'2020)</a></summary> + +```bibtex +@inproceedings{liu2020improving, + title={Improving Convolutional Networks with Self-Calibrated Convolutions}, + author={Liu, Jiang-Jiang and Hou, Qibin and Cheng, Ming-Ming and Wang, Changhu and Feng, Jiashi}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages={10096--10105}, + year={2020} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2014/html/Andriluka_2D_Human_Pose_2014_CVPR_paper.html">MPII (CVPR'2014)</a></summary> + +```bibtex +@inproceedings{andriluka14cvpr, + author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt}, + title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis}, + booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + year = {2014}, + month = {June} +} +``` + +</details> + +Results on MPII val set + +| Arch | Input Size | Mean | Mean@0.1 | ckpt | log | +| :---------------------------------------------------------- | :--------: | :---: | :------: | :---------------------------------------------------------: | :---------------------------------------------------------: | +| [pose_scnet_50](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_scnet50_8xb64-210e_mpii-256x256.py) | 256x256 | 0.888 | 0.290 | [ckpt](https://download.openmmlab.com/mmpose/top_down/scnet/scnet50_mpii_256x256-a54b6af5_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/scnet/scnet50_mpii_256x256_20200812.log.json) | +| [pose_scnet_101](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_scnet101_8xb64-210e_mpii-256x256.py) | 256x256 | 0.887 | 0.293 | [ckpt](https://download.openmmlab.com/mmpose/top_down/scnet/scnet101_mpii_256x256-b4c2d184_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/scnet/scnet101_mpii_256x256_20200812.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/scnet_mpii.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/scnet_mpii.yml new file mode 100644 index 0000000000000000000000000000000000000000..b1ec80fd8005dd50847171cd7c916cece02dd94d --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/scnet_mpii.yml @@ -0,0 +1,29 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_scnet50_8xb64-210e_mpii-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: &id001 + - SimpleBaseline2D + - SCNet + Training Data: MPII + Name: td-hm_scnet50_8xb64-210e_mpii-256x256 + Results: + - Dataset: MPII + Metrics: + Mean: 0.888 + Mean@0.1: 0.29 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/scnet/scnet50_mpii_256x256-a54b6af5_20200812.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_scnet101_8xb64-210e_mpii-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: MPII + Name: td-hm_scnet101_8xb64-210e_mpii-256x256 + Results: + - Dataset: MPII + Metrics: + Mean: 0.887 + Mean@0.1: 0.293 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/scnet/scnet101_mpii_256x256-b4c2d184_20200812.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/seresnet_mpii.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/seresnet_mpii.md new file mode 100644 index 0000000000000000000000000000000000000000..1c92ecf9ea19a05665faf7ff2feb62a1ec5753a2 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/seresnet_mpii.md @@ -0,0 +1,43 @@ +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2018/html/Hu_Squeeze-and-Excitation_Networks_CVPR_2018_paper">SEResNet (CVPR'2018)</a></summary> + +```bibtex +@inproceedings{hu2018squeeze, + title={Squeeze-and-excitation networks}, + author={Hu, Jie and Shen, Li and Sun, Gang}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={7132--7141}, + year={2018} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2014/html/Andriluka_2D_Human_Pose_2014_CVPR_paper.html">MPII (CVPR'2014)</a></summary> + +```bibtex +@inproceedings{andriluka14cvpr, + author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt}, + title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis}, + booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + year = {2014}, + month = {June} +} +``` + +</details> + +Results on MPII val set + +| Arch | Input Size | Mean | Mean@0.1 | ckpt | log | +| :---------------------------------------------------------- | :--------: | :---: | :------: | :---------------------------------------------------------: | :---------------------------------------------------------: | +| [pose_seresnet_50](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_seresnet50_8xb64-210e_mpii-256x256.py) | 256x256 | 0.884 | 0.292 | [ckpt](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet50_mpii_256x256-1bb21f79_20200927.pth) | [log](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet50_mpii_256x256_20200927.log.json) | +| [pose_seresnet_101](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_seresnet101_8xb64-210e_mpii-256x256.py) | 256x256 | 0.884 | 0.295 | [ckpt](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet101_mpii_256x256-0ba14ff5_20200927.pth) | [log](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet101_mpii_256x256_20200927.log.json) | +| [pose_seresnet_152\*](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_seresnet152_8xb32-210e_mpii-256x256.py) | 256x256 | 0.884 | 0.287 | [ckpt](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet152_mpii_256x256-6ea1e774_20200927.pth) | [log](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet152_mpii_256x256_20200927.log.json) | + +Note that * means without imagenet pre-training. diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/seresnet_mpii.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/seresnet_mpii.yml new file mode 100644 index 0000000000000000000000000000000000000000..e71050811a4a028626e7cbbb4563533c9fdd6e57 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/seresnet_mpii.yml @@ -0,0 +1,42 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_seresnet50_8xb64-210e_mpii-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: &id001 + - SimpleBaseline2D + - SEResNet + Training Data: MPII + Name: td-hm_seresnet50_8xb64-210e_mpii-256x256 + Results: + - Dataset: MPII + Metrics: + Mean: 0.884 + Mean@0.1: 0.292 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet50_mpii_256x256-1bb21f79_20200927.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_seresnet101_8xb64-210e_mpii-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: MPII + Name: td-hm_seresnet101_8xb64-210e_mpii-256x256 + Results: + - Dataset: MPII + Metrics: + Mean: 0.884 + Mean@0.1: 0.295 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet101_mpii_256x256-0ba14ff5_20200927.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_seresnet152_8xb32-210e_mpii-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: MPII + Name: td-hm_seresnet152_8xb32-210e_mpii-256x256 + Results: + - Dataset: MPII + Metrics: + Mean: 0.884 + Mean@0.1: 0.287 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet152_mpii_256x256-6ea1e774_20200927.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/shufflenetv1_mpii.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/shufflenetv1_mpii.md new file mode 100644 index 0000000000000000000000000000000000000000..3cdaaaf5eaa1b2d9f311b9c63dc6ec37cdac2cdc --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/shufflenetv1_mpii.md @@ -0,0 +1,39 @@ +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2018/html/Zhang_ShuffleNet_An_Extremely_CVPR_2018_paper.html">ShufflenetV1 (CVPR'2018)</a></summary> + +```bibtex +@inproceedings{zhang2018shufflenet, + title={Shufflenet: An extremely efficient convolutional neural network for mobile devices}, + author={Zhang, Xiangyu and Zhou, Xinyu and Lin, Mengxiao and Sun, Jian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={6848--6856}, + year={2018} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2014/html/Andriluka_2D_Human_Pose_2014_CVPR_paper.html">MPII (CVPR'2014)</a></summary> + +```bibtex +@inproceedings{andriluka14cvpr, + author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt}, + title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis}, + booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + year = {2014}, + month = {June} +} +``` + +</details> + +Results on MPII val set + +| Arch | Input Size | Mean | Mean@0.1 | ckpt | log | +| :---------------------------------------------------------- | :--------: | :---: | :------: | :---------------------------------------------------------: | :---------------------------------------------------------: | +| [pose_shufflenetv1](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_shufflenetv1_8xb64-210e_mpii-256x256.py) | 256x256 | 0.824 | 0.195 | [ckpt](https://download.openmmlab.com/mmpose/top_down/shufflenetv1/shufflenetv1_mpii_256x256-dcc1c896_20200925.pth) | [log](https://download.openmmlab.com/mmpose/top_down/shufflenetv1/shufflenetv1_mpii_256x256_20200925.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/shufflenetv1_mpii.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/shufflenetv1_mpii.yml new file mode 100644 index 0000000000000000000000000000000000000000..b9edecc42838b43aec08941d4d87c37c72d74de0 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/shufflenetv1_mpii.yml @@ -0,0 +1,16 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_shufflenetv1_8xb64-210e_mpii-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: + - SimpleBaseline2D + - ShufflenetV1 + Training Data: MPII + Name: td-hm_shufflenetv1_8xb64-210e_mpii-256x256 + Results: + - Dataset: MPII + Metrics: + Mean: 0.824 + Mean@0.1: 0.195 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/shufflenetv1/shufflenetv1_mpii_256x256-dcc1c896_20200925.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/shufflenetv2_mpii.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/shufflenetv2_mpii.md new file mode 100644 index 0000000000000000000000000000000000000000..8ab7b026ba02ec179e07ec772f12b46966748fd9 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/shufflenetv2_mpii.md @@ -0,0 +1,39 @@ +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_ECCV_2018/html/Ningning_Light-weight_CNN_Architecture_ECCV_2018_paper.html">ShufflenetV2 (ECCV'2018)</a></summary> + +```bibtex +@inproceedings{ma2018shufflenet, + title={Shufflenet v2: Practical guidelines for efficient cnn architecture design}, + author={Ma, Ningning and Zhang, Xiangyu and Zheng, Hai-Tao and Sun, Jian}, + booktitle={Proceedings of the European conference on computer vision (ECCV)}, + pages={116--131}, + year={2018} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2014/html/Andriluka_2D_Human_Pose_2014_CVPR_paper.html">MPII (CVPR'2014)</a></summary> + +```bibtex +@inproceedings{andriluka14cvpr, + author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt}, + title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis}, + booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + year = {2014}, + month = {June} +} +``` + +</details> + +Results on MPII val set + +| Arch | Input Size | Mean | Mean@0.1 | ckpt | log | +| :---------------------------------------------------------- | :--------: | :---: | :------: | :---------------------------------------------------------: | :---------------------------------------------------------: | +| [pose_shufflenetv2](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_shufflenetv2_8xb64-210e_mpii-256x256.py) | 256x256 | 0.828 | 0.205 | [ckpt](https://download.openmmlab.com/mmpose/top_down/shufflenetv2/shufflenetv2_mpii_256x256-4fb9df2d_20200925.pth) | [log](https://download.openmmlab.com/mmpose/top_down/shufflenetv2/shufflenetv2_mpii_256x256_20200925.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/shufflenetv2_mpii.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/shufflenetv2_mpii.yml new file mode 100644 index 0000000000000000000000000000000000000000..efa6e14f51b13c14c8a25b3929f0150e69a13589 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/shufflenetv2_mpii.yml @@ -0,0 +1,16 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_shufflenetv2_8xb64-210e_mpii-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: + - SimpleBaseline2D + - ShufflenetV2 + Training Data: MPII + Name: td-hm_shufflenetv2_8xb64-210e_mpii-256x256 + Results: + - Dataset: MPII + Metrics: + Mean: 0.828 + Mean@0.1: 0.205 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/shufflenetv2/shufflenetv2_mpii_256x256-4fb9df2d_20200925.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_cpm_8xb64-210e_mpii-368x368.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_cpm_8xb64-210e_mpii-368x368.py new file mode 100644 index 0000000000000000000000000000000000000000..794c49420ab69ae202685bb70c6d8ec8e1b2a02b --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_cpm_8xb64-210e_mpii-368x368.py @@ -0,0 +1,125 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(368, 368), heatmap_size=(46, 46), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='CPM', + in_channels=3, + out_channels=16, + feat_channels=128, + num_stages=6), + head=dict( + type='CPMHead', + in_channels=16, + out_channels=16, + num_stages=6, + deconv_out_channels=None, + final_layer=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'MpiiDataset' +data_mode = 'topdown' +data_root = 'data/mpii/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + shift_prob=0, + rotate_factor=60, + scale_factor=(0.75, 1.25)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_val.json', + headbox_file='data/mpii/annotations/mpii_gt_val.mat', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict(type='MpiiPCKAccuracy') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hourglass52_8xb32-210e_mpii-384x384.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hourglass52_8xb32-210e_mpii-384x384.py new file mode 100644 index 0000000000000000000000000000000000000000..e9546504e0d3ead0b6977c33a4172a2581532a7f --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hourglass52_8xb32-210e_mpii-384x384.py @@ -0,0 +1,118 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(384, 384), heatmap_size=(96, 96), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HourglassNet', + num_stacks=1, + ), + head=dict( + type='CPMHead', + in_channels=256, + out_channels=16, + num_stages=1, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'MpiiDataset' +data_mode = 'topdown' +data_root = 'data/mpii/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform', shift_prob=0), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_val.json', + headbox_file='data/mpii/annotations/mpii_gt_val.mat', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict(type='MpiiPCKAccuracy') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hourglass52_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hourglass52_8xb64-210e_mpii-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..cd854a40a3f5d6def990488b5967058997d2348f --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hourglass52_8xb64-210e_mpii-256x256.py @@ -0,0 +1,118 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HourglassNet', + num_stacks=1, + ), + head=dict( + type='CPMHead', + in_channels=256, + out_channels=16, + num_stages=1, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'MpiiDataset' +data_mode = 'topdown' +data_root = 'data/mpii/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform', shift_prob=0), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_val.json', + headbox_file='data/mpii/annotations/mpii_gt_val.mat', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict(type='MpiiPCKAccuracy') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w32_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w32_8xb64-210e_mpii-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..459f24f3bdbbdc4a93e43e16382b023d6ff76e50 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w32_8xb64-210e_mpii-256x256.py @@ -0,0 +1,146 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256))), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w32-36af842e.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=32, + out_channels=16, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'MpiiDataset' +data_mode = 'topdown' +data_root = 'data/mpii/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform', shift_prob=0), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=16, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=16, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_val.json', + headbox_file='data/mpii/annotations/mpii_gt_val.mat', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict(type='MpiiPCKAccuracy') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w32_dark-8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w32_dark-8xb64-210e_mpii-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..5d47ed6fdc161e019c94b1ab64751a096a0d4537 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w32_dark-8xb64-210e_mpii-256x256.py @@ -0,0 +1,150 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', + input_size=(256, 256), + heatmap_size=(64, 64), + sigma=2, + unbiased=True) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256))), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w32-36af842e.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=32, + out_channels=16, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'MpiiDataset' +data_mode = 'topdown' +data_root = 'data/mpii/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform', shift_prob=0), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_val.json', + headbox_file='data/mpii/annotations/mpii_gt_val.mat', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict(type='MpiiPCKAccuracy') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w48_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w48_8xb64-210e_mpii-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..4e3fce96000a2ff5e1165a88a322e1cfd1226c0a --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w48_8xb64-210e_mpii-256x256.py @@ -0,0 +1,146 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(48, 96)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(48, 96, 192)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(48, 96, 192, 384))), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w48-8ef0771d.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=48, + out_channels=16, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'MpiiDataset' +data_mode = 'topdown' +data_root = 'data/mpii/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform', shift_prob=0), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_val.json', + headbox_file='data/mpii/annotations/mpii_gt_val.mat', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict(type='MpiiPCKAccuracy') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w48_dark-8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w48_dark-8xb64-210e_mpii-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..18b31539a33d542ae8a0f83b42835a7cf97ec5c2 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w48_dark-8xb64-210e_mpii-256x256.py @@ -0,0 +1,150 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', + input_size=(256, 256), + heatmap_size=(64, 64), + sigma=2, + unbiased=True) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(48, 96)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(48, 96, 192)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(48, 96, 192, 384))), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w48-8ef0771d.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=48, + out_channels=16, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'MpiiDataset' +data_mode = 'topdown' +data_root = 'data/mpii/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform', shift_prob=0), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_val.json', + headbox_file='data/mpii/annotations/mpii_gt_val.mat', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict(type='MpiiPCKAccuracy') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_litehrnet-18_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_litehrnet-18_8xb64-210e_mpii-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..bdab446f5038c6d86231d27284aee3b3723bea14 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_litehrnet-18_8xb64-210e_mpii-256x256.py @@ -0,0 +1,137 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='LiteHRNet', + in_channels=3, + extra=dict( + stem=dict(stem_channels=32, out_channels=32, expand_ratio=1), + num_stages=3, + stages_spec=dict( + num_modules=(2, 4, 2), + num_branches=(2, 3, 4), + num_blocks=(2, 2, 2), + module_type=('LITE', 'LITE', 'LITE'), + with_fuse=(True, True, True), + reduce_ratios=(8, 8, 8), + num_channels=( + (40, 80), + (40, 80, 160), + (40, 80, 160, 320), + )), + with_head=True, + )), + head=dict( + type='HeatmapHead', + in_channels=40, + out_channels=16, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'MpiiDataset' +data_mode = 'topdown' +data_root = 'data/mpii/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + shift_prob=0, + rotate_factor=60, + scale_factor=(0.75, 1.25)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_val.json', + headbox_file='data/mpii/annotations/mpii_gt_val.mat', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict(type='MpiiPCKAccuracy') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_litehrnet-30_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_litehrnet-30_8xb64-210e_mpii-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..84089add2a0d2c6c7d7d8b75275cf097a9b68e7f --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_litehrnet-30_8xb64-210e_mpii-256x256.py @@ -0,0 +1,137 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='LiteHRNet', + in_channels=3, + extra=dict( + stem=dict(stem_channels=32, out_channels=32, expand_ratio=1), + num_stages=3, + stages_spec=dict( + num_modules=(3, 8, 3), + num_branches=(2, 3, 4), + num_blocks=(2, 2, 2), + module_type=('LITE', 'LITE', 'LITE'), + with_fuse=(True, True, True), + reduce_ratios=(8, 8, 8), + num_channels=( + (40, 80), + (40, 80, 160), + (40, 80, 160, 320), + )), + with_head=True, + )), + head=dict( + type='HeatmapHead', + in_channels=40, + out_channels=16, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'MpiiDataset' +data_mode = 'topdown' +data_root = 'data/mpii/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + shift_prob=0, + rotate_factor=60, + scale_factor=(0.75, 1.25)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_val.json', + headbox_file='data/mpii/annotations/mpii_gt_val.mat', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict(type='MpiiPCKAccuracy') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_mobilenetv2_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_mobilenetv2_8xb64-210e_mpii-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..41b9d3ba9ba964f34f1204d185e36dcbcb3821e0 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_mobilenetv2_8xb64-210e_mpii-256x256.py @@ -0,0 +1,118 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='MobileNetV2', + widen_factor=1., + out_indices=(7, ), + init_cfg=dict(type='Pretrained', checkpoint='mmcls://mobilenet_v2'), + ), + head=dict( + type='HeatmapHead', + in_channels=1280, + out_channels=16, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'MpiiDataset' +data_mode = 'topdown' +data_root = 'data/mpii/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform', shift_prob=0), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_val.json', + headbox_file='data/mpii/annotations/mpii_gt_val.mat', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict(type='MpiiPCKAccuracy') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_res101_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_res101_8xb64-210e_mpii-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..def5d2fd1681262689afd40b20a0299e64118136 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_res101_8xb64-210e_mpii-256x256.py @@ -0,0 +1,117 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=101, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=16, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'MpiiDataset' +data_mode = 'topdown' +data_root = 'data/mpii/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform', shift_prob=0), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_val.json', + headbox_file='data/mpii/annotations/mpii_gt_val.mat', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict(type='MpiiPCKAccuracy') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_res152_8xb32-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_res152_8xb32-210e_mpii-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..bf515d0d21e6796af7fc79fb39ec27cd0fb0c7b0 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_res152_8xb32-210e_mpii-256x256.py @@ -0,0 +1,117 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=152, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet152'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=16, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'MpiiDataset' +data_mode = 'topdown' +data_root = 'data/mpii/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform', shift_prob=0), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_val.json', + headbox_file='data/mpii/annotations/mpii_gt_val.mat', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict(type='MpiiPCKAccuracy') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_res50_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_res50_8xb64-210e_mpii-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..dee56ae77b0c7b7fa40690e712e7c7ad4648f279 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_res50_8xb64-210e_mpii-256x256.py @@ -0,0 +1,117 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=16, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'MpiiDataset' +data_mode = 'topdown' +data_root = 'data/mpii/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform', shift_prob=0), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_val.json', + headbox_file='data/mpii/annotations/mpii_gt_val.mat', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict(type='MpiiPCKAccuracy') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnetv1d101_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnetv1d101_8xb64-210e_mpii-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..0cbf684e38c1358cd939621294765249e1e5d68e --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnetv1d101_8xb64-210e_mpii-256x256.py @@ -0,0 +1,117 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNetV1d', + depth=101, + init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnet101_v1d'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=16, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'MpiiDataset' +data_mode = 'topdown' +data_root = 'data/mpii/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform', shift_prob=0), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_val.json', + headbox_file='data/mpii/annotations/mpii_gt_val.mat', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict(type='MpiiPCKAccuracy') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnetv1d152_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnetv1d152_8xb64-210e_mpii-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..24653a9e56b982b150ced4157c486428a34f9d04 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnetv1d152_8xb64-210e_mpii-256x256.py @@ -0,0 +1,117 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNetV1d', + depth=152, + init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnet152_v1d'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=16, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'MpiiDataset' +data_mode = 'topdown' +data_root = 'data/mpii/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform', shift_prob=0), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_val.json', + headbox_file='data/mpii/annotations/mpii_gt_val.mat', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict(type='MpiiPCKAccuracy') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnetv1d50_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnetv1d50_8xb64-210e_mpii-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..48bcfec5eb5017036168fae73396d809fcb3f567 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnetv1d50_8xb64-210e_mpii-256x256.py @@ -0,0 +1,117 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNetV1d', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnet50_v1d'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=16, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'MpiiDataset' +data_mode = 'topdown' +data_root = 'data/mpii/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform', shift_prob=0), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_val.json', + headbox_file='data/mpii/annotations/mpii_gt_val.mat', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict(type='MpiiPCKAccuracy') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnext152_8xb32-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnext152_8xb32-210e_mpii-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..30afb101037cc31d9dd51ac02487e5ef749921c7 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnext152_8xb32-210e_mpii-256x256.py @@ -0,0 +1,118 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNeXt', + depth=152, + init_cfg=dict( + type='Pretrained', checkpoint='mmcls://resnext152_32x4d'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=16, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'MpiiDataset' +data_mode = 'topdown' +data_root = 'data/mpii/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform', shift_prob=0), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_val.json', + headbox_file='data/mpii/annotations/mpii_gt_val.mat', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict(type='MpiiPCKAccuracy') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_scnet101_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_scnet101_8xb64-210e_mpii-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..fb5c6b702c28300525db4137973889967af9d09c --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_scnet101_8xb64-210e_mpii-256x256.py @@ -0,0 +1,120 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='SCNet', + depth=101, + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/scnet101-94250a77.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=16, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'MpiiDataset' +data_mode = 'topdown' +data_root = 'data/mpii/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform', shift_prob=0), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_val.json', + headbox_file='data/mpii/annotations/mpii_gt_val.mat', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict(type='MpiiPCKAccuracy') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_scnet50_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_scnet50_8xb64-210e_mpii-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..c2f7723724b80d730f70d00f7649adb5935a10fc --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_scnet50_8xb64-210e_mpii-256x256.py @@ -0,0 +1,120 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='SCNet', + depth=50, + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/scnet50-7ef0a199.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=16, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'MpiiDataset' +data_mode = 'topdown' +data_root = 'data/mpii/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform', shift_prob=0), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_val.json', + headbox_file='data/mpii/annotations/mpii_gt_val.mat', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict(type='MpiiPCKAccuracy') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_seresnet101_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_seresnet101_8xb64-210e_mpii-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..56b7fccb2e121fdd9734f9a43963f7fe1cc7511c --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_seresnet101_8xb64-210e_mpii-256x256.py @@ -0,0 +1,117 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='SEResNet', + depth=101, + init_cfg=dict(type='Pretrained', checkpoint='mmcls://se-resnet101'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=16, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'MpiiDataset' +data_mode = 'topdown' +data_root = 'data/mpii/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform', shift_prob=0), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_val.json', + headbox_file='data/mpii/annotations/mpii_gt_val.mat', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict(type='MpiiPCKAccuracy') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_seresnet152_8xb32-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_seresnet152_8xb32-210e_mpii-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..79bb29e4b34fba243bca0635df2d8548e19ed76b --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_seresnet152_8xb32-210e_mpii-256x256.py @@ -0,0 +1,116 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='SEResNet', + depth=152, + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=16, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'MpiiDataset' +data_mode = 'topdown' +data_root = 'data/mpii/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform', shift_prob=0), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_val.json', + headbox_file='data/mpii/annotations/mpii_gt_val.mat', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict(type='MpiiPCKAccuracy') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_seresnet50_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_seresnet50_8xb64-210e_mpii-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..257dc360ad1ea41cec56d57bd4de19a59146a7a5 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_seresnet50_8xb64-210e_mpii-256x256.py @@ -0,0 +1,117 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='SEResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='mmcls://se-resnet50'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=16, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'MpiiDataset' +data_mode = 'topdown' +data_root = 'data/mpii/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform', shift_prob=0), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_val.json', + headbox_file='data/mpii/annotations/mpii_gt_val.mat', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict(type='MpiiPCKAccuracy') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_shufflenetv1_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_shufflenetv1_8xb64-210e_mpii-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..83eaca208f237d6eff8b7930e36bc91213af4fdf --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_shufflenetv1_8xb64-210e_mpii-256x256.py @@ -0,0 +1,117 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ShuffleNetV1', + groups=3, + init_cfg=dict(type='Pretrained', checkpoint='mmcls://shufflenet_v1'), + ), + head=dict( + type='HeatmapHead', + in_channels=960, + out_channels=16, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'MpiiDataset' +data_mode = 'topdown' +data_root = 'data/mpii/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform', shift_prob=0), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_val.json', + headbox_file='data/mpii/annotations/mpii_gt_val.mat', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict(type='MpiiPCKAccuracy') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_shufflenetv2_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_shufflenetv2_8xb64-210e_mpii-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..cd05c23596c21c7aa2f491c7e95399f2ec1126c7 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_shufflenetv2_8xb64-210e_mpii-256x256.py @@ -0,0 +1,117 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ShuffleNetV2', + widen_factor=1.0, + init_cfg=dict(type='Pretrained', checkpoint='mmcls://shufflenet_v2'), + ), + head=dict( + type='HeatmapHead', + in_channels=1024, + out_channels=16, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'MpiiDataset' +data_mode = 'topdown' +data_root = 'data/mpii/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform', shift_prob=0), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_val.json', + headbox_file='data/mpii/annotations/mpii_gt_val.mat', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict(type='MpiiPCKAccuracy') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/hrnet_posetrack18.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/hrnet_posetrack18.md new file mode 100644 index 0000000000000000000000000000000000000000..5d26a103db205eca0a9466a2f362ed29b1c64d0f --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/hrnet_posetrack18.md @@ -0,0 +1,55 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html">HRNet (CVPR'2019)</a></summary> + +```bibtex +@inproceedings{sun2019deep, + title={Deep high-resolution representation learning for human pose estimation}, + author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={5693--5703}, + year={2019} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2018/html/Andriluka_PoseTrack_A_Benchmark_CVPR_2018_paper.html">PoseTrack18 (CVPR'2018)</a></summary> + +```bibtex +@inproceedings{andriluka2018posetrack, + title={Posetrack: A benchmark for human pose estimation and tracking}, + author={Andriluka, Mykhaylo and Iqbal, Umar and Insafutdinov, Eldar and Pishchulin, Leonid and Milan, Anton and Gall, Juergen and Schiele, Bernt}, + booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}, + pages={5167--5176}, + year={2018} +} +``` + +</details> + +Results on PoseTrack2018 val with ground-truth bounding boxes + +| Arch | Input Size | Head | Shou | Elb | Wri | Hip | Knee | Ankl | Total | ckpt | log | +| :--------------------------------------------------- | :--------: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :---: | :---------------------------------------------------: | :--------------------------------------------------: | +| [pose_hrnet_w32](/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w32_8xb64-20e_posetrack18-256x192.py) | 256x192 | 86.2 | 89.0 | 84.5 | 79.2 | 82.3 | 82.5 | 78.7 | 83.4 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_256x192-1ee951c4_20201028.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_256x192_20201028.log.json) | +| [pose_hrnet_w32](/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w32_8xb64-20e_posetrack18-384x288.py) | 384x288 | 87.1 | 89.0 | 85.1 | 80.2 | 80.6 | 82.8 | 79.6 | 83.7 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_384x288-806f00a3_20211130.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_384x288_20211130.log.json) | +| [pose_hrnet_w48](/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w48_8xb64-20e_posetrack18-256x192.py) | 256x192 | 88.3 | 90.2 | 86.0 | 81.0 | 80.7 | 83.3 | 80.6 | 84.6 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_256x192-b5d9b3f1_20211130.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_256x192_20211130.log.json) | +| [pose_hrnet_w48](/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w48_8xb64-20e_posetrack18-384x288.py) | 384x288 | 87.8 | 90.0 | 86.2 | 81.3 | 81.0 | 83.4 | 80.9 | 84.6 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_384x288-5fd6d3ff_20211130.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_384x288_20211130.log.json) | + +The models are first pre-trained on COCO dataset, and then fine-tuned on PoseTrack18. + +Results on PoseTrack2018 val with [MMDetection](https://github.com/open-mmlab/mmdetection) pre-trained [Cascade R-CNN](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco/cascade_rcnn_x101_64x4d_fpn_20e_coco_20200509_224357-051557b1.pth) (X-101-64x4d-FPN) human detector + +| Arch | Input Size | Head | Shou | Elb | Wri | Hip | Knee | Ankl | Total | ckpt | log | +| :--------------------------------------------------- | :--------: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :---: | :---------------------------------------------------: | :--------------------------------------------------: | +| [pose_hrnet_w32](/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w32_8xb64-20e_posetrack18-256x192.py) | 256x192 | 78.0 | 82.9 | 79.5 | 73.8 | 76.9 | 76.6 | 70.2 | 76.9 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_256x192-1ee951c4_20201028.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_256x192_20201028.log.json) | +| [pose_hrnet_w32](/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w32_8xb64-20e_posetrack18-384x288.py) | 384x288 | 79.9 | 83.6 | 80.4 | 74.5 | 74.8 | 76.1 | 70.5 | 77.3 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_384x288-806f00a3_20211130.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_384x288_20211130.log.json) | +| [pose_hrnet_w48](/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w48_8xb64-20e_posetrack18-256x192.py) | 256x192 | 80.1 | 83.4 | 80.6 | 74.8 | 74.3 | 76.8 | 70.5 | 77.4 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_256x192-b5d9b3f1_20211130.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_256x192_20211130.log.json) | +| [pose_hrnet_w48](/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w48_8xb64-20e_posetrack18-384x288.py) | 384x288 | 80.2 | 83.8 | 80.9 | 75.2 | 74.7 | 76.7 | 71.7 | 77.8 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_384x288-5fd6d3ff_20211130.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_384x288_20211130.log.json) | + +The models are first pre-trained on COCO dataset, and then fine-tuned on PoseTrack18. diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/hrnet_posetrack18.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/hrnet_posetrack18.yml new file mode 100644 index 0000000000000000000000000000000000000000..a0dcc78f7c65b0e712caa6e4f4204bfc8a3d8626 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/hrnet_posetrack18.yml @@ -0,0 +1,154 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w32_8xb64-20e_posetrack18-256x192.py + In Collection: HRNet + Metadata: + Architecture: &id001 + - HRNet + Training Data: PoseTrack18 + Name: td-hm_hrnet-w32_8xb64-20e_posetrack18-256x192 + Results: + - Dataset: PoseTrack18 + Metrics: + Ankl: 78.7 + Elb: 84.5 + Head: 86.2 + Hip: 82.3 + Knee: 82.5 + Shou: 89 + Total: 83.4 + Wri: 79.2 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_256x192-1ee951c4_20201028.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w32_8xb64-20e_posetrack18-384x288.py + In Collection: HRNet + Metadata: + Architecture: *id001 + Training Data: PoseTrack18 + Name: td-hm_hrnet-w32_8xb64-20e_posetrack18-384x288 + Results: + - Dataset: PoseTrack18 + Metrics: + Ankl: 79.6 + Elb: 84.5 + Head: 87.1 + Hip: 80.6 + Knee: 82.8 + Shou: 89 + Total: 83.7 + Wri: 80.2 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_384x288-806f00a3_20211130.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w48_8xb64-20e_posetrack18-256x192.py + In Collection: HRNet + Metadata: + Architecture: *id001 + Training Data: PoseTrack18 + Name: td-hm_hrnet-w48_8xb64-20e_posetrack18-256x192 + Results: + - Dataset: PoseTrack18 + Metrics: + Ankl: 79.6 + Elb: 85.1 + Head: 88.3 + Hip: 80.6 + Knee: 82.8 + Shou: 90.2 + Total: 84.6 + Wri: 81 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_256x192-b5d9b3f1_20211130.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w48_8xb64-20e_posetrack18-384x288.py + In Collection: HRNet + Metadata: + Architecture: *id001 + Training Data: PoseTrack18 + Name: td-hm_hrnet-w48_8xb64-20e_posetrack18-384x288 + Results: + - Dataset: PoseTrack18 + Metrics: + Ankl: 80.6 + Elb: 86.2 + Head: 87.8 + Hip: 81 + Knee: 83.4 + Shou: 90 + Total: 84.6 + Wri: 81.3 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_384x288-5fd6d3ff_20211130.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w32_8xb64-20e_posetrack18-256x192.py + In Collection: HRNet + Metadata: + Architecture: *id001 + Training Data: PoseTrack18 + Name: td-hm_hrnet-w32_8xb64-20e_posetrack18-256x192 + Results: + - Dataset: PoseTrack18 + Metrics: + Ankl: 70.2 + Elb: 79.5 + Head: 78.0 + Hip: 76.9 + Knee: 76.6 + Shou: 82.9 + Total: 76.9 + Wri: 73.8 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_256x192-1ee951c4_20201028.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w32_8xb64-20e_posetrack18-384x288.py + In Collection: HRNet + Metadata: + Architecture: *id001 + Training Data: PoseTrack18 + Name: td-hm_hrnet-w32_8xb64-20e_posetrack18-384x288 + Results: + - Dataset: PoseTrack18 + Metrics: + Ankl: 70.5 + Elb: 80.4 + Head: 79.9 + Hip: 74.8 + Knee: 76.1 + Shou: 83.6 + Total: 77.3 + Wri: 74.5 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_384x288-806f00a3_20211130.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w48_8xb64-20e_posetrack18-256x192.py + In Collection: HRNet + Metadata: + Architecture: *id001 + Training Data: PoseTrack18 + Name: td-hm_hrnet-w48_8xb64-20e_posetrack18-256x192 + Results: + - Dataset: PoseTrack18 + Metrics: + Ankl: 70.4 + Elb: 80.6 + Head: 80.1 + Hip: 74.3 + Knee: 76.8 + Shou: 83.4 + Total: 77.4 + Wri: 74.8 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_256x192-b5d9b3f1_20211130.pth +- Config: configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w48_8xb64-20e_posetrack18-384x288.py + In Collection: HRNet + Metadata: + Architecture: *id001 + Training Data: PoseTrack18 + Name: td-hm_hrnet-w48_8xb64-20e_posetrack18-384x288 + Results: + - Dataset: PoseTrack18 + Metrics: + Ankl: 71.7 + Elb: 80.9 + Head: 80.2 + Hip: 74.7 + Knee: 76.7 + Shou: 83.8 + Total: 77.8 + Wri: 75.2 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_384x288-5fd6d3ff_20211130.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/resnet_posetrack18.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/resnet_posetrack18.md new file mode 100644 index 0000000000000000000000000000000000000000..86f476e5b7d0cbfef712e822d660ca6a91f78849 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/resnet_posetrack18.md @@ -0,0 +1,58 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html">SimpleBaseline2D (ECCV'2018)</a></summary> + +```bibtex +@inproceedings{xiao2018simple, + title={Simple baselines for human pose estimation and tracking}, + author={Xiao, Bin and Wu, Haiping and Wei, Yichen}, + booktitle={Proceedings of the European conference on computer vision (ECCV)}, + pages={466--481}, + year={2018} +} +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html">ResNet (CVPR'2016)</a></summary> + +```bibtex +@inproceedings{he2016deep, + title={Deep residual learning for image recognition}, + author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={770--778}, + year={2016} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2018/html/Andriluka_PoseTrack_A_Benchmark_CVPR_2018_paper.html">PoseTrack18 (CVPR'2018)</a></summary> + +```bibtex +@inproceedings{andriluka2018posetrack, + title={Posetrack: A benchmark for human pose estimation and tracking}, + author={Andriluka, Mykhaylo and Iqbal, Umar and Insafutdinov, Eldar and Pishchulin, Leonid and Milan, Anton and Gall, Juergen and Schiele, Bernt}, + booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}, + pages={5167--5176}, + year={2018} +} +``` + +</details> + +Results on PoseTrack2018 val with ground-truth bounding boxes + +| Arch | Input Size | Head | Shou | Elb | Wri | Hip | Knee | Ankl | Total | ckpt | log | +| :--------------------------------------------------- | :--------: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :---: | :---------------------------------------------------: | :--------------------------------------------------: | +| [pose_resnet_50](/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_res50_8xb64-20e_posetrack18-256x192.py) | 256x192 | 86.5 | 87.7 | 82.5 | 75.8 | 80.1 | 78.8 | 74.2 | 81.2 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_posetrack18_256x192-a62807c7_20201028.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_posetrack18_256x192_20201028.log.json) | + +The models are first pre-trained on COCO dataset, and then fine-tuned on PoseTrack18. diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/resnet_posetrack18.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/resnet_posetrack18.yml new file mode 100644 index 0000000000000000000000000000000000000000..478ffa247e660611a0f4eca6dbf594188ff9b7c2 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/resnet_posetrack18.yml @@ -0,0 +1,22 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_res50_8xb64-20e_posetrack18-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: &id001 + - SimpleBaseline2D + - ResNet + Training Data: PoseTrack18 + Name: td-hm_res50_8xb64-20e_posetrack18-256x192 + Results: + - Dataset: PoseTrack18 + Metrics: + Ankl: 74.2 + Elb: 82.5 + Head: 86.5 + Hip: 80.1 + Knee: 78.8 + Shou: 87.7 + Total: 81.2 + Wri: 75.8 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_posetrack18_256x192-a62807c7_20201028.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w32_8xb64-20e_posetrack18-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w32_8xb64-20e_posetrack18-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..fe8e385f1daac0ac4df7a805203a88e87f487730 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w32_8xb64-20e_posetrack18-256x192.py @@ -0,0 +1,155 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=20, val_interval=1) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=20, + milestones=[10, 15], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict( + save_best='posetrack18/Total AP', rule='greater', interval=1)) + +# load from the pretrained model +load_from = 'https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-256x192-81c58e40_20220909.pth' # noqa: E501 + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256))), + ), + head=dict( + type='HeatmapHead', + in_channels=32, + out_channels=17, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'PoseTrack18Dataset' +data_mode = 'topdown' +data_root = 'data/posetrack18/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/posetrack18_train.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/posetrack18_val.json', + # comment `bbox_file` and '`filter_cfg` if use gt bbox for evaluation + bbox_file='data/posetrack18/annotations/' + 'posetrack18_val_human_detections.json', + filter_cfg=dict(bbox_score_thr=0.4), + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +val_evaluator = dict( + type='PoseTrack18Metric', + ann_file=data_root + 'annotations/posetrack18_val.json', +) +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w32_8xb64-20e_posetrack18-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w32_8xb64-20e_posetrack18-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..513207441068ff0dcf37a98e995d3be47baf4817 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w32_8xb64-20e_posetrack18-384x288.py @@ -0,0 +1,155 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=20, val_interval=1) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=20, + milestones=[10, 15], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict( + save_best='posetrack18/Total AP', rule='greater', interval=1)) + +# load from the pretrained model +load_from = 'https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-384x288-ca5956af_20220909.pth' # noqa: E501 + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256))), + ), + head=dict( + type='HeatmapHead', + in_channels=32, + out_channels=17, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'PoseTrack18Dataset' +data_mode = 'topdown' +data_root = 'data/posetrack18/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/posetrack18_train.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/posetrack18_val.json', + # comment `bbox_file` and '`filter_cfg` if use gt bbox for evaluation + bbox_file='data/posetrack18/annotations/' + 'posetrack18_val_human_detections.json', + filter_cfg=dict(bbox_score_thr=0.4), + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +val_evaluator = dict( + type='PoseTrack18Metric', + ann_file=data_root + 'annotations/posetrack18_val.json', +) +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w48_8xb64-20e_posetrack18-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w48_8xb64-20e_posetrack18-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..cac23f14e47b4ba1f6ed5cb6c43ea6c11c5e89ad --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w48_8xb64-20e_posetrack18-256x192.py @@ -0,0 +1,155 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=20, val_interval=1) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=20, + milestones=[10, 15], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict( + save_best='posetrack18/Total AP', rule='greater', interval=1)) + +# load from the pretrained model +load_from = 'https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-256x192-0e67c616_20220913.pth' # noqa: E501 + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(48, 96)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(48, 96, 192)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(48, 96, 192, 384))), + ), + head=dict( + type='HeatmapHead', + in_channels=48, + out_channels=17, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'PoseTrack18Dataset' +data_mode = 'topdown' +data_root = 'data/posetrack18/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/posetrack18_train.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/posetrack18_val.json', + # comment `bbox_file` and '`filter_cfg` if use gt bbox for evaluation + bbox_file='data/posetrack18/annotations/' + 'posetrack18_val_human_detections.json', + filter_cfg=dict(bbox_score_thr=0.4), + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +val_evaluator = dict( + type='PoseTrack18Metric', + ann_file=data_root + 'annotations/posetrack18_val.json', +) +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w48_8xb64-20e_posetrack18-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w48_8xb64-20e_posetrack18-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..7ee99469fed8ae914e7aa91b3a32281f9f18ca1b --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w48_8xb64-20e_posetrack18-384x288.py @@ -0,0 +1,155 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=20, val_interval=1) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=20, + milestones=[10, 15], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict( + save_best='posetrack18/Total AP', rule='greater', interval=1)) + +# load from the pretrained model +load_from = 'https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-384x288-c161b7de_20220915.pth' # noqa: E501 + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(48, 96)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(48, 96, 192)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(48, 96, 192, 384))), + ), + head=dict( + type='HeatmapHead', + in_channels=48, + out_channels=17, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'PoseTrack18Dataset' +data_mode = 'topdown' +data_root = 'data/posetrack18/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/posetrack18_train.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/posetrack18_val.json', + # comment `bbox_file` and '`filter_cfg` if use gt bbox for evaluation + bbox_file='data/posetrack18/annotations/' + 'posetrack18_val_human_detections.json', + filter_cfg=dict(bbox_score_thr=0.4), + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +val_evaluator = dict( + type='PoseTrack18Metric', + ann_file=data_root + 'annotations/posetrack18_val.json', +) +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_res50_8xb64-20e_posetrack18-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_res50_8xb64-20e_posetrack18-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..f8e529d120733235c82e8088cb983127cf35f95d --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_res50_8xb64-20e_posetrack18-256x192.py @@ -0,0 +1,126 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=20, val_interval=1) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=20, + milestones=[10, 15], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict( + save_best='posetrack18/Total AP', rule='greater', interval=1)) + +# load from the pretrained model +load_from = 'https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_256x192-ec54d7f3_20200709.pth' # noqa: E501 + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=17, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'PoseTrack18Dataset' +data_mode = 'topdown' +data_root = 'data/posetrack18/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/posetrack18_train.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/posetrack18_val.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +val_evaluator = dict( + type='PoseTrack18Metric', + ann_file=data_root + 'annotations/posetrack18_val.json', +) +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/README.md b/mmpose/configs/body_2d_keypoint/topdown_regression/README.md new file mode 100644 index 0000000000000000000000000000000000000000..adc278ce0e5363f1c7afdaac6ee6d3b05ef3a9d3 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_regression/README.md @@ -0,0 +1,32 @@ +# Top-down regression-based pose estimation + +Top-down methods divide the task into two stages: object detection, followed by single-object pose estimation given object bounding boxes. At the 2nd stage, regression based methods directly regress the keypoint coordinates given the features extracted from the bounding box area, following the paradigm introduced in [Deeppose: Human pose estimation via deep neural networks](http://openaccess.thecvf.com/content_cvpr_2014/html/Toshev_DeepPose_Human_Pose_2014_CVPR_paper.html). + +<div align=center> +<img src="https://user-images.githubusercontent.com/15977946/146515040-a82a8a29-d6bc-42f1-a2ab-7dfa610ce363.png"> +</div> + +## Results and Models + +### COCO Dataset + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Model | Input Size | AP | AR | Details and Download | +| :--------------: | :--------: | :---: | :---: | :-------------------------------------------------------: | +| ResNet-152+RLE | 256x192 | 0.731 | 0.805 | [resnet_rle_coco.md](./coco/resnet_rle_coco.md) | +| ResNet-101+RLE | 256x192 | 0.722 | 0.768 | [resnet_rle_coco.md](./coco/resnet_rle_coco.md) | +| ResNet-50+RLE | 256x192 | 0.706 | 0.768 | [resnet_rle_coco.md](./coco/resnet_rle_coco.md) | +| MobileNet-v2+RLE | 256x192 | 0.593 | 0.644 | [mobilenetv2_rle_coco.md](./coco/mobilenetv2_rle_coco.md) | +| ResNet-152 | 256x192 | 0.584 | 0.688 | [resnet_coco.md](./coco/resnet_coco.md) | +| ResNet-101 | 256x192 | 0.562 | 0.670 | [resnet_coco.md](./coco/resnet_coco.md) | +| ResNet-50 | 256x192 | 0.528 | 0.639 | [resnet_coco.md](./coco/resnet_coco.md) | + +### MPII Dataset + +| Model | Input Size | PCKh@0.5 | PCKh@0.1 | Details and Download | +| :-----------: | :--------: | :------: | :------: | :---------------------------------------------: | +| ResNet-50+RLE | 256x256 | 0.861 | 0.277 | [resnet_rle_mpii.md](./mpii/resnet_rle_mpii.md) | +| ResNet-152 | 256x256 | 0.850 | 0.208 | [resnet_mpii.md](./mpii/resnet_mpii.md) | +| ResNet-101 | 256x256 | 0.841 | 0.200 | [resnet_mpii.md](./mpii/resnet_mpii.md) | +| ResNet-50 | 256x256 | 0.826 | 0.180 | [resnet_mpii.md](./mpii/resnet_mpii.md) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/coco/mobilenetv2_rle_coco.md b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/mobilenetv2_rle_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..eddf5a79d31d974bf9a0e1d0fe128b32f5fa6065 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/mobilenetv2_rle_coco.md @@ -0,0 +1,74 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2014/html/Toshev_DeepPose_Human_Pose_2014_CVPR_paper.html">DeepPose (CVPR'2014)</a></summary> + +```bibtex +@inproceedings{toshev2014deeppose, + title={Deeppose: Human pose estimation via deep neural networks}, + author={Toshev, Alexander and Szegedy, Christian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={1653--1660}, + year={2014} +} +``` + +</details> + +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2107.11291">RLE (ICCV'2021)</a></summary> + +```bibtex +@inproceedings{li2021human, + title={Human pose regression with residual log-likelihood estimation}, + author={Li, Jiefeng and Bian, Siyuan and Zeng, Ailing and Wang, Can and Pang, Bo and Liu, Wentao and Lu, Cewu}, + booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision}, + pages={11025--11034}, + year={2021} +} +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2018/html/Sandler_MobileNetV2_Inverted_Residuals_CVPR_2018_paper.html">MobilenetV2 (CVPR'2018)</a></summary> + +```bibtex +@inproceedings{sandler2018mobilenetv2, + title={Mobilenetv2: Inverted residuals and linear bottlenecks}, + author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={4510--4520}, + year={2018} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [deeppose_mobilenetv2_rle_pretrained](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_mobilenetv2_rle-pretrained-8xb64-210e_coco-256x192.py) | 256x192 | 0.593 | 0.836 | 0.660 | 0.644 | 0.877 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_mobilenetv2_rle-pretrained-8xb64-210e_coco-256x192-39b73bd5_20220922.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_mobilenetv2_rle-pretrained-8xb64-210e_coco-256x192-39b73bd5_20220922.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/coco/mobilenetv2_rle_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/mobilenetv2_rle_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..c0f470432b444bdc5dced66291bc91d7a8bd18a2 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/mobilenetv2_rle_coco.yml @@ -0,0 +1,20 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_regression/coco/td-reg_mobilenetv2_rle-pretrained-8xb64-210e_coco-256x192.py + In Collection: RLE + Metadata: + Architecture: &id001 + - DeepPose + - RLE + - MobileNet + Training Data: COCO + Name: td-reg_mobilenetv2_rle-pretrained-8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.593 + AP@0.5: 0.836 + AP@0.75: 0.66 + AR: 0.644 + AR@0.5: 0.877 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_mobilenetv2_rle-pretrained-8xb64-210e_coco-256x192-39b73bd5_20220922.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/coco/resnet_coco.md b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/resnet_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..77ed459aeda6a43d01b0219812c90509b8414282 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/resnet_coco.md @@ -0,0 +1,59 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2014/html/Toshev_DeepPose_Human_Pose_2014_CVPR_paper.html">DeepPose (CVPR'2014)</a></summary> + +```bibtex +@inproceedings{toshev2014deeppose, + title={Deeppose: Human pose estimation via deep neural networks}, + author={Toshev, Alexander and Szegedy, Christian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={1653--1660}, + year={2014} +} +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html">ResNet (CVPR'2016)</a></summary> + +```bibtex +@inproceedings{he2016deep, + title={Deep residual learning for image recognition}, + author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={770--778}, + year={2016} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [deeppose_resnet_50](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_8xb64-210e_coco-256x192.py) | 256x192 | 0.541 | 0.824 | 0.601 | 0.649 | 0.893 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_res50_8xb64-210e_coco-256x192-72ef04f3_20220913.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_res50_8xb64-210e_coco-256x192-72ef04f3_20220913.log.json) | +| [deeppose_resnet_101](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_8xb64-210e_coco-256x192.py) | 256x192 | 0.562 | 0.831 | 0.629 | 0.670 | 0.900 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res101_coco_256x192-2f247111_20210205.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res101_coco_256x192_20210205.log.json) | +| [deeppose_resnet_152](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_8xb64-210e_coco-256x192.py) | 256x192 | 0.584 | 0.842 | 0.659 | 0.688 | 0.907 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_coco_256x192-7df89a88_20210205.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_coco_256x192_20210205.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/coco/resnet_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/resnet_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..e66b3043c6dfe8f9171e21b31cb6d3ae6d283932 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/resnet_coco.yml @@ -0,0 +1,57 @@ +Collections: +- Name: DeepPose + Paper: + Title: "Deeppose: Human pose estimation via deep neural networks" + URL: http://openaccess.thecvf.com/content_cvpr_2014/html/Toshev_DeepPose_Human_Pose_2014_CVPR_paper.html + README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/algorithms/deeppose.md +Models: +- Config: configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_8xb64-210e_coco-256x192.py + In Collection: DeepPose + Metadata: + Architecture: &id001 + - DeepPose + - ResNet + Training Data: COCO + Name: td-reg_res50_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.541 + AP@0.5: 0.824 + AP@0.75: 0.601 + AR: 0.649 + AR@0.5: 0.893 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_res50_8xb64-210e_coco-256x192-72ef04f3_20220913.pth +- Config: configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_8xb64-210e_coco-256x192.py + In Collection: DeepPose + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-reg_res101_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.562 + AP@0.5: 0.831 + AP@0.75: 0.629 + AR: 0.67 + AR@0.5: 0.9 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res101_coco_256x192-2f247111_20210205.pth +- Config: configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_8xb64-210e_coco-256x192.py + In Collection: DeepPose + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-reg_res152_8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.584 + AP@0.5: 0.842 + AP@0.75: 0.659 + AR: 0.688 + AR@0.5: 0.907 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_coco_256x192-7df89a88_20210205.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/coco/resnet_rle_coco.md b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/resnet_rle_coco.md new file mode 100644 index 0000000000000000000000000000000000000000..d3f4f5a2883ef69ddbfd11921a630758151a0be2 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/resnet_rle_coco.md @@ -0,0 +1,78 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2014/html/Toshev_DeepPose_Human_Pose_2014_CVPR_paper.html">DeepPose (CVPR'2014)</a></summary> + +```bibtex +@inproceedings{toshev2014deeppose, + title={Deeppose: Human pose estimation via deep neural networks}, + author={Toshev, Alexander and Szegedy, Christian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={1653--1660}, + year={2014} +} +``` + +</details> + +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2107.11291">RLE (ICCV'2021)</a></summary> + +```bibtex +@inproceedings{li2021human, + title={Human pose regression with residual log-likelihood estimation}, + author={Li, Jiefeng and Bian, Siyuan and Zeng, Ailing and Wang, Can and Pang, Bo and Liu, Wentao and Lu, Cewu}, + booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision}, + pages={11025--11034}, + year={2021} +} +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html">ResNet (CVPR'2016)</a></summary> + +```bibtex +@inproceedings{he2016deep, + title={Deep residual learning for image recognition}, + author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={770--778}, + year={2016} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log | +| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: | +| [deeppose_resnet_50_rle](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-8xb64-210e_coco-256x192.py) | 256x192 | 0.706 | 0.888 | 0.776 | 0.753 | 0.924 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-8xb64-210e_coco-256x192-d37efd64_20220913.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-8xb64-210e_coco-256x192-d37efd64_20220913.log.json) | +| [deeppose_resnet_50_rle_pretrained](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-pretrained-8xb64-210e_coco-256x192.py) | 256x192 | 0.719 | 0.891 | 0.788 | 0.764 | 0.925 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-pretrained-8xb64-210e_coco-256x192-2cb494ee_20220913.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-pretrained-8xb64-210e_coco-256x192-2cb494ee_20220913.log.json) | +| [deeppose_resnet_101_rle](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_rle-8xb64-210e_coco-256x192.py) | 256x192 | 0.722 | 0.894 | 0.794 | 0.768 | 0.930 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res101_coco_256x192_rle-16c3d461_20220615.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res101_coco_256x192_rle_20220615.log.json) | +| [deeppose_resnet_152_rle](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-256x192.py) | 256x192 | 0.731 | 0.897 | 0.805 | 0.777 | 0.933 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_coco_256x192_rle-c05bdccf_20220615.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_coco_256x192_rle_20220615.log.json) | +| [deeppose_resnet_152_rle](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-384x288.py) | 384x288 | 0.749 | 0.901 | 0.815 | 0.793 | 0.935 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_coco_384x288_rle-b77c4c37_20220624.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_coco_384x288_rle_20220624.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/coco/resnet_rle_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/resnet_rle_coco.yml new file mode 100644 index 0000000000000000000000000000000000000000..97ae41b8f2af552b5f0e77264baf86f308912ecc --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/resnet_rle_coco.yml @@ -0,0 +1,90 @@ +Collections: +- Name: RLE + Paper: + Title: Human pose regression with residual log-likelihood estimation + URL: https://arxiv.org/abs/2107.11291 + README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/techniques/rle.md +Models: +- Config: configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-8xb64-210e_coco-256x192.py + In Collection: RLE + Metadata: + Architecture: &id001 + - DeepPose + - RLE + - ResNet + Training Data: COCO + Name: td-reg_res50_rle-8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.706 + AP@0.5: 0.888 + AP@0.75: 0.776 + AR: 0.753 + AR@0.5: 0.924 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-8xb64-210e_coco-256x192-d37efd64_20220913.pth +- Config: configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-pretrained-8xb64-210e_coco-256x192.py + In Collection: RLE + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-reg_res50_rle-pretrained-8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.719 + AP@0.5: 0.891 + AP@0.75: 0.788 + AR: 0.764 + AR@0.5: 0.925 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-pretrained-8xb64-210e_coco-256x192-2cb494ee_20220913.pth +- Config: configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_rle-8xb64-210e_coco-256x192.py + In Collection: RLE + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-reg_res101_rle-8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.722 + AP@0.5: 0.894 + AP@0.75: 0.794 + AR: 0.768 + AR@0.5: 0.93 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res101_coco_256x192_rle-16c3d461_20220615.pth +- Config: configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-256x192.py + In Collection: RLE + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-reg_res152_rle-8xb64-210e_coco-256x192 + Results: + - Dataset: COCO + Metrics: + AP: 0.731 + AP@0.5: 0.897 + AP@0.75: 0.805 + AR: 0.777 + AR@0.5: 0.933 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_coco_256x192_rle-c05bdccf_20220615.pth +- Config: configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-384x288.py + In Collection: RLE + Metadata: + Architecture: *id001 + Training Data: COCO + Name: td-reg_res152_rle-8xb64-210e_coco-384x288 + Results: + - Dataset: COCO + Metrics: + AP: 0.749 + AP@0.5: 0.901 + AP@0.75: 0.815 + AR: 0.793 + AR@0.5: 0.935 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_coco_384x288_rle-b77c4c37_20220624.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_mobilenetv2_rle-pretrained-8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_mobilenetv2_rle-pretrained-8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..97f5d926c66be84ef1bc8fb8f1f187730cebd46d --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_mobilenetv2_rle-pretrained-8xb64-210e_coco-256x192.py @@ -0,0 +1,126 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=1e-3, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=train_cfg['max_epochs'], + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict(type='RegressionLabel', input_size=(192, 256)) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='MobileNetV2', + widen_factor=1., + out_indices=(7, ), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/top_down/' + 'mobilenetv2/mobilenetv2_coco_256x192-d1e58e7b_20200727.pth')), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='RLEHead', + in_channels=1280, + num_joints=17, + loss=dict(type='RLELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + shift_coords=True, + ), +) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file=f'{data_root}person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=f'{data_root}annotations/person_keypoints_val2017.json', + score_mode='bbox_rle') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..94f35d0fc36c749638ff397f5af5eb50a006894f --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_8xb64-210e_coco-256x192.py @@ -0,0 +1,120 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=train_cfg['max_epochs'], + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict(type='RegressionLabel', input_size=(192, 256)) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=101, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'), + ), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='RegressionHead', + in_channels=2048, + num_joints=17, + loss=dict(type='SmoothL1Loss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + shift_coords=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file=f'{data_root}person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=f'{data_root}annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_rle-8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_rle-8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..21b4a3cdcbab80fa080ca90581a6ab3ee44fdbe4 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_rle-8xb64-210e_coco-256x192.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=train_cfg['max_epochs'], + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict(type='RegressionLabel', input_size=(192, 256)) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=101, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'), + ), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='RLEHead', + in_channels=2048, + num_joints=17, + loss=dict(type='RLELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + shift_coords=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file=f'{data_root}person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=f'{data_root}annotations/person_keypoints_val2017.json', + score_mode='bbox_rle') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..fa56fba4987e9f4c6c4f0e284e5949c0c6f46d6c --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_8xb64-210e_coco-256x192.py @@ -0,0 +1,120 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=train_cfg['max_epochs'], + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict(type='RegressionLabel', input_size=(192, 256)) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=152, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet152'), + ), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='RegressionHead', + in_channels=2048, + num_joints=17, + loss=dict(type='SmoothL1Loss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + shift_coords=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file=f'{data_root}person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=f'{data_root}annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..e2a832b652b33aaa629fdb4a07863f223051461f --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-256x192.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=train_cfg['max_epochs'], + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict(type='RegressionLabel', input_size=(192, 256)) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=152, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet152'), + ), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='RLEHead', + in_channels=2048, + num_joints=17, + loss=dict(type='RLELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + shift_coords=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file=f'{data_root}person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=f'{data_root}annotations/person_keypoints_val2017.json', + score_mode='bbox_rle') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..6d319e927eb21ddcb71e40ecae1050c3421871d2 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-384x288.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=train_cfg['max_epochs'], + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict(type='RegressionLabel', input_size=(288, 384)) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=152, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet152'), + ), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='RLEHead', + in_channels=2048, + num_joints=17, + loss=dict(type='RLELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + shift_coords=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file=f'{data_root}person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=f'{data_root}annotations/person_keypoints_val2017.json', + score_mode='bbox_rle') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..fa7e487acf470dfbd988979ffb7570f72d409df0 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_8xb64-210e_coco-256x192.py @@ -0,0 +1,120 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=train_cfg['max_epochs'], + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict(type='RegressionLabel', input_size=(192, 256)) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='RegressionHead', + in_channels=2048, + num_joints=17, + loss=dict(type='SmoothL1Loss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + shift_coords=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file=f'{data_root}person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=f'{data_root}annotations/person_keypoints_val2017.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..db530f6ec4f065fa16228ee66fee33db5afddc4f --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-8xb64-210e_coco-256x192.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=1e-3, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=train_cfg['max_epochs'], + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict(type='RegressionLabel', input_size=(192, 256)) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='RLEHead', + in_channels=2048, + num_joints=17, + loss=dict(type='RLELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + shift_coords=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file=f'{data_root}person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=f'{data_root}annotations/person_keypoints_val2017.json', + score_mode='bbox_rle') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-pretrained-8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-pretrained-8xb64-210e_coco-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..6b74aba7f3c138901d35652c9b7f19bebf23cceb --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-pretrained-8xb64-210e_coco-256x192.py @@ -0,0 +1,125 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=1e-3, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=train_cfg['max_epochs'], + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict(type='RegressionLabel', input_size=(192, 256)) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/td-hm_res50_8xb64-210e_coco-256x192.pth'), + ), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='RLEHead', + in_channels=2048, + num_joints=17, + loss=dict(type='RLELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + shift_coords=True, + )) + +# base dataset settings +dataset_type = 'CocoDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +test_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_train2017.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/person_keypoints_val2017.json', + bbox_file=f'{data_root}person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=test_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) + +# evaluators +val_evaluator = dict( + type='CocoMetric', + ann_file=f'{data_root}annotations/person_keypoints_val2017.json', + score_mode='bbox_rle') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/resnet_mpii.md b/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/resnet_mpii.md new file mode 100644 index 0000000000000000000000000000000000000000..150fd48020f0e47e63e5e2356bc91ae29499c546 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/resnet_mpii.md @@ -0,0 +1,58 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2014/html/Toshev_DeepPose_Human_Pose_2014_CVPR_paper.html">DeepPose (CVPR'2014)</a></summary> + +```bibtex +@inproceedings{toshev2014deeppose, + title={Deeppose: Human pose estimation via deep neural networks}, + author={Toshev, Alexander and Szegedy, Christian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={1653--1660}, + year={2014} +} +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html">ResNet (CVPR'2016)</a></summary> + +```bibtex +@inproceedings{he2016deep, + title={Deep residual learning for image recognition}, + author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={770--778}, + year={2016} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2014/html/Andriluka_2D_Human_Pose_2014_CVPR_paper.html">MPII (CVPR'2014)</a></summary> + +```bibtex +@inproceedings{andriluka14cvpr, + author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt}, + title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis}, + booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + year = {2014}, + month = {June} +} +``` + +</details> + +Results on MPII val set + +| Arch | Input Size | Mean | Mean@0.1 | ckpt | log | +| :---------------------------------------------------------- | :--------: | :---: | :------: | :---------------------------------------------------------: | :---------------------------------------------------------: | +| [deeppose_resnet_50](/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_8xb64-210e_mpii-256x256.py) | 256x256 | 0.826 | 0.180 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res50_mpii_256x256-c63cd0b6_20210203.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res50_mpii_256x256_20210203.log.json) | +| [deeppose_resnet_101](/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res101_8xb64-210e_mpii-256x256.py) | 256x256 | 0.841 | 0.200 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res101_mpii_256x256-87516a90_20210205.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res101_mpii_256x256_20210205.log.json) | +| [deeppose_resnet_152](/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res152_8xb64-210e_mpii-256x256.py) | 256x256 | 0.850 | 0.208 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_mpii_256x256-15f5e6f9_20210205.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_mpii_256x256_20210205.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/resnet_mpii.yml b/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/resnet_mpii.yml new file mode 100644 index 0000000000000000000000000000000000000000..a744083e97a054b07c89b0d283189ef51f236bf0 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/resnet_mpii.yml @@ -0,0 +1,42 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_8xb64-210e_mpii-256x256.py + In Collection: DeepPose + Metadata: + Architecture: &id001 + - DeepPose + - ResNet + Training Data: MPII + Name: td-reg_res50_8xb64-210e_mpii-256x256 + Results: + - Dataset: MPII + Metrics: + Mean: 0.826 + Mean@0.1: 0.18 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res50_mpii_256x256-c63cd0b6_20210203.pth +- Config: configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res101_8xb64-210e_mpii-256x256.py + In Collection: DeepPose + Metadata: + Architecture: *id001 + Training Data: MPII + Name: td-reg_res101_8xb64-210e_mpii-256x256 + Results: + - Dataset: MPII + Metrics: + Mean: 0.841 + Mean@0.1: 0.2 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res101_mpii_256x256-87516a90_20210205.pth +- Config: configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res152_8xb64-210e_mpii-256x256.py + In Collection: DeepPose + Metadata: + Architecture: *id001 + Training Data: MPII + Name: td-reg_res152_8xb64-210e_mpii-256x256 + Results: + - Dataset: MPII + Metrics: + Mean: 0.85 + Mean@0.1: 0.208 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_mpii_256x256-15f5e6f9_20210205.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/resnet_rle_mpii.md b/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/resnet_rle_mpii.md new file mode 100644 index 0000000000000000000000000000000000000000..bf3a67a49a41be72076fa2831902aa194d17d346 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/resnet_rle_mpii.md @@ -0,0 +1,73 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2014/html/Toshev_DeepPose_Human_Pose_2014_CVPR_paper.html">DeepPose (CVPR'2014)</a></summary> + +```bibtex +@inproceedings{toshev2014deeppose, + title={Deeppose: Human pose estimation via deep neural networks}, + author={Toshev, Alexander and Szegedy, Christian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={1653--1660}, + year={2014} +} +``` + +</details> + +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2107.11291">RLE (ICCV'2021)</a></summary> + +```bibtex +@inproceedings{li2021human, + title={Human pose regression with residual log-likelihood estimation}, + author={Li, Jiefeng and Bian, Siyuan and Zeng, Ailing and Wang, Can and Pang, Bo and Liu, Wentao and Lu, Cewu}, + booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision}, + pages={11025--11034}, + year={2021} +} +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html">ResNet (CVPR'2016)</a></summary> + +```bibtex +@inproceedings{he2016deep, + title={Deep residual learning for image recognition}, + author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={770--778}, + year={2016} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2014/html/Andriluka_2D_Human_Pose_2014_CVPR_paper.html">MPII (CVPR'2014)</a></summary> + +```bibtex +@inproceedings{andriluka14cvpr, + author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt}, + title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis}, + booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + year = {2014}, + month = {June} +} +``` + +</details> + +Results on MPII val set + +| Arch | Input Size | Mean | Mean@0.1 | ckpt | log | +| :---------------------------------------------------------- | :--------: | :---: | :------: | :---------------------------------------------------------: | :---------------------------------------------------------: | +| [deeppose_resnet_50_rle](/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_rle-8xb64-210e_mpii-256x256.py) | 256x256 | 0.861 | 0.277 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res50_mpii_256x256_rle-5f92a619_20220504.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res50_mpii_256x256_rle_20220504.log.json) | diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/resnet_rle_mpii.yml b/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/resnet_rle_mpii.yml new file mode 100644 index 0000000000000000000000000000000000000000..a03586d42cd7690154d336444e948ea317dbfc9c --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/resnet_rle_mpii.yml @@ -0,0 +1,17 @@ +Models: +- Config: configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_rle-8xb64-210e_mpii-256x256.py + In Collection: RLE + Metadata: + Architecture: + - DeepPose + - RLE + - ResNet + Training Data: MPII + Name: td-reg_res50_rle-8xb64-210e_mpii-256x256 + Results: + - Dataset: MPII + Metrics: + Mean: 0.861 + Mean@0.1: 0.277 + Task: Body 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res50_mpii_256x256_rle-5f92a619_20220504.pth diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res101_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res101_8xb64-210e_mpii-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..6c7821f91b1161491ad2166b36bd582e194f384b --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res101_8xb64-210e_mpii-256x256.py @@ -0,0 +1,116 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict(type='RegressionLabel', input_size=(256, 256)) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=101, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'), + ), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='RegressionHead', + in_channels=2048, + num_joints=16, + loss=dict(type='SmoothL1Loss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + shift_coords=True, + )) + +# base dataset settings +dataset_type = 'MpiiDataset' +data_mode = 'topdown' +data_root = 'data/mpii/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform', shift_prob=0), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_val.json', + headbox_file=f'{data_root}/annotations/mpii_gt_val.mat', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater')) + +# evaluators +val_evaluator = dict(type='MpiiPCKAccuracy') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res152_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res152_8xb64-210e_mpii-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..c1a19b0d6e720c9f60e19d62a8712e532390cc84 --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res152_8xb64-210e_mpii-256x256.py @@ -0,0 +1,118 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict(type='RegressionLabel', input_size=(256, 256)) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=152, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet152'), + ), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='RegressionHead', + in_channels=2048, + num_joints=16, + loss=dict(type='SmoothL1Loss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + shift_coords=True, + )) + +# base dataset settings +dataset_type = 'MpiiDataset' +data_mode = 'topdown' +data_root = 'data/mpii/' + +file_client_args = dict(backend='disk') + +# pipelines +train_pipeline = [ + dict(type='LoadImage', file_client_args=file_client_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform', shift_prob=0), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', file_client_args=file_client_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_val.json', + headbox_file=f'{data_root}/annotations/mpii_gt_val.mat', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater')) + +# evaluators +val_evaluator = dict(type='MpiiPCKAccuracy') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_8xb64-210e_mpii-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..901fd4b8d61c2aa7a5cc920d1590acf8a4ece88d --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_8xb64-210e_mpii-256x256.py @@ -0,0 +1,116 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict(type='RegressionLabel', input_size=(256, 256)) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='RegressionHead', + in_channels=2048, + num_joints=16, + loss=dict(type='SmoothL1Loss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + shift_coords=True, + )) + +# base dataset settings +dataset_type = 'MpiiDataset' +data_mode = 'topdown' +data_root = 'data/mpii/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform', shift_prob=0), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_val.json', + headbox_file=f'{data_root}/annotations/mpii_gt_val.mat', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater')) + +# evaluators +val_evaluator = dict(type='MpiiPCKAccuracy') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_rle-8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_rle-8xb64-210e_mpii-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..9d46484755dec533fe5519a782de86404bf9986e --- /dev/null +++ b/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_rle-8xb64-210e_mpii-256x256.py @@ -0,0 +1,116 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict(type='RegressionLabel', input_size=(256, 256)) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='RLEHead', + in_channels=2048, + num_joints=16, + loss=dict(type='RLELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + shift_coords=True, + )) + +# base dataset settings +dataset_type = 'MpiiDataset' +data_mode = 'topdown' +data_root = 'data/mpii/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform', shift_prob=0), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/mpii_val.json', + headbox_file=f'{data_root}/annotations/mpii_gt_val.mat', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater')) + +# evaluators +val_evaluator = dict(type='MpiiPCKAccuracy') +test_evaluator = val_evaluator diff --git a/mmpose/configs/body_3d_keypoint/README.md b/mmpose/configs/body_3d_keypoint/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b67f7ce7ac7570c8903cb437080358ab34984f88 --- /dev/null +++ b/mmpose/configs/body_3d_keypoint/README.md @@ -0,0 +1,13 @@ +# Human Body 3D Pose Estimation + +3D human body pose estimation aims at predicting the X, Y, Z coordinates of human body joints. Based on the camera number to capture the images or videos, existing works can be further divided into multi-view methods and single-view (monocular) methods. + +## Data preparation + +Please follow [DATA Preparation](/docs/en/dataset_zoo/3d_body_keypoint.md) to prepare data. + +## Demo + +Please follow [Demo](/demo/docs/en/3d_human_pose_demo.md) to run demos. + +<img src="https://user-images.githubusercontent.com/15977946/118820606-02df2000-b8e9-11eb-9984-b9228101e780.gif" width="600px" alt><br> diff --git a/mmpose/configs/face_2d_keypoint/README.md b/mmpose/configs/face_2d_keypoint/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9f9370a754902883013e479779a5db7acb2c9699 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/README.md @@ -0,0 +1,16 @@ +# 2D Face Landmark Detection + +2D face landmark detection (also referred to as face alignment) is defined as the task of detecting the face keypoints from an input image. + +Normally, the input images are cropped face images, where the face locates at the center; +or the rough location (or the bounding box) of the hand is provided. + +## Data preparation + +Please follow [DATA Preparation](/docs/en/dataset_zoo/2d_face_keypoint.md) to prepare data. + +## Demo + +Please follow [Demo](/demo/docs/en/2d_face_demo.md) to run demos. + +<img src="https://user-images.githubusercontent.com/11788150/109144943-ccd44900-779c-11eb-9e9d-8682e7629654.gif" width="600px" alt><br> diff --git a/mmpose/configs/face_2d_keypoint/rtmpose/README.md b/mmpose/configs/face_2d_keypoint/rtmpose/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d0c7f55fb42d9501dbfac7511e1097d4a5aa8c1d --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/rtmpose/README.md @@ -0,0 +1,32 @@ +# RTMPose + +Recent studies on 2D pose estimation have achieved excellent performance on public benchmarks, yet its application in the industrial community still suffers from heavy model parameters and high latency. +In order to bridge this gap, we empirically study five aspects that affect the performance of multi-person pose estimation algorithms: paradigm, backbone network, localization algorithm, training strategy, and deployment inference, and present a high-performance real-time multi-person pose estimation framework, **RTMPose**, based on MMPose. +Our RTMPose-m achieves **75.8% AP** on COCO with **90+ FPS** on an Intel i7-11700 CPU and **430+ FPS** on an NVIDIA GTX 1660 Ti GPU, and RTMPose-l achieves **67.0% AP** on COCO-WholeBody with **130+ FPS**, outperforming existing open-source libraries. +To further evaluate RTMPose's capability in critical real-time applications, we also report the performance after deploying on the mobile device. + +## Results and Models + +### COCO-WholeBody-Face Dataset + +Results on COCO-WholeBody-Face val set + +| Model | Input Size | NME | Details and Download | +| :-------: | :--------: | :----: | :------------------------------------------------------------------------------------: | +| RTMPose-m | 256x256 | 0.0466 | [rtmpose_coco_wholebody_face.md](./coco_wholebody_face/rtmpose_coco_wholebody_face.md) | + +### WFLW Dataset + +Results on WFLW dataset + +| Model | Input Size | NME | Details and Download | +| :-------: | :--------: | :--: | :---------------------------------------: | +| RTMPose-m | 256x256 | 4.01 | [rtmpose_wflw.md](./wflw/rtmpose_wflw.md) | + +### LaPa Dataset + +Results on LaPa dataset + +| Model | Input Size | NME | Details and Download | +| :-------: | :--------: | :--: | :---------------------------------------: | +| RTMPose-m | 256x256 | 1.29 | [rtmpose_lapa.md](./lapa/rtmpose_lapa.md) | diff --git a/mmpose/configs/face_2d_keypoint/rtmpose/coco_wholebody_face/rtmpose-m_8xb32-60e_coco-wholebody-face-256x256.py b/mmpose/configs/face_2d_keypoint/rtmpose/coco_wholebody_face/rtmpose-m_8xb32-60e_coco-wholebody-face-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..41c9309707229734a53d3c93a439624068fb08b3 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/rtmpose/coco_wholebody_face/rtmpose-m_8xb32-60e_coco-wholebody-face-256x256.py @@ -0,0 +1,232 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 60 +stage2_num_epochs = 10 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=1) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 150 to 300 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict( + type='SimCCLabel', + input_size=(256, 256), + sigma=(5.66, 5.66), + simcc_split_ratio=2.0, + normalize=False, + use_dark=False) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=0.67, + widen_factor=0.75, + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' + 'rtmposev1/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa + )), + head=dict( + type='RTMCCHead', + in_channels=768, + out_channels=68, + input_size=codec['input_size'], + in_featuremap_size=(8, 8), + simcc_split_ratio=codec['simcc_split_ratio'], + final_layer_kernel_size=7, + gau_cfg=dict( + hidden_dims=256, + s=128, + expansion_factor=2, + dropout_rate=0., + drop_path=0., + act_fn='SiLU', + use_rel_bias=False, + pos_enc=False), + loss=dict( + type='KLDiscretLoss', + use_target_weight=True, + beta=10., + label_softmax=True), + decoder=codec), + test_cfg=dict(flip_test=True, )) + +# base dataset settings +dataset_type = 'CocoWholeBodyFaceDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/', +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + # dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.0), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + # dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict( + checkpoint=dict( + save_best='NME', rule='less', max_keep_ckpts=1, interval=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='NME', + norm_mode='keypoint_distance', +) +test_evaluator = val_evaluator diff --git a/mmpose/configs/face_2d_keypoint/rtmpose/coco_wholebody_face/rtmpose_coco_wholebody_face.md b/mmpose/configs/face_2d_keypoint/rtmpose/coco_wholebody_face/rtmpose_coco_wholebody_face.md new file mode 100644 index 0000000000000000000000000000000000000000..77d99bc63f7452b80e2983341794326a20c80fa1 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/rtmpose/coco_wholebody_face/rtmpose_coco_wholebody_face.md @@ -0,0 +1,39 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2212.07784">RTMDet (ArXiv 2022)</a></summary> + +```bibtex +@misc{lyu2022rtmdet, + title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors}, + author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen}, + year={2022}, + eprint={2212.07784}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-030-58545-7_12">COCO-WholeBody-Face (ECCV'2020)</a></summary> + +```bibtex +@inproceedings{jin2020whole, + title={Whole-Body Human Pose Estimation in the Wild}, + author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping}, + booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, + year={2020} +} +``` + +</details> + +Results on COCO-WholeBody-Face val set + +| Arch | Input Size | NME | ckpt | log | +| :------------------------------------------------------------ | :--------: | :----: | :------------------------------------------------------------: | :-----------------------------------------------------------: | +| [pose_rtmpose_m](/configs/face_2d_keypoint/rtmpose/coco_wholebody_face/rtmpose-m_8xb32-60e_coco-wholebody-face-256x256.py) | 256x256 | 0.0466 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco-wholebody-face_pt-aic-coco_60e-256x256-62026ef2_20230228.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco-wholebody-face_pt-aic-coco_60e-256x256-62026ef2_20230228.json) | diff --git a/mmpose/configs/face_2d_keypoint/rtmpose/coco_wholebody_face/rtmpose_coco_wholebody_face.yml b/mmpose/configs/face_2d_keypoint/rtmpose/coco_wholebody_face/rtmpose_coco_wholebody_face.yml new file mode 100644 index 0000000000000000000000000000000000000000..fdc2599e713aa710c102a71c67906090600ef6d6 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/rtmpose/coco_wholebody_face/rtmpose_coco_wholebody_face.yml @@ -0,0 +1,14 @@ +Models: +- Config: configs/face_2d_keypoint/rtmpose/coco_wholebody_face/rtmpose-m_8xb32-60e_coco-wholebody-face-256x256.py + In Collection: RTMPose + Metadata: + Architecture: + - RTMPose + Training Data: COCO-WholeBody-Face + Name: rtmpose-m_8xb32-60e_coco-wholebody-face-256x256 + Results: + - Dataset: COCO-WholeBody-Face + Metrics: + NME: 0.0466 + Task: Face 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco-wholebody-face_pt-aic-coco_60e-256x256-62026ef2_20230228.pth diff --git a/mmpose/configs/face_2d_keypoint/rtmpose/lapa/rtmpose-m_8xb64-120e_lapa-256x256.py b/mmpose/configs/face_2d_keypoint/rtmpose/lapa/rtmpose-m_8xb64-120e_lapa-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..b4124ff6d8db6956df8ec0aabacc5a5d0a93db6e --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/rtmpose/lapa/rtmpose-m_8xb64-120e_lapa-256x256.py @@ -0,0 +1,247 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 120 +stage2_num_epochs = 10 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=1) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 150 to 300 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict( + type='SimCCLabel', + input_size=(256, 256), + sigma=(5.66, 5.66), + simcc_split_ratio=2.0, + normalize=False, + use_dark=False) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=0.67, + widen_factor=0.75, + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' + 'rtmposev1/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa + )), + head=dict( + type='RTMCCHead', + in_channels=768, + out_channels=106, + input_size=codec['input_size'], + in_featuremap_size=(8, 8), + simcc_split_ratio=codec['simcc_split_ratio'], + final_layer_kernel_size=7, + gau_cfg=dict( + hidden_dims=256, + s=128, + expansion_factor=2, + dropout_rate=0., + drop_path=0., + act_fn='SiLU', + use_rel_bias=False, + pos_enc=False), + loss=dict( + type='KLDiscretLoss', + use_target_weight=True, + beta=10., + label_softmax=True), + decoder=codec), + test_cfg=dict(flip_test=True, )) + +# base dataset settings +dataset_type = 'LapaDataset' +data_mode = 'topdown' +data_root = 'data/LaPa/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/pose/LaPa/', +# f'{data_root}': 's3://openmmlab/datasets/pose/LaPa/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.5, 1.5], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict(type='PhotometricDistortion'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.2), + dict(type='MedianBlur', p=0.2), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.0), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + # dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/lapa_train.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/lapa_val.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = dict( + batch_size=32, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/lapa_test.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) + +# hooks +default_hooks = dict( + checkpoint=dict( + save_best='NME', rule='less', max_keep_ckpts=1, interval=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='NME', + norm_mode='keypoint_distance', +) +test_evaluator = val_evaluator diff --git a/mmpose/configs/face_2d_keypoint/rtmpose/lapa/rtmpose_lapa.md b/mmpose/configs/face_2d_keypoint/rtmpose/lapa/rtmpose_lapa.md new file mode 100644 index 0000000000000000000000000000000000000000..9638de7551c0e0cabaa2ca1ba606bf8abc42b311 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/rtmpose/lapa/rtmpose_lapa.md @@ -0,0 +1,40 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2212.07784">RTMDet (ArXiv 2022)</a></summary> + +```bibtex +@misc{lyu2022rtmdet, + title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors}, + author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen}, + year={2022}, + eprint={2212.07784}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://aaai.org/ojs/index.php/AAAI/article/view/6832/6686">LaPa (AAAI'2020)</a></summary> + +```bibtex +@inproceedings{liu2020new, + title={A New Dataset and Boundary-Attention Semantic Segmentation for Face Parsing.}, + author={Liu, Yinglu and Shi, Hailin and Shen, Hao and Si, Yue and Wang, Xiaobo and Mei, Tao}, + booktitle={AAAI}, + pages={11637--11644}, + year={2020} +} +``` + +</details> + +Results on LaPa val set + +| Arch | Input Size | NME | ckpt | log | +| :------------------------------------------------------------- | :--------: | :--: | :------------------------------------------------------------: | :------------------------------------------------------------: | +| [pose_rtmpose_m](/configs/face_2d_keypoint/rtmpose/lapa/rtmpose-m_8xb64-120e_lapa-256x256.py) | 256x256 | 1.29 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-lapa_pt-aic-coco_120e-256x256-762b1ae2_20230422.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-lapa_pt-aic-coco_120e-256x256-762b1ae2_20230422.json) | diff --git a/mmpose/configs/face_2d_keypoint/rtmpose/lapa/rtmpose_lapa.yml b/mmpose/configs/face_2d_keypoint/rtmpose/lapa/rtmpose_lapa.yml new file mode 100644 index 0000000000000000000000000000000000000000..96acff8de6c25f064622a9711565ed0ffc594912 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/rtmpose/lapa/rtmpose_lapa.yml @@ -0,0 +1,15 @@ +Models: +- Config: configs/face_2d_keypoint/rtmpose/lapa/rtmpose-m_8xb64-120e_lapa-256x256.py + In Collection: RTMPose + Alias: face + Metadata: + Architecture: + - RTMPose + Training Data: LaPa + Name: rtmpose-m_8xb64-120e_lapa-256x256 + Results: + - Dataset: WFLW + Metrics: + NME: 1.29 + Task: Face 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-lapa_pt-aic-coco_120e-256x256-762b1ae2_20230422.pth diff --git a/mmpose/configs/face_2d_keypoint/rtmpose/wflw/rtmpose-m_8xb64-60e_wflw-256x256.py b/mmpose/configs/face_2d_keypoint/rtmpose/wflw/rtmpose-m_8xb64-60e_wflw-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..f3f8c06e43b87c330612835c3db7af89583493d2 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/rtmpose/wflw/rtmpose-m_8xb64-60e_wflw-256x256.py @@ -0,0 +1,232 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 60 +stage2_num_epochs = 10 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=1) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 150 to 300 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict( + type='SimCCLabel', + input_size=(256, 256), + sigma=(5.66, 5.66), + simcc_split_ratio=2.0, + normalize=False, + use_dark=False) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=0.67, + widen_factor=0.75, + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' + 'rtmposev1/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa + )), + head=dict( + type='RTMCCHead', + in_channels=768, + out_channels=98, + input_size=codec['input_size'], + in_featuremap_size=(8, 8), + simcc_split_ratio=codec['simcc_split_ratio'], + final_layer_kernel_size=7, + gau_cfg=dict( + hidden_dims=256, + s=128, + expansion_factor=2, + dropout_rate=0., + drop_path=0., + act_fn='SiLU', + use_rel_bias=False, + pos_enc=False), + loss=dict( + type='KLDiscretLoss', + use_target_weight=True, + beta=10., + label_softmax=True), + decoder=codec), + test_cfg=dict(flip_test=True, )) + +# base dataset settings +dataset_type = 'WFLWDataset' +data_mode = 'topdown' +data_root = 'data/wflw/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/pose/WFLW/', +# f'{data_root}': 's3://openmmlab/datasets/pose/WFLW/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + # dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.0), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + # dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/face_landmarks_wflw_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/face_landmarks_wflw_test.json', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict( + checkpoint=dict( + save_best='NME', rule='less', max_keep_ckpts=1, interval=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='NME', + norm_mode='keypoint_distance', +) +test_evaluator = val_evaluator diff --git a/mmpose/configs/face_2d_keypoint/rtmpose/wflw/rtmpose_wflw.md b/mmpose/configs/face_2d_keypoint/rtmpose/wflw/rtmpose_wflw.md new file mode 100644 index 0000000000000000000000000000000000000000..b0070258da1b81c6ee5bd7ebe198eae968067f80 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/rtmpose/wflw/rtmpose_wflw.md @@ -0,0 +1,42 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2212.07784">RTMDet (ArXiv 2022)</a></summary> + +```bibtex +@misc{lyu2022rtmdet, + title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors}, + author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen}, + year={2022}, + eprint={2212.07784}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2018/html/Wu_Look_at_Boundary_CVPR_2018_paper.html">WFLW (CVPR'2018)</a></summary> + +```bibtex +@inproceedings{wu2018look, + title={Look at boundary: A boundary-aware face alignment algorithm}, + author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={2129--2138}, + year={2018} +} +``` + +</details> + +Results on WFLW dataset + +The model is trained on WFLW train. + +| Arch | Input Size | NME | ckpt | log | +| :------------------------------------------------------------- | :--------: | :--: | :------------------------------------------------------------: | :------------------------------------------------------------: | +| [pose_rtmpose_m](/configs/face_2d_keypoint/rtmpose/wflw/rtmpose-m_8xb64-60e_wflw-256x256.py) | 256x256 | 4.01 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-wflw_pt-aic-coco_60e-256x256-dc1dcdcf_20230228.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-wflw_pt-aic-coco_60e-256x256-dc1dcdcf_20230228.json) | diff --git a/mmpose/configs/face_2d_keypoint/rtmpose/wflw/rtmpose_wflw.yml b/mmpose/configs/face_2d_keypoint/rtmpose/wflw/rtmpose_wflw.yml new file mode 100644 index 0000000000000000000000000000000000000000..deee03a7ddc8fb9205bbf505856a90633885cc38 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/rtmpose/wflw/rtmpose_wflw.yml @@ -0,0 +1,15 @@ +Models: +- Config: configs/face_2d_keypoint/rtmpose/wflw/rtmpose-m_8xb64-60e_wflw-256x256.py + In Collection: RTMPose + Alias: face + Metadata: + Architecture: + - RTMPose + Training Data: WFLW + Name: rtmpose-m_8xb64-60e_wflw-256x256 + Results: + - Dataset: WFLW + Metrics: + NME: 4.01 + Task: Face 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-wflw_pt-aic-coco_60e-256x256-dc1dcdcf_20230228.pth diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/300w/hrnetv2_300w.md b/mmpose/configs/face_2d_keypoint/topdown_heatmap/300w/hrnetv2_300w.md new file mode 100644 index 0000000000000000000000000000000000000000..ace8776c4e28e66559e4dcecec0785e6ef5a0771 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/300w/hrnetv2_300w.md @@ -0,0 +1,44 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://ieeexplore.ieee.org/abstract/document/9052469/">HRNetv2 (TPAMI'2019)</a></summary> + +```bibtex +@article{WangSCJDZLMTWLX19, + title={Deep High-Resolution Representation Learning for Visual Recognition}, + author={Jingdong Wang and Ke Sun and Tianheng Cheng and + Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and + Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao}, + journal={TPAMI}, + year={2019} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://www.sciencedirect.com/science/article/pii/S0262885616000147">300W (IMAVIS'2016)</a></summary> + +```bibtex +@article{sagonas2016300, + title={300 faces in-the-wild challenge: Database and results}, + author={Sagonas, Christos and Antonakos, Epameinondas and Tzimiropoulos, Georgios and Zafeiriou, Stefanos and Pantic, Maja}, + journal={Image and vision computing}, + volume={47}, + pages={3--18}, + year={2016}, + publisher={Elsevier} +} +``` + +</details> + +Results on 300W dataset + +The model is trained on 300W train. + +| Arch | Input Size | NME<sub>*common*</sub> | NME<sub>*challenge*</sub> | NME<sub>*full*</sub> | NME<sub>*test*</sub> | ckpt | log | +| :--------------------------------- | :--------: | :--------------------: | :-----------------------: | :------------------: | :------------------: | :---------------------------------: | :--------------------------------: | +| [pose_hrnetv2_w18](/configs/face_2d_keypoint/topdown_heatmap/300w/td-hm_hrnetv2-w18_8xb64-60e_300w-256x256.py) | 256x256 | 2.92 | 5.64 | 3.45 | 4.10 | [ckpt](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_300w_256x256-eea53406_20211019.pth) | [log](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_300w_256x256_20211019.log.json) | diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/300w/hrnetv2_300w.yml b/mmpose/configs/face_2d_keypoint/topdown_heatmap/300w/hrnetv2_300w.yml new file mode 100644 index 0000000000000000000000000000000000000000..58dcb4832ac5824e375f5d8dc66f6648626528f8 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/300w/hrnetv2_300w.yml @@ -0,0 +1,23 @@ +Collections: +- Name: HRNetv2 + Paper: + Title: Deep High-Resolution Representation Learning for Visual Recognition + URL: https://ieeexplore.ieee.org/abstract/document/9052469/ + README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/backbones/hrnetv2.md +Models: +- Config: configs/face_2d_keypoint/topdown_heatmap/300w/td-hm_hrnetv2-w18_8xb64-60e_300w-256x256.py + In Collection: HRNetv2 + Metadata: + Architecture: + - HRNetv2 + Training Data: 300W + Name: td-hm_hrnetv2-w18_8xb64-60e_300w-256x256 + Results: + - Dataset: 300W + Metrics: + NME challenge: 5.64 + NME common: 2.92 + NME full: 3.45 + NME test: 4.1 + Task: Face 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_300w_256x256-eea53406_20211019.pth diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/300w/td-hm_hrnetv2-w18_8xb64-60e_300w-256x256.py b/mmpose/configs/face_2d_keypoint/topdown_heatmap/300w/td-hm_hrnetv2-w18_8xb64-60e_300w-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..52473a4664cca8266f603729d1a631aa6dc5b4ca --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/300w/td-hm_hrnetv2-w18_8xb64-60e_300w-256x256.py @@ -0,0 +1,161 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=60, val_interval=1) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=2e-3, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=60, + milestones=[40, 55], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='NME', rule='less', interval=1)) + +# codec settings +codec = dict( + type='MSRAHeatmap', + input_size=(256, 256), + heatmap_size=(64, 64), + sigma=1.5) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(18, 36)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(18, 36, 72)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(18, 36, 72, 144), + multiscale_output=True), + upsample=dict(mode='bilinear', align_corners=False)), + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18'), + ), + neck=dict( + type='FeatureMapProcessor', + concat=True, + ), + head=dict( + type='HeatmapHead', + in_channels=270, + out_channels=68, + deconv_out_channels=None, + conv_out_channels=(270, ), + conv_kernel_sizes=(1, ), + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'Face300WDataset' +data_mode = 'topdown' +data_root = 'data/300w/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + shift_prob=0, + rotate_factor=60, + scale_factor=(0.75, 1.25)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/face_landmarks_300w_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/face_landmarks_300w_valid.json', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='NME', + norm_mode='keypoint_distance', +) +test_evaluator = val_evaluator diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/README.md b/mmpose/configs/face_2d_keypoint/topdown_heatmap/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a8b7cf98fa119c4a1065484b24bd768196a3622d --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/README.md @@ -0,0 +1,57 @@ +# Top-down heatmap-based pose estimation + +Top-down methods divide the task into two stages: object detection, followed by single-object pose estimation given object bounding boxes. Instead of estimating keypoint coordinates directly, the pose estimator will produce heatmaps which represent the likelihood of being a keypoint, following the paradigm introduced in [Simple Baselines for Human Pose Estimation and Tracking](http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html). + +<div align=center> +<img src="https://user-images.githubusercontent.com/15977946/146522977-5f355832-e9c1-442f-a34f-9d24fb0aefa8.png" height=400> +</div> + +## Results and Models + +### 300W Dataset + +Results on 300W dataset + +| Model | Input Size | NME<sub>*common*</sub> | NME<sub>*challenge*</sub> | NME<sub>*full*</sub> | NME<sub>*test*</sub> | Details and Download | +| :---------: | :--------: | :--------------------: | :-----------------------: | :------------------: | :------------------: | :---------------------------------------: | +| HRNetv2-w18 | 256x256 | 2.92 | 5.64 | 3.45 | 4.10 | [hrnetv2_300w.md](./300w/hrnetv2_300w.md) | + +### AFLW Dataset + +Results on AFLW dataset + +| Model | Input Size | NME<sub>*full*</sub> | NME<sub>*frontal*</sub> | Details and Download | +| :--------------: | :--------: | :------------------: | :---------------------: | :-------------------------------------------------: | +| HRNetv2-w18+Dark | 256x256 | 1.35 | 1.19 | [hrnetv2_dark_aflw.md](./aflw/hrnetv2_dark_aflw.md) | +| HRNetv2-w18 | 256x256 | 1.41 | 1.27 | [hrnetv2_aflw.md](./aflw/hrnetv2_aflw.md) | + +### COCO-WholeBody-Face Dataset + +Results on COCO-WholeBody-Face val set + +| Model | Input Size | NME | Details and Download | +| :--------------: | :--------: | :----: | :----------------------------------------------------------------------------------------------: | +| HRNetv2-w18+Dark | 256x256 | 0.0513 | [hrnetv2_dark_coco_wholebody_face.md](./coco_wholebody_face/hrnetv2_dark_coco_wholebody_face.md) | +| SCNet-50 | 256x256 | 0.0567 | [scnet_coco_wholebody_face.md](./coco_wholebody_face/scnet_coco_wholebody_face.md) | +| HRNetv2-w18 | 256x256 | 0.0569 | [hrnetv2_coco_wholebody_face.md](./coco_wholebody_face/hrnetv2_coco_wholebody_face.md) | +| ResNet-50 | 256x256 | 0.0582 | [resnet_coco_wholebody_face.md](./coco_wholebody_face/resnet_coco_wholebody_face.md) | +| HourglassNet | 256x256 | 0.0587 | [hourglass_coco_wholebody_face.md](./coco_wholebody_face/hourglass_coco_wholebody_face.md) | +| MobileNet-v2 | 256x256 | 0.0611 | [mobilenetv2_coco_wholebody_face.md](./coco_wholebody_face/mobilenetv2_coco_wholebody_face.md) | + +### COFW Dataset + +Results on COFW dataset + +| Model | Input Size | NME | Details and Download | +| :---------: | :--------: | :--: | :---------------------------------------: | +| HRNetv2-w18 | 256x256 | 3.48 | [hrnetv2_cofw.md](./cofw/hrnetv2_cofw.md) | + +### WFLW Dataset + +Results on WFLW dataset + +| Model | Input Size | NME<sub>*test*</sub> | NME<sub>*pose*</sub> | NME<sub>*illumination*</sub> | NME<sub>*occlusion*</sub> | NME<sub>*blur*</sub> | NME<sub>*makeup*</sub> | NME<sub>*expression*</sub> | Details and Download | +| :-----: | :--------: | :------------------: | :------------------: | :--------------------------: | :-----------------------: | :------------------: | :--------------------: | :------------------------: | :--------------------: | +| HRNetv2-w18+Dark | 256x256 | 3.98 | 6.98 | 3.96 | 4.78 | 4.56 | 3.89 | 4.29 | [hrnetv2_dark_wflw.md](./wflw/hrnetv2_dark_wflw.md) | +| HRNetv2-w18+AWing | 256x256 | 4.02 | 6.94 | 3.97 | 4.78 | 4.59 | 3.87 | 4.28 | [hrnetv2_awing_wflw.md](./wflw/hrnetv2_awing_wflw.md) | +| HRNetv2-w18 | 256x256 | 4.06 | 6.97 | 3.99 | 4.83 | 4.58 | 3.94 | 4.33 | [hrnetv2_wflw.md](./wflw/hrnetv2_wflw.md) | diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/hrnetv2_aflw.md b/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/hrnetv2_aflw.md new file mode 100644 index 0000000000000000000000000000000000000000..70c59ac2e4ca7a58db9057f01d2af3c17ce5785d --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/hrnetv2_aflw.md @@ -0,0 +1,43 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://ieeexplore.ieee.org/abstract/document/9052469/">HRNetv2 (TPAMI'2019)</a></summary> + +```bibtex +@article{WangSCJDZLMTWLX19, + title={Deep High-Resolution Representation Learning for Visual Recognition}, + author={Jingdong Wang and Ke Sun and Tianheng Cheng and + Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and + Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao}, + journal={TPAMI}, + year={2019} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://ieeexplore.ieee.org/abstract/document/6130513/">AFLW (ICCVW'2011)</a></summary> + +```bibtex +@inproceedings{koestinger2011annotated, + title={Annotated facial landmarks in the wild: A large-scale, real-world database for facial landmark localization}, + author={Koestinger, Martin and Wohlhart, Paul and Roth, Peter M and Bischof, Horst}, + booktitle={2011 IEEE international conference on computer vision workshops (ICCV workshops)}, + pages={2144--2151}, + year={2011}, + organization={IEEE} +} +``` + +</details> + +Results on AFLW dataset + +The model is trained on AFLW train and evaluated on AFLW full and frontal. + +| Arch | Input Size | NME<sub>*full*</sub> | NME<sub>*frontal*</sub> | ckpt | log | +| :------------------------------------------------ | :--------: | :------------------: | :---------------------: | :-----------------------------------------------: | :-----------------------------------------------: | +| [pose_hrnetv2_w18](/configs/face_2d_keypoint/topdown_heatmap/aflw/td-hm_hrnetv2-w18_8xb64-60e_aflw-256x256.py) | 256x256 | 1.41 | 1.27 | [ckpt](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_aflw_256x256-f2bbc62b_20210125.pth) | [log](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_aflw_256x256_20210125.log.json) | diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/hrnetv2_aflw.yml b/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/hrnetv2_aflw.yml new file mode 100644 index 0000000000000000000000000000000000000000..06d2d43b9c1983c2c4d43d715b08721a822ffed3 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/hrnetv2_aflw.yml @@ -0,0 +1,15 @@ +Models: +- Config: configs/face_2d_keypoint/topdown_heatmap/aflw/td-hm_hrnetv2-w18_8xb64-60e_aflw-256x256.py + In Collection: HRNetv2 + Metadata: + Architecture: + - HRNetv2 + Training Data: AFLW + Name: td-hm_hrnetv2-w18_8xb64-60e_aflw-256x256 + Results: + - Dataset: AFLW + Metrics: + NME frontal: 1.27 + NME full: 1.41 + Task: Face 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_aflw_256x256-f2bbc62b_20210125.pth diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/hrnetv2_dark_aflw.md b/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/hrnetv2_dark_aflw.md new file mode 100644 index 0000000000000000000000000000000000000000..a51c473d3b243f7f773a851bb69c425b14443767 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/hrnetv2_dark_aflw.md @@ -0,0 +1,60 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://ieeexplore.ieee.org/abstract/document/9052469/">HRNetv2 (TPAMI'2019)</a></summary> + +```bibtex +@article{WangSCJDZLMTWLX19, + title={Deep High-Resolution Representation Learning for Visual Recognition}, + author={Jingdong Wang and Ke Sun and Tianheng Cheng and + Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and + Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao}, + journal={TPAMI}, + year={2019} +} +``` + +</details> + +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2020/html/Zhang_Distribution-Aware_Coordinate_Representation_for_Human_Pose_Estimation_CVPR_2020_paper.html">DarkPose (CVPR'2020)</a></summary> + +```bibtex +@inproceedings{zhang2020distribution, + title={Distribution-aware coordinate representation for human pose estimation}, + author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages={7093--7102}, + year={2020} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://ieeexplore.ieee.org/abstract/document/6130513/">AFLW (ICCVW'2011)</a></summary> + +```bibtex +@inproceedings{koestinger2011annotated, + title={Annotated facial landmarks in the wild: A large-scale, real-world database for facial landmark localization}, + author={Koestinger, Martin and Wohlhart, Paul and Roth, Peter M and Bischof, Horst}, + booktitle={2011 IEEE international conference on computer vision workshops (ICCV workshops)}, + pages={2144--2151}, + year={2011}, + organization={IEEE} +} +``` + +</details> + +Results on AFLW dataset + +The model is trained on AFLW train and evaluated on AFLW full and frontal. + +| Arch | Input Size | NME<sub>*full*</sub> | NME<sub>*frontal*</sub> | ckpt | log | +| :------------------------------------------------ | :--------: | :------------------: | :---------------------: | :-----------------------------------------------: | :-----------------------------------------------: | +| [pose_hrnetv2_w18_dark](/configs/face_2d_keypoint/topdown_heatmap/aflw/td-hm_hrnetv2-w18_dark-8xb64-60e_aflw-256x256.py) | 256x256 | 1.35 | 1.19 | [ckpt](https://download.openmmlab.com/mmpose/face/darkpose/hrnetv2_w18_aflw_256x256_dark-219606c0_20210125.pth) | [log](https://download.openmmlab.com/mmpose/face/darkpose/hrnetv2_w18_aflw_256x256_dark_20210125.log.json) | diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/hrnetv2_dark_aflw.yml b/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/hrnetv2_dark_aflw.yml new file mode 100644 index 0000000000000000000000000000000000000000..54c09538974835c5a701de61f41c812d2813940a --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/hrnetv2_dark_aflw.yml @@ -0,0 +1,16 @@ +Models: +- Config: configs/face_2d_keypoint/topdown_heatmap/aflw/td-hm_hrnetv2-w18_dark-8xb64-60e_aflw-256x256.py + In Collection: DarkPose + Metadata: + Architecture: + - HRNetv2 + - DarkPose + Training Data: AFLW + Name: td-hm_hrnetv2-w18_dark-8xb64-60e_aflw-256x256 + Results: + - Dataset: AFLW + Metrics: + NME frontal: 1.19 + NME full: 1.34 + Task: Face 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/face/darkpose/hrnetv2_w18_aflw_256x256_dark-219606c0_20210125.pth diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/td-hm_hrnetv2-w18_8xb64-60e_aflw-256x256.py b/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/td-hm_hrnetv2-w18_8xb64-60e_aflw-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..a157a01442f155d34f7fd330014028bc77c4f888 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/td-hm_hrnetv2-w18_8xb64-60e_aflw-256x256.py @@ -0,0 +1,156 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=60, val_interval=1) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=2e-3, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=60, + milestones=[40, 55], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='NME', rule='less', interval=1)) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(18, 36)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(18, 36, 72)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(18, 36, 72, 144), + multiscale_output=True), + upsample=dict(mode='bilinear', align_corners=False)), + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18'), + ), + neck=dict( + type='FeatureMapProcessor', + concat=True, + ), + head=dict( + type='HeatmapHead', + in_channels=270, + out_channels=19, + deconv_out_channels=None, + conv_out_channels=(270, ), + conv_kernel_sizes=(1, ), + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'AFLWDataset' +data_mode = 'topdown' +data_root = 'data/aflw/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + shift_prob=0, + rotate_factor=60, + scale_factor=(0.75, 1.25)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/face_landmarks_aflw_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/face_landmarks_aflw_test.json', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='NME', norm_mode='use_norm_item', norm_item='bbox_size') +test_evaluator = val_evaluator diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/td-hm_hrnetv2-w18_dark-8xb64-60e_aflw-256x256.py b/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/td-hm_hrnetv2-w18_dark-8xb64-60e_aflw-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..44100cebe60bbe023837dba7586f2c913b731918 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/td-hm_hrnetv2-w18_dark-8xb64-60e_aflw-256x256.py @@ -0,0 +1,160 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=60, val_interval=1) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=2e-3, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=60, + milestones=[40, 55], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='NME', rule='less', interval=1)) + +# codec settings +codec = dict( + type='MSRAHeatmap', + input_size=(256, 256), + heatmap_size=(64, 64), + sigma=2, + unbiased=True) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(18, 36)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(18, 36, 72)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(18, 36, 72, 144), + multiscale_output=True), + upsample=dict(mode='bilinear', align_corners=False)), + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18'), + ), + neck=dict( + type='FeatureMapProcessor', + concat=True, + ), + head=dict( + type='HeatmapHead', + in_channels=270, + out_channels=19, + deconv_out_channels=None, + conv_out_channels=(270, ), + conv_kernel_sizes=(1, ), + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'AFLWDataset' +data_mode = 'topdown' +data_root = 'data/aflw/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + shift_prob=0, + rotate_factor=60, + scale_factor=(0.75, 1.25)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/face_landmarks_aflw_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/face_landmarks_aflw_test.json', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='NME', norm_mode='use_norm_item', norm_item='bbox_size') +test_evaluator = val_evaluator diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hourglass_coco_wholebody_face.md b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hourglass_coco_wholebody_face.md new file mode 100644 index 0000000000000000000000000000000000000000..6099dcf06dcf8a9e988b77623bd6f3a7ee7883a7 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hourglass_coco_wholebody_face.md @@ -0,0 +1,39 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-46484-8_29">Hourglass (ECCV'2016)</a></summary> + +```bibtex +@inproceedings{newell2016stacked, + title={Stacked hourglass networks for human pose estimation}, + author={Newell, Alejandro and Yang, Kaiyu and Deng, Jia}, + booktitle={European conference on computer vision}, + pages={483--499}, + year={2016}, + organization={Springer} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-030-58545-7_12">COCO-WholeBody-Face (ECCV'2020)</a></summary> + +```bibtex +@inproceedings{jin2020whole, + title={Whole-Body Human Pose Estimation in the Wild}, + author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping}, + booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, + year={2020} +} +``` + +</details> + +Results on COCO-WholeBody-Face val set + +| Arch | Input Size | NME | ckpt | log | +| :------------------------------------------------------------ | :--------: | :----: | :------------------------------------------------------------: | :-----------------------------------------------------------: | +| [pose_hourglass_52](/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_hourglass52_8xb32-60e_coco-wholebody-face-256x256.py) | 256x256 | 0.0587 | [ckpt](https://download.openmmlab.com/mmpose/face/hourglass/hourglass52_coco_wholebody_face_256x256-6994cf2e_20210909.pth) | [log](https://download.openmmlab.com/mmpose/face/hourglass/hourglass52_coco_wholebody_face_256x256_20210909.log.json) | diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hourglass_coco_wholebody_face.yml b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hourglass_coco_wholebody_face.yml new file mode 100644 index 0000000000000000000000000000000000000000..704c01983e4ab53f87a0a1ec798b49bf4b8b5e6f --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hourglass_coco_wholebody_face.yml @@ -0,0 +1,14 @@ +Models: +- Config: configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_hourglass52_8xb32-60e_coco-wholebody-face-256x256.py + In Collection: Hourglass + Metadata: + Architecture: + - Hourglass + Training Data: COCO-WholeBody-Face + Name: td-hm_hourglass52_8xb32-60e_coco-wholebody-face-256x256 + Results: + - Dataset: COCO-WholeBody-Face + Metrics: + NME: 0.0587 + Task: Face 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/face/hourglass/hourglass52_coco_wholebody_face_256x256-6994cf2e_20210909.pth diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hrnetv2_coco_wholebody_face.md b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hrnetv2_coco_wholebody_face.md new file mode 100644 index 0000000000000000000000000000000000000000..d16ea2bc7fa50b3b8df57219bc5e3fada52c3558 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hrnetv2_coco_wholebody_face.md @@ -0,0 +1,39 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://ieeexplore.ieee.org/abstract/document/9052469/">HRNetv2 (TPAMI'2019)</a></summary> + +```bibtex +@article{WangSCJDZLMTWLX19, + title={Deep High-Resolution Representation Learning for Visual Recognition}, + author={Jingdong Wang and Ke Sun and Tianheng Cheng and + Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and + Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao}, + journal={TPAMI}, + year={2019} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-030-58545-7_12">COCO-WholeBody-Face (ECCV'2020)</a></summary> + +```bibtex +@inproceedings{jin2020whole, + title={Whole-Body Human Pose Estimation in the Wild}, + author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping}, + booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, + year={2020} +} +``` + +</details> + +Results on COCO-WholeBody-Face val set + +| Arch | Input Size | NME | ckpt | log | +| :------------------------------------------------------------ | :--------: | :----: | :------------------------------------------------------------: | :-----------------------------------------------------------: | +| [pose_hrnetv2_w18](/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_hrnetv2-w18_8xb32-60e_coco-wholebody-face-256x256.py) | 256x256 | 0.0569 | [ckpt](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_coco_wholebody_face_256x256-c1ca469b_20210909.pth) | [log](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_coco_wholebody_face_256x256_20210909.log.json) | diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hrnetv2_coco_wholebody_face.yml b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hrnetv2_coco_wholebody_face.yml new file mode 100644 index 0000000000000000000000000000000000000000..0a4a38d5b78e5390d60c71ba43d663fafb51d279 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hrnetv2_coco_wholebody_face.yml @@ -0,0 +1,14 @@ +Models: +- Config: configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_hrnetv2-w18_8xb32-60e_coco-wholebody-face-256x256.py + In Collection: HRNetv2 + Metadata: + Architecture: + - HRNetv2 + Training Data: COCO-WholeBody-Face + Name: td-hm_hrnetv2-w18_8xb32-60e_coco-wholebody-face-256x256 + Results: + - Dataset: COCO-WholeBody-Face + Metrics: + NME: 0.0569 + Task: Face 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_coco_wholebody_face_256x256-c1ca469b_20210909.pth diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hrnetv2_dark_coco_wholebody_face.md b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hrnetv2_dark_coco_wholebody_face.md new file mode 100644 index 0000000000000000000000000000000000000000..fd059ee23cc17a82ed71cfc0ca089785ea6e150e --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hrnetv2_dark_coco_wholebody_face.md @@ -0,0 +1,56 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://ieeexplore.ieee.org/abstract/document/9052469/">HRNetv2 (TPAMI'2019)</a></summary> + +```bibtex +@article{WangSCJDZLMTWLX19, + title={Deep High-Resolution Representation Learning for Visual Recognition}, + author={Jingdong Wang and Ke Sun and Tianheng Cheng and + Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and + Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao}, + journal={TPAMI}, + year={2019} +} +``` + +</details> + +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2020/html/Zhang_Distribution-Aware_Coordinate_Representation_for_Human_Pose_Estimation_CVPR_2020_paper.html">DarkPose (CVPR'2020)</a></summary> + +```bibtex +@inproceedings{zhang2020distribution, + title={Distribution-aware coordinate representation for human pose estimation}, + author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages={7093--7102}, + year={2020} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-030-58545-7_12">COCO-WholeBody-Face (ECCV'2020)</a></summary> + +```bibtex +@inproceedings{jin2020whole, + title={Whole-Body Human Pose Estimation in the Wild}, + author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping}, + booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, + year={2020} +} +``` + +</details> + +Results on COCO-WholeBody-Face val set + +| Arch | Input Size | NME | ckpt | log | +| :------------------------------------------------------------ | :--------: | :----: | :------------------------------------------------------------: | :-----------------------------------------------------------: | +| [pose_hrnetv2_w18_dark](/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_hrnetv2-w18_dark-8xb32-60e_coco-wholebody-face-256x256.py) | 256x256 | 0.0513 | [ckpt](https://download.openmmlab.com/mmpose/face/darkpose/hrnetv2_w18_coco_wholebody_face_256x256_dark-3d9a334e_20210909.pth) | [log](https://download.openmmlab.com/mmpose/face/darkpose/hrnetv2_w18_coco_wholebody_face_256x256_dark_20210909.log.json) | diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hrnetv2_dark_coco_wholebody_face.yml b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hrnetv2_dark_coco_wholebody_face.yml new file mode 100644 index 0000000000000000000000000000000000000000..cedc4950f9d2fba11e9a18ce2ca5942dcc2492eb --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hrnetv2_dark_coco_wholebody_face.yml @@ -0,0 +1,15 @@ +Models: +- Config: configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_hrnetv2-w18_dark-8xb32-60e_coco-wholebody-face-256x256.py + In Collection: DarkPose + Metadata: + Architecture: + - HRNetv2 + - DarkPose + Training Data: COCO-WholeBody-Face + Name: td-hm_hrnetv2-w18_dark-8xb32-60e_coco-wholebody-face-256x256 + Results: + - Dataset: COCO-WholeBody-Face + Metrics: + NME: 0.0513 + Task: Face 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/face/darkpose/hrnetv2_w18_coco_wholebody_face_256x256_dark-3d9a334e_20210909.pth diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/mobilenetv2_coco_wholebody_face.md b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/mobilenetv2_coco_wholebody_face.md new file mode 100644 index 0000000000000000000000000000000000000000..d551a6c9abc3a4da60aaa90dac1ddbb9802ddd83 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/mobilenetv2_coco_wholebody_face.md @@ -0,0 +1,38 @@ +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2018/html/Sandler_MobileNetV2_Inverted_Residuals_CVPR_2018_paper.html">MobilenetV2 (CVPR'2018)</a></summary> + +```bibtex +@inproceedings{sandler2018mobilenetv2, + title={Mobilenetv2: Inverted residuals and linear bottlenecks}, + author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={4510--4520}, + year={2018} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-030-58545-7_12">COCO-WholeBody-Face (ECCV'2020)</a></summary> + +```bibtex +@inproceedings{jin2020whole, + title={Whole-Body Human Pose Estimation in the Wild}, + author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping}, + booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, + year={2020} +} +``` + +</details> + +Results on COCO-WholeBody-Face val set + +| Arch | Input Size | NME | ckpt | log | +| :------------------------------------------------------------ | :--------: | :----: | :------------------------------------------------------------: | :-----------------------------------------------------------: | +| [pose_mobilenetv2](/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_mobilenetv2_8xb32-60e_coco-wholebody-face-256x256.py) | 256x256 | 0.0611 | [ckpt](https://download.openmmlab.com/mmpose/face/mobilenetv2/mobilenetv2_coco_wholebody_face_256x256-4a3f096e_20210909.pth) | [log](https://download.openmmlab.com/mmpose/face/mobilenetv2/mobilenetv2_coco_wholebody_face_256x256_20210909.log.json) | diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/mobilenetv2_coco_wholebody_face.yml b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/mobilenetv2_coco_wholebody_face.yml new file mode 100644 index 0000000000000000000000000000000000000000..2bd4352119546e2670f4b6bd16c12d37213b099b --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/mobilenetv2_coco_wholebody_face.yml @@ -0,0 +1,15 @@ +Models: +- Config: configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_mobilenetv2_8xb32-60e_coco-wholebody-face-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: + - SimpleBaseline2D + - MobilenetV2 + Training Data: COCO-WholeBody-Face + Name: td-hm_mobilenetv2_8xb32-60e_coco-wholebody-face-256x256 + Results: + - Dataset: COCO-WholeBody-Face + Metrics: + NME: 0.0611 + Task: Face 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/face/mobilenetv2/mobilenetv2_coco_wholebody_face_256x256-4a3f096e_20210909.pth diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/resnet_coco_wholebody_face.md b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/resnet_coco_wholebody_face.md new file mode 100644 index 0000000000000000000000000000000000000000..e4609385bdc230558581a05f8ee0fe73b6b248b2 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/resnet_coco_wholebody_face.md @@ -0,0 +1,55 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html">SimpleBaseline2D (ECCV'2018)</a></summary> + +```bibtex +@inproceedings{xiao2018simple, + title={Simple baselines for human pose estimation and tracking}, + author={Xiao, Bin and Wu, Haiping and Wei, Yichen}, + booktitle={Proceedings of the European conference on computer vision (ECCV)}, + pages={466--481}, + year={2018} +} +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html">ResNet (CVPR'2016)</a></summary> + +```bibtex +@inproceedings{he2016deep, + title={Deep residual learning for image recognition}, + author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={770--778}, + year={2016} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-030-58545-7_12">COCO-WholeBody-Face (ECCV'2020)</a></summary> + +```bibtex +@inproceedings{jin2020whole, + title={Whole-Body Human Pose Estimation in the Wild}, + author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping}, + booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, + year={2020} +} +``` + +</details> + +Results on COCO-WholeBody-Face val set + +| Arch | Input Size | NME | ckpt | log | +| :------------------------------------------------------------ | :--------: | :----: | :------------------------------------------------------------: | :-----------------------------------------------------------: | +| [pose_res50](/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_res50_8xb32-60e_coco-wholebody-face-256x256.py) | 256x256 | 0.0582 | [ckpt](https://download.openmmlab.com/mmpose/face/resnet/res50_coco_wholebody_face_256x256-5128edf5_20210909.pth) | [log](https://download.openmmlab.com/mmpose/face/resnet/res50_coco_wholebody_face_256x256_20210909.log.json) | diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/resnet_coco_wholebody_face.yml b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/resnet_coco_wholebody_face.yml new file mode 100644 index 0000000000000000000000000000000000000000..ef91a3da21c316335caf8d88c0ebde9a6e1bd4d7 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/resnet_coco_wholebody_face.yml @@ -0,0 +1,15 @@ +Models: +- Config: configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_res50_8xb32-60e_coco-wholebody-face-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: + - SimpleBaseline2D + - ResNet + Training Data: COCO-WholeBody-Face + Name: td-hm_res50_8xb32-60e_coco-wholebody-face-256x256 + Results: + - Dataset: COCO-WholeBody-Face + Metrics: + NME: 0.0582 + Task: Face 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/face/resnet/res50_coco_wholebody_face_256x256-5128edf5_20210909.pth diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/scnet_coco_wholebody_face.md b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/scnet_coco_wholebody_face.md new file mode 100644 index 0000000000000000000000000000000000000000..2710c2ff39bb02fa46949f89592c8c116234a63b --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/scnet_coco_wholebody_face.md @@ -0,0 +1,38 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2020/html/Liu_Improving_Convolutional_Networks_With_Self-Calibrated_Convolutions_CVPR_2020_paper.html">SCNet (CVPR'2020)</a></summary> + +```bibtex +@inproceedings{liu2020improving, + title={Improving Convolutional Networks with Self-Calibrated Convolutions}, + author={Liu, Jiang-Jiang and Hou, Qibin and Cheng, Ming-Ming and Wang, Changhu and Feng, Jiashi}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages={10096--10105}, + year={2020} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-030-58545-7_12">COCO-WholeBody-Face (ECCV'2020)</a></summary> + +```bibtex +@inproceedings{jin2020whole, + title={Whole-Body Human Pose Estimation in the Wild}, + author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping}, + booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, + year={2020} +} +``` + +</details> + +Results on COCO-WholeBody-Face val set + +| Arch | Input Size | NME | ckpt | log | +| :------------------------------------------------------------ | :--------: | :----: | :------------------------------------------------------------: | :-----------------------------------------------------------: | +| [pose_scnet_50](/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_scnet50_8xb32-60e_coco-wholebody-face-256x256.py) | 256x256 | 0.0567 | [ckpt](https://download.openmmlab.com/mmpose/face/scnet/scnet50_coco_wholebody_face_256x256-a0183f5f_20210909.pth) | [log](https://download.openmmlab.com/mmpose/face/scnet/scnet50_coco_wholebody_face_256x256_20210909.log.json) | diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/scnet_coco_wholebody_face.yml b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/scnet_coco_wholebody_face.yml new file mode 100644 index 0000000000000000000000000000000000000000..d3b052ffc51e133706d36246caae80563ac7edcb --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/scnet_coco_wholebody_face.yml @@ -0,0 +1,15 @@ +Models: +- Config: configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_scnet50_8xb32-60e_coco-wholebody-face-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: + - SimpleBaseline2D + - SCNet + Training Data: COCO-WholeBody-Face + Name: td-hm_scnet50_8xb32-60e_coco-wholebody-face-256x256 + Results: + - Dataset: COCO-WholeBody-Face + Metrics: + NME: 0.0567 + Task: Face 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/face/scnet/scnet50_coco_wholebody_face_256x256-a0183f5f_20210909.pth diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_hourglass52_8xb32-60e_coco-wholebody-face-256x256.py b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_hourglass52_8xb32-60e_coco-wholebody-face-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..0e6f5c5c9084bf03ec95e203c57bad4a91ce7179 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_hourglass52_8xb32-60e_coco-wholebody-face-256x256.py @@ -0,0 +1,123 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=60, val_interval=1) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=2e-3, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[40, 55], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='NME', rule='less', interval=1)) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HourglassNet', + num_stacks=1, + ), + head=dict( + type='CPMHead', + in_channels=256, + out_channels=68, + num_stages=1, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoWholeBodyFaceDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + rotate_factor=60, + scale_factor=(0.75, 1.25)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='NME', + norm_mode='keypoint_distance', +) +test_evaluator = val_evaluator diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_hrnetv2-w18_8xb32-60e_coco-wholebody-face-256x256.py b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_hrnetv2-w18_8xb32-60e_coco-wholebody-face-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..dfeac90ced1307eeaa8fe9c83c59a3ae67b1cb23 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_hrnetv2-w18_8xb32-60e_coco-wholebody-face-256x256.py @@ -0,0 +1,156 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=60, val_interval=1) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=2e-3, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[40, 55], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='NME', rule='less', interval=1)) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(18, 36)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(18, 36, 72)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(18, 36, 72, 144), + multiscale_output=True), + upsample=dict(mode='bilinear', align_corners=False)), + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18')), + neck=dict( + type='FeatureMapProcessor', + concat=True, + ), + head=dict( + type='HeatmapHead', + in_channels=270, + out_channels=68, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + conv_out_channels=(270, ), + conv_kernel_sizes=(1, ), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoWholeBodyFaceDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + rotate_factor=60, + scale_factor=(0.75, 1.25)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='NME', + norm_mode='keypoint_distance', +) +test_evaluator = val_evaluator diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_hrnetv2-w18_dark-8xb32-60e_coco-wholebody-face-256x256.py b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_hrnetv2-w18_dark-8xb32-60e_coco-wholebody-face-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..3c34f9aa5dc733f6dd1363212791b7f2c5b7f447 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_hrnetv2-w18_dark-8xb32-60e_coco-wholebody-face-256x256.py @@ -0,0 +1,160 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=60, val_interval=1) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=2e-3, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[40, 55], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='NME', rule='less', interval=1)) + +# codec settings +codec = dict( + type='MSRAHeatmap', + input_size=(256, 256), + heatmap_size=(64, 64), + sigma=2, + unbiased=True) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(18, 36)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(18, 36, 72)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(18, 36, 72, 144), + multiscale_output=True), + upsample=dict(mode='bilinear', align_corners=False)), + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18')), + neck=dict( + type='FeatureMapProcessor', + concat=True, + ), + head=dict( + type='HeatmapHead', + in_channels=270, + out_channels=68, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + conv_out_channels=(270, ), + conv_kernel_sizes=(1, ), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoWholeBodyFaceDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + rotate_factor=60, + scale_factor=(0.75, 1.25)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='NME', + norm_mode='keypoint_distance', +) +test_evaluator = val_evaluator diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_mobilenetv2_8xb32-60e_coco-wholebody-face-256x256.py b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_mobilenetv2_8xb32-60e_coco-wholebody-face-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..6f1a8629fc7448a4edc5e3a98b554b615efb7102 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_mobilenetv2_8xb32-60e_coco-wholebody-face-256x256.py @@ -0,0 +1,122 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=60, val_interval=1) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=2e-3, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[40, 55], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='NME', rule='less', interval=1)) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='MobileNetV2', + widen_factor=1., + out_indices=(7, ), + init_cfg=dict(type='Pretrained', checkpoint='mmcls://mobilenet_v2')), + head=dict( + type='HeatmapHead', + in_channels=1280, + out_channels=68, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoWholeBodyFaceDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + rotate_factor=60, + scale_factor=(0.75, 1.25)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='NME', + norm_mode='keypoint_distance', +) +test_evaluator = val_evaluator diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_res50_8xb32-60e_coco-wholebody-face-256x256.py b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_res50_8xb32-60e_coco-wholebody-face-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..0070e55d69d26b5e50edfef7868dc4faa5b0b5f4 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_res50_8xb32-60e_coco-wholebody-face-256x256.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=60, val_interval=1) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=2e-3, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[40, 55], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='NME', rule='less', interval=1)) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=68, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoWholeBodyFaceDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + rotate_factor=60, + scale_factor=(0.75, 1.25)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='NME', + norm_mode='keypoint_distance', +) +test_evaluator = val_evaluator diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_scnet50_8xb32-60e_coco-wholebody-face-256x256.py b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_scnet50_8xb32-60e_coco-wholebody-face-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..8f79f4b1d362b527cd684ae927e61cf17ec821cd --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_scnet50_8xb32-60e_coco-wholebody-face-256x256.py @@ -0,0 +1,124 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=60, val_interval=1) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=2e-3, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[40, 55], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='NME', rule='less', interval=1)) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='SCNet', + depth=50, + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/scnet50-7ef0a199.pth')), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=68, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoWholeBodyFaceDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + rotate_factor=60, + scale_factor=(0.75, 1.25)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='NME', + norm_mode='keypoint_distance', +) +test_evaluator = val_evaluator diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/cofw/hrnetv2_cofw.md b/mmpose/configs/face_2d_keypoint/topdown_heatmap/cofw/hrnetv2_cofw.md new file mode 100644 index 0000000000000000000000000000000000000000..b99f91f3d180287081543a27c6e61818092b3b1c --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/cofw/hrnetv2_cofw.md @@ -0,0 +1,42 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://ieeexplore.ieee.org/abstract/document/9052469/">HRNetv2 (TPAMI'2019)</a></summary> + +```bibtex +@article{WangSCJDZLMTWLX19, + title={Deep High-Resolution Representation Learning for Visual Recognition}, + author={Jingdong Wang and Ke Sun and Tianheng Cheng and + Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and + Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao}, + journal={TPAMI}, + year={2019} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_iccv_2013/html/Burgos-Artizzu_Robust_Face_Landmark_2013_ICCV_paper.html">COFW (ICCV'2013)</a></summary> + +```bibtex +@inproceedings{burgos2013robust, + title={Robust face landmark estimation under occlusion}, + author={Burgos-Artizzu, Xavier P and Perona, Pietro and Doll{\'a}r, Piotr}, + booktitle={Proceedings of the IEEE international conference on computer vision}, + pages={1513--1520}, + year={2013} +} +``` + +</details> + +Results on COFW dataset + +The model is trained on COFW train. + +| Arch | Input Size | NME | ckpt | log | +| :------------------------------------------------------------- | :--------: | :--: | :------------------------------------------------------------: | :------------------------------------------------------------: | +| [pose_hrnetv2_w18](/configs/face_2d_keypoint/topdown_heatmap/cofw/td-hm_hrnetv2-w18_8xb64-60e_cofw-256x256.py) | 256x256 | 3.48 | [ckpt](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_cofw_256x256-49243ab8_20211019.pth) | [log](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_cofw_256x256_20211019.log.json) | diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/cofw/hrnetv2_cofw.yml b/mmpose/configs/face_2d_keypoint/topdown_heatmap/cofw/hrnetv2_cofw.yml new file mode 100644 index 0000000000000000000000000000000000000000..733e275685de62a483d48e2ec7eedf347d6d0e51 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/cofw/hrnetv2_cofw.yml @@ -0,0 +1,14 @@ +Models: +- Config: configs/face_2d_keypoint/topdown_heatmap/cofw/td-hm_hrnetv2-w18_8xb64-60e_cofw-256x256.py + In Collection: HRNetv2 + Metadata: + Architecture: + - HRNetv2 + Training Data: COFW + Name: td-hm_hrnetv2-w18_8xb64-60e_cofw-256x256 + Results: + - Dataset: COFW + Metrics: + NME: 3.48 + Task: Face 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_cofw_256x256-49243ab8_20211019.pth diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/cofw/td-hm_hrnetv2-w18_8xb64-60e_cofw-256x256.py b/mmpose/configs/face_2d_keypoint/topdown_heatmap/cofw/td-hm_hrnetv2-w18_8xb64-60e_cofw-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..7c52342e950246755a9e5c0ed60302da936bb6fe --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/cofw/td-hm_hrnetv2-w18_8xb64-60e_cofw-256x256.py @@ -0,0 +1,161 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=60, val_interval=1) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=2e-3, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=50, + milestones=[40, 55], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='NME', rule='less', interval=1)) + +# codec settings +codec = dict( + type='MSRAHeatmap', + input_size=(256, 256), + heatmap_size=(64, 64), + sigma=1.5) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(18, 36)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(18, 36, 72)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(18, 36, 72, 144), + multiscale_output=True), + upsample=dict(mode='bilinear', align_corners=False)), + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18'), + ), + neck=dict( + type='FeatureMapProcessor', + concat=True, + ), + head=dict( + type='HeatmapHead', + in_channels=270, + out_channels=29, + deconv_out_channels=None, + conv_out_channels=(270, ), + conv_kernel_sizes=(1, ), + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'COFWDataset' +data_mode = 'topdown' +data_root = 'data/cofw/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + shift_prob=0, + rotate_factor=60, + scale_factor=(0.75, 1.25)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/cofw_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/cofw_test.json', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='NME', + norm_mode='keypoint_distance', +) +test_evaluator = val_evaluator diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_awing_wflw.md b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_awing_wflw.md new file mode 100644 index 0000000000000000000000000000000000000000..53d5c3b36d7c61e9f6db3542b047adda70e70a86 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_awing_wflw.md @@ -0,0 +1,59 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://ieeexplore.ieee.org/abstract/document/9052469/">HRNetv2 (TPAMI'2019)</a></summary> + +```bibtex +@article{WangSCJDZLMTWLX19, + title={Deep High-Resolution Representation Learning for Visual Recognition}, + author={Jingdong Wang and Ke Sun and Tianheng Cheng and + Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and + Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao}, + journal={TPAMI}, + year={2019} +} +``` + +</details> + +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://arxiv.org/pdf/1904.07399.pdf">AdaptiveWingloss (ICCV'2019)</a></summary> + +```bibtex +@inproceedings{wang2019adaptive, + title={Adaptive wing loss for robust face alignment via heatmap regression}, + author={Wang, Xinyao and Bo, Liefeng and Fuxin, Li}, + booktitle={Proceedings of the IEEE/CVF international conference on computer vision}, + pages={6971--6981}, + year={2019} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2018/html/Wu_Look_at_Boundary_CVPR_2018_paper.html">WFLW (CVPR'2018)</a></summary> + +```bibtex +@inproceedings{wu2018look, + title={Look at boundary: A boundary-aware face alignment algorithm}, + author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={2129--2138}, + year={2018} +} +``` + +</details> + +Results on WFLW dataset + +The model is trained on WFLW train. + +| Arch | Input Size | NME<sub>*test*</sub> | NME<sub>*pose*</sub> | NME<sub>*illumination*</sub> | NME<sub>*occlusion*</sub> | NME<sub>*blur*</sub> | NME<sub>*makeup*</sub> | NME<sub>*expression*</sub> | ckpt | log | +| :--------- | :--------: | :------------------: | :------------------: | :--------------------------: | :-----------------------: | :------------------: | :--------------------: | :------------------------: | :--------: | :-------: | +| [pose_hrnetv2_w18_awing](/configs/face_2d_keypoint/topdown_heatmap/wflw/td-hm_hrnetv2-w18_awing-8xb64-60e_wflw-256x256.py) | 256x256 | 4.02 | 6.94 | 3.97 | 4.78 | 4.59 | 3.87 | 4.28 | [ckpt](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_wflw_256x256_awing-5af5055c_20211212.pth) | [log](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_wflw_256x256_awing_20211212.log.json) | diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_awing_wflw.yml b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_awing_wflw.yml new file mode 100644 index 0000000000000000000000000000000000000000..6ba45c82b7499f09e0664ef42589e46ec298aca9 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_awing_wflw.yml @@ -0,0 +1,21 @@ +Models: +- Config: configs/face_2d_keypoint/topdown_heatmap/wflw/td-hm_hrnetv2-w18_awing-8xb64-60e_wflw-256x256.py + In Collection: HRNetv2 + Metadata: + Architecture: + - HRNetv2 + - AdaptiveWingloss + Training Data: WFLW + Name: td-hm_hrnetv2-w18_awing-8xb64-60e_wflw-256x256 + Results: + - Dataset: WFLW + Metrics: + NME blur: 4.59 + NME expression: 4.28 + NME illumination: 3.97 + NME makeup: 3.87 + NME occlusion: 4.78 + NME pose: 6.94 + NME test: 4.02 + Task: Face 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_wflw_256x256_awing-5af5055c_20211212.pth diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_dark_wflw.md b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_dark_wflw.md new file mode 100644 index 0000000000000000000000000000000000000000..476afb6c01c1a2f57c2030f673a913654b5a4698 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_dark_wflw.md @@ -0,0 +1,59 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://ieeexplore.ieee.org/abstract/document/9052469/">HRNetv2 (TPAMI'2019)</a></summary> + +```bibtex +@article{WangSCJDZLMTWLX19, + title={Deep High-Resolution Representation Learning for Visual Recognition}, + author={Jingdong Wang and Ke Sun and Tianheng Cheng and + Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and + Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao}, + journal={TPAMI}, + year={2019} +} +``` + +</details> + +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2020/html/Zhang_Distribution-Aware_Coordinate_Representation_for_Human_Pose_Estimation_CVPR_2020_paper.html">DarkPose (CVPR'2020)</a></summary> + +```bibtex +@inproceedings{zhang2020distribution, + title={Distribution-aware coordinate representation for human pose estimation}, + author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages={7093--7102}, + year={2020} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2018/html/Wu_Look_at_Boundary_CVPR_2018_paper.html">WFLW (CVPR'2018)</a></summary> + +```bibtex +@inproceedings{wu2018look, + title={Look at boundary: A boundary-aware face alignment algorithm}, + author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={2129--2138}, + year={2018} +} +``` + +</details> + +Results on WFLW dataset + +The model is trained on WFLW train. + +| Arch | Input Size | NME<sub>*test*</sub> | NME<sub>*pose*</sub> | NME<sub>*illumination*</sub> | NME<sub>*occlusion*</sub> | NME<sub>*blur*</sub> | NME<sub>*makeup*</sub> | NME<sub>*expression*</sub> | ckpt | log | +| :--------- | :--------: | :------------------: | :------------------: | :--------------------------: | :-----------------------: | :------------------: | :--------------------: | :------------------------: | :--------: | :-------: | +| [pose_hrnetv2_w18_dark](/configs/face_2d_keypoint/topdown_heatmap/wflw/td-hm_hrnetv2-w18_dark-8xb64-60e_wflw-256x256.py) | 256x256 | 3.98 | 6.98 | 3.96 | 4.78 | 4.56 | 3.89 | 4.29 | [ckpt](https://download.openmmlab.com/mmpose/face/darkpose/hrnetv2_w18_wflw_256x256_dark-3f8e0c2c_20210125.pth) | [log](https://download.openmmlab.com/mmpose/face/darkpose/hrnetv2_w18_wflw_256x256_dark_20210125.log.json) | diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_dark_wflw.yml b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_dark_wflw.yml new file mode 100644 index 0000000000000000000000000000000000000000..bbb82185cf0f4be034cb31f4a8166128d522938e --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_dark_wflw.yml @@ -0,0 +1,21 @@ +Models: +- Config: configs/face_2d_keypoint/topdown_heatmap/wflw/td-hm_hrnetv2-w18_dark-8xb64-60e_wflw-256x256.py + In Collection: DarkPose + Metadata: + Architecture: + - HRNetv2 + - DarkPose + Training Data: WFLW + Name: td-hm_hrnetv2-w18_dark-8xb64-60e_wflw-256x256 + Results: + - Dataset: WFLW + Metrics: + NME blur: 4.56 + NME expression: 4.29 + NME illumination: 3.96 + NME makeup: 3.89 + NME occlusion: 4.78 + NME pose: 6.98 + NME test: 3.98 + Task: Face 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/face/darkpose/hrnetv2_w18_wflw_256x256_dark-3f8e0c2c_20210125.pth diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_wflw.md b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_wflw.md new file mode 100644 index 0000000000000000000000000000000000000000..c9b8eec0669a73d60b2023eefa84204e71c7d1d0 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_wflw.md @@ -0,0 +1,42 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://ieeexplore.ieee.org/abstract/document/9052469/">HRNetv2 (TPAMI'2019)</a></summary> + +```bibtex +@article{WangSCJDZLMTWLX19, + title={Deep High-Resolution Representation Learning for Visual Recognition}, + author={Jingdong Wang and Ke Sun and Tianheng Cheng and + Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and + Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao}, + journal={TPAMI}, + year={2019} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2018/html/Wu_Look_at_Boundary_CVPR_2018_paper.html">WFLW (CVPR'2018)</a></summary> + +```bibtex +@inproceedings{wu2018look, + title={Look at boundary: A boundary-aware face alignment algorithm}, + author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={2129--2138}, + year={2018} +} +``` + +</details> + +Results on WFLW dataset + +The model is trained on WFLW train. + +| Arch | Input Size | NME<sub>*test*</sub> | NME<sub>*pose*</sub> | NME<sub>*illumination*</sub> | NME<sub>*occlusion*</sub> | NME<sub>*blur*</sub> | NME<sub>*makeup*</sub> | NME<sub>*expression*</sub> | ckpt | log | +| :--------- | :--------: | :------------------: | :------------------: | :--------------------------: | :-----------------------: | :------------------: | :--------------------: | :------------------------: | :--------: | :-------: | +| [pose_hrnetv2_w18](/configs/face_2d_keypoint/topdown_heatmap/wflw/td-hm_hrnetv2-w18_8xb64-60e_wflw-256x256.py) | 256x256 | 4.06 | 6.97 | 3.99 | 4.83 | 4.58 | 3.94 | 4.33 | [ckpt](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_wflw_256x256-2bf032a6_20210125.pth) | [log](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_wflw_256x256_20210125.log.json) | diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_wflw.yml b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_wflw.yml new file mode 100644 index 0000000000000000000000000000000000000000..9124324f8b8fe8ca7d1835471b130013cee13efa --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_wflw.yml @@ -0,0 +1,20 @@ +Models: +- Config: configs/face_2d_keypoint/topdown_heatmap/wflw/td-hm_hrnetv2-w18_8xb64-60e_wflw-256x256.py + In Collection: HRNetv2 + Metadata: + Architecture: + - HRNetv2 + Training Data: WFLW + Name: td-hm_hrnetv2-w18_8xb64-60e_wflw-256x256 + Results: + - Dataset: WFLW + Metrics: + NME blur: 4.58 + NME expression: 4.33 + NME illumination: 3.99 + NME makeup: 3.94 + NME occlusion: 4.83 + NME pose: 6.97 + NME test: 4.06 + Task: Face 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_wflw_256x256-2bf032a6_20210125.pth diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/td-hm_hrnetv2-w18_8xb64-60e_wflw-256x256.py b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/td-hm_hrnetv2-w18_8xb64-60e_wflw-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..ae373c816aec3e09f7780f304ce11687e48b8e32 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/td-hm_hrnetv2-w18_8xb64-60e_wflw-256x256.py @@ -0,0 +1,158 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=60, val_interval=1) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=2e-3, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=60, + milestones=[40, 55], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='NME', rule='less', interval=1)) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(18, 36)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(18, 36, 72)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(18, 36, 72, 144), + multiscale_output=True), + upsample=dict(mode='bilinear', align_corners=False)), + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18'), + ), + neck=dict( + type='FeatureMapProcessor', + concat=True, + ), + head=dict( + type='HeatmapHead', + in_channels=270, + out_channels=98, + deconv_out_channels=None, + conv_out_channels=(270, ), + conv_kernel_sizes=(1, ), + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'WFLWDataset' +data_mode = 'topdown' +data_root = 'data/wflw/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + shift_prob=0, + rotate_factor=60, + scale_factor=(0.75, 1.25)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/face_landmarks_wflw_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/face_landmarks_wflw_test.json', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='NME', + norm_mode='keypoint_distance', +) +test_evaluator = val_evaluator diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/td-hm_hrnetv2-w18_awing-8xb64-60e_wflw-256x256.py b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/td-hm_hrnetv2-w18_awing-8xb64-60e_wflw-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..ada24a97bb7954d42b2d300ea9e8a14b494da938 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/td-hm_hrnetv2-w18_awing-8xb64-60e_wflw-256x256.py @@ -0,0 +1,158 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=60, val_interval=1) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=2e-3, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=60, + milestones=[40, 55], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='NME', rule='less', interval=1)) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(18, 36)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(18, 36, 72)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(18, 36, 72, 144), + multiscale_output=True), + upsample=dict(mode='bilinear', align_corners=False)), + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18'), + ), + neck=dict( + type='FeatureMapProcessor', + concat=True, + ), + head=dict( + type='HeatmapHead', + in_channels=270, + out_channels=98, + deconv_out_channels=None, + conv_out_channels=(270, ), + conv_kernel_sizes=(1, ), + loss=dict(type='AdaptiveWingLoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'WFLWDataset' +data_mode = 'topdown' +data_root = 'data/wflw/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + shift_prob=0, + rotate_factor=60, + scale_factor=(0.75, 1.25)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/face_landmarks_wflw_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/face_landmarks_wflw_test.json', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='NME', + norm_mode='keypoint_distance', +) +test_evaluator = val_evaluator diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/td-hm_hrnetv2-w18_dark-8xb64-60e_wflw-256x256.py b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/td-hm_hrnetv2-w18_dark-8xb64-60e_wflw-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..973a850f3fdf2ab6300e8e56c4e1b92b15d3f63a --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/td-hm_hrnetv2-w18_dark-8xb64-60e_wflw-256x256.py @@ -0,0 +1,162 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=60, val_interval=1) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=2e-3, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=60, + milestones=[40, 55], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='NME', rule='less', interval=1)) + +# codec settings +codec = dict( + type='MSRAHeatmap', + input_size=(256, 256), + heatmap_size=(64, 64), + sigma=2, + unbiased=True) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(18, 36)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(18, 36, 72)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(18, 36, 72, 144), + multiscale_output=True), + upsample=dict(mode='bilinear', align_corners=False)), + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18'), + ), + neck=dict( + type='FeatureMapProcessor', + concat=True, + ), + head=dict( + type='HeatmapHead', + in_channels=270, + out_channels=98, + deconv_out_channels=None, + conv_out_channels=(270, ), + conv_kernel_sizes=(1, ), + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'WFLWDataset' +data_mode = 'topdown' +data_root = 'data/wflw/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + shift_prob=0, + rotate_factor=60, + scale_factor=(0.75, 1.25)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/face_landmarks_wflw_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/face_landmarks_wflw_test.json', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = dict( + type='NME', + norm_mode='keypoint_distance', +) +test_evaluator = val_evaluator diff --git a/mmpose/configs/face_2d_keypoint/topdown_regression/README.md b/mmpose/configs/face_2d_keypoint/topdown_regression/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5d20cb9a311c033eef1b8668b2d9d0e5c56e6514 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_regression/README.md @@ -0,0 +1,19 @@ +# Top-down regression-based pose estimation + +Top-down methods divide the task into two stages: object detection, followed by single-object pose estimation given object bounding boxes. At the 2nd stage, regression based methods directly regress the keypoint coordinates given the features extracted from the bounding box area, following the paradigm introduced in [Deeppose: Human pose estimation via deep neural networks](http://openaccess.thecvf.com/content_cvpr_2014/html/Toshev_DeepPose_Human_Pose_2014_CVPR_paper.html). + +<div align=center> +<img src="https://user-images.githubusercontent.com/15977946/146515040-a82a8a29-d6bc-42f1-a2ab-7dfa610ce363.png"> +</div> + +## Results and Models + +### WFLW Dataset + +Result on WFLW test set + +| Model | Input Size | NME | ckpt | log | +| :-------------------------------------------------------------- | :--------: | :--: | :------------------------------------------------------------: | :-----------------------------------------------------------: | +| [ResNet-50](/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_8xb64-210e_wflw-256x256.py) | 256x256 | 4.88 | [ckpt](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256-92d0ba7f_20210303.pth) | [log](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_20210303.log.json) | +| [ResNet-50+WingLoss](/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_wingloss_8xb64-210e_wflw-256x256.py) | 256x256 | 4.67 | [ckpt](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_wingloss-f82a5e53_20210303.pth) | [log](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_wingloss_20210303.log.json) | +| [ResNet-50+SoftWingLoss](/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_softwingloss_8xb64-210e_wflw-256x256.py) | 256x256 | 4.44 | [ckpt](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_softwingloss-4d34f22a_20211212.pth) | [log](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_softwingloss_20211212.log.json) | diff --git a/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_softwingloss_wflw.md b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_softwingloss_wflw.md new file mode 100644 index 0000000000000000000000000000000000000000..f1d9629d0ad0caced74cb3b0f4781080c302588f --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_softwingloss_wflw.md @@ -0,0 +1,75 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2014/html/Toshev_DeepPose_Human_Pose_2014_CVPR_paper.html">DeepPose (CVPR'2014)</a></summary> + +```bibtex +@inproceedings{toshev2014deeppose, + title={Deeppose: Human pose estimation via deep neural networks}, + author={Toshev, Alexander and Szegedy, Christian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={1653--1660}, + year={2014} +} +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html">ResNet (CVPR'2016)</a></summary> + +```bibtex +@inproceedings{he2016deep, + title={Deep residual learning for image recognition}, + author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={770--778}, + year={2016} +} +``` + +</details> + +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://ieeexplore.ieee.org/document/9442331/">SoftWingloss (TIP'2021)</a></summary> + +```bibtex +@article{lin2021structure, + title={Structure-Coherent Deep Feature Learning for Robust Face Alignment}, + author={Lin, Chunze and Zhu, Beier and Wang, Quan and Liao, Renjie and Qian, Chen and Lu, Jiwen and Zhou, Jie}, + journal={IEEE Transactions on Image Processing}, + year={2021}, + publisher={IEEE} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2018/html/Wu_Look_at_Boundary_CVPR_2018_paper.html">WFLW (CVPR'2018)</a></summary> + +```bibtex +@inproceedings{wu2018look, + title={Look at boundary: A boundary-aware face alignment algorithm}, + author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={2129--2138}, + year={2018} +} +``` + +</details> + +Results on WFLW dataset + +The model is trained on WFLW train set. + +| Model | Input Size | NME | ckpt | log | +| :-------------------------------------------------------------- | :--------: | :--: | :------------------------------------------------------------: | :-----------------------------------------------------------: | +| [ResNet-50+SoftWingLoss](/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_softwingloss_8xb64-210e_wflw-256x256.py) | 256x256 | 4.44 | [ckpt](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_softwingloss-4d34f22a_20211212.pth) | [log](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_softwingloss_20211212.log.json) | diff --git a/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_softwingloss_wflw.yml b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_softwingloss_wflw.yml new file mode 100644 index 0000000000000000000000000000000000000000..7c65215ccc2c748b5f1a65efe4fc555faea73ed4 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_softwingloss_wflw.yml @@ -0,0 +1,16 @@ +Models: +- Config: configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_softwingloss_8xb64-210e_wflw-256x256.py + In Collection: ResNet + Metadata: + Architecture: + - DeepPose + - ResNet + - SoftWingloss + Training Data: WFLW + Name: td-reg_res50_softwingloss_8xb64-210e_wflw-256x256 + Results: + - Dataset: WFLW + Metrics: + NME: 4.44 + Task: Face 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_softwingloss-4d34f22a_20211212.pth diff --git a/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_wflw.md b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_wflw.md new file mode 100644 index 0000000000000000000000000000000000000000..1ec3e76dbad52d30e8ce1c458592ec13e6c8ee31 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_wflw.md @@ -0,0 +1,58 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2014/html/Toshev_DeepPose_Human_Pose_2014_CVPR_paper.html">DeepPose (CVPR'2014)</a></summary> + +```bibtex +@inproceedings{toshev2014deeppose, + title={Deeppose: Human pose estimation via deep neural networks}, + author={Toshev, Alexander and Szegedy, Christian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={1653--1660}, + year={2014} +} +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html">ResNet (CVPR'2016)</a></summary> + +```bibtex +@inproceedings{he2016deep, + title={Deep residual learning for image recognition}, + author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={770--778}, + year={2016} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2018/html/Wu_Look_at_Boundary_CVPR_2018_paper.html">WFLW (CVPR'2018)</a></summary> + +```bibtex +@inproceedings{wu2018look, + title={Look at boundary: A boundary-aware face alignment algorithm}, + author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={2129--2138}, + year={2018} +} +``` + +</details> + +Results on WFLW dataset + +The model is trained on WFLW train set. + +| Model | Input Size | NME | ckpt | log | +| :-------------------------------------------------------------- | :--------: | :--: | :------------------------------------------------------------: | :-----------------------------------------------------------: | +| [ResNet-50](/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_8xb64-210e_wflw-256x256.py) | 256x256 | 4.88 | [ckpt](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256-92d0ba7f_20210303.pth) | [log](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_20210303.log.json) | diff --git a/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_wflw.yml b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_wflw.yml new file mode 100644 index 0000000000000000000000000000000000000000..81c7b79a7e47fa1647b1bf1f5d78d34b1dc73faf --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_wflw.yml @@ -0,0 +1,15 @@ +Models: +- Config: configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_8xb64-210e_wflw-256x256.py + In Collection: ResNet + Metadata: + Architecture: + - DeepPose + - ResNet + Training Data: WFLW + Name: td-reg_res50_8x64e-210e_wflw-256x256 + Results: + - Dataset: WFLW + Metrics: + NME: 4.88 + Task: Face 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256-92d0ba7f_20210303.pth diff --git a/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_wingloss_wflw.md b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_wingloss_wflw.md new file mode 100644 index 0000000000000000000000000000000000000000..51477143d11c1755f9280026641ba68b954ec99e --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_wingloss_wflw.md @@ -0,0 +1,76 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2014/html/Toshev_DeepPose_Human_Pose_2014_CVPR_paper.html">DeepPose (CVPR'2014)</a></summary> + +```bibtex +@inproceedings{toshev2014deeppose, + title={Deeppose: Human pose estimation via deep neural networks}, + author={Toshev, Alexander and Szegedy, Christian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={1653--1660}, + year={2014} +} +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html">ResNet (CVPR'2016)</a></summary> + +```bibtex +@inproceedings{he2016deep, + title={Deep residual learning for image recognition}, + author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={770--778}, + year={2016} +} +``` + +</details> + +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2018/html/Feng_Wing_Loss_for_CVPR_2018_paper.html">Wingloss (CVPR'2018)</a></summary> + +```bibtex +@inproceedings{feng2018wing, + title={Wing Loss for Robust Facial Landmark Localisation with Convolutional Neural Networks}, + author={Feng, Zhen-Hua and Kittler, Josef and Awais, Muhammad and Huber, Patrik and Wu, Xiao-Jun}, + booktitle={Computer Vision and Pattern Recognition (CVPR), 2018 IEEE Conference on}, + year={2018}, + pages ={2235-2245}, + organization={IEEE} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2018/html/Wu_Look_at_Boundary_CVPR_2018_paper.html">WFLW (CVPR'2018)</a></summary> + +```bibtex +@inproceedings{wu2018look, + title={Look at boundary: A boundary-aware face alignment algorithm}, + author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={2129--2138}, + year={2018} +} +``` + +</details> + +Results on WFLW dataset + +The model is trained on WFLW train set. + +| Model | Input Size | NME | ckpt | log | +| :-------------------------------------------------------------- | :--------: | :--: | :------------------------------------------------------------: | :-----------------------------------------------------------: | +| [ResNet-50+WingLoss](/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_wingloss_8xb64-210e_wflw-256x256.py) | 256x256 | 4.67 | [ckpt](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_wingloss-f82a5e53_20210303.pth) | [log](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_wingloss_20210303.log.json) | diff --git a/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_wingloss_wflw.yml b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_wingloss_wflw.yml new file mode 100644 index 0000000000000000000000000000000000000000..49b409121a60336ae19f4d29d535017013f22c8f --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_wingloss_wflw.yml @@ -0,0 +1,16 @@ +Models: +- Config: configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_wingloss_8xb64-210e_wflw-256x256.py + In Collection: ResNet + Metadata: + Architecture: + - DeepPose + - ResNet + - WingLoss + Training Data: WFLW + Name: td-reg_res50_wingloss_8xb64-210e_wflw-256x256 + Results: + - Dataset: WFLW + Metrics: + NME: 4.67 + Task: Face 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_wingloss-f82a5e53_20210303.pth diff --git a/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_8xb64-210e_wflw-256x256.py b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_8xb64-210e_wflw-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..2742f497b8fbdd7889281c660b9ccd804ccf754d --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_8xb64-210e_wflw-256x256.py @@ -0,0 +1,122 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict(type='RegressionLabel', input_size=(256, 256)) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='RegressionHead', + in_channels=2048, + num_joints=98, + loss=dict(type='SmoothL1Loss', use_target_weight=True), + decoder=codec), + train_cfg=dict(), + test_cfg=dict( + flip_test=True, + shift_coords=True, + )) + +# base dataset settings +dataset_type = 'WFLWDataset' +data_mode = 'topdown' +data_root = 'data/wflw/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# dataloaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/face_landmarks_wflw_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/face_landmarks_wflw_test.json', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict(checkpoint=dict(save_best='NME', rule='less')) + +# evaluators +val_evaluator = dict( + type='NME', + norm_mode='keypoint_distance', +) +test_evaluator = val_evaluator diff --git a/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_softwingloss_8xb64-210e_wflw-256x256.py b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_softwingloss_8xb64-210e_wflw-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..eb4199073d712024f0495746ad902f4ea4dd9052 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_softwingloss_8xb64-210e_wflw-256x256.py @@ -0,0 +1,122 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict(type='RegressionLabel', input_size=(256, 256)) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='RegressionHead', + in_channels=2048, + num_joints=98, + loss=dict(type='SoftWingLoss', use_target_weight=True), + decoder=codec), + train_cfg=dict(), + test_cfg=dict( + flip_test=True, + shift_coords=True, + )) + +# base dataset settings +dataset_type = 'WFLWDataset' +data_mode = 'topdown' +data_root = 'data/wflw/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# dataloaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/face_landmarks_wflw_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/face_landmarks_wflw_test.json', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict(checkpoint=dict(save_best='NME', rule='less')) + +# evaluators +val_evaluator = dict( + type='NME', + norm_mode='keypoint_distance', +) +test_evaluator = val_evaluator diff --git a/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_wingloss_8xb64-210e_wflw-256x256.py b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_wingloss_8xb64-210e_wflw-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..ab519cd401bbd07212e4834c9a5d655418b49fb1 --- /dev/null +++ b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_wingloss_8xb64-210e_wflw-256x256.py @@ -0,0 +1,122 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict(type='RegressionLabel', input_size=(256, 256)) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='RegressionHead', + in_channels=2048, + num_joints=98, + loss=dict(type='WingLoss', use_target_weight=True), + decoder=codec), + train_cfg=dict(), + test_cfg=dict( + flip_test=True, + shift_coords=True, + )) + +# base dataset settings +dataset_type = 'WFLWDataset' +data_mode = 'topdown' +data_root = 'data/wflw/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# dataloaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/face_landmarks_wflw_train.json', + data_prefix=dict(img='images/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/face_landmarks_wflw_test.json', + data_prefix=dict(img='images/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict(checkpoint=dict(save_best='NME', rule='less')) + +# evaluators +val_evaluator = dict( + type='NME', + norm_mode='keypoint_distance', +) +test_evaluator = val_evaluator diff --git a/mmpose/configs/fashion_2d_keypoint/README.md b/mmpose/configs/fashion_2d_keypoint/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e7d761067afc34b6a7249faa187752b39ca24ffd --- /dev/null +++ b/mmpose/configs/fashion_2d_keypoint/README.md @@ -0,0 +1,7 @@ +# 2D Fashion Landmark Detection + +2D fashion landmark detection (also referred to as fashion alignment) aims to detect the key-point located at the functional region of clothes, for example the neckline and the cuff. + +## Data preparation + +Please follow [DATA Preparation](/docs/en/dataset_zoo/2d_fashion_landmark.md) to prepare data. diff --git a/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/res50_deepfashion2.md b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/res50_deepfashion2.md new file mode 100644 index 0000000000000000000000000000000000000000..1dcfd593133c95f744869ff23bd2ec12a54e187c --- /dev/null +++ b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/res50_deepfashion2.md @@ -0,0 +1,67 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html">SimpleBaseline2D (ECCV'2018)</a></summary> + +```bibtex +@inproceedings{xiao2018simple, + title={Simple baselines for human pose estimation and tracking}, + author={Xiao, Bin and Wu, Haiping and Wei, Yichen}, + booktitle={Proceedings of the European conference on computer vision (ECCV)}, + pages={466--481}, + year={2018} +} +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html">ResNet (CVPR'2016)</a></summary> + +```bibtex +@inproceedings{he2016deep, + title={Deep residual learning for image recognition}, + author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={770--778}, + year={2016} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://arxiv.org/pdf/1901.07973.pdf">DeepFashion2 (CVPR'2019)</a></summary> + +```bibtex +@article{DeepFashion2, + author = {Yuying Ge and Ruimao Zhang and Lingyun Wu and Xiaogang Wang and Xiaoou Tang and Ping Luo}, + title={A Versatile Benchmark for Detection, Pose Estimation, Segmentation and Re-Identification of Clothing Images}, + journal={CVPR}, + year={2019} +} +``` + +</details> + +Results on DeepFashion2 val set + +| Set | Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log | +| :-------------------- | :-------------------------------------------------: | :--------: | :-----: | :---: | :--: | :-------------------------------------------------: | :-------------------------------------------------: | +| short_sleeved_shirt | [pose_resnet_50](/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_6xb64-210e_deepfasion2-short-sleeved-shirt-256x192.py) | 256x192 | 0.988 | 0.703 | 10.2 | [ckpt](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_short_sleeved_shirt_256x192-21e1c5da_20221208.pth) | [log](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_short_sleeved_shirt_256x192_20221208.log.json) | +| long_sleeved_shirt | [pose_resnet_50](/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_8xb64-210e_deepfasion2-long-sleeved-shirt-256x192.py) | 256x192 | 0.973 | 0.587 | 16.6 | [ckpt](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_long_sleeved_shirt_256x192-8679e7e3_20221208.pth) | [log](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_long_sleeved_shirt_256x192_20221208.log.json) | +| short_sleeved_outwear | [pose_resnet_50](/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_8xb64-210e_deepfasion2-short-sleeved-outwear-256x192.py) | 256x192 | 0.966 | 0.408 | 24.0 | [ckpt](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_short_sleeved_outwear_256x192-a04c1298_20221208.pth) | [log](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_short_sleeved_outwear_256x192_20221208.log.json) | +| long_sleeved_outwear | [pose_resnet_50](/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_8xb64-210e_deepfasion2-long-sleeved-outwear-256x192.py) | 256x192 | 0.987 | 0.517 | 18.1 | [ckpt](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_long_sleeved_outwear_256x192-31fbaecf_20221208.pth) | [log](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_long_sleeved_outwear_256x192_20221208.log.json) | +| vest | [pose_resnet_50](/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-vest-256x192.py) | 256x192 | 0.981 | 0.643 | 12.7 | [ckpt](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_vest_256x192-4c48d05c_20221208.pth) | [log](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_vest_256x192_20221208.log.json) | +| sling | [pose_resnet_50](/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-sling-256x192.py) | 256x192 | 0.940 | 0.557 | 21.6 | [ckpt](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_sling_256x192-ebb2b736_20221208.pth) | [log](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_sling_256x192_20221208.log.json) | +| shorts | [pose_resnet_50](/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_3xb64-210e_deepfasion2-shorts-256x192.py) | 256x192 | 0.975 | 0.682 | 12.4 | [ckpt](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_shorts_256x192-9ab23592_20221208.pth) | [log](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_shorts_256x192_20221208.log.json) | +| trousers | [pose_resnet_50](/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_2xb64-210e_deepfasion2-trousers-256x192.py) | 256x192 | 0.973 | 0.625 | 14.8 | [ckpt](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_trousers_256x192-3e632257_20221208.pth) | [log](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_trousers_256x192_20221208.log.json) | +| skirt | [pose_resnet_50](/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_1xb64-210e_deepfasion2-skirt-256x192.py) | 256x192 | 0.952 | 0.653 | 16.6 | [ckpt](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_skirt_256x192-09573469_20221208.pth) | [log](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_skirt_256x192_20221208.log.json) | +| short_sleeved_dress | [pose_resnet_50](/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-short-sleeved-dress-256x192.py) | 256x192 | 0.980 | 0.603 | 15.6 | [ckpt](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_short_sleeved_dress_256x192-1345b07a_20221208.pth) | [log](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_short_sleeved_dress_256x192_20221208.log.json) | +| long_sleeved_dress | [pose_resnet_50](/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_1xb64-210e_deepfasion2-long-sleeved-dress-256x192.py) | 256x192 | 0.976 | 0.518 | 20.1 | [ckpt](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_long_sleeved_dress_256x192-87bac74e_20221208.pth) | [log](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_long_sleeved_dress_256x192_20221208.log.json) | +| vest_dress | [pose_resnet_50](/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_1xb64-210e_deepfasion2-vest-dress-256x192.py) | 256x192 | 0.980 | 0.600 | 16.0 | [ckpt](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_vest_dress_256x192-fb3fbd6f_20221208.pth) | [log](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_vest_dress_256x192_20221208.log.json) | +| sling_dress | [pose_resnet_50](/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-sling-dress-256x192.py) | 256x192 | 0.967 | 0.544 | 19.5 | [ckpt](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_sling_dress_256x192-8ebae0eb_20221208.pth) | [log](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_sling_dress_256x192_20221208.log.json) | diff --git a/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/res50_deepfasion2.yml b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/res50_deepfasion2.yml new file mode 100644 index 0000000000000000000000000000000000000000..28825fa01100a9375f4640eb89575e829608ac37 --- /dev/null +++ b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/res50_deepfasion2.yml @@ -0,0 +1,185 @@ +Models: +- Config: configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_6xb64-210e_deepfasion2-short-sleeved-shirt-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: &id001 + - SimpleBaseline2D + - ResNet + Training Data: DeepFashion2 + Name: td-hm_res50_6xb64-210e_deepfasion2-short-sleeved-shirt-256x192 + Results: + - Dataset: DeepFashion2 + Metrics: + AUC: 0.703 + EPE: 10.2 + PCK@0.2: 0.988 + Task: Fashion 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_short_sleeved_shirt_256x192-21e1c5da_20221208.pth +- Config: configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_8xb64-210e_deepfasion2-long-sleeved-shirt-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: DeepFashion2 + Name: td-hm_res50_8xb64-210e_deepfasion2-long-sleeved-shirt-256x192 + Results: + - Dataset: DeepFashion2 + Metrics: + AUC: 0.587 + EPE: 16.5 + PCK@0.2: 0.973 + Task: Fashion 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_long_sleeved_shirt_256x192-8679e7e3_20221208.pth +- Config: configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_8xb64-210e_deepfasion2-short-sleeved-outwear-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: DeepFashion2 + Name: td-hm_res50_8xb64-210e_deepfasion2-short-sleeved-outwear-256x192 + Results: + - Dataset: DeepFashion2 + Metrics: + AUC: 0.408 + EPE: 24.0 + PCK@0.2: 0.966 + Task: Fashion 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_short_sleeved_outwear_256x192-a04c1298_20221208.pth +- Config: configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_8xb64-210e_deepfasion2-long-sleeved-outwear-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: DeepFashion2 + Name: td-hm_res50_8xb64-210e_deepfasion2-long-sleeved-outwear-256x192 + Results: + - Dataset: DeepFashion2 + Metrics: + AUC: 0.517 + EPE: 18.1 + PCK@0.2: 0.987 + Task: Fashion 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_long_sleeved_outwear_256x192-31fbaecf_20221208.pth +- Config: configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-vest-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: DeepFashion2 + Name: td-hm_res50_4xb64-210e_deepfasion2-vest-256x192 + Results: + - Dataset: DeepFashion2 + Metrics: + AUC: 0.643 + EPE: 12.7 + PCK@0.2: 0.981 + Task: Fashion 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_vest_256x192-4c48d05c_20221208.pth +- Config: configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-sling-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: DeepFashion2 + Name: td-hm_res50_4xb64-210e_deepfasion2-sling-256x192 + Results: + - Dataset: DeepFashion2 + Metrics: + AUC: 0.557 + EPE: 21.6 + PCK@0.2: 0.94 + Task: Fashion 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_sling_256x192-ebb2b736_20221208.pth +- Config: configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_3xb64-210e_deepfasion2-shorts-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: DeepFashion2 + Name: td-hm_res50_3xb64-210e_deepfasion2-shorts-256x192 + Results: + - Dataset: DeepFashion2 + Metrics: + AUC: 0.682 + EPE: 12.4 + PCK@0.2: 0.975 + Task: Fashion 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_shorts_256x192-9ab23592_20221208.pth +- Config: configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_2xb64-210e_deepfasion2-trousers-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: DeepFashion2 + Name: td-hm_res50_2xb64-210e_deepfasion2-trousers-256x192 + Results: + - Dataset: DeepFashion2 + Metrics: + AUC: 0.625 + EPE: 14.8 + PCK@0.2: 0.973 + Task: Fashion 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_trousers_256x192-3e632257_20221208.pth +- Config: configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_1xb64-210e_deepfasion2-skirt-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: DeepFashion2 + Name: td-hm_res50_1xb64-210e_deepfasion2-skirt-256x192 + Results: + - Dataset: DeepFashion2 + Metrics: + AUC: 0.653 + EPE: 16.6 + PCK@0.2: 0.952 + Task: Fashion 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_skirt_256x192-09573469_20221208.pth +- Config: configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-short-sleeved-dress-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: DeepFashion2 + Name: td-hm_res50_4xb64-210e_deepfasion2-short-sleeved-dress-256x192 + Results: + - Dataset: DeepFashion2 + Metrics: + AUC: 0.603 + EPE: 15.6 + PCK@0.2: 0.98 + Task: Fashion 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_short_sleeved_dress_256x192-1345b07a_20221208.pth +- Config: configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_1xb64-210e_deepfasion2-long-sleeved-dress-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: DeepFashion2 + Name: td-hm_res50_1xb64-210e_deepfasion2-long-sleeved-dress-256x192 + Results: + - Dataset: DeepFashion2 + Metrics: + AUC: 0.518 + EPE: 20.1 + PCK@0.2: 0.976 + Task: Fashion 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_long_sleeved_dress_256x192-87bac74e_20221208.pth +- Config: configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_1xb64-210e_deepfasion2-vest-dress-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: DeepFashion2 + Name: td-hm_res50_1xb64-210e_deepfasion2-vest-dress-256x192 + Results: + - Dataset: DeepFashion2 + Metrics: + AUC: 0.6 + EPE: 16.0 + PCK@0.2: 0.98 + Task: Fashion 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_vest_dress_256x192-fb3fbd6f_20221208.pth +- Config: configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-sling-dress-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: DeepFashion2 + Name: td-hm_res50_4xb64-210e_deepfasion2-sling-dress-256x192 + Results: + - Dataset: DeepFashion2 + Metrics: + AUC: 0.544 + EPE: 19.5 + PCK@0.2: 0.967 + Task: Fashion 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_sling_dress_256x192-8ebae0eb_20221208.pth diff --git a/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_1xb64-210e_deepfasion2-long-sleeved-dress-256x192.py b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_1xb64-210e_deepfasion2-long-sleeved-dress-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..09dfaaa390bb2020e4a511d6ba111d35d5fa4378 --- /dev/null +++ b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_1xb64-210e_deepfasion2-long-sleeved-dress-256x192.py @@ -0,0 +1,122 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=64) + +# hooks +default_hooks = dict( + logger=dict(type='LoggerHook', interval=10), + checkpoint=dict(save_best='AUC', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=294, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'DeepFashion2Dataset' +data_mode = 'topdown' +data_root = 'data/deepfasion2/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='train/deepfashion2_long_sleeved_dress_train.json', + data_prefix=dict(img='train/image/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='validation/deepfashion2_long_sleeved_dress_validation.json', + data_prefix=dict(img='validation/image/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE'), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_1xb64-210e_deepfasion2-skirt-256x192.py b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_1xb64-210e_deepfasion2-skirt-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..f0e6f0c63218874f4e40bdd06eb0cbc57b9365a7 --- /dev/null +++ b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_1xb64-210e_deepfasion2-skirt-256x192.py @@ -0,0 +1,122 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=64) + +# hooks +default_hooks = dict( + logger=dict(type='LoggerHook', interval=10), + checkpoint=dict(save_best='AUC', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=294, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'DeepFashion2Dataset' +data_mode = 'topdown' +data_root = 'data/deepfasion2/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='train/deepfashion2_skirt_train.json', + data_prefix=dict(img='train/image/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='validation/deepfashion2_skirt_validation.json', + data_prefix=dict(img='validation/image/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE'), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_1xb64-210e_deepfasion2-vest-dress-256x192.py b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_1xb64-210e_deepfasion2-vest-dress-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..9bed7421991041145f028e2b91689b8c5125d205 --- /dev/null +++ b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_1xb64-210e_deepfasion2-vest-dress-256x192.py @@ -0,0 +1,122 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=64) + +# hooks +default_hooks = dict( + logger=dict(type='LoggerHook', interval=10), + checkpoint=dict(save_best='AUC', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=294, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'DeepFashion2Dataset' +data_mode = 'topdown' +data_root = 'data/deepfasion2/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='train/deepfashion2_vest_dress_train.json', + data_prefix=dict(img='train/image/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='validation/deepfashion2_vest_dress_validation.json', + data_prefix=dict(img='validation/image/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE'), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_2xb64-210e_deepfasion2-trousers-256x192.py b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_2xb64-210e_deepfasion2-trousers-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..617e59ae74be40511256c2b9e358300ea2348f27 --- /dev/null +++ b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_2xb64-210e_deepfasion2-trousers-256x192.py @@ -0,0 +1,122 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=128) + +# hooks +default_hooks = dict( + logger=dict(type='LoggerHook', interval=10), + checkpoint=dict(save_best='AUC', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=294, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'DeepFashion2Dataset' +data_mode = 'topdown' +data_root = 'data/deepfasion2/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='train/deepfashion2_trousers_train.json', + data_prefix=dict(img='train/image/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='validation/deepfashion2_trousers_validation.json', + data_prefix=dict(img='validation/image/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE'), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_3xb64-210e_deepfasion2-shorts-256x192.py b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_3xb64-210e_deepfasion2-shorts-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..aa3b2774fcaedf9c7ace5a335775011e6c0a7d29 --- /dev/null +++ b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_3xb64-210e_deepfasion2-shorts-256x192.py @@ -0,0 +1,122 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=192) + +# hooks +default_hooks = dict( + logger=dict(type='LoggerHook', interval=10), + checkpoint=dict(save_best='AUC', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=294, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'DeepFashion2Dataset' +data_mode = 'topdown' +data_root = 'data/deepfasion2/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='train/deepfashion2_shorts_train.json', + data_prefix=dict(img='train/image/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='validation/deepfashion2_shorts_validation.json', + data_prefix=dict(img='validation/image/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE'), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-short-sleeved-dress-256x192.py b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-short-sleeved-dress-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..0bfcabaa5478596cc026309e5f57e6ea5db83abc --- /dev/null +++ b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-short-sleeved-dress-256x192.py @@ -0,0 +1,122 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict( + logger=dict(type='LoggerHook', interval=10), + checkpoint=dict(save_best='AUC', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=294, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'DeepFashion2Dataset' +data_mode = 'topdown' +data_root = 'data/deepfasion2/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='train/deepfashion2_short_sleeved_dress_train.json', + data_prefix=dict(img='train/image/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='validation/deepfashion2_short_sleeved_dress_validation.json', + data_prefix=dict(img='validation/image/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE'), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-sling-256x192.py b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-sling-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..f627eb182c90b57ae53a4a9141f00ed333d3e229 --- /dev/null +++ b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-sling-256x192.py @@ -0,0 +1,122 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict( + logger=dict(type='LoggerHook', interval=10), + checkpoint=dict(save_best='AUC', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=294, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'DeepFashion2Dataset' +data_mode = 'topdown' +data_root = 'data/deepfasion2/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='train/deepfashion2_sling_train.json', + data_prefix=dict(img='train/image/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='validation/deepfashion2_sling_validation.json', + data_prefix=dict(img='validation/image/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE'), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-sling-dress-256x192.py b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-sling-dress-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..8b59607060c41a8ddbb4d38c5acc41e243cd2e96 --- /dev/null +++ b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-sling-dress-256x192.py @@ -0,0 +1,122 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict( + logger=dict(type='LoggerHook', interval=10), + checkpoint=dict(save_best='AUC', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=294, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'DeepFashion2Dataset' +data_mode = 'topdown' +data_root = 'data/deepfasion2/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='train/deepfashion2_sling_dress_train.json', + data_prefix=dict(img='train/image/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='validation/deepfashion2_sling_dress_validation.json', + data_prefix=dict(img='validation/image/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE'), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-vest-256x192.py b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-vest-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..4249d5a8971e80a4e068e51543b9191f36488542 --- /dev/null +++ b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-vest-256x192.py @@ -0,0 +1,122 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict( + logger=dict(type='LoggerHook', interval=10), + checkpoint=dict(save_best='AUC', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=294, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'DeepFashion2Dataset' +data_mode = 'topdown' +data_root = 'data/deepfasion2/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='train/deepfashion2_vest_train.json', + data_prefix=dict(img='train/image/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='validation/deepfashion2_vest_validation.json', + data_prefix=dict(img='validation/image/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE'), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_6xb64-210e_deepfasion2-short-sleeved-shirt-256x192.py b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_6xb64-210e_deepfasion2-short-sleeved-shirt-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..4161952dcf31904e8df8c70ff25ca207c1cea2ae --- /dev/null +++ b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_6xb64-210e_deepfasion2-short-sleeved-shirt-256x192.py @@ -0,0 +1,122 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=384) + +# hooks +default_hooks = dict( + logger=dict(type='LoggerHook', interval=10), + checkpoint=dict(save_best='AUC', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=294, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'DeepFashion2Dataset' +data_mode = 'topdown' +data_root = 'data/deepfasion2/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='train/deepfashion2_short_sleeved_shirt_train.json', + data_prefix=dict(img='train/image/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='validation/deepfashion2_short_sleeved_shirt_validation.json', + data_prefix=dict(img='validation/image/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE'), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_8xb64-210e_deepfasion2-long-sleeved-outwear-256x192.py b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_8xb64-210e_deepfasion2-long-sleeved-outwear-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..36e0318bf7a954fdbd35a8b59219a6cde2396df2 --- /dev/null +++ b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_8xb64-210e_deepfasion2-long-sleeved-outwear-256x192.py @@ -0,0 +1,123 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + logger=dict(type='LoggerHook', interval=10), + checkpoint=dict(save_best='AUC', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=294, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'DeepFashion2Dataset' +data_mode = 'topdown' +data_root = 'data/deepfasion2/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='train/deepfashion2_long_sleeved_outwear_train.json', + data_prefix=dict(img='train/image/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='validation/' + 'deepfashion2_long_sleeved_outwear_validation.json', + data_prefix=dict(img='validation/image/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE'), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_8xb64-210e_deepfasion2-long-sleeved-shirt-256x192.py b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_8xb64-210e_deepfasion2-long-sleeved-shirt-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..f82e3cb5fb04011130521a35080b00f01a70ac68 --- /dev/null +++ b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_8xb64-210e_deepfasion2-long-sleeved-shirt-256x192.py @@ -0,0 +1,122 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + logger=dict(type='LoggerHook', interval=10), + checkpoint=dict(save_best='AUC', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=294, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'DeepFashion2Dataset' +data_mode = 'topdown' +data_root = 'data/deepfasion2/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='train/deepfashion2_long_sleeved_shirt_train.json', + data_prefix=dict(img='train/image/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='validation/deepfashion2_long_sleeved_shirt_validation.json', + data_prefix=dict(img='validation/image/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE'), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_8xb64-210e_deepfasion2-short-sleeved-outwear-256x192.py b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_8xb64-210e_deepfasion2-short-sleeved-outwear-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..30db99de9e96eaede42332daae3d55f578b941f2 --- /dev/null +++ b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_8xb64-210e_deepfasion2-short-sleeved-outwear-256x192.py @@ -0,0 +1,123 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + logger=dict(type='LoggerHook', interval=10), + checkpoint=dict(save_best='AUC', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=294, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'DeepFashion2Dataset' +data_mode = 'topdown' +data_root = 'data/deepfasion2/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='train/deepfashion2_short_sleeved_outwear_train.json', + data_prefix=dict(img='train/image/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='validation/' + 'deepfashion2_short_sleeved_outwear_validation.json', + data_prefix=dict(img='validation/image/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE'), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/hand_2d_keypoint/README.md b/mmpose/configs/hand_2d_keypoint/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6f7758290eb914b88662e685135748c1fb5f665d --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/README.md @@ -0,0 +1,18 @@ +# 2D Hand Pose Estimation + +2D hand pose estimation is defined as the task of detecting the poses (or keypoints) of the hand from an input image. + +Normally, the input images are cropped hand images, where the hand locates at the center; +or the rough location (or the bounding box) of the hand is provided. + +## Data preparation + +Please follow [DATA Preparation](/docs/en/dataset_zoo/2d_hand_keypoint.md) to prepare data. + +## Demo + +Please follow [Demo](/demo/docs/en/2d_hand_demo.md) to run demos. + +<img src="https://user-images.githubusercontent.com/11788150/109098558-8c54db00-775c-11eb-8966-85df96b23dc5.gif" width="600px" alt><br> + +<img src="https://user-images.githubusercontent.com/26127467/187664103-cfbe0c4e-5876-42f9-9023-5fb58ce00d7b.jpg" height="500px" alt><br> diff --git a/mmpose/configs/hand_2d_keypoint/rtmpose/README.md b/mmpose/configs/hand_2d_keypoint/rtmpose/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9687b7e72c98376a00570389e7f1d003b4ace8f0 --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/rtmpose/README.md @@ -0,0 +1,16 @@ +# RTMPose + +Recent studies on 2D pose estimation have achieved excellent performance on public benchmarks, yet its application in the industrial community still suffers from heavy model parameters and high latency. +In order to bridge this gap, we empirically study five aspects that affect the performance of multi-person pose estimation algorithms: paradigm, backbone network, localization algorithm, training strategy, and deployment inference, and present a high-performance real-time multi-person pose estimation framework, **RTMPose**, based on MMPose. +Our RTMPose-m achieves **75.8% AP** on COCO with **90+ FPS** on an Intel i7-11700 CPU and **430+ FPS** on an NVIDIA GTX 1660 Ti GPU, and RTMPose-l achieves **67.0% AP** on COCO-WholeBody with **130+ FPS**, outperforming existing open-source libraries. +To further evaluate RTMPose's capability in critical real-time applications, we also report the performance after deploying on the mobile device. + +## Results and Models + +### COCO-WholeBody-Hand Dataset + +Results on COCO-WholeBody-Hand val set + +| Model | Input Size | PCK@0.2 | AUC | EPE | Details and Download | +| :-------: | :--------: | :-----: | :---: | :--: | :------------------------------------------------------------------------------------: | +| RTMPose-m | 256x256 | 0.815 | 0.837 | 4.51 | [rtmpose_coco_wholebody_hand.md](./coco_wholebody_hand/rtmpose_coco_wholebody_hand.md) | diff --git a/mmpose/configs/hand_2d_keypoint/rtmpose/coco_wholebody_hand/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py b/mmpose/configs/hand_2d_keypoint/rtmpose/coco_wholebody_hand/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..7d5438586eca47ad9ae006a03f146ef245a3b502 --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/rtmpose/coco_wholebody_hand/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py @@ -0,0 +1,233 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 210 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 150 to 300 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# codec settings +codec = dict( + type='SimCCLabel', + input_size=(256, 256), + sigma=(5.66, 5.66), + simcc_split_ratio=2.0, + normalize=False, + use_dark=False) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=0.67, + widen_factor=0.75, + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' + 'rtmposev1/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa + )), + head=dict( + type='RTMCCHead', + in_channels=768, + out_channels=21, + input_size=codec['input_size'], + in_featuremap_size=(8, 8), + simcc_split_ratio=codec['simcc_split_ratio'], + final_layer_kernel_size=7, + gau_cfg=dict( + hidden_dims=256, + s=128, + expansion_factor=2, + dropout_rate=0., + drop_path=0., + act_fn='SiLU', + use_rel_bias=False, + pos_enc=False), + loss=dict( + type='KLDiscretLoss', + use_target_weight=True, + beta=10., + label_softmax=True), + decoder=codec), + test_cfg=dict(flip_test=True, )) + +# base dataset settings +dataset_type = 'CocoWholeBodyHandDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/', +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + # dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.5, 1.5], + rotate_factor=180), + dict(type='RandomFlip', direction='horizontal'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.0), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + # dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=180), + dict(type='RandomFlip', direction='horizontal'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='AUC', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE') +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/hand_2d_keypoint/rtmpose/coco_wholebody_hand/rtmpose_coco_wholebody_hand.md b/mmpose/configs/hand_2d_keypoint/rtmpose/coco_wholebody_hand/rtmpose_coco_wholebody_hand.md new file mode 100644 index 0000000000000000000000000000000000000000..b2a5957e6ec3850423188c4fde9fd4aeae9853ee --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/rtmpose/coco_wholebody_hand/rtmpose_coco_wholebody_hand.md @@ -0,0 +1,39 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2212.07784">RTMDet (ArXiv 2022)</a></summary> + +```bibtex +@misc{lyu2022rtmdet, + title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors}, + author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen}, + year={2022}, + eprint={2212.07784}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-030-58545-7_12">COCO-WholeBody-Hand (ECCV'2020)</a></summary> + +```bibtex +@inproceedings{jin2020whole, + title={Whole-Body Human Pose Estimation in the Wild}, + author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping}, + booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, + year={2020} +} +``` + +</details> + +Results on COCO-WholeBody-Hand val set + +| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log | +| :--------------------------------------------------------- | :--------: | :-----: | :---: | :--: | :--------------------------------------------------------: | :--------------------------------------------------------: | +| [rtmpose_m](/configs/hand_2d_keypoint/rtmpose/coco_wholebody_hand/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py) | 256x256 | 0.815 | 0.837 | 4.51 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco-wholebody-hand_pt-aic-coco_210e-256x256-99477206_20230228.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco-wholebody-hand_pt-aic-coco_210e-256x256-99477206_20230228.json) | diff --git a/mmpose/configs/hand_2d_keypoint/rtmpose/coco_wholebody_hand/rtmpose_coco_wholebody_hand.yml b/mmpose/configs/hand_2d_keypoint/rtmpose/coco_wholebody_hand/rtmpose_coco_wholebody_hand.yml new file mode 100644 index 0000000000000000000000000000000000000000..2f87733605b771ff0aa094c5702cdeec8c115e21 --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/rtmpose/coco_wholebody_hand/rtmpose_coco_wholebody_hand.yml @@ -0,0 +1,17 @@ +Models: +- Config: configs/hand_2d_keypoint/rtmpose/coco_wholebody_hand/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py + In Collection: RTMPose + Alias: hand + Metadata: + Architecture: + - RTMPose + Training Data: COCO-WholeBody-Hand + Name: rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256 + Results: + - Dataset: COCO-WholeBody-Hand + Metrics: + AUC: 0.815 + EPE: 4.51 + PCK@0.2: 0.837 + Task: Hand 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco-wholebody-hand_pt-aic-coco_210e-256x256-99477206_20230228.pth diff --git a/mmpose/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose-m_8xb256-210e_hand5-256x256.py b/mmpose/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose-m_8xb256-210e_hand5-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..689dc68096075ed5a53b32f243eefc13bf7deaca --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose-m_8xb256-210e_hand5-256x256.py @@ -0,0 +1,381 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# coco-hand onehand10k freihand2d rhd2d halpehand + +# runtime +max_epochs = 210 +stage2_num_epochs = 10 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 150 to 300 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# codec settings +codec = dict( + type='SimCCLabel', + input_size=(256, 256), + sigma=(5.66, 5.66), + simcc_split_ratio=2.0, + normalize=False, + use_dark=False) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=0.67, + widen_factor=0.75, + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' + 'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa + )), + head=dict( + type='RTMCCHead', + in_channels=768, + out_channels=21, + input_size=codec['input_size'], + in_featuremap_size=(8, 8), + simcc_split_ratio=codec['simcc_split_ratio'], + final_layer_kernel_size=7, + gau_cfg=dict( + hidden_dims=256, + s=128, + expansion_factor=2, + dropout_rate=0., + drop_path=0., + act_fn='SiLU', + use_rel_bias=False, + pos_enc=False), + loss=dict( + type='KLDiscretLoss', + use_target_weight=True, + beta=10., + label_softmax=True), + decoder=codec), + test_cfg=dict(flip_test=True, )) + +# base dataset settings +dataset_type = 'CocoWholeBodyHandDataset' +data_mode = 'topdown' +data_root = 'data/' + +backend_args = dict(backend='local') + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + # dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.5, 1.5], + rotate_factor=180), + dict(type='RandomFlip', direction='horizontal'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.0), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + # dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=180), + dict(type='RandomFlip', direction='horizontal'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.2), + dict(type='MedianBlur', p=0.2), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# train datasets +dataset_coco = dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='detection/coco/train2017/'), + pipeline=[], +) + +dataset_onehand10k = dict( + type='OneHand10KDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='onehand10k/annotations/onehand10k_train.json', + data_prefix=dict(img='pose/OneHand10K/'), + pipeline=[], +) + +dataset_freihand = dict( + type='FreiHandDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='freihand/annotations/freihand_train.json', + data_prefix=dict(img='pose/FreiHand/'), + pipeline=[], +) + +dataset_rhd = dict( + type='Rhd2DDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='rhd/annotations/rhd_train.json', + data_prefix=dict(img='pose/RHD/'), + pipeline=[ + dict( + type='KeypointConverter', + num_keypoints=21, + mapping=[ + (0, 0), + (1, 4), + (2, 3), + (3, 2), + (4, 1), + (5, 8), + (6, 7), + (7, 6), + (8, 5), + (9, 12), + (10, 11), + (11, 10), + (12, 9), + (13, 16), + (14, 15), + (15, 14), + (16, 13), + (17, 20), + (18, 19), + (19, 18), + (20, 17), + ]) + ], +) + +dataset_halpehand = dict( + type='HalpeHandDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='halpe/annotations/halpe_train_v1.json', + data_prefix=dict(img='pose/Halpe/hico_20160224_det/images/train2015/'), + pipeline=[], +) + +# data loaders +train_dataloader = dict( + batch_size=256, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type='CombinedDataset', + metainfo=dict( + from_file='configs/_base_/datasets/coco_wholebody_hand.py'), + datasets=[ + dataset_coco, dataset_onehand10k, dataset_freihand, dataset_rhd, + dataset_halpehand + ], + pipeline=train_pipeline, + test_mode=False, + )) + +# test datasets +val_coco = dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='coco/annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='detection/coco/val2017/'), + pipeline=[], +) + +val_onehand10k = dict( + type='OneHand10KDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='onehand10k/annotations/onehand10k_test.json', + data_prefix=dict(img='pose/OneHand10K/'), + pipeline=[], +) + +val_freihand = dict( + type='FreiHandDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='freihand/annotations/freihand_test.json', + data_prefix=dict(img='pose/FreiHand/'), + pipeline=[], +) + +val_rhd = dict( + type='Rhd2DDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='rhd/annotations/rhd_test.json', + data_prefix=dict(img='pose/RHD/'), + pipeline=[ + dict( + type='KeypointConverter', + num_keypoints=21, + mapping=[ + (0, 0), + (1, 4), + (2, 3), + (3, 2), + (4, 1), + (5, 8), + (6, 7), + (7, 6), + (8, 5), + (9, 12), + (10, 11), + (11, 10), + (12, 9), + (13, 16), + (14, 15), + (15, 14), + (16, 13), + (17, 20), + (18, 19), + (19, 18), + (20, 17), + ]) + ], +) + +val_halpehand = dict( + type='HalpeHandDataset', + data_root=data_root, + data_mode=data_mode, + ann_file='halpe/annotations/halpe_val_v1.json', + data_prefix=dict(img='detection/coco/val2017/'), + pipeline=[], +) + +test_dataloader = dict( + batch_size=32, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type='CombinedDataset', + metainfo=dict( + from_file='configs/_base_/datasets/coco_wholebody_hand.py'), + datasets=[ + val_coco, val_onehand10k, val_freihand, val_rhd, val_halpehand + ], + pipeline=val_pipeline, + test_mode=True, + )) + +val_dataloader = test_dataloader + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='AUC', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE') +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose_hand5.md b/mmpose/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose_hand5.md new file mode 100644 index 0000000000000000000000000000000000000000..361770dad2ec789daf9bfde0d08b3947a8a2cf38 --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose_hand5.md @@ -0,0 +1,67 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-030-58580-8_27">RTMPose (arXiv'2023)</a></summary> + +```bibtex +@misc{https://doi.org/10.48550/arxiv.2303.07399, + doi = {10.48550/ARXIV.2303.07399}, + url = {https://arxiv.org/abs/2303.07399}, + author = {Jiang, Tao and Lu, Peng and Zhang, Li and Ma, Ningsheng and Han, Rui and Lyu, Chengqi and Li, Yining and Chen, Kai}, + keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences}, + title = {RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose}, + publisher = {arXiv}, + year = {2023}, + copyright = {Creative Commons Attribution 4.0 International} +} + +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2212.07784">RTMDet (arXiv'2022)</a></summary> + +```bibtex +@misc{lyu2022rtmdet, + title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors}, + author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen}, + year={2022}, + eprint={2212.07784}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary> + +```bibtex +@inproceedings{lin2014microsoft, + title={Microsoft coco: Common objects in context}, + author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, + booktitle={European conference on computer vision}, + pages={740--755}, + year={2014}, + organization={Springer} +} +``` + +</details> + +- `Hand5` and `*` denote model trained on 5 public datasets: + - [COCO-Wholebody-Hand](https://github.com/jin-s13/COCO-WholeBody/) + - [OneHand10K](https://www.yangangwang.com/papers/WANG-MCC-2018-10.html) + - [FreiHand2d](https://lmb.informatik.uni-freiburg.de/projects/freihand/) + - [RHD2d](https://lmb.informatik.uni-freiburg.de/resources/datasets/RenderedHandposeDataset.en.html) + - [Halpe](https://mmpose.readthedocs.io/en/latest/dataset_zoo/2d_wholebody_keypoint.html#halpe) + +| Config | Input Size | PCK@0.2<sup><br>(COCO-Wholebody-Hand) | PCK@0.2<sup><br>(Hand5) | AUC<sup><br>(Hand5) | EPE<sup><br>(Hand5) | FLOPS(G) | Download | +| :---------------------------------------: | :--------: | :-----------------------------------: | :---------------------: | :-----------------: | :-----------------: | :------: | :-----------------------------------------: | +| [RTMPose-m\*<sup><br>(alpha version)](./rtmpose/hand_2d_keypoint/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py) | 256x256 | 81.5 | 96.4 | 83.9 | 5.06 | 2.581 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-hand5_pt-aic-coco_210e-256x256-74fb594_20230320.pth) | diff --git a/mmpose/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose_hand5.yml b/mmpose/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose_hand5.yml new file mode 100644 index 0000000000000000000000000000000000000000..a8dfd42e39166be29c25a38354ad472c0612d313 --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose_hand5.yml @@ -0,0 +1,27 @@ +Collections: +- Name: RTMPose + Paper: + Title: "RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose" + URL: https://arxiv.org/abs/2303.07399 + README: https://github.com/open-mmlab/mmpose/blob/main/projects/rtmpose/README.md +Models: +- Config: configs/hand_2d_keypoint/rtmpose/hand5/rtmpose-m_8xb256-210e_hand5-256x256.py + In Collection: RTMPose + Metadata: + Architecture: &id001 + - RTMPose + Training Data: &id002 + - COCO-Wholebody-Hand + - OneHand10K + - FreiHand2d + - RHD2d + - Halpe + Name: rtmpose-m_8xb256-210e_hand5-256x256 + Results: + - Dataset: Hand5 + Metrics: + PCK@0.2: 0.964 + AUC: 0.839 + EPE: 5.06 + Task: Hand 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-hand5_pt-aic-coco_210e-256x256-74fb594_20230320.pth diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/README.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f63f1f8259dd1cf52bebd3612db8ceec3d23220 --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/README.md @@ -0,0 +1,55 @@ +# Top-down heatmap-based pose estimation + +Top-down methods divide the task into two stages: object detection, followed by single-object pose estimation given object bounding boxes. Instead of estimating keypoint coordinates directly, the pose estimator will produce heatmaps which represent the likelihood of being a keypoint, following the paradigm introduced in [Simple Baselines for Human Pose Estimation and Tracking](http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html). + +<div align=center> +<img src="https://user-images.githubusercontent.com/15977946/146522977-5f355832-e9c1-442f-a34f-9d24fb0aefa8.png" height=400> +</div> + +## Results and Models + +### COCO-WholeBody-Hand Dataset + +Results on COCO-WholeBody-Hand val set + +| Model | Input Size | PCK@0.2 | AUC | EPE | Details and Download | +| :--------------: | :--------: | :-----: | :---: | :--: | :----------------------------------------------------------------------------------------------: | +| HRNetv2-w18+Dark | 256x256 | 0.814 | 0.840 | 4.37 | [hrnetv2_dark_coco_wholebody_hand.md](./coco_wholebody_hand/hrnetv2_dark_coco_wholebody_hand.md) | +| HRNetv2-w18 | 256x256 | 0.813 | 0.840 | 4.39 | [hrnetv2_coco_wholebody_hand.md](./coco_wholebody_hand/hrnetv2_coco_wholebody_hand.md) | +| HourglassNet | 256x256 | 0.804 | 0.835 | 4.54 | [hourglass_coco_wholebody_hand.md](./coco_wholebody_hand/hourglass_coco_wholebody_hand.md) | +| SCNet-50 | 256x256 | 0.803 | 0.834 | 4.55 | [scnet_coco_wholebody_hand.md](./coco_wholebody_hand/scnet_coco_wholebody_hand.md) | +| ResNet-50 | 256x256 | 0.800 | 0.833 | 4.64 | [resnet_coco_wholebody_hand.md](./coco_wholebody_hand/resnet_coco_wholebody_hand.md) | +| LiteHRNet-18 | 256x256 | 0.795 | 0.830 | 4.77 | [litehrnet_coco_wholebody_hand.md](./coco_wholebody_hand/litehrnet_coco_wholebody_hand.md) | +| MobileNet-v2 | 256x256 | 0.795 | 0.829 | 4.77 | [mobilenetv2_coco_wholebody_hand.md](./coco_wholebody_hand/mobilenetv2_coco_wholebody_hand.md) | + +### FreiHand Dataset + +Results on FreiHand val & test set + +| Model | Input Size | PCK@0.2 | AUC | EPE | Details and Download | +| :-------: | :--------: | :-----: | :---: | :--: | :-------------------------------------------------------: | +| ResNet-50 | 224x224 | 0.999 | 0.868 | 3.27 | [resnet_freihand2d.md](./freihand2d/resnet_freihand2d.md) | + +### OneHand10K Dataset + +Results on OneHand10K val set + +| Model | Input Size | PCK@0.2 | AUC | EPE | Details and Download | +| :--------------: | :--------: | :-----: | :---: | :---: | :-------------------------------------------------------------------: | +| HRNetv2-w18+Dark | 256x256 | 0.990 | 0.572 | 23.96 | [hrnetv2_dark_onehand10k.md](./onehand10k/hrnetv2_dark_onehand10k.md) | +| HRNetv2-w18+UDP | 256x256 | 0.990 | 0.571 | 23.88 | [hrnetv2_udp_onehand10k.md](./onehand10k/hrnetv2_udp_onehand10k.md) | +| HRNetv2-w18 | 256x256 | 0.990 | 0.567 | 24.26 | [hrnetv2_onehand10k.md](./onehand10k/hrnetv2_onehand10k.md) | +| ResNet-50 | 256x256 | 0.989 | 0.555 | 25.16 | [resnet_onehand10k.md](./onehand10k/resnet_onehand10k.md) | +| MobileNet-v2 | 256x256 | 0.986 | 0.537 | 28.56 | [mobilenetv2_onehand10k.md](./onehand10k/mobilenetv2_onehand10k.md) | + +### RHD Dataset + +Results on RHD test set + +| Model | Input Size | PCK@0.2 | AUC | EPE | Details and Download | +| :--------------: | :--------: | :-----: | :---: | :--: | :----------------------------------------------------: | +| HRNetv2-w18+Dark | 256x256 | 0.992 | 0.903 | 2.18 | [hrnetv2_dark_rhd2d.md](./rhd2d/hrnetv2_dark_rhd2d.md) | +| HRNetv2-w18+UDP | 256x256 | 0.992 | 0.902 | 2.19 | [hrnetv2_udp_rhd2d.md](./rhd2d/hrnetv2_udp_rhd2d.md) | +| HRNetv2-w18 | 256x256 | 0.992 | 0.902 | 2.21 | [hrnetv2_rhd2d.md](./rhd2d/hrnetv2_rhd2d.md) | +| ResNet-50 | 256x256 | 0.991 | 0.898 | 2.32 | [resnet_rhd2d.md](./rhd2d/resnet_rhd2d.md) | +| MobileNet-v2 | 256x256 | 0.985 | 0.883 | 2.79 | [mobilenetv2_rhd2d.md](./rhd2d/mobilenetv2_rhd2d.md) | diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hourglass_coco_wholebody_hand.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hourglass_coco_wholebody_hand.md new file mode 100644 index 0000000000000000000000000000000000000000..4728baaba2f19ad8e6760958be21ab54dc964266 --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hourglass_coco_wholebody_hand.md @@ -0,0 +1,39 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-46484-8_29">Hourglass (ECCV'2016)</a></summary> + +```bibtex +@inproceedings{newell2016stacked, + title={Stacked hourglass networks for human pose estimation}, + author={Newell, Alejandro and Yang, Kaiyu and Deng, Jia}, + booktitle={European conference on computer vision}, + pages={483--499}, + year={2016}, + organization={Springer} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-030-58545-7_12">COCO-WholeBody-Hand (ECCV'2020)</a></summary> + +```bibtex +@inproceedings{jin2020whole, + title={Whole-Body Human Pose Estimation in the Wild}, + author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping}, + booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, + year={2020} +} +``` + +</details> + +Results on COCO-WholeBody-Hand val set + +| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log | +| :--------------------------------------------------------- | :--------: | :-----: | :---: | :--: | :--------------------------------------------------------: | :--------------------------------------------------------: | +| [pose_hourglass_52](/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_hourglass52_8xb32-210e_coco-wholebody-hand-256x256.py) | 256x256 | 0.804 | 0.835 | 4.54 | [ckpt](https://download.openmmlab.com/mmpose/hand/hourglass/hourglass52_coco_wholebody_hand_256x256-7b05c6db_20210909.pth) | [log](https://download.openmmlab.com/mmpose/hand/hourglass/hourglass52_coco_wholebody_hand_256x256_20210909.log.json) | diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hourglass_coco_wholebody_hand.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hourglass_coco_wholebody_hand.yml new file mode 100644 index 0000000000000000000000000000000000000000..f6247504e2b6323010a800ea7b5a1c6d01f51a99 --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hourglass_coco_wholebody_hand.yml @@ -0,0 +1,16 @@ +Models: +- Config: configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_hourglass52_8xb32-210e_coco-wholebody-hand-256x256.py + In Collection: Hourglass + Metadata: + Architecture: + - Hourglass + Training Data: COCO-WholeBody-Hand + Name: td-hm_hourglass52_8xb32-210e_coco-wholebody-hand-256x256 + Results: + - Dataset: COCO-WholeBody-Hand + Metrics: + AUC: 0.835 + EPE: 4.54 + PCK@0.2: 0.804 + Task: Hand 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/hand/hourglass/hourglass52_coco_wholebody_hand_256x256-7b05c6db_20210909.pth diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hrnetv2_coco_wholebody_hand.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hrnetv2_coco_wholebody_hand.md new file mode 100644 index 0000000000000000000000000000000000000000..d944ff43a268aad9f781dbea9063ae4eb000a597 --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hrnetv2_coco_wholebody_hand.md @@ -0,0 +1,39 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://ieeexplore.ieee.org/abstract/document/9052469/">HRNetv2 (TPAMI'2019)</a></summary> + +```bibtex +@article{WangSCJDZLMTWLX19, + title={Deep High-Resolution Representation Learning for Visual Recognition}, + author={Jingdong Wang and Ke Sun and Tianheng Cheng and + Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and + Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao}, + journal={TPAMI}, + year={2019} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-030-58545-7_12">COCO-WholeBody-Hand (ECCV'2020)</a></summary> + +```bibtex +@inproceedings{jin2020whole, + title={Whole-Body Human Pose Estimation in the Wild}, + author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping}, + booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, + year={2020} +} +``` + +</details> + +Results on COCO-WholeBody-Hand val set + +| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log | +| :--------------------------------------------------------- | :--------: | :-----: | :---: | :--: | :--------------------------------------------------------: | :--------------------------------------------------------: | +| [pose_hrnetv2_w18](/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_hrnetv2-w18_8xb32-210e_coco-wholebody-hand-256x256.py) | 256x256 | 0.813 | 0.840 | 4.39 | [ckpt](https://download.openmmlab.com/mmpose/hand/hrnetv2/hrnetv2_w18_coco_wholebody_hand_256x256-1c028db7_20210908.pth) | [log](https://download.openmmlab.com/mmpose/hand/hrnetv2/hrnetv2_w18_coco_wholebody_hand_256x256_20210908.log.json) | diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hrnetv2_coco_wholebody_hand.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hrnetv2_coco_wholebody_hand.yml new file mode 100644 index 0000000000000000000000000000000000000000..f6c0046f663badee9d9b9ed0d42baaaaafe280ad --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hrnetv2_coco_wholebody_hand.yml @@ -0,0 +1,16 @@ +Models: +- Config: configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_hrnetv2-w18_8xb32-210e_coco-wholebody-hand-256x256.py + In Collection: HRNetv2 + Metadata: + Architecture: + - HRNetv2 + Training Data: COCO-WholeBody-Hand + Name: td-hm_hrnetv2-w18_8xb32-210e_coco-wholebody-hand-256x256 + Results: + - Dataset: COCO-WholeBody-Hand + Metrics: + AUC: 0.84 + EPE: 4.39 + PCK@0.2: 0.813 + Task: Hand 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/hand/hrnetv2/hrnetv2_w18_coco_wholebody_hand_256x256-1c028db7_20210908.pth diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hrnetv2_dark_coco_wholebody_hand.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hrnetv2_dark_coco_wholebody_hand.md new file mode 100644 index 0000000000000000000000000000000000000000..73896361860d72f901b173206a29d1985a85bd65 --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hrnetv2_dark_coco_wholebody_hand.md @@ -0,0 +1,56 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://ieeexplore.ieee.org/abstract/document/9052469/">HRNetv2 (TPAMI'2019)</a></summary> + +```bibtex +@article{WangSCJDZLMTWLX19, + title={Deep High-Resolution Representation Learning for Visual Recognition}, + author={Jingdong Wang and Ke Sun and Tianheng Cheng and + Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and + Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao}, + journal={TPAMI}, + year={2019} +} +``` + +</details> + +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2020/html/Zhang_Distribution-Aware_Coordinate_Representation_for_Human_Pose_Estimation_CVPR_2020_paper.html">DarkPose (CVPR'2020)</a></summary> + +```bibtex +@inproceedings{zhang2020distribution, + title={Distribution-aware coordinate representation for human pose estimation}, + author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages={7093--7102}, + year={2020} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-030-58545-7_12">COCO-WholeBody-Hand (ECCV'2020)</a></summary> + +```bibtex +@inproceedings{jin2020whole, + title={Whole-Body Human Pose Estimation in the Wild}, + author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping}, + booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, + year={2020} +} +``` + +</details> + +Results on COCO-WholeBody-Hand val set + +| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log | +| :--------------------------------------------------------- | :--------: | :-----: | :---: | :--: | :--------------------------------------------------------: | :--------------------------------------------------------: | +| [pose_hrnetv2_w18_dark](/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_hrnetv2-w18_dark-8xb32-210e_coco-wholebody-hand-256x256.py) | 256x256 | 0.814 | 0.840 | 4.37 | [ckpt](https://download.openmmlab.com/mmpose/hand/dark/hrnetv2_w18_coco_wholebody_hand_256x256_dark-a9228c9c_20210908.pth) | [log](https://download.openmmlab.com/mmpose/hand/dark/hrnetv2_w18_coco_wholebody_hand_256x256_dark_20210908.log.json) | diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hrnetv2_dark_coco_wholebody_hand.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hrnetv2_dark_coco_wholebody_hand.yml new file mode 100644 index 0000000000000000000000000000000000000000..af1d607d10f4fc62e678a62676fddcf0f1752295 --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hrnetv2_dark_coco_wholebody_hand.yml @@ -0,0 +1,17 @@ +Models: +- Config: configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_hrnetv2-w18_dark-8xb32-210e_coco-wholebody-hand-256x256.py + In Collection: DarkPose + Metadata: + Architecture: + - HRNetv2 + - DarkPose + Training Data: COCO-WholeBody-Hand + Name: td-hm_hrnetv2-w18_dark-8xb32-210e_coco-wholebody-hand-256x256 + Results: + - Dataset: COCO-WholeBody-Hand + Metrics: + AUC: 0.84 + EPE: 4.37 + PCK@0.2: 0.814 + Task: Hand 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/hand/dark/hrnetv2_w18_coco_wholebody_hand_256x256_dark-a9228c9c_20210908.pth diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/litehrnet_coco_wholebody_hand.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/litehrnet_coco_wholebody_hand.md new file mode 100644 index 0000000000000000000000000000000000000000..7c084b79e1a463794988134b4403393fba540e6a --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/litehrnet_coco_wholebody_hand.md @@ -0,0 +1,37 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2104.06403">LiteHRNet (CVPR'2021)</a></summary> + +```bibtex +@inproceedings{Yulitehrnet21, + title={Lite-HRNet: A Lightweight High-Resolution Network}, + author={Yu, Changqian and Xiao, Bin and Gao, Changxin and Yuan, Lu and Zhang, Lei and Sang, Nong and Wang, Jingdong}, + booktitle={CVPR}, + year={2021} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-030-58545-7_12">COCO-WholeBody-Hand (ECCV'2020)</a></summary> + +```bibtex +@inproceedings{jin2020whole, + title={Whole-Body Human Pose Estimation in the Wild}, + author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping}, + booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, + year={2020} +} +``` + +</details> + +Results on COCO-WholeBody-Hand val set + +| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log | +| :--------------------------------------------------------- | :--------: | :-----: | :---: | :--: | :--------------------------------------------------------: | :--------------------------------------------------------: | +| [LiteHRNet-18](/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_litehrnet-w18_8xb32-210e_coco-wholebody-hand-256x256.py) | 256x256 | 0.795 | 0.830 | 4.77 | [ckpt](https://download.openmmlab.com/mmpose/hand/litehrnet/litehrnet_w18_coco_wholebody_hand_256x256-d6945e6a_20210908.pth) | [log](https://download.openmmlab.com/mmpose/hand/litehrnet/litehrnet_w18_coco_wholebody_hand_256x256_20210908.log.json) | diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/litehrnet_coco_wholebody_hand.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/litehrnet_coco_wholebody_hand.yml new file mode 100644 index 0000000000000000000000000000000000000000..eeecbfe7e244b8c629fd4d621aea73a6b5694704 --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/litehrnet_coco_wholebody_hand.yml @@ -0,0 +1,16 @@ +Models: +- Config: configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_litehrnet-w18_8xb32-210e_coco-wholebody-hand-256x256.py + In Collection: LiteHRNet + Metadata: + Architecture: + - LiteHRNet + Training Data: COCO-WholeBody-Hand + Name: td-hm_litehrnet-w18_8xb32-210e_coco-wholebody-hand-256x256 + Results: + - Dataset: COCO-WholeBody-Hand + Metrics: + AUC: 0.83 + EPE: 4.77 + PCK@0.2: 0.795 + Task: Hand 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/hand/litehrnet/litehrnet_w18_coco_wholebody_hand_256x256-d6945e6a_20210908.pth diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/mobilenetv2_coco_wholebody_hand.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/mobilenetv2_coco_wholebody_hand.md new file mode 100644 index 0000000000000000000000000000000000000000..cc76358a8fe124d1f2bc04523e841e551590fccb --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/mobilenetv2_coco_wholebody_hand.md @@ -0,0 +1,38 @@ +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2018/html/Sandler_MobileNetV2_Inverted_Residuals_CVPR_2018_paper.html">MobilenetV2 (CVPR'2018)</a></summary> + +```bibtex +@inproceedings{sandler2018mobilenetv2, + title={Mobilenetv2: Inverted residuals and linear bottlenecks}, + author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={4510--4520}, + year={2018} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-030-58545-7_12">COCO-WholeBody-Hand (ECCV'2020)</a></summary> + +```bibtex +@inproceedings{jin2020whole, + title={Whole-Body Human Pose Estimation in the Wild}, + author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping}, + booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, + year={2020} +} +``` + +</details> + +Results on COCO-WholeBody-Hand val set + +| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log | +| :--------------------------------------------------------: | :--------: | :-----: | :---: | :--: | :--------------------------------------------------------: | :--------------------------------------------------------: | +| [pose_mobilenetv2](/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_mobilenetv2_8xb32-210e_coco-wholebody-hand-256x256.py) | 256x256 | 0.795 | 0.829 | 4.77 | [ckpt](https://download.openmmlab.com/mmpose/hand/mobilenetv2/mobilenetv2_coco_wholebody_hand_256x256-06b8c877_20210909.pth) | [log](https://download.openmmlab.com/mmpose/hand/mobilenetv2/mobilenetv2_coco_wholebody_hand_256x256_20210909.log.json) | diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/mobilenetv2_coco_wholebody_hand.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/mobilenetv2_coco_wholebody_hand.yml new file mode 100644 index 0000000000000000000000000000000000000000..a9d0101ce77db1d1062cfb20c9d2d80f848f0ecf --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/mobilenetv2_coco_wholebody_hand.yml @@ -0,0 +1,17 @@ +Models: +- Config: configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_mobilenetv2_8xb32-210e_coco-wholebody-hand-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: + - SimpleBaseline2D + - MobilenetV2 + Training Data: COCO-WholeBody-Hand + Name: td-hm_mobilenetv2_8xb32-210e_coco-wholebody-hand-256x256 + Results: + - Dataset: COCO-WholeBody-Hand + Metrics: + AUC: 0.829 + EPE: 4.77 + PCK@0.2: 0.795 + Task: Hand 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/hand/mobilenetv2/mobilenetv2_coco_wholebody_hand_256x256-06b8c877_20210909.pth diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/resnet_coco_wholebody_hand.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/resnet_coco_wholebody_hand.md new file mode 100644 index 0000000000000000000000000000000000000000..ae7f287e3d250de0f9a39a0190562eb9f6e69e7c --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/resnet_coco_wholebody_hand.md @@ -0,0 +1,55 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html">SimpleBaseline2D (ECCV'2018)</a></summary> + +```bibtex +@inproceedings{xiao2018simple, + title={Simple baselines for human pose estimation and tracking}, + author={Xiao, Bin and Wu, Haiping and Wei, Yichen}, + booktitle={Proceedings of the European conference on computer vision (ECCV)}, + pages={466--481}, + year={2018} +} +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html">ResNet (CVPR'2016)</a></summary> + +```bibtex +@inproceedings{he2016deep, + title={Deep residual learning for image recognition}, + author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={770--778}, + year={2016} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-030-58545-7_12">COCO-WholeBody-Hand (ECCV'2020)</a></summary> + +```bibtex +@inproceedings{jin2020whole, + title={Whole-Body Human Pose Estimation in the Wild}, + author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping}, + booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, + year={2020} +} +``` + +</details> + +Results on COCO-WholeBody-Hand val set + +| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log | +| :--------------------------------------------------------: | :--------: | :-----: | :---: | :--: | :--------------------------------------------------------: | :--------------------------------------------------------: | +| [pose_resnet_50](/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_res50_8xb32-210e_coco-wholebody-hand-256x256.py) | 256x256 | 0.800 | 0.833 | 4.64 | [ckpt](https://download.openmmlab.com/mmpose/hand/resnet/res50_coco_wholebody_hand_256x256-8dbc750c_20210908.pth) | [log](https://download.openmmlab.com/mmpose/hand/resnet/res50_coco_wholebody_hand_256x256_20210908.log.json) | diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/resnet_coco_wholebody_hand.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/resnet_coco_wholebody_hand.yml new file mode 100644 index 0000000000000000000000000000000000000000..78d16a6e459e6cbafeffb2fb14d863910a7b2144 --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/resnet_coco_wholebody_hand.yml @@ -0,0 +1,17 @@ +Models: +- Config: configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_res50_8xb32-210e_coco-wholebody-hand-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: + - SimpleBaseline2D + - ResNet + Training Data: COCO-WholeBody-Hand + Name: td-hm_res50_8xb32-210e_coco-wholebody-hand-256x256 + Results: + - Dataset: COCO-WholeBody-Hand + Metrics: + AUC: 0.833 + EPE: 4.64 + PCK@0.2: 0.8 + Task: Hand 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/hand/resnet/res50_coco_wholebody_hand_256x256-8dbc750c_20210908.pth diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/scnet_coco_wholebody_hand.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/scnet_coco_wholebody_hand.md new file mode 100644 index 0000000000000000000000000000000000000000..06c6fda74c6c36df1abac5a06c5d642f9eac580d --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/scnet_coco_wholebody_hand.md @@ -0,0 +1,38 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2020/html/Liu_Improving_Convolutional_Networks_With_Self-Calibrated_Convolutions_CVPR_2020_paper.html">SCNet (CVPR'2020)</a></summary> + +```bibtex +@inproceedings{liu2020improving, + title={Improving Convolutional Networks with Self-Calibrated Convolutions}, + author={Liu, Jiang-Jiang and Hou, Qibin and Cheng, Ming-Ming and Wang, Changhu and Feng, Jiashi}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages={10096--10105}, + year={2020} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-030-58545-7_12">COCO-WholeBody-Hand (ECCV'2020)</a></summary> + +```bibtex +@inproceedings{jin2020whole, + title={Whole-Body Human Pose Estimation in the Wild}, + author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping}, + booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, + year={2020} +} +``` + +</details> + +Results on COCO-WholeBody-Hand val set + +| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log | +| :--------------------------------------------------------: | :--------: | :-----: | :---: | :--: | :--------------------------------------------------------: | :--------------------------------------------------------: | +| [pose_scnet_50](/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_scnet50_8xb32-210e_coco-wholebody-hand-256x256.py) | 256x256 | 0.803 | 0.834 | 4.55 | [ckpt](https://download.openmmlab.com/mmpose/hand/scnet/scnet50_coco_wholebody_hand_256x256-e73414c7_20210909.pth) | [log](https://download.openmmlab.com/mmpose/hand/scnet/scnet50_coco_wholebody_hand_256x256_20210909.log.json) | diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/scnet_coco_wholebody_hand.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/scnet_coco_wholebody_hand.yml new file mode 100644 index 0000000000000000000000000000000000000000..a8887b3c8eeb2be74423d845ece640af8333e1d3 --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/scnet_coco_wholebody_hand.yml @@ -0,0 +1,16 @@ +Models: +- Config: configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_scnet50_8xb32-210e_coco-wholebody-hand-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: + - SCNet + Training Data: COCO-WholeBody-Hand + Name: td-hm_scnet50_8xb32-210e_coco-wholebody-hand-256x256 + Results: + - Dataset: COCO-WholeBody-Hand + Metrics: + AUC: 0.834 + EPE: 4.55 + PCK@0.2: 0.803 + Task: Hand 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/hand/scnet/scnet50_coco_wholebody_hand_256x256-e73414c7_20210909.pth diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_hourglass52_8xb32-210e_coco-wholebody-hand-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_hourglass52_8xb32-210e_coco-wholebody-hand-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..e0bc1c8739c9d8ea1fc585882abe5b8189087e2a --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_hourglass52_8xb32-210e_coco-wholebody-hand-256x256.py @@ -0,0 +1,123 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HourglassNet', + num_stacks=1, + ), + head=dict( + type='CPMHead', + in_channels=256, + out_channels=21, + num_stages=1, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoWholeBodyHandDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + rotate_factor=180.0, + scale_factor=(0.7, 1.3)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE') +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_hrnetv2-w18_8xb32-210e_coco-wholebody-hand-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_hrnetv2-w18_8xb32-210e_coco-wholebody-hand-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b9f0f281b9bc72598b9e1ffacd99f58248175d --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_hrnetv2-w18_8xb32-210e_coco-wholebody-hand-256x256.py @@ -0,0 +1,154 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater')) +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(18, 36)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(18, 36, 72)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(18, 36, 72, 144), + multiscale_output=True), + upsample=dict(mode='bilinear', align_corners=False)), + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18')), + neck=dict( + type='FeatureMapProcessor', + concat=True, + ), + head=dict( + type='HeatmapHead', + in_channels=270, + out_channels=21, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + conv_out_channels=(270, ), + conv_kernel_sizes=(1, ), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoWholeBodyHandDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', rotate_factor=180, + scale_factor=(0.7, 1.3)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE') +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_hrnetv2-w18_dark-8xb32-210e_coco-wholebody-hand-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_hrnetv2-w18_dark-8xb32-210e_coco-wholebody-hand-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..5d67f393f6612aab494a47c15bd9ce7b68fc8b4d --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_hrnetv2-w18_dark-8xb32-210e_coco-wholebody-hand-256x256.py @@ -0,0 +1,158 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater')) +# codec settings +codec = dict( + type='MSRAHeatmap', + input_size=(256, 256), + heatmap_size=(64, 64), + sigma=2, + unbiased=True) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(18, 36)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(18, 36, 72)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(18, 36, 72, 144), + multiscale_output=True), + upsample=dict(mode='bilinear', align_corners=False)), + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18')), + neck=dict( + type='FeatureMapProcessor', + concat=True, + ), + head=dict( + type='HeatmapHead', + in_channels=270, + out_channels=21, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + conv_out_channels=(270, ), + conv_kernel_sizes=(1, ), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoWholeBodyHandDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict( + type='RandomBBoxTransform', rotate_factor=180, + scale_factor=(0.7, 1.3)), + dict(type='RandomFlip', direction='horizontal'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE') +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_litehrnet-w18_8xb32-210e_coco-wholebody-hand-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_litehrnet-w18_8xb32-210e_coco-wholebody-hand-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..f3a6150e49e687bb3d510bd7139d66bd8ac8b37f --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_litehrnet-w18_8xb32-210e_coco-wholebody-hand-256x256.py @@ -0,0 +1,136 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater')) +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='LiteHRNet', + in_channels=3, + extra=dict( + stem=dict(stem_channels=32, out_channels=32, expand_ratio=1), + num_stages=3, + stages_spec=dict( + num_modules=(2, 4, 2), + num_branches=(2, 3, 4), + num_blocks=(2, 2, 2), + module_type=('LITE', 'LITE', 'LITE'), + with_fuse=(True, True, True), + reduce_ratios=(8, 8, 8), + num_channels=( + (40, 80), + (40, 80, 160), + (40, 80, 160, 320), + )), + with_head=True, + )), + head=dict( + type='HeatmapHead', + in_channels=40, + out_channels=21, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoWholeBodyHandDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict( + type='RandomBBoxTransform', rotate_factor=180, + scale_factor=(0.7, 1.3)), + dict(type='RandomFlip', direction='horizontal'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE') +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_mobilenetv2_8xb32-210e_coco-wholebody-hand-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_mobilenetv2_8xb32-210e_coco-wholebody-hand-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..dba8538a5fe7b4313b888cae5a21f0c55b58c340 --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_mobilenetv2_8xb32-210e_coco-wholebody-hand-256x256.py @@ -0,0 +1,120 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater')) +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='MobileNetV2', + widen_factor=1., + out_indices=(7, ), + init_cfg=dict(type='Pretrained', checkpoint='mmcls://mobilenet_v2')), + head=dict( + type='HeatmapHead', + in_channels=1280, + out_channels=21, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoWholeBodyHandDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict( + type='RandomBBoxTransform', rotate_factor=180, + scale_factor=(0.7, 1.3)), + dict(type='RandomFlip', direction='horizontal'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE') +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_res50_8xb32-210e_coco-wholebody-hand-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_res50_8xb32-210e_coco-wholebody-hand-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..c04950bfaabcfebc806ce541e8d5285d0bca75be --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_res50_8xb32-210e_coco-wholebody-hand-256x256.py @@ -0,0 +1,119 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater')) +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=21, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoWholeBodyHandDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict( + type='RandomBBoxTransform', rotate_factor=180, + scale_factor=(0.7, 1.3)), + dict(type='RandomFlip', direction='horizontal'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE') +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_scnet50_8xb32-210e_coco-wholebody-hand-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_scnet50_8xb32-210e_coco-wholebody-hand-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..f596227c5c109fe51b0fd822c1f2b26b4abaae83 --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_scnet50_8xb32-210e_coco-wholebody-hand-256x256.py @@ -0,0 +1,122 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater')) +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='SCNet', + depth=50, + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/scnet50-7ef0a199.pth')), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=21, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoWholeBodyHandDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict( + type='RandomBBoxTransform', rotate_factor=180, + scale_factor=(0.7, 1.3)), + dict(type='RandomFlip', direction='horizontal'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE') +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/freihand2d/resnet_freihand2d.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/freihand2d/resnet_freihand2d.md new file mode 100644 index 0000000000000000000000000000000000000000..f1a6c80132255b1cd6d029acd05add2a71f72e98 --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/freihand2d/resnet_freihand2d.md @@ -0,0 +1,56 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html">SimpleBaseline2D (ECCV'2018)</a></summary> + +```bibtex +@inproceedings{xiao2018simple, + title={Simple baselines for human pose estimation and tracking}, + author={Xiao, Bin and Wu, Haiping and Wei, Yichen}, + booktitle={Proceedings of the European conference on computer vision (ECCV)}, + pages={466--481}, + year={2018} +} +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html">ResNet (CVPR'2016)</a></summary> + +```bibtex +@inproceedings{he2016deep, + title={Deep residual learning for image recognition}, + author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={770--778}, + year={2016} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_ICCV_2019/html/Zimmermann_FreiHAND_A_Dataset_for_Markerless_Capture_of_Hand_Pose_and_ICCV_2019_paper.html">FreiHand (ICCV'2019)</a></summary> + +```bibtex +@inproceedings{zimmermann2019freihand, + title={Freihand: A dataset for markerless capture of hand pose and shape from single rgb images}, + author={Zimmermann, Christian and Ceylan, Duygu and Yang, Jimei and Russell, Bryan and Argus, Max and Brox, Thomas}, + booktitle={Proceedings of the IEEE International Conference on Computer Vision}, + pages={813--822}, + year={2019} +} +``` + +</details> + +Results on FreiHand val & test set + +| Set | Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log | +| :--- | :-------------------------------------------------------: | :--------: | :-----: | :---: | :--: | :-------------------------------------------------------: | :------------------------------------------------------: | +| test | [pose_resnet_50](/configs/hand_2d_keypoint/topdown_heatmap/freihand2d/td-hm_res50_8xb64-100e_freihand2d-224x224.py) | 224x224 | 0.999 | 0.868 | 3.27 | [ckpt](https://download.openmmlab.com/mmpose/hand/resnet/res50_freihand_224x224-ff0799bc_20200914.pth) | [log](https://download.openmmlab.com/mmpose/hand/resnet/res50_freihand_224x224_20200914.log.json) | diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/freihand2d/resnet_freihand2d.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/freihand2d/resnet_freihand2d.yml new file mode 100644 index 0000000000000000000000000000000000000000..9937b50be6756f1b8cd3de45347cabb159919d2f --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/freihand2d/resnet_freihand2d.yml @@ -0,0 +1,17 @@ +Models: +- Config: configs/hand_2d_keypoint/topdown_heatmap/freihand2d/td-hm_res50_8xb64-100e_freihand2d-224x224.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: + - SimpleBaseline2D + - ResNet + Training Data: FreiHand + Name: td-hm_res50_8xb64-100e_freihand2d-224x224 + Results: + - Dataset: FreiHand + Metrics: + AUC: 0.868 + EPE: 3.27 + PCK@0.2: 0.999 + Task: Hand 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/hand/resnet/res50_freihand_224x224-ff0799bc_20200914.pth diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/freihand2d/td-hm_res50_8xb64-100e_freihand2d-224x224.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/freihand2d/td-hm_res50_8xb64-100e_freihand2d-224x224.py new file mode 100644 index 0000000000000000000000000000000000000000..cd1750cdebc9d977ae917432b3e714ee1275f3d8 --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/freihand2d/td-hm_res50_8xb64-100e_freihand2d-224x224.py @@ -0,0 +1,138 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=100, val_interval=1) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=100, + milestones=[50, 70], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='AUC', rule='greater', interval=1)) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(224, 224), heatmap_size=(56, 56), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=21, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'FreiHandDataset' +data_mode = 'topdown' +data_root = 'data/freihand/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale', padding=0.8), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', + shift_factor=0.25, + rotate_factor=180, + scale_factor=(0.7, 1.3)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale', padding=0.8), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/freihand_train.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/freihand_val.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/freihand_test.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) + +# evaluators +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE'), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_dark_onehand10k.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_dark_onehand10k.md new file mode 100644 index 0000000000000000000000000000000000000000..59d70fc597094e1597440809c5b1de2d9e4a760f --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_dark_onehand10k.md @@ -0,0 +1,60 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://ieeexplore.ieee.org/abstract/document/9052469/">HRNetv2 (TPAMI'2019)</a></summary> + +```bibtex +@article{WangSCJDZLMTWLX19, + title={Deep High-Resolution Representation Learning for Visual Recognition}, + author={Jingdong Wang and Ke Sun and Tianheng Cheng and + Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and + Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao}, + journal={TPAMI}, + year={2019} +} +``` + +</details> + +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2020/html/Zhang_Distribution-Aware_Coordinate_Representation_for_Human_Pose_Estimation_CVPR_2020_paper.html">DarkPose (CVPR'2020)</a></summary> + +```bibtex +@inproceedings{zhang2020distribution, + title={Distribution-aware coordinate representation for human pose estimation}, + author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages={7093--7102}, + year={2020} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://ieeexplore.ieee.org/abstract/document/8529221/">OneHand10K (TCSVT'2019)</a></summary> + +```bibtex +@article{wang2018mask, + title={Mask-pose cascaded cnn for 2d hand pose estimation from single color image}, + author={Wang, Yangang and Peng, Cong and Liu, Yebin}, + journal={IEEE Transactions on Circuits and Systems for Video Technology}, + volume={29}, + number={11}, + pages={3258--3268}, + year={2018}, + publisher={IEEE} +} +``` + +</details> + +Results on OneHand10K val set + +| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log | +| :--------------------------------------------------------- | :--------: | :-----: | :---: | :---: | :--------------------------------------------------------: | :-------------------------------------------------------: | +| [pose_hrnetv2_w18_dark](/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_hrnetv2-w18_dark-8xb64-210e_onehand10k-256x256.py) | 256x256 | 0.990 | 0.572 | 23.96 | [ckpt](https://download.openmmlab.com/mmpose/hand/dark/hrnetv2_w18_onehand10k_256x256_dark-a2f80c64_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/dark/hrnetv2_w18_onehand10k_256x256_dark_20210330.log.json) | diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_dark_onehand10k.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_dark_onehand10k.yml new file mode 100644 index 0000000000000000000000000000000000000000..7fc64b75c7a445e7cae9c0350e5847b90205f87f --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_dark_onehand10k.yml @@ -0,0 +1,17 @@ +Models: +- Config: configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_hrnetv2-w18_dark-8xb64-210e_onehand10k-256x256.py + In Collection: DarkPose + Metadata: + Architecture: + - HRNetv2 + - DarkPose + Training Data: OneHand10K + Name: td-hm_hrnetv2-w18_dark-8xb64-210e_onehand10k-256x256 + Results: + - Dataset: OneHand10K + Metrics: + AUC: 0.572 + EPE: 23.96 + PCK@0.2: 0.99 + Task: Hand 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/hand/dark/hrnetv2_w18_onehand10k_256x256_dark-a2f80c64_20210330.pth diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_onehand10k.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_onehand10k.md new file mode 100644 index 0000000000000000000000000000000000000000..262bf3225390b69c8d965e02d9f78691a39b4760 --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_onehand10k.md @@ -0,0 +1,43 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://ieeexplore.ieee.org/abstract/document/9052469/">HRNetv2 (TPAMI'2019)</a></summary> + +```bibtex +@article{WangSCJDZLMTWLX19, + title={Deep High-Resolution Representation Learning for Visual Recognition}, + author={Jingdong Wang and Ke Sun and Tianheng Cheng and + Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and + Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao}, + journal={TPAMI}, + year={2019} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://ieeexplore.ieee.org/abstract/document/8529221/">OneHand10K (TCSVT'2019)</a></summary> + +```bibtex +@article{wang2018mask, + title={Mask-pose cascaded cnn for 2d hand pose estimation from single color image}, + author={Wang, Yangang and Peng, Cong and Liu, Yebin}, + journal={IEEE Transactions on Circuits and Systems for Video Technology}, + volume={29}, + number={11}, + pages={3258--3268}, + year={2018}, + publisher={IEEE} +} +``` + +</details> + +Results on OneHand10K val set + +| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log | +| :--------------------------------------------------------- | :--------: | :-----: | :---: | :---: | :--------------------------------------------------------: | :-------------------------------------------------------: | +| [pose_hrnetv2_w18](/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_hrnetv2-w18_8xb64-210e_onehand10k-256x256.py) | 256x256 | 0.990 | 0.567 | 24.26 | [ckpt](https://download.openmmlab.com/mmpose/hand/hrnetv2/hrnetv2_w18_onehand10k_256x256-30bc9c6b_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/hrnetv2/hrnetv2_w18_onehand10k_256x256_20210330.log.json) | diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_onehand10k.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_onehand10k.yml new file mode 100644 index 0000000000000000000000000000000000000000..fd0c75587621aa94af22654a8a3ea80957eddc0a --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_onehand10k.yml @@ -0,0 +1,16 @@ +Models: +- Config: configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_hrnetv2-w18_8xb64-210e_onehand10k-256x256.py + In Collection: HRNetv2 + Metadata: + Architecture: + - HRNetv2 + Training Data: OneHand10K + Name: td-hm_hrnetv2-w18_8xb64-210e_onehand10k-256x256 + Results: + - Dataset: OneHand10K + Metrics: + AUC: 0.567 + EPE: 24.26 + PCK@0.2: 0.99 + Task: Hand 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/hand/hrnetv2/hrnetv2_w18_onehand10k_256x256-30bc9c6b_20210330.pth diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_udp_onehand10k.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_udp_onehand10k.md new file mode 100644 index 0000000000000000000000000000000000000000..ca1599c116e3df24438842da11a05c713fbf99b1 --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_udp_onehand10k.md @@ -0,0 +1,60 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://ieeexplore.ieee.org/abstract/document/9052469/">HRNetv2 (TPAMI'2019)</a></summary> + +```bibtex +@article{WangSCJDZLMTWLX19, + title={Deep High-Resolution Representation Learning for Visual Recognition}, + author={Jingdong Wang and Ke Sun and Tianheng Cheng and + Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and + Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao}, + journal={TPAMI}, + year={2019} +} +``` + +</details> + +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2020/html/Huang_The_Devil_Is_in_the_Details_Delving_Into_Unbiased_Data_CVPR_2020_paper.html">UDP (CVPR'2020)</a></summary> + +```bibtex +@InProceedings{Huang_2020_CVPR, + author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan}, + title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation}, + booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, + month = {June}, + year = {2020} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://ieeexplore.ieee.org/abstract/document/8529221/">OneHand10K (TCSVT'2019)</a></summary> + +```bibtex +@article{wang2018mask, + title={Mask-pose cascaded cnn for 2d hand pose estimation from single color image}, + author={Wang, Yangang and Peng, Cong and Liu, Yebin}, + journal={IEEE Transactions on Circuits and Systems for Video Technology}, + volume={29}, + number={11}, + pages={3258--3268}, + year={2018}, + publisher={IEEE} +} +``` + +</details> + +Results on OneHand10K val set + +| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log | +| :--------------------------------------------------------- | :--------: | :-----: | :---: | :---: | :--------------------------------------------------------: | :-------------------------------------------------------: | +| [pose_hrnetv2_w18_udp](/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_hrnetv2-w18_udp-8xb64-210e_onehand10k-256x256.py) | 256x256 | 0.990 | 0.571 | 23.88 | [ckpt](https://download.openmmlab.com/mmpose/hand/udp/hrnetv2_w18_onehand10k_256x256_udp-0d1b515d_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/udp/hrnetv2_w18_onehand10k_256x256_udp_20210330.log.json) | diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_udp_onehand10k.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_udp_onehand10k.yml new file mode 100644 index 0000000000000000000000000000000000000000..32d5dd6db5e3f6873456dd5016bdb74a75b783cf --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_udp_onehand10k.yml @@ -0,0 +1,17 @@ +Models: +- Config: configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_hrnetv2-w18_udp-8xb64-210e_onehand10k-256x256.py + In Collection: UDP + Metadata: + Architecture: + - HRNetv2 + - UDP + Training Data: OneHand10K + Name: td-hm_hrnetv2-w18_udp-8xb64-210e_onehand10k-256x256 + Results: + - Dataset: OneHand10K + Metrics: + AUC: 0.571 + EPE: 23.88 + PCK@0.2: 0.99 + Task: Hand 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/hand/udp/hrnetv2_w18_onehand10k_256x256_udp-0d1b515d_20210330.pth diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/mobilenetv2_onehand10k.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/mobilenetv2_onehand10k.md new file mode 100644 index 0000000000000000000000000000000000000000..3f0bf9d1b76f27294712462584791657c910212e --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/mobilenetv2_onehand10k.md @@ -0,0 +1,42 @@ +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2018/html/Sandler_MobileNetV2_Inverted_Residuals_CVPR_2018_paper.html">MobilenetV2 (CVPR'2018)</a></summary> + +```bibtex +@inproceedings{sandler2018mobilenetv2, + title={Mobilenetv2: Inverted residuals and linear bottlenecks}, + author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={4510--4520}, + year={2018} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://ieeexplore.ieee.org/abstract/document/8529221/">OneHand10K (TCSVT'2019)</a></summary> + +```bibtex +@article{wang2018mask, + title={Mask-pose cascaded cnn for 2d hand pose estimation from single color image}, + author={Wang, Yangang and Peng, Cong and Liu, Yebin}, + journal={IEEE Transactions on Circuits and Systems for Video Technology}, + volume={29}, + number={11}, + pages={3258--3268}, + year={2018}, + publisher={IEEE} +} +``` + +</details> + +Results on OneHand10K val set + +| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log | +| :--------------------------------------------------------- | :--------: | :-----: | :---: | :---: | :--------------------------------------------------------: | :-------------------------------------------------------: | +| [pose_mobilenet_v2](/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_mobilenetv2_8xb64-210e_onehand10k-256x256.py) | 256x256 | 0.986 | 0.537 | 28.56 | [ckpt](https://download.openmmlab.com/mmpose/hand/mobilenetv2/mobilenetv2_onehand10k_256x256-f3a3d90e_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/mobilenetv2/mobilenetv2_onehand10k_256x256_20210330.log.json) | diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/mobilenetv2_onehand10k.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/mobilenetv2_onehand10k.yml new file mode 100644 index 0000000000000000000000000000000000000000..ade1f054f1b16754be9c2dddf174a85db838140d --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/mobilenetv2_onehand10k.yml @@ -0,0 +1,17 @@ +Models: +- Config: configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_mobilenetv2_8xb64-210e_onehand10k-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: + - SimpleBaseline2D + - MobilenetV2 + Training Data: OneHand10K + Name: td-hm_mobilenetv2_8xb64-210e_onehand10k-256x256 + Results: + - Dataset: OneHand10K + Metrics: + AUC: 0.537 + EPE: 28.56 + PCK@0.2: 0.986 + Task: Hand 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/hand/mobilenetv2/mobilenetv2_onehand10k_256x256-f3a3d90e_20210330.pth diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/resnet_onehand10k.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/resnet_onehand10k.md new file mode 100644 index 0000000000000000000000000000000000000000..c07817d68eae7caba106695c69d6ab1746e6c5b8 --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/resnet_onehand10k.md @@ -0,0 +1,59 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html">SimpleBaseline2D (ECCV'2018)</a></summary> + +```bibtex +@inproceedings{xiao2018simple, + title={Simple baselines for human pose estimation and tracking}, + author={Xiao, Bin and Wu, Haiping and Wei, Yichen}, + booktitle={Proceedings of the European conference on computer vision (ECCV)}, + pages={466--481}, + year={2018} +} +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html">ResNet (CVPR'2016)</a></summary> + +```bibtex +@inproceedings{he2016deep, + title={Deep residual learning for image recognition}, + author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={770--778}, + year={2016} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://ieeexplore.ieee.org/abstract/document/8529221/">OneHand10K (TCSVT'2019)</a></summary> + +```bibtex +@article{wang2018mask, + title={Mask-pose cascaded cnn for 2d hand pose estimation from single color image}, + author={Wang, Yangang and Peng, Cong and Liu, Yebin}, + journal={IEEE Transactions on Circuits and Systems for Video Technology}, + volume={29}, + number={11}, + pages={3258--3268}, + year={2018}, + publisher={IEEE} +} +``` + +</details> + +Results on OneHand10K val set + +| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log | +| :--------------------------------------------------------- | :--------: | :-----: | :---: | :---: | :--------------------------------------------------------: | :-------------------------------------------------------: | +| [pose_resnet_50](/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_res50_8xb32-210e_onehand10k-256x256.py) | 256x256 | 0.989 | 0.555 | 25.16 | [ckpt](https://download.openmmlab.com/mmpose/hand/resnet/res50_onehand10k_256x256-739c8639_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/resnet/res50_onehand10k_256x256_20210330.log.json) | diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/resnet_onehand10k.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/resnet_onehand10k.yml new file mode 100644 index 0000000000000000000000000000000000000000..59dc7f523f3f9d75e2451587525c36cf82ef851f --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/resnet_onehand10k.yml @@ -0,0 +1,17 @@ +Models: +- Config: configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_res50_8xb32-210e_onehand10k-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: + - SimpleBaseline2D + - ResNet + Training Data: OneHand10K + Name: td-hm_res50_8xb32-210e_onehand10k-256x256 + Results: + - Dataset: OneHand10K + Metrics: + AUC: 0.555 + EPE: 25.16 + PCK@0.2: 0.989 + Task: Hand 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/hand/resnet/res50_onehand10k_256x256-739c8639_20210330.pth diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_hrnetv2-w18_8xb64-210e_onehand10k-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_hrnetv2-w18_8xb64-210e_onehand10k-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..99419065aa879884fddb6afa568257fb1b9fe340 --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_hrnetv2-w18_8xb64-210e_onehand10k-256x256.py @@ -0,0 +1,158 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(18, 36)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(18, 36, 72)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(18, 36, 72, 144), + multiscale_output=True), + upsample=dict(mode='bilinear', align_corners=False)), + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://msra/hrnetv2_w18', + )), + neck=dict( + type='FeatureMapProcessor', + concat=True, + ), + head=dict( + type='HeatmapHead', + in_channels=270, + out_channels=21, + deconv_out_channels=None, + conv_out_channels=(270, ), + conv_kernel_sizes=(1, ), + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'OneHand10KDataset' +data_mode = 'topdown' +data_root = 'data/onehand10k/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', rotate_factor=180, + scale_factor=(0.7, 1.3)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/onehand10k_train.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/onehand10k_test.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE'), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_hrnetv2-w18_dark-8xb64-210e_onehand10k-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_hrnetv2-w18_dark-8xb64-210e_onehand10k-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..610e9d149b658166a37d1fa1a028efab32d0637d --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_hrnetv2-w18_dark-8xb64-210e_onehand10k-256x256.py @@ -0,0 +1,162 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', + input_size=(256, 256), + heatmap_size=(64, 64), + sigma=2, + unbiased=True) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(18, 36)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(18, 36, 72)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(18, 36, 72, 144), + multiscale_output=True), + upsample=dict(mode='bilinear', align_corners=False)), + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://msra/hrnetv2_w18', + )), + neck=dict( + type='FeatureMapProcessor', + concat=True, + ), + head=dict( + type='HeatmapHead', + in_channels=270, + out_channels=21, + deconv_out_channels=None, + conv_out_channels=(270, ), + conv_kernel_sizes=(1, ), + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'OneHand10KDataset' +data_mode = 'topdown' +data_root = 'data/onehand10k/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', rotate_factor=180, + scale_factor=(0.7, 1.3)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/onehand10k_train.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/onehand10k_test.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE'), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_hrnetv2-w18_udp-8xb64-210e_onehand10k-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_hrnetv2-w18_udp-8xb64-210e_onehand10k-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..54e2220d636601ac4a19a116aa6d0aabe138dbef --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_hrnetv2-w18_udp-8xb64-210e_onehand10k-256x256.py @@ -0,0 +1,158 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater')) + +# codec settings +codec = dict( + type='UDPHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(18, 36)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(18, 36, 72)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(18, 36, 72, 144), + multiscale_output=True), + upsample=dict(mode='bilinear', align_corners=False)), + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://msra/hrnetv2_w18', + )), + neck=dict( + type='FeatureMapProcessor', + concat=True, + ), + head=dict( + type='HeatmapHead', + in_channels=270, + out_channels=21, + deconv_out_channels=None, + conv_out_channels=(270, ), + conv_kernel_sizes=(1, ), + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=False, + )) + +# base dataset settings +dataset_type = 'OneHand10KDataset' +data_mode = 'topdown' +data_root = 'data/onehand10k/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', rotate_factor=180, + scale_factor=(0.7, 1.3)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/onehand10k_train.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/onehand10k_test.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE'), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_mobilenetv2_8xb64-210e_onehand10k-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_mobilenetv2_8xb64-210e_onehand10k-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..1f4e61c37c5692b62d407f642e23b38197e23d47 --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_mobilenetv2_8xb64-210e_onehand10k-256x256.py @@ -0,0 +1,125 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='MobileNetV2', + widen_factor=1., + out_indices=(7, ), + init_cfg=dict( + type='Pretrained', + checkpoint='mmcls://mobilenet_v2', + )), + head=dict( + type='HeatmapHead', + in_channels=1280, + out_channels=21, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'OneHand10KDataset' +data_mode = 'topdown' +data_root = 'data/onehand10k/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', rotate_factor=180, + scale_factor=(0.7, 1.3)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/onehand10k_train.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/onehand10k_test.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE'), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_res50_8xb32-210e_onehand10k-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_res50_8xb32-210e_onehand10k-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..36589d899ddd930143749845b5fd5650917d23ec --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_res50_8xb32-210e_onehand10k-256x256.py @@ -0,0 +1,124 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict( + type='Pretrained', + checkpoint='torchvision://resnet50', + )), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=21, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'OneHand10KDataset' +data_mode = 'topdown' +data_root = 'data/onehand10k/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', rotate_factor=180, + scale_factor=(0.7, 1.3)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/onehand10k_train.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/onehand10k_test.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE'), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_dark_rhd2d.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_dark_rhd2d.md new file mode 100644 index 0000000000000000000000000000000000000000..334d97978c1dbb9b7ddca0df13f75372d753a067 --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_dark_rhd2d.md @@ -0,0 +1,58 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://ieeexplore.ieee.org/abstract/document/9052469/">HRNetv2 (TPAMI'2019)</a></summary> + +```bibtex +@article{WangSCJDZLMTWLX19, + title={Deep High-Resolution Representation Learning for Visual Recognition}, + author={Jingdong Wang and Ke Sun and Tianheng Cheng and + Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and + Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao}, + journal={TPAMI}, + year={2019} +} +``` + +</details> + +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2020/html/Zhang_Distribution-Aware_Coordinate_Representation_for_Human_Pose_Estimation_CVPR_2020_paper.html">DarkPose (CVPR'2020)</a></summary> + +```bibtex +@inproceedings{zhang2020distribution, + title={Distribution-aware coordinate representation for human pose estimation}, + author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages={7093--7102}, + year={2020} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://lmb.informatik.uni-freiburg.de/projects/hand3d/">RHD (ICCV'2017)</a></summary> + +```bibtex +@TechReport{zb2017hand, + author={Christian Zimmermann and Thomas Brox}, + title={Learning to Estimate 3D Hand Pose from Single RGB Images}, + institution={arXiv:1705.01389}, + year={2017}, + note="https://arxiv.org/abs/1705.01389", + url="https://lmb.informatik.uni-freiburg.de/projects/hand3d/" +} +``` + +</details> + +Results on RHD test set + +| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log | +| :--------------------------------------------------------- | :--------: | :-----: | :---: | :--: | :--------------------------------------------------------: | :--------------------------------------------------------: | +| [pose_hrnetv2_w18_dark](/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_hrnetv2-w18_dark-8xb64-210e_rhd2d-256x256.py) | 256x256 | 0.992 | 0.903 | 2.18 | [ckpt](https://download.openmmlab.com/mmpose/hand/dark/hrnetv2_w18_rhd2d_256x256_dark-4df3a347_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/dark/hrnetv2_w18_rhd2d_256x256_dark_20210330.log.json) | diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_dark_rhd2d.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_dark_rhd2d.yml new file mode 100644 index 0000000000000000000000000000000000000000..7400dc19e019f605efe6688de5be8170c1aa2c4b --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_dark_rhd2d.yml @@ -0,0 +1,17 @@ +Models: +- Config: configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_hrnetv2-w18_dark-8xb64-210e_rhd2d-256x256.py + In Collection: DarkPose + Metadata: + Architecture: + - HRNetv2 + - DarkPose + Training Data: RHD + Name: td-hm_hrnetv2-w18_dark-8xb64-210e_rhd2d-256x256 + Results: + - Dataset: RHD + Metrics: + AUC: 0.903 + EPE: 2.18 + PCK@0.2: 0.992 + Task: Hand 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/hand/dark/hrnetv2_w18_rhd2d_256x256_dark-4df3a347_20210330.pth diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_rhd2d.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_rhd2d.md new file mode 100644 index 0000000000000000000000000000000000000000..6fe91fe17b3ef52fdd65751dab103641202c5595 --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_rhd2d.md @@ -0,0 +1,41 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://ieeexplore.ieee.org/abstract/document/9052469/">HRNetv2 (TPAMI'2019)</a></summary> + +```bibtex +@article{WangSCJDZLMTWLX19, + title={Deep High-Resolution Representation Learning for Visual Recognition}, + author={Jingdong Wang and Ke Sun and Tianheng Cheng and + Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and + Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao}, + journal={TPAMI}, + year={2019} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://lmb.informatik.uni-freiburg.de/projects/hand3d/">RHD (ICCV'2017)</a></summary> + +```bibtex +@TechReport{zb2017hand, + author={Christian Zimmermann and Thomas Brox}, + title={Learning to Estimate 3D Hand Pose from Single RGB Images}, + institution={arXiv:1705.01389}, + year={2017}, + note="https://arxiv.org/abs/1705.01389", + url="https://lmb.informatik.uni-freiburg.de/projects/hand3d/" +} +``` + +</details> + +Results on RHD test set + +| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log | +| :--------------------------------------------------------- | :--------: | :-----: | :---: | :--: | :--------------------------------------------------------: | :--------------------------------------------------------: | +| [pose_hrnetv2_w18](/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_hrnetv2-w18_8xb64-210e_rhd2d-256x256.py) | 256x256 | 0.992 | 0.902 | 2.21 | [ckpt](https://download.openmmlab.com/mmpose/hand/hrnetv2/hrnetv2_w18_rhd2d_256x256-95b20dd8_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/hrnetv2/hrnetv2_w18_rhd2d_256x256_20210330.log.json) | diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_rhd2d.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_rhd2d.yml new file mode 100644 index 0000000000000000000000000000000000000000..f5292da7706f8a51a7a52a9dc98371ec9705aff5 --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_rhd2d.yml @@ -0,0 +1,16 @@ +Models: +- Config: configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_hrnetv2-w18_8xb64-210e_rhd2d-256x256.py + In Collection: HRNetv2 + Metadata: + Architecture: + - HRNetv2 + Training Data: RHD + Name: td-hm_hrnetv2-w18_8xb64-210e_rhd2d-256x256 + Results: + - Dataset: RHD + Metrics: + AUC: 0.902 + EPE: 2.21 + PCK@0.2: 0.992 + Task: Hand 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/hand/hrnetv2/hrnetv2_w18_rhd2d_256x256-95b20dd8_20210330.pth diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_udp_rhd2d.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_udp_rhd2d.md new file mode 100644 index 0000000000000000000000000000000000000000..c494eb8fc6b79e310f3b9099cf0cfc6059ce4f8c --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_udp_rhd2d.md @@ -0,0 +1,58 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://ieeexplore.ieee.org/abstract/document/9052469/">HRNetv2 (TPAMI'2019)</a></summary> + +```bibtex +@article{WangSCJDZLMTWLX19, + title={Deep High-Resolution Representation Learning for Visual Recognition}, + author={Jingdong Wang and Ke Sun and Tianheng Cheng and + Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and + Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao}, + journal={TPAMI}, + year={2019} +} +``` + +</details> + +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2020/html/Huang_The_Devil_Is_in_the_Details_Delving_Into_Unbiased_Data_CVPR_2020_paper.html">UDP (CVPR'2020)</a></summary> + +```bibtex +@InProceedings{Huang_2020_CVPR, + author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan}, + title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation}, + booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, + month = {June}, + year = {2020} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://lmb.informatik.uni-freiburg.de/projects/hand3d/">RHD (ICCV'2017)</a></summary> + +```bibtex +@TechReport{zb2017hand, + author={Christian Zimmermann and Thomas Brox}, + title={Learning to Estimate 3D Hand Pose from Single RGB Images}, + institution={arXiv:1705.01389}, + year={2017}, + note="https://arxiv.org/abs/1705.01389", + url="https://lmb.informatik.uni-freiburg.de/projects/hand3d/" +} +``` + +</details> + +Results on RHD test set + +| Arch | Input Size | PCKh@0.7 | AUC | EPE | ckpt | log | +| :--------------------------------------------------------- | :--------: | :------: | :---: | :--: | :--------------------------------------------------------: | :-------------------------------------------------------: | +| [pose_hrnetv2_w18_udp](/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_hrnetv2-w18_udp-8xb64-210e_rhd2d-256x256.py) | 256x256 | 0.992 | 0.902 | 2.19 | [ckpt](https://download.openmmlab.com/mmpose/hand/udp/hrnetv2_w18_rhd2d_256x256_udp-63ba6007_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/udp/hrnetv2_w18_rhd2d_256x256_udp_20210330.log.json) | diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_udp_rhd2d.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_udp_rhd2d.yml new file mode 100644 index 0000000000000000000000000000000000000000..db63b682e2fc50570d118a3755d9e64285fb6fe5 --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_udp_rhd2d.yml @@ -0,0 +1,17 @@ +Models: +- Config: configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_hrnetv2-w18_udp-8xb64-210e_rhd2d-256x256.py + In Collection: UDP + Metadata: + Architecture: + - HRNetv2 + - UDP + Training Data: RHD + Name: td-hm_hrnetv2-w18_udp-8xb64-210e_rhd2d-256x256 + Results: + - Dataset: RHD + Metrics: + AUC: 0.902 + EPE: 2.19 + PCKh@0.7: 0.992 + Task: Hand 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/hand/udp/hrnetv2_w18_rhd2d_256x256_udp-63ba6007_20210330.pth diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/mobilenetv2_rhd2d.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/mobilenetv2_rhd2d.md new file mode 100644 index 0000000000000000000000000000000000000000..877247fe86ff2ec59a296f5cb45d88b392533135 --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/mobilenetv2_rhd2d.md @@ -0,0 +1,40 @@ +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2018/html/Sandler_MobileNetV2_Inverted_Residuals_CVPR_2018_paper.html">MobilenetV2 (CVPR'2018)</a></summary> + +```bibtex +@inproceedings{sandler2018mobilenetv2, + title={Mobilenetv2: Inverted residuals and linear bottlenecks}, + author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={4510--4520}, + year={2018} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://lmb.informatik.uni-freiburg.de/projects/hand3d/">RHD (ICCV'2017)</a></summary> + +```bibtex +@TechReport{zb2017hand, + author={Christian Zimmermann and Thomas Brox}, + title={Learning to Estimate 3D Hand Pose from Single RGB Images}, + institution={arXiv:1705.01389}, + year={2017}, + note="https://arxiv.org/abs/1705.01389", + url="https://lmb.informatik.uni-freiburg.de/projects/hand3d/" +} +``` + +</details> + +Results on RHD test set + +| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log | +| :--------------------------------------------------------- | :--------: | :-----: | :---: | :--: | :--------------------------------------------------------: | :--------------------------------------------------------: | +| [pose_mobilenet_v2](/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_mobilenetv2_8xb64-210e_rhd2d-256x256.py) | 256x256 | 0.985 | 0.883 | 2.79 | [ckpt](https://download.openmmlab.com/mmpose/hand/mobilenetv2/mobilenetv2_rhd2d_256x256-85fa02db_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/mobilenetv2/mobilenetv2_rhd2d_256x256_20210330.log.json) | diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/mobilenetv2_rhd2d.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/mobilenetv2_rhd2d.yml new file mode 100644 index 0000000000000000000000000000000000000000..202a636fbe70e59ff71b6f225273242162c400cc --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/mobilenetv2_rhd2d.yml @@ -0,0 +1,17 @@ +Models: +- Config: configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_mobilenetv2_8xb64-210e_rhd2d-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: + - SimpleBaseline2D + - MobilenetV2 + Training Data: RHD + Name: td-hm_mobilenetv2_8xb64-210e_rhd2d-256x256 + Results: + - Dataset: RHD + Metrics: + AUC: 0.883 + EPE: 2.79 + PCK@0.2: 0.985 + Task: Hand 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/hand/mobilenetv2/mobilenetv2_rhd2d_256x256-85fa02db_20210330.pth diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/resnet_rhd2d.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/resnet_rhd2d.md new file mode 100644 index 0000000000000000000000000000000000000000..f103a0df40e4ec57223a752e2c833f42b6909ae8 --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/resnet_rhd2d.md @@ -0,0 +1,57 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html">SimpleBaseline2D (ECCV'2018)</a></summary> + +```bibtex +@inproceedings{xiao2018simple, + title={Simple baselines for human pose estimation and tracking}, + author={Xiao, Bin and Wu, Haiping and Wei, Yichen}, + booktitle={Proceedings of the European conference on computer vision (ECCV)}, + pages={466--481}, + year={2018} +} +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html">ResNet (CVPR'2016)</a></summary> + +```bibtex +@inproceedings{he2016deep, + title={Deep residual learning for image recognition}, + author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={770--778}, + year={2016} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://lmb.informatik.uni-freiburg.de/projects/hand3d/">RHD (ICCV'2017)</a></summary> + +```bibtex +@TechReport{zb2017hand, + author={Christian Zimmermann and Thomas Brox}, + title={Learning to Estimate 3D Hand Pose from Single RGB Images}, + institution={arXiv:1705.01389}, + year={2017}, + note="https://arxiv.org/abs/1705.01389", + url="https://lmb.informatik.uni-freiburg.de/projects/hand3d/" +} +``` + +</details> + +Results on RHD test set + +| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log | +| :--------------------------------------------------------- | :--------: | :-----: | :---: | :--: | :--------------------------------------------------------: | :--------------------------------------------------------: | +| [pose_resnet50](/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_res50_8xb64-210e_rhd2d-256x256.py) | 256x256 | 0.991 | 0.898 | 2.32 | [ckpt](https://download.openmmlab.com/mmpose/hand/resnet/res50_rhd2d_256x256-5dc7e4cc_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/resnet/res50_rhd2d_256x256_20210330.log.json) | diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/resnet_rhd2d.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/resnet_rhd2d.yml new file mode 100644 index 0000000000000000000000000000000000000000..d09f8ba2685b86fb1326c0460b7eb6fc83dd95fa --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/resnet_rhd2d.yml @@ -0,0 +1,17 @@ +Models: +- Config: configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_res50_8xb64-210e_rhd2d-256x256.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: + - SimpleBaseline2D + - ResNet + Training Data: RHD + Name: td-hm_res50_8xb64-210e_rhd2d-256x256 + Results: + - Dataset: RHD + Metrics: + AUC: 0.898 + EPE: 2.32 + PCK@0.2: 0.991 + Task: Hand 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/hand/resnet/res50_rhd2d_256x256-5dc7e4cc_20210330.pth diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_hrnetv2-w18_8xb64-210e_rhd2d-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_hrnetv2-w18_8xb64-210e_rhd2d-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..4a9bcc9b896ae499e034605209f1c7eb14ba7b39 --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_hrnetv2-w18_8xb64-210e_rhd2d-256x256.py @@ -0,0 +1,158 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(18, 36)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(18, 36, 72)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(18, 36, 72, 144), + multiscale_output=True), + upsample=dict(mode='bilinear', align_corners=False)), + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://msra/hrnetv2_w18', + )), + neck=dict( + type='FeatureMapProcessor', + concat=True, + ), + head=dict( + type='HeatmapHead', + in_channels=270, + out_channels=21, + deconv_out_channels=None, + conv_out_channels=(270, ), + conv_kernel_sizes=(1, ), + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'Rhd2DDataset' +data_mode = 'topdown' +data_root = 'data/rhd/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', rotate_factor=180, + scale_factor=(0.7, 1.3)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/rhd_train.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/rhd_test.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE'), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_hrnetv2-w18_dark-8xb64-210e_rhd2d-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_hrnetv2-w18_dark-8xb64-210e_rhd2d-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..44b8dc0f5a1c55d10293c40e5b8314fca6aa9b9c --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_hrnetv2-w18_dark-8xb64-210e_rhd2d-256x256.py @@ -0,0 +1,162 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', + input_size=(256, 256), + heatmap_size=(64, 64), + sigma=2, + unbiased=True) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(18, 36)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(18, 36, 72)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(18, 36, 72, 144), + multiscale_output=True), + upsample=dict(mode='bilinear', align_corners=False)), + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://msra/hrnetv2_w18', + )), + neck=dict( + type='FeatureMapProcessor', + concat=True, + ), + head=dict( + type='HeatmapHead', + in_channels=270, + out_channels=21, + deconv_out_channels=None, + conv_out_channels=(270, ), + conv_kernel_sizes=(1, ), + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'Rhd2DDataset' +data_mode = 'topdown' +data_root = 'data/rhd/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', rotate_factor=180, + scale_factor=(0.7, 1.3)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/rhd_train.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/rhd_test.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE'), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_hrnetv2-w18_udp-8xb64-210e_rhd2d-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_hrnetv2-w18_udp-8xb64-210e_rhd2d-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..d1c796234dd22760f6f52dcb97d05ad0410ceabb --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_hrnetv2-w18_udp-8xb64-210e_rhd2d-256x256.py @@ -0,0 +1,158 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater')) + +# codec settings +codec = dict( + type='UDPHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(18, 36)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(18, 36, 72)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(18, 36, 72, 144), + multiscale_output=True), + upsample=dict(mode='bilinear', align_corners=False)), + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://msra/hrnetv2_w18', + )), + neck=dict( + type='FeatureMapProcessor', + concat=True, + ), + head=dict( + type='HeatmapHead', + in_channels=270, + out_channels=21, + deconv_out_channels=None, + conv_out_channels=(270, ), + conv_kernel_sizes=(1, ), + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=False, + )) + +# base dataset settings +dataset_type = 'Rhd2DDataset' +data_mode = 'topdown' +data_root = 'data/rhd/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', rotate_factor=180, + scale_factor=(0.7, 1.3)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/rhd_train.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/rhd_test.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE'), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_mobilenetv2_8xb64-210e_rhd2d-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_mobilenetv2_8xb64-210e_rhd2d-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..d7176bacd73cad44295c84ae8f4b9b1d1201bf35 --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_mobilenetv2_8xb64-210e_rhd2d-256x256.py @@ -0,0 +1,125 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='MobileNetV2', + widen_factor=1., + out_indices=(7, ), + init_cfg=dict( + type='Pretrained', + checkpoint='mmcls://mobilenet_v2', + )), + head=dict( + type='HeatmapHead', + in_channels=1280, + out_channels=21, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'Rhd2DDataset' +data_mode = 'topdown' +data_root = 'data/rhd/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', rotate_factor=180, + scale_factor=(0.7, 1.3)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/rhd_train.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/rhd_test.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE'), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_res50_8xb64-210e_rhd2d-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_res50_8xb64-210e_rhd2d-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..da5556802891a6d742527e7889d0f31161223eef --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_res50_8xb64-210e_rhd2d-256x256.py @@ -0,0 +1,124 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict( + type='Pretrained', + checkpoint='torchvision://resnet50', + )), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=21, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'Rhd2DDataset' +data_mode = 'topdown' +data_root = 'data/rhd/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', rotate_factor=180, + scale_factor=(0.7, 1.3)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/rhd_train.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/rhd_test.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE'), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/hand_2d_keypoint/topdown_regression/README.md b/mmpose/configs/hand_2d_keypoint/topdown_regression/README.md new file mode 100644 index 0000000000000000000000000000000000000000..0210a89c2de35fc5f0a480662aa216e61ac9c623 --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_regression/README.md @@ -0,0 +1,25 @@ +# Top-down regression-based pose estimation + +Top-down methods divide the task into two stages: object detection, followed by single-object pose estimation given object bounding boxes. At the 2nd stage, regression based methods directly regress the keypoint coordinates given the features extracted from the bounding box area, following the paradigm introduced in [Deeppose: Human pose estimation via deep neural networks](http://openaccess.thecvf.com/content_cvpr_2014/html/Toshev_DeepPose_Human_Pose_2014_CVPR_paper.html). + +<div align=center> +<img src="https://user-images.githubusercontent.com/15977946/146515040-a82a8a29-d6bc-42f1-a2ab-7dfa610ce363.png"> +</div> + +## Results and Models + +### OneHand10K Dataset + +Results on OneHand10K val set + +| Model | Input Size | PCK@0.2 | AUC | EPE | Details and Download | +| :-------: | :--------: | :-----: | :---: | :---: | :-------------------------------------------------------: | +| ResNet-50 | 256x256 | 0.990 | 0.485 | 34.21 | [resnet_onehand10k.md](./onehand10k/resnet_onehand10k.md) | + +### RHD Dataset + +Results on RHD test set + +| Model | Input Size | PCK@0.2 | AUC | EPE | Details and Download | +| :-------: | :--------: | :-----: | :---: | :--: | :----------------------------------------: | +| ResNet-50 | 256x256 | 0.988 | 0.865 | 3.32 | [resnet_rhd2d.md](./rhd2d/resnet_rhd2d.md) | diff --git a/mmpose/configs/hand_2d_keypoint/topdown_regression/onehand10k/resnet_onehand10k.md b/mmpose/configs/hand_2d_keypoint/topdown_regression/onehand10k/resnet_onehand10k.md new file mode 100644 index 0000000000000000000000000000000000000000..40c0c184959466df639518450dcf17aa0b60ba30 --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_regression/onehand10k/resnet_onehand10k.md @@ -0,0 +1,59 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2014/html/Toshev_DeepPose_Human_Pose_2014_CVPR_paper.html">DeepPose (CVPR'2014)</a></summary> + +```bibtex +@inproceedings{toshev2014deeppose, + title={Deeppose: Human pose estimation via deep neural networks}, + author={Toshev, Alexander and Szegedy, Christian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={1653--1660}, + year={2014} +} +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html">ResNet (CVPR'2016)</a></summary> + +```bibtex +@inproceedings{he2016deep, + title={Deep residual learning for image recognition}, + author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={770--778}, + year={2016} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://ieeexplore.ieee.org/abstract/document/8529221/">OneHand10K (TCSVT'2019)</a></summary> + +```bibtex +@article{wang2018mask, + title={Mask-pose cascaded cnn for 2d hand pose estimation from single color image}, + author={Wang, Yangang and Peng, Cong and Liu, Yebin}, + journal={IEEE Transactions on Circuits and Systems for Video Technology}, + volume={29}, + number={11}, + pages={3258--3268}, + year={2018}, + publisher={IEEE} +} +``` + +</details> + +Results on OneHand10K val set + +| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log | +| :--------------------------------------------------------- | :--------: | :-----: | :---: | :---: | :--------------------------------------------------------: | :-------------------------------------------------------: | +| [deeppose_resnet_50](/configs/hand_2d_keypoint/topdown_regression/onehand10k/td-reg_res50_8xb64-210e_onehand10k-256x256.py) | 256x256 | 0.990 | 0.485 | 34.21 | [ckpt](https://download.openmmlab.com/mmpose/hand/deeppose/deeppose_res50_onehand10k_256x256-cbddf43a_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/deeppose/deeppose_res50_onehand10k_256x256_20210330.log.json) | diff --git a/mmpose/configs/hand_2d_keypoint/topdown_regression/onehand10k/resnet_onehand10k.yml b/mmpose/configs/hand_2d_keypoint/topdown_regression/onehand10k/resnet_onehand10k.yml new file mode 100644 index 0000000000000000000000000000000000000000..d5e9d8122ecb0c2082b2d0b38413dee2dc2220aa --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_regression/onehand10k/resnet_onehand10k.yml @@ -0,0 +1,17 @@ +Models: +- Config: configs/hand_2d_keypoint/topdown_regression/onehand10k/td-reg_res50_8xb64-210e_onehand10k-256x256.py + In Collection: DeepPose + Metadata: + Architecture: + - DeepPose + - ResNet + Training Data: OneHand10K + Name: td-reg_res50_8xb64-210e_onehand10k-256x256 + Results: + - Dataset: OneHand10K + Metrics: + AUC: 0.485 + EPE: 34.21 + PCK@0.2: 0.99 + Task: Hand 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/hand/deeppose/deeppose_res50_onehand10k_256x256-cbddf43a_20210330.pth diff --git a/mmpose/configs/hand_2d_keypoint/topdown_regression/onehand10k/td-reg_res50_8xb64-210e_onehand10k-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_regression/onehand10k/td-reg_res50_8xb64-210e_onehand10k-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..ee1556d45e18e3253f421948d1affd4eabfe673f --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_regression/onehand10k/td-reg_res50_8xb64-210e_onehand10k-256x256.py @@ -0,0 +1,122 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater')) + +# codec settings +codec = dict(type='RegressionLabel', input_size=(256, 256)) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='RegressionHead', + in_channels=2048, + num_joints=21, + loss=dict(type='SmoothL1Loss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'OneHand10KDataset' +data_mode = 'topdown' +data_root = 'data/onehand10k/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', rotate_factor=180, + scale_factor=(0.7, 1.3)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/onehand10k_train.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/onehand10k_test.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE'), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/hand_2d_keypoint/topdown_regression/rhd2d/resnet_rhd2d.md b/mmpose/configs/hand_2d_keypoint/topdown_regression/rhd2d/resnet_rhd2d.md new file mode 100644 index 0000000000000000000000000000000000000000..6cca5580ba7fcb0fa0f089159b0c96007c5ce90f --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_regression/rhd2d/resnet_rhd2d.md @@ -0,0 +1,57 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2014/html/Toshev_DeepPose_Human_Pose_2014_CVPR_paper.html">DeepPose (CVPR'2014)</a></summary> + +```bibtex +@inproceedings{toshev2014deeppose, + title={Deeppose: Human pose estimation via deep neural networks}, + author={Toshev, Alexander and Szegedy, Christian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={1653--1660}, + year={2014} +} +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html">ResNet (CVPR'2016)</a></summary> + +```bibtex +@inproceedings{he2016deep, + title={Deep residual learning for image recognition}, + author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={770--778}, + year={2016} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://lmb.informatik.uni-freiburg.de/projects/hand3d/">RHD (ICCV'2017)</a></summary> + +```bibtex +@TechReport{zb2017hand, + author={Christian Zimmermann and Thomas Brox}, + title={Learning to Estimate 3D Hand Pose from Single RGB Images}, + institution={arXiv:1705.01389}, + year={2017}, + note="https://arxiv.org/abs/1705.01389", + url="https://lmb.informatik.uni-freiburg.de/projects/hand3d/" +} +``` + +</details> + +Results on RHD test set + +| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log | +| :--------------------------------------------------------- | :--------: | :-----: | :---: | :--: | :--------------------------------------------------------: | :--------------------------------------------------------: | +| [deeppose_resnet_50](/configs/hand_2d_keypoint/topdown_regression/rhd2d/td-reg_res50_8xb64-210e_rhd2d-256x256.py) | 256x256 | 0.988 | 0.865 | 3.32 | [ckpt](https://download.openmmlab.com/mmpose/hand/deeppose/deeppose_res50_rhd2d_256x256-37f1c4d3_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/deeppose/deeppose_res50_rhd2d_256x256_20210330.log.json) | diff --git a/mmpose/configs/hand_2d_keypoint/topdown_regression/rhd2d/resnet_rhd2d.yml b/mmpose/configs/hand_2d_keypoint/topdown_regression/rhd2d/resnet_rhd2d.yml new file mode 100644 index 0000000000000000000000000000000000000000..3d0a920c5daa2eb1eaab3bea7c5c77529d72f377 --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_regression/rhd2d/resnet_rhd2d.yml @@ -0,0 +1,17 @@ +Models: +- Config: configs/hand_2d_keypoint/topdown_regression/rhd2d/td-reg_res50_8xb64-210e_rhd2d-256x256.py + In Collection: DeepPose + Metadata: + Architecture: + - DeepPose + - ResNet + Training Data: RHD + Name: td-reg_res50_8xb64-210e_rhd2d-256x256 + Results: + - Dataset: RHD + Metrics: + AUC: 0.865 + EPE: 3.32 + PCK@0.2: 0.988 + Task: Hand 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/hand/deeppose/deeppose_res50_rhd2d_256x256-37f1c4d3_20210330.pth diff --git a/mmpose/configs/hand_2d_keypoint/topdown_regression/rhd2d/td-reg_res50_8xb64-210e_rhd2d-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_regression/rhd2d/td-reg_res50_8xb64-210e_rhd2d-256x256.py new file mode 100644 index 0000000000000000000000000000000000000000..a350c24bfe2d3d7246ed63c78f737414ee5f247e --- /dev/null +++ b/mmpose/configs/hand_2d_keypoint/topdown_regression/rhd2d/td-reg_res50_8xb64-210e_rhd2d-256x256.py @@ -0,0 +1,122 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater')) + +# codec settings +codec = dict(type='RegressionLabel', input_size=(256, 256)) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='RegressionHead', + in_channels=2048, + num_joints=21, + loss=dict(type='SmoothL1Loss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'Rhd2DDataset' +data_mode = 'topdown' +data_root = 'data/rhd/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict( + type='RandomBBoxTransform', rotate_factor=180, + scale_factor=(0.7, 1.3)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/rhd_train.json', + data_prefix=dict(img=''), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/rhd_test.json', + data_prefix=dict(img=''), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# evaluators +val_evaluator = [ + dict(type='PCKAccuracy', thr=0.2), + dict(type='AUC'), + dict(type='EPE'), +] +test_evaluator = val_evaluator diff --git a/mmpose/configs/hand_3d_keypoint/README.md b/mmpose/configs/hand_3d_keypoint/README.md new file mode 100644 index 0000000000000000000000000000000000000000..330319f42b186afec3a7c73afd627505e91c7dc8 --- /dev/null +++ b/mmpose/configs/hand_3d_keypoint/README.md @@ -0,0 +1,7 @@ +# 3D Hand Pose Estimation + +3D hand pose estimation is defined as the task of detecting the poses (or keypoints) of the hand from an input image. + +## Data preparation + +Please follow [DATA Preparation](/docs/en/dataset_zoo/3d_hand_keypoint.md) to prepare data. diff --git a/mmpose/configs/hand_gesture/README.md b/mmpose/configs/hand_gesture/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7cc5bb323b05823b8eeedfb905756ce84c87c8ac --- /dev/null +++ b/mmpose/configs/hand_gesture/README.md @@ -0,0 +1,13 @@ +# Gesture Recognition + +Gesture recognition aims to recognize the hand gestures in the video, such as thumbs up. + +## Data preparation + +Please follow [DATA Preparation](/docs/en/dataset_zoo/2d_hand_gesture.md) to prepare data. + +## Demo + +Please follow [Demo](/demo/docs/en/gesture_recognition_demo.md) to run the demo. + +<img src="https://user-images.githubusercontent.com/15977946/172213082-afb9d71a-f2df-4509-932c-e47dc61ec7d7.gif" width="600px" alt> diff --git a/mmpose/configs/wholebody_2d_keypoint/README.md b/mmpose/configs/wholebody_2d_keypoint/README.md new file mode 100644 index 0000000000000000000000000000000000000000..362a6a89764acec6db1a4ef8216352c1fbbe697e --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/README.md @@ -0,0 +1,19 @@ +# 2D Human Whole-Body Pose Estimation + +2D human whole-body pose estimation aims to localize dense landmarks on the entire human body including face, hands, body, and feet. + +Existing approaches can be categorized into top-down and bottom-up approaches. + +Top-down methods divide the task into two stages: human detection and whole-body pose estimation. They perform human detection first, followed by single-person whole-body pose estimation given human bounding boxes. + +Bottom-up approaches (e.g. AE) first detect all the whole-body keypoints and then group/associate them into person instances. + +## Data preparation + +Please follow [DATA Preparation](/docs/en/dataset_zoo/2d_wholebody_keypoint.md) to prepare data. + +## Demo + +Please follow [Demo](/demo/docs/en/2d_wholebody_pose_demo.md) to run demos. + +<img src="https://user-images.githubusercontent.com/9464825/95552839-00a61080-0a40-11eb-818c-b8dad7307217.gif" width="600px" alt><br> diff --git a/mmpose/configs/wholebody_2d_keypoint/rtmpose/README.md b/mmpose/configs/wholebody_2d_keypoint/rtmpose/README.md new file mode 100644 index 0000000000000000000000000000000000000000..47e488567ca002b499c3349e8860ef9c1e398cae --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/rtmpose/README.md @@ -0,0 +1,18 @@ +# RTMPose + +Recent studies on 2D pose estimation have achieved excellent performance on public benchmarks, yet its application in the industrial community still suffers from heavy model parameters and high latency. +In order to bridge this gap, we empirically study five aspects that affect the performance of multi-person pose estimation algorithms: paradigm, backbone network, localization algorithm, training strategy, and deployment inference, and present a high-performance real-time multi-person pose estimation framework, **RTMPose**, based on MMPose. +Our RTMPose-m achieves **75.8% AP** on COCO with **90+ FPS** on an Intel i7-11700 CPU and **430+ FPS** on an NVIDIA GTX 1660 Ti GPU, and RTMPose-l achieves **67.0% AP** on COCO-WholeBody with **130+ FPS**, outperforming existing open-source libraries. +To further evaluate RTMPose's capability in critical real-time applications, we also report the performance after deploying on the mobile device. + +## Results and Models + +### COCO-WholeBody Dataset + +Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset + +| Model | Input Size | Whole AP | Whole AR | Details and Download | +| :-------: | :--------: | :------: | :------: | :---------------------------------------------------------------------: | +| RTMPose-m | 256x192 | 0.604 | 0.667 | [rtmpose_coco-wholebody.md](./coco-wholebody/rtmpose_coco-wholebody.md) | +| RTMPose-l | 256x192 | 0.632 | 0.694 | [rtmpose_coco-wholebody.md](./coco-wholebody/rtmpose_coco-wholebody.md) | +| RTMPose-l | 384x288 | 0.670 | 0.723 | [rtmpose_coco-wholebody.md](./coco-wholebody/rtmpose_coco-wholebody.md) | diff --git a/mmpose/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py b/mmpose/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..a926fe38d7d75c31cdebe4d350b07832670d1218 --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py @@ -0,0 +1,231 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 270 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 150 to 300 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict( + type='SimCCLabel', + input_size=(288, 384), + sigma=(6., 6.93), + simcc_split_ratio=2.0, + normalize=False, + use_dark=False) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=1., + widen_factor=1., + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' + 'rtmposev1/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth' # noqa + )), + head=dict( + type='RTMCCHead', + in_channels=1024, + out_channels=133, + input_size=codec['input_size'], + in_featuremap_size=(9, 12), + simcc_split_ratio=codec['simcc_split_ratio'], + final_layer_kernel_size=7, + gau_cfg=dict( + hidden_dims=256, + s=128, + expansion_factor=2, + dropout_rate=0., + drop_path=0., + act_fn='SiLU', + use_rel_bias=False, + pos_enc=False), + loss=dict( + type='KLDiscretLoss', + use_target_weight=True, + beta=10., + label_softmax=True), + decoder=codec), + test_cfg=dict(flip_test=True, )) + +# base dataset settings +dataset_type = 'CocoWholeBodyDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/', +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.0), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict( + checkpoint=dict( + save_best='coco-wholebody/AP', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoWholeBodyMetric', + ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb64-270e_coco-wholebody-256x192.py b/mmpose/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb64-270e_coco-wholebody-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..026336a550396aa0e65e212bbd93f54ca3a56a60 --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb64-270e_coco-wholebody-256x192.py @@ -0,0 +1,231 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 270 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 150 to 300 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict( + type='SimCCLabel', + input_size=(192, 256), + sigma=(4.9, 5.66), + simcc_split_ratio=2.0, + normalize=False, + use_dark=False) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=1., + widen_factor=1., + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' + 'rtmposev1/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth' # noqa + )), + head=dict( + type='RTMCCHead', + in_channels=1024, + out_channels=133, + input_size=codec['input_size'], + in_featuremap_size=(6, 8), + simcc_split_ratio=codec['simcc_split_ratio'], + final_layer_kernel_size=7, + gau_cfg=dict( + hidden_dims=256, + s=128, + expansion_factor=2, + dropout_rate=0., + drop_path=0., + act_fn='SiLU', + use_rel_bias=False, + pos_enc=False), + loss=dict( + type='KLDiscretLoss', + use_target_weight=True, + beta=10., + label_softmax=True), + decoder=codec), + test_cfg=dict(flip_test=True, )) + +# base dataset settings +dataset_type = 'CocoWholeBodyDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/', +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.0), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict( + checkpoint=dict( + save_best='coco-wholebody/AP', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoWholeBodyMetric', + ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-m_8xb64-270e_coco-wholebody-256x192.py b/mmpose/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-m_8xb64-270e_coco-wholebody-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..6a834b830150d8de343d1c1e4a81734d3541a671 --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-m_8xb64-270e_coco-wholebody-256x192.py @@ -0,0 +1,231 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 270 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 150 to 300 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict( + type='SimCCLabel', + input_size=(192, 256), + sigma=(4.9, 5.66), + simcc_split_ratio=2.0, + normalize=False, + use_dark=False) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=0.67, + widen_factor=0.75, + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' + 'rtmposev1/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa + )), + head=dict( + type='RTMCCHead', + in_channels=768, + out_channels=133, + input_size=codec['input_size'], + in_featuremap_size=(6, 8), + simcc_split_ratio=codec['simcc_split_ratio'], + final_layer_kernel_size=7, + gau_cfg=dict( + hidden_dims=256, + s=128, + expansion_factor=2, + dropout_rate=0., + drop_path=0., + act_fn='SiLU', + use_rel_bias=False, + pos_enc=False), + loss=dict( + type='KLDiscretLoss', + use_target_weight=True, + beta=10., + label_softmax=True), + decoder=codec), + test_cfg=dict(flip_test=True, )) + +# base dataset settings +dataset_type = 'CocoWholeBodyDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/', +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.0), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict( + checkpoint=dict( + save_best='coco-wholebody/AP', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoWholeBodyMetric', + ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose_coco-wholebody.md b/mmpose/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose_coco-wholebody.md new file mode 100644 index 0000000000000000000000000000000000000000..bdf327d631e07c18e63183996df7e6fa51a9b6b1 --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose_coco-wholebody.md @@ -0,0 +1,62 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-030-58580-8_27">RTMPose (arXiv'2023)</a></summary> + +```bibtex +@misc{https://doi.org/10.48550/arxiv.2303.07399, + doi = {10.48550/ARXIV.2303.07399}, + url = {https://arxiv.org/abs/2303.07399}, + author = {Jiang, Tao and Lu, Peng and Zhang, Li and Ma, Ningsheng and Han, Rui and Lyu, Chengqi and Li, Yining and Chen, Kai}, + keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences}, + title = {RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose}, + publisher = {arXiv}, + year = {2023}, + copyright = {Creative Commons Attribution 4.0 International} +} + +``` + +</details> + +<!-- [BACKBONE] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2212.07784">RTMDet (arXiv'2022)</a></summary> + +```bibtex +@misc{lyu2022rtmdet, + title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors}, + author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen}, + year={2022}, + eprint={2212.07784}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-030-58545-7_12">COCO-WholeBody (ECCV'2020)</a></summary> + +```bibtex +@inproceedings{jin2020whole, + title={Whole-Body Human Pose Estimation in the Wild}, + author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping}, + booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, + year={2020} +} +``` + +</details> + +Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | Body AP | Body AR | Foot AP | Foot AR | Face AP | Face AR | Hand AP | Hand AR | Whole AP | Whole AR | ckpt | log | +| :-------------------------------------- | :--------: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :------: | :------: | :--------------------------------------: | :-------------------------------------: | +| [rtmpose-m](/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-m_8xb64-270e_coco-wholebody-256x192.py) | 256x192 | 0.697 | 0.743 | 0.660 | 0.749 | 0.822 | 0.858 | 0.483 | 0.564 | 0.604 | 0.667 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco-wholebody_pt-aic-coco_270e-256x192-cd5e845c_20230123.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco-wholebody_pt-aic-coco_270e-256x192-cd5e845c_20230123.json) | +| [rtmpose-l](/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb64-270e_coco-wholebody-256x192.py) | 256x192 | 0.721 | 0.764 | 0.693 | 0.780 | 0.844 | 0.876 | 0.523 | 0.600 | 0.632 | 0.694 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-256x192-6f206314_20230124.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-256x192-6f206314_20230124.json) | +| [rtmpose-l](/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py) | 384x288 | 0.736 | 0.776 | 0.738 | 0.810 | 0.895 | 0.918 | 0.591 | 0.659 | 0.670 | 0.723 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-384x288-eaeb96c8_20230125.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-384x288-eaeb96c8_20230125.json) | diff --git a/mmpose/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose_coco-wholebody.yml b/mmpose/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose_coco-wholebody.yml new file mode 100644 index 0000000000000000000000000000000000000000..19a3cd0cec54115b463c26a338de7354cf4dd65d --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose_coco-wholebody.yml @@ -0,0 +1,66 @@ +Models: +- Config: configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-m_8xb64-270e_coco-wholebody-256x192.py + In Collection: RTMPose + Alias: wholebody + Metadata: + Architecture: &id001 + - HRNet + Training Data: COCO-WholeBody + Name: rtmpose-m_8xb64-270e_coco-wholebody-256x192 + Results: + - Dataset: COCO-WholeBody + Metrics: + Body AP: 0.697 + Body AR: 0.743 + Face AP: 0.822 + Face AR: 0.858 + Foot AP: 0.66 + Foot AR: 0.749 + Hand AP: 0.483 + Hand AR: 0.564 + Whole AP: 0.604 + Whole AR: 0.667 + Task: Wholebody 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco-wholebody_pt-aic-coco_270e-256x192-cd5e845c_20230123.pth +- Config: configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb64-270e_coco-wholebody-256x192.py + In Collection: RTMPose + Metadata: + Architecture: *id001 + Training Data: COCO-WholeBody + Name: rtmpose-l_8xb64-270e_coco-wholebody-256x192 + Results: + - Dataset: COCO-WholeBody + Metrics: + Body AP: 0.721 + Body AR: 0.764 + Face AP: 0.844 + Face AR: 0.876 + Foot AP: 0.693 + Foot AR: 0.78 + Hand AP: 0.523 + Hand AR: 0.6 + Whole AP: 0.632 + Whole AR: 0.694 + Task: Wholebody 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-256x192-6f206314_20230124.pth +- Config: configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py + In Collection: RTMPose + Metadata: + Architecture: *id001 + Training Data: COCO-WholeBody + Name: rtmpose-l_8xb32-270e_coco-wholebody-384x288.py + Results: + - Dataset: COCO-WholeBody + Metrics: + Body AP: 0.736 + Body AR: 0.776 + Face AP: 0.895 + Face AR: 0.918 + Foot AP: 0.738 + Foot AR: 0.81 + Hand AP: 0.591 + Hand AR: 0.659 + Whole AP: 0.67 + Whole AR: 0.723 + Task: Wholebody 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-384x288-eaeb96c8_20230125.pth diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/README.md b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/README.md new file mode 100644 index 0000000000000000000000000000000000000000..23ee1ed315956e31ddf1637049032db767e8006c --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/README.md @@ -0,0 +1,26 @@ +# Top-down heatmap-based pose estimation + +Top-down methods divide the task into two stages: object detection, followed by single-object pose estimation given object bounding boxes. Instead of estimating keypoint coordinates directly, the pose estimator will produce heatmaps which represent the likelihood of being a keypoint, following the paradigm introduced in [Simple Baselines for Human Pose Estimation and Tracking](http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html). + +<div align=center> +<img src="https://user-images.githubusercontent.com/15977946/146522977-5f355832-e9c1-442f-a34f-9d24fb0aefa8.png" height=400> +</div> + +## Results and Models + +### COCO-WholeBody Dataset + +Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset + +| Model | Input Size | Whole AP | Whole AR | Details and Download | +| :-----------------: | :--------: | :------: | :------: | :-----------------------------------------------------------------------------: | +| HRNet-w48+Dark+ | 384x288 | 0.661 | 0.743 | [hrnet_dark_coco-wholebody.md](./coco-wholebody/hrnet_dark_coco-wholebody.md) | +| HRNet-w32+Dark | 256x192 | 0.582 | 0.671 | [hrnet_dark_coco-wholebody.md](./coco-wholebody/hrnet_dark_coco-wholebody.md) | +| HRNet-w48 | 256x192 | 0.579 | 0.681 | [hrnet_coco-wholebody.md](./coco-wholebody/hrnet_coco-wholebody.md) | +| CSPNeXt-m | 256x192 | 0.567 | 0.641 | [cspnext_udp_coco-wholebody.md](./coco-wholebody/cspnext_udp_coco-wholebody.md) | +| ResNet-152 | 256x192 | 0.548 | 0.661 | [resnet_coco-wholebody.md](./coco-wholebody/resnet_coco-wholebody.md) | +| HRNet-w32 | 256x192 | 0.536 | 0.636 | [hrnet_coco-wholebody.md](./coco-wholebody/hrnet_coco-wholebody.md) | +| ResNet-101 | 256x192 | 0.531 | 0.645 | [resnet_coco-wholebody.md](./coco-wholebody/resnet_coco-wholebody.md) | +| S-ViPNAS-Res50+Dark | 256x192 | 0.528 | 0.632 | [vipnas_dark_coco-wholebody.md](./coco-wholebody/vipnas_dark_coco-wholebody.md) | +| ResNet-50 | 256x192 | 0.521 | 0.633 | [resnet_coco-wholebody.md](./coco-wholebody/resnet_coco-wholebody.md) | +| S-ViPNAS-Res50 | 256x192 | 0.495 | 0.607 | [vipnas_coco-wholebody.md](./coco-wholebody/vipnas_coco-wholebody.md) | diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/cspnext-l_udp_8xb64-210e_coco-wholebody-256x192.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/cspnext-l_udp_8xb64-210e_coco-wholebody-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..2112e19e7688be70ee6afd3085d0070e7d68d0a3 --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/cspnext-l_udp_8xb64-210e_coco-wholebody-256x192.py @@ -0,0 +1,213 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 210 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 150 to 300 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict( + type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=1., + widen_factor=1., + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmdetection/v3.0/' + 'rtmdet/cspnext_rsb_pretrain/' + 'cspnext-l_8xb256-rsb-a1-600e_in1k-6a760974.pth')), + head=dict( + type='HeatmapHead', + in_channels=1024, + out_channels=133, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=False, + flip_mode='heatmap', + shift_heatmap=False, + )) + +# base dataset settings +dataset_type = 'CocoWholeBodyDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/', +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.0), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict( + checkpoint=dict( + save_best='coco-wholebody/AP', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoWholeBodyMetric', + ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/cspnext-m_udp_8xb64-210e_coco-wholebody-256x192.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/cspnext-m_udp_8xb64-210e_coco-wholebody-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..bfcb5c3917bb8fd0d60057a5129e0998cb37a672 --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/cspnext-m_udp_8xb64-210e_coco-wholebody-256x192.py @@ -0,0 +1,213 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +max_epochs = 210 +stage2_num_epochs = 30 +base_lr = 4e-3 + +train_cfg = dict(max_epochs=max_epochs, val_interval=10) +randomness = dict(seed=21) + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 150 to 300 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# codec settings +codec = dict( + type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + _scope_='mmdet', + type='CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=0.67, + widen_factor=0.75, + out_indices=(4, ), + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', + prefix='backbone.', + checkpoint='https://download.openmmlab.com/mmdetection/v3.0/' + 'rtmdet/cspnext_rsb_pretrain/' + 'cspnext-m_8xb256-rsb-a1-600e_in1k-ecb3bbd9.pth')), + head=dict( + type='HeatmapHead', + in_channels=768, + out_channels=133, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=False, + flip_mode='heatmap', + shift_heatmap=False, + )) + +# base dataset settings +dataset_type = 'CocoWholeBodyDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +backend_args = dict(backend='local') +# backend_args = dict( +# backend='petrel', +# path_mapping=dict({ +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/', +# f'{data_root}': 's3://openmmlab/datasets/detection/coco/' +# })) + +# pipelines +train_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=1.0), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +train_pipeline_stage2 = [ + dict(type='LoadImage', backend_args=backend_args), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + shift_factor=0., + scale_factor=[0.75, 1.25], + rotate_factor=60), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='Albumentation', + transforms=[ + dict(type='Blur', p=0.1), + dict(type='MedianBlur', p=0.1), + dict( + type='CoarseDropout', + max_holes=1, + max_height=0.4, + max_width=0.4, + min_holes=1, + min_height=0.2, + min_width=0.2, + p=0.5), + ]), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=10, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=10, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +# hooks +default_hooks = dict( + checkpoint=dict( + save_best='coco-wholebody/AP', rule='greater', max_keep_ckpts=1)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] + +# evaluators +val_evaluator = dict( + type='CocoWholeBodyMetric', + ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/cspnext_udp_coco-wholebody.md b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/cspnext_udp_coco-wholebody.md new file mode 100644 index 0000000000000000000000000000000000000000..1fc4a78dfbda287d33c6edd16d9d36944992f365 --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/cspnext_udp_coco-wholebody.md @@ -0,0 +1,56 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2212.07784">RTMDet (ArXiv 2022)</a></summary> + +```bibtex +@misc{lyu2022rtmdet, + title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors}, + author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen}, + year={2022}, + eprint={2212.07784}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +</details> + +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2020/html/Huang_The_Devil_Is_in_the_Details_Delving_Into_Unbiased_Data_CVPR_2020_paper.html">UDP (CVPR'2020)</a></summary> + +```bibtex +@InProceedings{Huang_2020_CVPR, + author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan}, + title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation}, + booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, + month = {June}, + year = {2020} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-030-58545-7_12">COCO-WholeBody (ECCV'2020)</a></summary> + +```bibtex +@inproceedings{jin2020whole, + title={Whole-Body Human Pose Estimation in the Wild}, + author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping}, + booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, + year={2020} +} +``` + +</details> + +Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | Body AP | Body AR | Foot AP | Foot AR | Face AP | Face AR | Hand AP | Hand AR | Whole AP | Whole AR | ckpt | log | +| :-------------------------------------- | :--------: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :------: | :------: | :--------------------------------------: | :-------------------------------------: | +| [pose_cspnext_m_udp](/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/cspnext-m_udp_8xb64-210e_coco-wholebody-256x192.py) | 256x192 | 0.687 | 0.735 | 0.680 | 0.763 | 0.697 | 0.755 | 0.460 | 0.543 | 0.567 | 0.641 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-coco-wholebody_pt-in1k_210e-256x192-320fa258_20230123.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-coco-wholebody_pt-in1k_210e-256x192-320fa258_20230123.json) | diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/cspnext_udp_coco-wholebody.yml b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/cspnext_udp_coco-wholebody.yml new file mode 100644 index 0000000000000000000000000000000000000000..ebdcc7146ef7969da2fdd26926c92268a9abae90 --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/cspnext_udp_coco-wholebody.yml @@ -0,0 +1,24 @@ +Models: +- Config: configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/cspnext-m_udp_8xb64-210e_coco-wholebody-256x192.py + In Collection: UDP + Metadata: + Architecture: &id001 + - UDP + - CSPNeXt + Training Data: COCO-WholeBody + Name: cspnext-m_udp_8xb64-210e_coco-wholebody-256x192 + Results: + - Dataset: COCO-WholeBody + Metrics: + Body AP: 0.687 + Body AR: 0.735 + Face AP: 0.697 + Face AR: 0.755 + Foot AP: 0.680 + Foot AR: 0.763 + Hand AP: 0.46 + Hand AR: 0.567 + Whole AP: 0.567 + Whole AR: 0.641 + Task: Wholebody 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-coco-wholebody_pt-in1k_210e-256x192-320fa258_20230123.pth diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/hrnet_coco-wholebody.md b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/hrnet_coco-wholebody.md new file mode 100644 index 0000000000000000000000000000000000000000..53f240bc528f9c81e65620fb33dbc4546519001c --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/hrnet_coco-wholebody.md @@ -0,0 +1,41 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html">HRNet (CVPR'2019)</a></summary> + +```bibtex +@inproceedings{sun2019deep, + title={Deep high-resolution representation learning for human pose estimation}, + author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={5693--5703}, + year={2019} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-030-58545-7_12">COCO-WholeBody (ECCV'2020)</a></summary> + +```bibtex +@inproceedings{jin2020whole, + title={Whole-Body Human Pose Estimation in the Wild}, + author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping}, + booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, + year={2020} +} +``` + +</details> + +Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | Body AP | Body AR | Foot AP | Foot AR | Face AP | Face AR | Hand AP | Hand AR | Whole AP | Whole AR | ckpt | log | +| :-------------------------------------- | :--------: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :------: | :------: | :--------------------------------------: | :-------------------------------------: | +| [pose_hrnet_w32](/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w32_8xb64-210e_coco-wholebody-256x192.py) | 256x192 | 0.678 | 0.755 | 0.543 | 0.661 | 0.630 | 0.708 | 0.467 | 0.566 | 0.536 | 0.636 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_wholebody_256x192-853765cd_20200918.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_wholebody_256x192_20200918.log.json) | +| [pose_hrnet_w32](/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w32_8xb64-210e_coco-wholebody-384x288.py) | 384x288 | 0.700 | 0.772 | 0.585 | 0.691 | 0.726 | 0.783 | 0.515 | 0.603 | 0.586 | 0.673 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_wholebody_384x288-78cacac3_20200922.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_wholebody_384x288_20200922.log.json) | +| [pose_hrnet_w48](/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w48_8xb32-210e_coco-wholebody-256x192.py) | 256x192 | 0.701 | 0.776 | 0.675 | 0.787 | 0.656 | 0.743 | 0.535 | 0.639 | 0.579 | 0.681 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_256x192-643e18cb_20200922.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_256x192_20200922.log.json) | +| [pose_hrnet_w48](/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w48_8xb32-210e_coco-wholebody-384x288.py) | 384x288 | 0.722 | 0.791 | 0.696 | 0.801 | 0.776 | 0.834 | 0.587 | 0.678 | 0.632 | 0.717 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_384x288-6e061c6a_20200922.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_384x288_20200922.log.json) | diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/hrnet_coco-wholebody.yml b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/hrnet_coco-wholebody.yml new file mode 100644 index 0000000000000000000000000000000000000000..929bd0535671b25499624cf41487008d4be27ab2 --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/hrnet_coco-wholebody.yml @@ -0,0 +1,86 @@ +Models: +- Config: configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w32_8xb64-210e_coco-wholebody-256x192.py + In Collection: HRNet + Metadata: + Architecture: &id001 + - HRNet + Training Data: COCO-WholeBody + Name: td-hm_hrnet-w32_8xb64-210e_coco-wholebody-256x192 + Results: + - Dataset: COCO-WholeBody + Metrics: + Body AP: 0.678 + Body AR: 0.755 + Face AP: 0.630 + Face AR: 0.708 + Foot AP: 0.543 + Foot AR: 0.661 + Hand AP: 0.467 + Hand AR: 0.566 + Whole AP: 0.536 + Whole AR: 0.636 + Task: Wholebody 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_wholebody_256x192-853765cd_20200918.pth +- Config: configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w32_8xb64-210e_coco-wholebody-384x288.py + In Collection: HRNet + Metadata: + Architecture: *id001 + Training Data: COCO-WholeBody + Name: td-hm_hrnet-w32_8xb64-210e_coco-wholebody-384x288 + Results: + - Dataset: COCO-WholeBody + Metrics: + Body AP: 0.700 + Body AR: 0.772 + Face AP: 0.726 + Face AR: 0.783 + Foot AP: 0.585 + Foot AR: 0.691 + Hand AP: 0.515 + Hand AR: 0.603 + Whole AP: 0.586 + Whole AR: 0.673 + Task: Wholebody 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_wholebody_384x288-78cacac3_20200922.pth +- Config: configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w48_8xb32-210e_coco-wholebody-256x192.py + In Collection: HRNet + Metadata: + Architecture: *id001 + Training Data: COCO-WholeBody + Name: td-hm_hrnet-w48_8xb32-210e_coco-wholebody-256x192 + Results: + - Dataset: COCO-WholeBody + Metrics: + Body AP: 0.701 + Body AR: 0.776 + Face AP: 0.656 + Face AR: 0.743 + Foot AP: 0.675 + Foot AR: 0.787 + Hand AP: 0.535 + Hand AR: 0.639 + Whole AP: 0.579 + Whole AR: 0.681 + Task: Wholebody 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_256x192-643e18cb_20200922.pth +- Config: configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w48_8xb32-210e_coco-wholebody-384x288.py + In Collection: HRNet + Metadata: + Architecture: *id001 + Training Data: COCO-WholeBody + Name: td-hm_hrnet-w48_8xb32-210e_coco-wholebody-384x288 + Results: + - Dataset: COCO-WholeBody + Metrics: + Body AP: 0.722 + Body AR: 0.791 + Face AP: 0.776 + Face AR: 0.834 + Foot AP: 0.696 + Foot AR: 0.801 + Hand AP: 0.587 + Hand AR: 0.678 + Whole AP: 0.632 + Whole AR: 0.717 + Task: Wholebody 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_384x288-6e061c6a_20200922.pth diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/hrnet_dark_coco-wholebody.md b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/hrnet_dark_coco-wholebody.md new file mode 100644 index 0000000000000000000000000000000000000000..b215b3c5f25b595ca30ae54b743c907a53629084 --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/hrnet_dark_coco-wholebody.md @@ -0,0 +1,58 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html">HRNet (CVPR'2019)</a></summary> + +```bibtex +@inproceedings{sun2019deep, + title={Deep high-resolution representation learning for human pose estimation}, + author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={5693--5703}, + year={2019} +} +``` + +</details> + +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2020/html/Zhang_Distribution-Aware_Coordinate_Representation_for_Human_Pose_Estimation_CVPR_2020_paper.html">DarkPose (CVPR'2020)</a></summary> + +```bibtex +@inproceedings{zhang2020distribution, + title={Distribution-aware coordinate representation for human pose estimation}, + author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages={7093--7102}, + year={2020} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-030-58545-7_12">COCO-WholeBody (ECCV'2020)</a></summary> + +```bibtex +@inproceedings{jin2020whole, + title={Whole-Body Human Pose Estimation in the Wild}, + author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping}, + booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, + year={2020} +} +``` + +</details> + +Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | Body AP | Body AR | Foot AP | Foot AR | Face AP | Face AR | Hand AP | Hand AR | Whole AP | Whole AR | ckpt | log | +| :-------------------------------------- | :--------: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :------: | :------: | :--------------------------------------: | :-------------------------------------: | +| [pose_hrnet_w32_dark](/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w32_dark-8xb64-210e_coco-wholebody-256x192.py) | 256x192 | 0.693 | 0.764 | 0.564 | 0.674 | 0.737 | 0.809 | 0.503 | 0.602 | 0.582 | 0.671 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_wholebody_256x192_dark-469327ef_20200922.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_wholebody_256x192_dark_20200922.log.json) | +| [pose_hrnet_w48_dark+](/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w48_dark-8xb32-210e_coco-wholebody-384x288.py) | 384x288 | 0.742 | 0.807 | 0.707 | 0.806 | 0.841 | 0.892 | 0.602 | 0.694 | 0.661 | 0.743 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_384x288_dark-f5726563_20200918.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_384x288_dark_20200918.log.json) | + +Note: `+` means the model is first pre-trained on original COCO dataset, and then fine-tuned on COCO-WholeBody dataset. We find this will lead to better performance. diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/hrnet_dark_coco-wholebody.yml b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/hrnet_dark_coco-wholebody.yml new file mode 100644 index 0000000000000000000000000000000000000000..d0e2bd69542be8c482aee9498b9369e4151440c8 --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/hrnet_dark_coco-wholebody.yml @@ -0,0 +1,45 @@ +Models: +- Config: configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w32_dark-8xb64-210e_coco-wholebody-256x192.py + In Collection: DarkPose + Metadata: + Architecture: &id001 + - HRNet + - DarkPose + Training Data: COCO-WholeBody + Name: td-hm_hrnet-w32_dark-8xb64-210e_coco-wholebody-256x192 + Results: + - Dataset: COCO-WholeBody + Metrics: + Body AP: 0.693 + Body AR: 0.764 + Face AP: 0.737 + Face AR: 0.809 + Foot AP: 0.564 + Foot AR: 0.674 + Hand AP: 0.503 + Hand AR: 0.602 + Whole AP: 0.582 + Whole AR: 0.671 + Task: Wholebody 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_wholebody_256x192_dark-469327ef_20200922.pth +- Config: configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w48_dark-8xb32-210e_coco-wholebody-384x288.py + In Collection: DarkPose + Metadata: + Architecture: *id001 + Training Data: COCO-WholeBody + Name: td-hm_hrnet-w48_dark-8xb32-210e_coco-wholebody-384x288 + Results: + - Dataset: COCO-WholeBody + Metrics: + Body AP: 0.742 + Body AR: 0.807 + Face AP: 0.841 + Face AR: 0.892 + Foot AP: 0.707 + Foot AR: 0.806 + Hand AP: 0.602 + Hand AR: 0.694 + Whole AP: 0.661 + Whole AR: 0.743 + Task: Wholebody 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_384x288_dark-f5726563_20200918.pth diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/resnet_coco-wholebody.md b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/resnet_coco-wholebody.md new file mode 100644 index 0000000000000000000000000000000000000000..e4a189833b6b731d0efe5a5e6b9426c9a78ce1b3 --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/resnet_coco-wholebody.md @@ -0,0 +1,43 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html">SimpleBaseline2D (ECCV'2018)</a></summary> + +```bibtex +@inproceedings{xiao2018simple, + title={Simple baselines for human pose estimation and tracking}, + author={Xiao, Bin and Wu, Haiping and Wei, Yichen}, + booktitle={Proceedings of the European conference on computer vision (ECCV)}, + pages={466--481}, + year={2018} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-030-58545-7_12">COCO-WholeBody (ECCV'2020)</a></summary> + +```bibtex +@inproceedings{jin2020whole, + title={Whole-Body Human Pose Estimation in the Wild}, + author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping}, + booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, + year={2020} +} +``` + +</details> + +Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | Body AP | Body AR | Foot AP | Foot AR | Face AP | Face AR | Hand AP | Hand AR | Whole AP | Whole AR | ckpt | log | +| :-------------------------------------- | :--------: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :------: | :------: | :--------------------------------------: | :-------------------------------------: | +| [pose_resnet_50](/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res50_8xb64-210e_coco-wholebody-256x192.py) | 256x192 | 0.652 | 0.738 | 0.615 | 0.749 | 0.606 | 0.715 | 0.460 | 0.584 | 0.521 | 0.633 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_wholebody_256x192-9e37ed88_20201004.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_wholebody_256x192_20201004.log.json) | +| [pose_resnet_50](/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res50_8xb64-210e_coco-wholebody-384x288.py) | 384x288 | 0.666 | 0.747 | 0.634 | 0.763 | 0.731 | 0.811 | 0.536 | 0.646 | 0.574 | 0.670 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_wholebody_384x288-ce11e294_20201004.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_wholebody_384x288_20201004.log.json) | +| [pose_resnet_101](/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res101_8xb32-210e_coco-wholebody-256x192.py) | 256x192 | 0.669 | 0.753 | 0.637 | 0.766 | 0.611 | 0.722 | 0.463 | 0.589 | 0.531 | 0.645 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res101_coco_wholebody_256x192-7325f982_20201004.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res101_coco_wholebody_256x192_20201004.log.json) | +| [pose_resnet_101](/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res101_8xb32-210e_coco-wholebody-384x288.py) | 384x288 | 0.692 | 0.770 | 0.680 | 0.799 | 0.746 | 0.820 | 0.548 | 0.657 | 0.597 | 0.693 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res101_coco_wholebody_384x288-6c137b9a_20201004.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res101_coco_wholebody_384x288_20201004.log.json) | +| [pose_resnet_152](/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res152_8xb32-210e_coco-wholebody-256x192.py) | 256x192 | 0.682 | 0.764 | 0.661 | 0.787 | 0.623 | 0.728 | 0.481 | 0.607 | 0.548 | 0.661 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res152_coco_wholebody_256x192-5de8ae23_20201004.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res152_coco_wholebody_256x192_20201004.log.json) | +| [pose_resnet_152](/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res152_8xb32-210e_coco-wholebody-384x288.py) | 384x288 | 0.704 | 0.780 | 0.693 | 0.813 | 0.751 | 0.824 | 0.559 | 0.666 | 0.610 | 0.705 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res152_coco_wholebody_384x288-eab8caa8_20201004.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res152_coco_wholebody_384x288_20201004.log.json) | diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/resnet_coco-wholebody.yml b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/resnet_coco-wholebody.yml new file mode 100644 index 0000000000000000000000000000000000000000..0e8db24f6acb4166776e7cbf4ba2109b1f2a28a3 --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/resnet_coco-wholebody.yml @@ -0,0 +1,128 @@ +Models: +- Config: configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res50_8xb64-210e_coco-wholebody-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: &id001 + - SimpleBaseline2D + Training Data: COCO-WholeBody + Name: td-hm_res50_8xb64-210e_coco-wholebody-256x192 + Results: + - Dataset: COCO-WholeBody + Metrics: + Body AP: 0.652 + Body AR: 0.738 + Face AP: 0.606 + Face AR: 0.715 + Foot AP: 0.615 + Foot AR: 0.749 + Hand AP: 0.46 + Hand AR: 0.584 + Whole AP: 0.521 + Whole AR: 0.633 + Task: Wholebody 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_wholebody_256x192-9e37ed88_20201004.pth +- Config: configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res50_8xb64-210e_coco-wholebody-384x288.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO-WholeBody + Name: td-hm_res50_8xb64-210e_coco-wholebody-384x288 + Results: + - Dataset: COCO-WholeBody + Metrics: + Body AP: 0.666 + Body AR: 0.747 + Face AP: 0.731 + Face AR: 0.811 + Foot AP: 0.634 + Foot AR: 0.763 + Hand AP: 0.536 + Hand AR: 0.646 + Whole AP: 0.574 + Whole AR: 0.67 + Task: Wholebody 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_wholebody_384x288-ce11e294_20201004.pth +- Config: configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res101_8xb32-210e_coco-wholebody-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO-WholeBody + Name: td-hm_res101_8xb32-210e_coco-wholebody-256x192 + Results: + - Dataset: COCO-WholeBody + Metrics: + Body AP: 0.669 + Body AR: 0.753 + Face AP: 0.611 + Face AR: 0.722 + Foot AP: 0.637 + Foot AR: 0.766 + Hand AP: 0.463 + Hand AR: 0.589 + Whole AP: 0.531 + Whole AR: 0.645 + Task: Wholebody 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res101_coco_wholebody_256x192-7325f982_20201004.pth +- Config: configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res101_8xb32-210e_coco-wholebody-384x288.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO-WholeBody + Name: td-hm_res101_8xb32-210e_coco-wholebody-384x288 + Results: + - Dataset: COCO-WholeBody + Metrics: + Body AP: 0.692 + Body AR: 0.77 + Face AP: 0.746 + Face AR: 0.82 + Foot AP: 0.68 + Foot AR: 0.799 + Hand AP: 0.548 + Hand AR: 0.657 + Whole AP: 0.598 + Whole AR: 0.691 + Task: Wholebody 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res101_coco_wholebody_384x288-6c137b9a_20201004.pth +- Config: configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res152_8xb32-210e_coco-wholebody-256x192.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO-WholeBody + Name: td-hm_res152_8xb32-210e_coco-wholebody-256x192 + Results: + - Dataset: COCO-WholeBody + Metrics: + Body AP: 0.682 + Body AR: 0.764 + Face AP: 0.623 + Face AR: 0.728 + Foot AP: 0.661 + Foot AR: 0.787 + Hand AP: 0.481 + Hand AR: 0.607 + Whole AP: 0.548 + Whole AR: 0.661 + Task: Wholebody 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res152_coco_wholebody_256x192-5de8ae23_20201004.pth +- Config: configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res152_8xb32-210e_coco-wholebody-384x288.py + In Collection: SimpleBaseline2D + Metadata: + Architecture: *id001 + Training Data: COCO-WholeBody + Name: td-hm_res152_8xb32-210e_coco-wholebody-384x288 + Results: + - Dataset: COCO-WholeBody + Metrics: + Body AP: 0.704 + Body AR: 0.78 + Face AP: 0.751 + Face AR: 0.824 + Foot AP: 0.693 + Foot AR: 0.813 + Hand AP: 0.559 + Hand AR: 0.666 + Whole AP: 0.61 + Whole AR: 0.705 + Task: Wholebody 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res152_coco_wholebody_384x288-eab8caa8_20201004.pth diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w32_8xb64-210e_coco-wholebody-256x192.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w32_8xb64-210e_coco-wholebody-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..2595e3fc13e6913a01af45fa8d7b9c6377511ddb --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w32_8xb64-210e_coco-wholebody-256x192.py @@ -0,0 +1,150 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco-wholebody/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256))), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w32-36af842e.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=32, + out_channels=133, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoWholeBodyDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +val_evaluator = dict( + type='CocoWholeBodyMetric', + ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w32_8xb64-210e_coco-wholebody-384x288.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w32_8xb64-210e_coco-wholebody-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..727fa9472ec9c446cb572e6c4fcd49976bf3916b --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w32_8xb64-210e_coco-wholebody-384x288.py @@ -0,0 +1,150 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco-wholebody/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256))), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w32-36af842e.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=32, + out_channels=133, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoWholeBodyDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +val_evaluator = dict( + type='CocoWholeBodyMetric', + ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w32_dark-8xb64-210e_coco-wholebody-256x192.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w32_dark-8xb64-210e_coco-wholebody-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..ffee1d1383e4757b79ed0ea4461c69d7b4247b15 --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w32_dark-8xb64-210e_coco-wholebody-256x192.py @@ -0,0 +1,154 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco-wholebody/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', + input_size=(192, 256), + heatmap_size=(48, 64), + sigma=2, + unbiased=True) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256))), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w32-36af842e.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=32, + out_channels=133, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoWholeBodyDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +val_evaluator = dict( + type='CocoWholeBodyMetric', + ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w48_8xb32-210e_coco-wholebody-256x192.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w48_8xb32-210e_coco-wholebody-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..892b4b7936123840c3192e87491123e5f11b3f7f --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w48_8xb32-210e_coco-wholebody-256x192.py @@ -0,0 +1,150 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco-wholebody/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(48, 96)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(48, 96, 192)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(48, 96, 192, 384))), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w48-8ef0771d.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=48, + out_channels=133, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoWholeBodyDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +val_evaluator = dict( + type='CocoWholeBodyMetric', + ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w48_8xb32-210e_coco-wholebody-384x288.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w48_8xb32-210e_coco-wholebody-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..d587dbc45bf2f90a3912e263d63d0dc64205298a --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w48_8xb32-210e_coco-wholebody-384x288.py @@ -0,0 +1,150 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco-wholebody/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(48, 96)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(48, 96, 192)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(48, 96, 192, 384))), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w48-8ef0771d.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=48, + out_channels=133, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoWholeBodyDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +val_evaluator = dict( + type='CocoWholeBodyMetric', + ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w48_dark-8xb32-210e_coco-wholebody-384x288.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w48_dark-8xb32-210e_coco-wholebody-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..63175b99ea3e604fb87e1e45ef921aee2e7a1b16 --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w48_dark-8xb32-210e_coco-wholebody-384x288.py @@ -0,0 +1,154 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco-wholebody/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', + input_size=(288, 384), + heatmap_size=(72, 96), + sigma=3, + unbiased=True) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='HRNet', + in_channels=3, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(48, 96)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(48, 96, 192)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(48, 96, 192, 384))), + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmpose/' + 'pretrain_models/hrnet_w48-8ef0771d.pth'), + ), + head=dict( + type='HeatmapHead', + in_channels=48, + out_channels=133, + deconv_out_channels=None, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoWholeBodyDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +val_evaluator = dict( + type='CocoWholeBodyMetric', + ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res101_8xb32-210e_coco-wholebody-256x192.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res101_8xb32-210e_coco-wholebody-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..c0d8187ab47b54d445d3f125da596f381c494309 --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res101_8xb32-210e_coco-wholebody-256x192.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco-wholebody/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=101, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=133, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoWholeBodyDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +val_evaluator = dict( + type='CocoWholeBodyMetric', + ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res101_8xb32-210e_coco-wholebody-384x288.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res101_8xb32-210e_coco-wholebody-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..42e98575fba714ab65f3f19f226b5c06c2898a93 --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res101_8xb32-210e_coco-wholebody-384x288.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=256) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco-wholebody/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=101, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=133, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoWholeBodyDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +val_evaluator = dict( + type='CocoWholeBodyMetric', + ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res152_8xb32-210e_coco-wholebody-256x192.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res152_8xb32-210e_coco-wholebody-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..10c16eb71f9ac28ea6746e85d51ed526dd035abe --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res152_8xb32-210e_coco-wholebody-256x192.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco-wholebody/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=152, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet152'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=133, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoWholeBodyDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +val_evaluator = dict( + type='CocoWholeBodyMetric', + ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res152_8xb32-210e_coco-wholebody-384x288.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res152_8xb32-210e_coco-wholebody-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..43ec5fb67c23df4e5e3d1c93072c41e0d08b88a6 --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res152_8xb32-210e_coco-wholebody-384x288.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco-wholebody/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=152, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet152'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=133, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoWholeBodyDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +val_evaluator = dict( + type='CocoWholeBodyMetric', + ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res50_8xb64-210e_coco-wholebody-256x192.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res50_8xb64-210e_coco-wholebody-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..e568c78b175bf3cc3364235c671d04944d84c53f --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res50_8xb64-210e_coco-wholebody-256x192.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco-wholebody/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=133, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoWholeBodyDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +val_evaluator = dict( + type='CocoWholeBodyMetric', + ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res50_8xb64-210e_coco-wholebody-384x288.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res50_8xb64-210e_coco-wholebody-384x288.py new file mode 100644 index 0000000000000000000000000000000000000000..6869d17ba998b7918133eefcf98fc3344e729a26 --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res50_8xb64-210e_coco-wholebody-384x288.py @@ -0,0 +1,121 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco-wholebody/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ResNet', + depth=50, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + ), + head=dict( + type='HeatmapHead', + in_channels=2048, + out_channels=133, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoWholeBodyDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict(type='RandomBBoxTransform'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +val_evaluator = dict( + type='CocoWholeBodyMetric', + ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-mbv3_8xb64-210e_coco-wholebody-256x192.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-mbv3_8xb64-210e_coco-wholebody-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..cad9c539bef73cce6fc8e48e9d91489ea9f72270 --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-mbv3_8xb64-210e_coco-wholebody-256x192.py @@ -0,0 +1,122 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco-wholebody/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict(type='ViPNAS_MobileNetV3'), + head=dict( + type='ViPNASHead', + in_channels=160, + out_channels=133, + deconv_out_channels=(160, 160, 160), + deconv_num_groups=(160, 160, 160), + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoWholeBodyDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + rotate_factor=60, + scale_factor=(0.75, 1.25)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +val_evaluator = dict( + type='CocoWholeBodyMetric', + ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-mbv3_dark-8xb64-210e_coco-wholebody-256x192.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-mbv3_dark-8xb64-210e_coco-wholebody-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..d34ea50db64b6a2716469ebf872045b9308fc413 --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-mbv3_dark-8xb64-210e_coco-wholebody-256x192.py @@ -0,0 +1,126 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco-wholebody/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', + input_size=(192, 256), + heatmap_size=(48, 64), + sigma=2, + unbiased=True) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict(type='ViPNAS_MobileNetV3'), + head=dict( + type='ViPNASHead', + in_channels=160, + out_channels=133, + deconv_out_channels=(160, 160, 160), + deconv_num_groups=(160, 160, 160), + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoWholeBodyDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + rotate_factor=60, + scale_factor=(0.75, 1.25)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +val_evaluator = dict( + type='CocoWholeBodyMetric', + ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-res50_8xb64-210e_coco-wholebody-256x192.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-res50_8xb64-210e_coco-wholebody-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..822e4c698a54a82a62fd30f6cc891f814d024930 --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-res50_8xb64-210e_coco-wholebody-256x192.py @@ -0,0 +1,123 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco-wholebody/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ViPNAS_ResNet', + depth=50, + ), + head=dict( + type='ViPNASHead', + in_channels=608, + out_channels=133, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoWholeBodyDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + rotate_factor=60, + scale_factor=(0.75, 1.25)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +val_evaluator = dict( + type='CocoWholeBodyMetric', + ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-res50_dark-8xb64-210e_coco-wholebody-256x192.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-res50_dark-8xb64-210e_coco-wholebody-256x192.py new file mode 100644 index 0000000000000000000000000000000000000000..15b152fe96d3d60806c3461a04a0a4b5c66b3c96 --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-res50_dark-8xb64-210e_coco-wholebody-256x192.py @@ -0,0 +1,127 @@ +_base_ = ['../../../_base_/default_runtime.py'] + +# runtime +train_cfg = dict(max_epochs=210, val_interval=10) + +# optimizer +optim_wrapper = dict(optimizer=dict( + type='Adam', + lr=5e-4, +)) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', begin=0, end=500, start_factor=0.001, + by_epoch=False), # warm-up + dict( + type='MultiStepLR', + begin=0, + end=210, + milestones=[170, 200], + gamma=0.1, + by_epoch=True) +] + +# automatically scaling LR based on the actual training batch size +auto_scale_lr = dict(base_batch_size=512) + +# hooks +default_hooks = dict( + checkpoint=dict(save_best='coco-wholebody/AP', rule='greater')) + +# codec settings +codec = dict( + type='MSRAHeatmap', + input_size=(192, 256), + heatmap_size=(48, 64), + sigma=2, + unbiased=True) + +# model settings +model = dict( + type='TopdownPoseEstimator', + data_preprocessor=dict( + type='PoseDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True), + backbone=dict( + type='ViPNAS_ResNet', + depth=50, + ), + head=dict( + type='ViPNASHead', + in_channels=608, + out_channels=133, + loss=dict(type='KeypointMSELoss', use_target_weight=True), + decoder=codec), + test_cfg=dict( + flip_test=True, + flip_mode='heatmap', + shift_heatmap=True, + )) + +# base dataset settings +dataset_type = 'CocoWholeBodyDataset' +data_mode = 'topdown' +data_root = 'data/coco/' + +# pipelines +train_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='RandomFlip', direction='horizontal'), + dict(type='RandomHalfBody'), + dict( + type='RandomBBoxTransform', + rotate_factor=60, + scale_factor=(0.75, 1.25)), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='GenerateTarget', encoder=codec), + dict(type='PackPoseInputs') +] +val_pipeline = [ + dict(type='LoadImage'), + dict(type='GetBBoxCenterScale'), + dict(type='TopdownAffine', input_size=codec['input_size']), + dict(type='PackPoseInputs') +] + +# data loaders +train_dataloader = dict( + batch_size=64, + num_workers=2, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_train_v1.0.json', + data_prefix=dict(img='train2017/'), + pipeline=train_pipeline, + )) +val_dataloader = dict( + batch_size=32, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_mode=data_mode, + ann_file='annotations/coco_wholebody_val_v1.0.json', + data_prefix=dict(img='val2017/'), + test_mode=True, + bbox_file='data/coco/person_detection_results/' + 'COCO_val2017_detections_AP_H_56_person.json', + pipeline=val_pipeline, + )) +test_dataloader = val_dataloader + +val_evaluator = dict( + type='CocoWholeBodyMetric', + ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json') +test_evaluator = val_evaluator diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/vipnas_coco-wholebody.md b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/vipnas_coco-wholebody.md new file mode 100644 index 0000000000000000000000000000000000000000..63fc0aed8af576808a7c6ee7dac89c3235d2ebfc --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/vipnas_coco-wholebody.md @@ -0,0 +1,38 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2105.10154">ViPNAS (CVPR'2021)</a></summary> + +```bibtex +@article{xu2021vipnas, + title={ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search}, + author={Xu, Lumin and Guan, Yingda and Jin, Sheng and Liu, Wentao and Qian, Chen and Luo, Ping and Ouyang, Wanli and Wang, Xiaogang}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + year={2021} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-030-58545-7_12">COCO-WholeBody (ECCV'2020)</a></summary> + +```bibtex +@inproceedings{jin2020whole, + title={Whole-Body Human Pose Estimation in the Wild}, + author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping}, + booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, + year={2020} +} +``` + +</details> + +Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | Body AP | Body AR | Foot AP | Foot AR | Face AP | Face AR | Hand AP | Hand AR | Whole AP | Whole AR | ckpt | log | +| :-------------------------------------- | :--------: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :------: | :------: | :--------------------------------------: | :-------------------------------------: | +| [S-ViPNAS-MobileNetV3](/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-mbv3_8xb64-210e_coco-wholebody-256x192.py) | 256x192 | 0.619 | 0.700 | 0.477 | 0.608 | 0.585 | 0.689 | 0.386 | 0.505 | 0.473 | 0.578 | [ckpt](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_mbv3_coco_wholebody_256x192-0fee581a_20211205.pth) | [log](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_mbv3_coco_wholebody_256x192_20211205.log.json) | +| [S-ViPNAS-Res50](/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-res50_8xb64-210e_coco-wholebody-256x192.py) | 256x192 | 0.643 | 0.726 | 0.553 | 0.694 | 0.587 | 0.698 | 0.410 | 0.529 | 0.495 | 0.607 | [ckpt](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_res50_wholebody_256x192-49e1c3a4_20211112.pth) | [log](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_res50_wholebody_256x192_20211112.log.json) | diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/vipnas_coco-wholebody.yml b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/vipnas_coco-wholebody.yml new file mode 100644 index 0000000000000000000000000000000000000000..28148364075f0ea14c844965a82db732620fd3f2 --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/vipnas_coco-wholebody.yml @@ -0,0 +1,44 @@ +Models: +- Config: configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-mbv3_8xb64-210e_coco-wholebody-256x192.py + In Collection: ViPNAS + Metadata: + Architecture: &id001 + - ViPNAS + Training Data: COCO-WholeBody + Name: td-hm_vipnas-mbv3_8xb64-210e_coco-wholebody-256x192 + Results: + - Dataset: COCO-WholeBody + Metrics: + Body AP: 0.619 + Body AR: 0.7 + Face AP: 0.585 + Face AR: 0.689 + Foot AP: 0.477 + Foot AR: 0.608 + Hand AP: 0.386 + Hand AR: 0.505 + Whole AP: 0.473 + Whole AR: 0.578 + Task: Wholebody 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_mbv3_coco_wholebody_256x192-0fee581a_20211205.pth +- Config: configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-res50_8xb64-210e_coco-wholebody-256x192.py + In Collection: ViPNAS + Metadata: + Architecture: *id001 + Training Data: COCO-WholeBody + Name: td-hm_vipnas-res50_8xb64-210e_coco-wholebody-256x192 + Results: + - Dataset: COCO-WholeBody + Metrics: + Body AP: 0.643 + Body AR: 0.726 + Face AP: 0.587 + Face AR: 0.698 + Foot AP: 0.553 + Foot AR: 0.694 + Hand AP: 0.41 + Hand AR: 0.529 + Whole AP: 0.495 + Whole AR: 0.607 + Task: Wholebody 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_res50_wholebody_256x192-49e1c3a4_20211112.pth diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/vipnas_dark_coco-wholebody.md b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/vipnas_dark_coco-wholebody.md new file mode 100644 index 0000000000000000000000000000000000000000..e39c66e913ab44ae58e511e987397ebae5e30fe1 --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/vipnas_dark_coco-wholebody.md @@ -0,0 +1,55 @@ +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="https://arxiv.org/abs/2105.10154">ViPNAS (CVPR'2021)</a></summary> + +```bibtex +@article{xu2021vipnas, + title={ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search}, + author={Xu, Lumin and Guan, Yingda and Jin, Sheng and Liu, Wentao and Qian, Chen and Luo, Ping and Ouyang, Wanli and Wang, Xiaogang}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + year={2021} +} +``` + +</details> + +<!-- [ALGORITHM] --> + +<details> +<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2020/html/Zhang_Distribution-Aware_Coordinate_Representation_for_Human_Pose_Estimation_CVPR_2020_paper.html">DarkPose (CVPR'2020)</a></summary> + +```bibtex +@inproceedings{zhang2020distribution, + title={Distribution-aware coordinate representation for human pose estimation}, + author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages={7093--7102}, + year={2020} +} +``` + +</details> + +<!-- [DATASET] --> + +<details> +<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-030-58545-7_12">COCO-WholeBody (ECCV'2020)</a></summary> + +```bibtex +@inproceedings{jin2020whole, + title={Whole-Body Human Pose Estimation in the Wild}, + author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping}, + booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, + year={2020} +} +``` + +</details> + +Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset + +| Arch | Input Size | Body AP | Body AR | Foot AP | Foot AR | Face AP | Face AR | Hand AP | Hand AR | Whole AP | Whole AR | ckpt | log | +| :-------------------------------------- | :--------: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :------: | :------: | :--------------------------------------: | :-------------------------------------: | +| [S-ViPNAS-MobileNetV3_dark](/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-mbv3_dark-8xb64-210e_coco-wholebody-256x192.py) | 256x192 | 0.632 | 0.710 | 0.530 | 0.660 | 0.672 | 0.771 | 0.404 | 0.519 | 0.508 | 0.607 | [ckpt](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_mbv3_coco_wholebody_256x192_dark-e2158108_20211205.pth) | [log](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_mbv3_coco_wholebody_256x192_dark_20211205.log.json) | +| [S-ViPNAS-Res50_dark](/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-res50_dark-8xb64-210e_coco-wholebody-256x192.py) | 256x192 | 0.650 | 0.732 | 0.550 | 0.686 | 0.684 | 0.783 | 0.437 | 0.554 | 0.528 | 0.632 | [ckpt](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_res50_wholebody_256x192_dark-67c0ce35_20211112.pth) | [log](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_res50_wholebody_256x192_dark_20211112.log.json) | diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/vipnas_dark_coco-wholebody.yml b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/vipnas_dark_coco-wholebody.yml new file mode 100644 index 0000000000000000000000000000000000000000..5449af0ccd67ea433218a166b421888df3698ef8 --- /dev/null +++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/vipnas_dark_coco-wholebody.yml @@ -0,0 +1,45 @@ +Models: +- Config: configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-mbv3_dark-8xb64-210e_coco-wholebody-256x192.py + In Collection: ViPNAS + Metadata: + Architecture: &id001 + - ViPNAS + - DarkPose + Training Data: COCO-WholeBody + Name: td-hm_vipnas-mbv3_dark-8xb64-210e_coco-wholebody-256x192 + Results: + - Dataset: COCO-WholeBody + Metrics: + Body AP: 0.632 + Body AR: 0.71 + Face AP: 0.672 + Face AR: 0.771 + Foot AP: 0.53 + Foot AR: 0.66 + Hand AP: 0.404 + Hand AR: 0.519 + Whole AP: 0.508 + Whole AR: 0.607 + Task: Wholebody 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_mbv3_coco_wholebody_256x192_dark-e2158108_20211205.pth +- Config: configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-res50_dark-8xb64-210e_coco-wholebody-256x192.py + In Collection: ViPNAS + Metadata: + Architecture: *id001 + Training Data: COCO-WholeBody + Name: td-hm_vipnas-res50_dark-8xb64-210e_coco-wholebody-256x192 + Results: + - Dataset: COCO-WholeBody + Metrics: + Body AP: 0.65 + Body AR: 0.732 + Face AP: 0.684 + Face AR: 0.783 + Foot AP: 0.55 + Foot AR: 0.686 + Hand AP: 0.437 + Hand AR: 0.554 + Whole AP: 0.528 + Whole AR: 0.632 + Task: Wholebody 2D Keypoint + Weights: https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_res50_wholebody_256x192_dark-67c0ce35_20211112.pth diff --git a/pretrain/td-hm_ViTPose-huge-simple_8xb64-210e_coco-256x192-ffd48c05_20230314.pth b/pretrain/td-hm_ViTPose-huge-simple_8xb64-210e_coco-256x192-ffd48c05_20230314.pth new file mode 100644 index 0000000000000000000000000000000000000000..5a2783169751829b0fad53d9a6402bf9d2762839 --- /dev/null +++ b/pretrain/td-hm_ViTPose-huge-simple_8xb64-210e_coco-256x192-ffd48c05_20230314.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffd48c057be7f21ff22f4a077c87cebcd397b757e94de5605f43928c05e96392 +size 2524543895