Spaces:

napatswift
/

votecount-ml-be

Build error

App Files Files Community

napatswift commited on May 22, 2023

Commit

6ad06e7

1 Parent(s): 741d744

Chnage model arch

Browse files

Files changed (3) hide show

main.py +3 -5
model/text-det/psenet.pth +3 -0
model/text-det/psenet.py +326 -0

main.py CHANGED Viewed

@@ -7,11 +7,9 @@ import torch
 print('Loading model...')
 device = 'gpu' if torch.cuda.is_available() else 'cpu'
-table_det = init_detector('model/table-det/config.py',
-                          'model/table-det/model.pth', device=device)
-ocr = MMOCRInferencer(det='model/text-det/config.py',
-            det_weights='model/text-det/model.pth',
             device=device)
 def get_rec(points):
@@ -39,4 +37,4 @@ def run():
 if __name__ == "__main__":
-    run()

 print('Loading model...')
 device = 'gpu' if torch.cuda.is_available() else 'cpu'
+ocr = MMOCRInferencer(det='model/text-det/psenet.py',
+            det_weights='model/text-det/psenet.pth',
             device=device)
 def get_rec(points):
 if __name__ == "__main__":
+    run()

model/text-det/psenet.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8575eddcbed1c0a1151817ef05bb2df11a27979ea1f4a61fde5bbecd0c3e2595
+size 352447333

model/text-det/psenet.py ADDED Viewed

	@@ -0,0 +1,326 @@

+file_client_args = dict(backend='disk')
+model = dict(
+    type='PSENet',
+    backbone=dict(
+        type='mmdet.ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=-1,
+        norm_cfg=dict(type='SyncBN', requires_grad=True),
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+        norm_eval=True,
+        style='caffe'),
+    neck=dict(
+        type='FPNF',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        fusion_type='concat'),
+    det_head=dict(
+        type='PSEHead',
+        in_channels=[256],
+        hidden_dim=256,
+        out_channel=7,
+        module_loss=dict(type='PSEModuleLoss'),
+        postprocessor=dict(type='PSEPostprocessor', text_repr_type='poly')),
+    data_preprocessor=dict(
+        type='TextDetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32))
+train_pipeline = [
+    dict(
+        type='LoadImageFromFile',
+        file_client_args=dict(backend='disk'),
+        color_type='color_ignore_orientation'),
+    dict(
+        type='LoadOCRAnnotations',
+        with_polygon=True,
+        with_bbox=True,
+        with_label=True),
+    dict(
+        type='TorchVisionWrapper',
+        op='ColorJitter',
+        brightness=0.12549019607843137,
+        saturation=0.5),
+    dict(type='FixInvalidPolygon'),
+    dict(type='ShortScaleAspectJitter', short_size=736, scale_divisor=32),
+    dict(type='RandomFlip', prob=0.5, direction='horizontal'),
+    dict(type='RandomRotate', max_angle=10),
+    dict(type='TextDetRandomCrop', target_size=(736, 736)),
+    dict(type='Pad', size=(736, 736)),
+    dict(
+        type='PackTextDetInputs',
+        meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
+]
+test_pipeline = [
+    dict(
+        type='LoadImageFromFile',
+        file_client_args=dict(backend='disk'),
+        color_type='color_ignore_orientation'),
+    dict(type='Resize', scale=(2240, 2240), keep_ratio=True),
+    dict(
+        type='LoadOCRAnnotations',
+        with_polygon=True,
+        with_bbox=True,
+        with_label=True),
+    dict(
+        type='PackTextDetInputs',
+        meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
+]
+thvc_textdet_data_root = 'data/det/vl+vc-textdet'
+thvc_textdet_train = dict(
+    type='OCRDataset',
+    data_root='data/det/vl+vc-textdet',
+    ann_file='textdet_train.json',
+    data_prefix=dict(img_path='imgs/'),
+    filter_cfg=dict(filter_empty_gt=True, min_size=32),
+    pipeline=[
+        dict(
+            type='LoadImageFromFile',
+            file_client_args=dict(backend='disk'),
+            color_type='color_ignore_orientation'),
+        dict(
+            type='LoadOCRAnnotations',
+            with_polygon=True,
+            with_bbox=True,
+            with_label=True),
+        dict(
+            type='TorchVisionWrapper',
+            op='ColorJitter',
+            brightness=0.12549019607843137,
+            saturation=0.5),
+        dict(type='FixInvalidPolygon'),
+        dict(type='ShortScaleAspectJitter', short_size=736, scale_divisor=32),
+        dict(type='RandomFlip', prob=0.5, direction='horizontal'),
+        dict(type='RandomRotate', max_angle=10),
+        dict(type='TextDetRandomCrop', target_size=(736, 736)),
+        dict(type='Pad', size=(736, 736)),
+        dict(
+            type='PackTextDetInputs',
+            meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
+    ])
+thvc_textdet_test = dict(
+    type='OCRDataset',
+    data_root='data/det/vl+vc-textdet',
+    ann_file='textdet_test.json',
+    data_prefix=dict(img_path='imgs/'),
+    test_mode=True,
+    pipeline=None)
+thvote_textdet_data_root = 'data/det/textdet-thvote'
+thvote_textdet_train = dict(
+    type='OCRDataset',
+    data_root='data/det/textdet-thvote',
+    ann_file='textdet_train.json',
+    data_prefix=dict(img_path='imgs/'),
+    filter_cfg=dict(filter_empty_gt=True, min_size=32),
+    pipeline=None)
+thvote_textdet_test = dict(
+    type='OCRDataset',
+    data_root='data/det/textdet-thvote',
+    ann_file='textdet_test.json',
+    data_prefix=dict(img_path='imgs/'),
+    test_mode=True,
+    pipeline=[
+        dict(
+            type='LoadImageFromFile',
+            file_client_args=dict(backend='disk'),
+            color_type='color_ignore_orientation'),
+        dict(type='Resize', scale=(2240, 2240), keep_ratio=True),
+        dict(
+            type='LoadOCRAnnotations',
+            with_polygon=True,
+            with_bbox=True,
+            with_label=True),
+        dict(
+            type='PackTextDetInputs',
+            meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
+    ])
+default_scope = 'mmocr'
+env_cfg = dict(
+    cudnn_benchmark=True,
+    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
+    dist_cfg=dict(backend='nccl'))
+randomness = dict(seed=None)
+default_hooks = dict(
+    timer=dict(type='IterTimerHook'),
+    logger=dict(type='LoggerHook', interval=100),
+    param_scheduler=dict(type='ParamSchedulerHook'),
+    checkpoint=dict(type='CheckpointHook', interval=10),
+    sampler_seed=dict(type='DistSamplerSeedHook'),
+    sync_buffer=dict(type='SyncBuffersHook'),
+    visualization=dict(
+        type='VisualizationHook',
+        interval=1,
+        enable=False,
+        show=False,
+        draw_gt=False,
+        draw_pred=False))
+log_level = 'INFO'
+log_processor = dict(type='LogProcessor', window_size=10, by_epoch=True)
+load_from = None
+resume = True
+val_evaluator = dict(type='HmeanIOUMetric')
+test_evaluator = dict(type='HmeanIOUMetric')
+vis_backends = [dict(type='LocalVisBackend')]
+visualizer = dict(
+    type='TextDetLocalVisualizer',
+    name='visualizer',
+    vis_backends=[dict(type='LocalVisBackend')])
+max_epochs = 200
+optim_wrapper = dict(
+    type='OptimWrapper', optimizer=dict(type='Adam', lr=0.001))
+train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=30, val_interval=10)
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+param_scheduler = [dict(type='PolyLR', power=0.9, end=200)]
+thvotecount_textdet_train = dict(
+    type='OCRDataset',
+    data_root='data/det/vl+vc-textdet',
+    ann_file='textdet_train.json',
+    data_prefix=dict(img_path='imgs/'),
+    filter_cfg=dict(filter_empty_gt=True, min_size=32),
+    pipeline=[
+        dict(
+            type='LoadImageFromFile',
+            file_client_args=dict(backend='disk'),
+            color_type='color_ignore_orientation'),
+        dict(
+            type='LoadOCRAnnotations',
+            with_polygon=True,
+            with_bbox=True,
+            with_label=True),
+        dict(
+            type='TorchVisionWrapper',
+            op='ColorJitter',
+            brightness=0.12549019607843137,
+            saturation=0.5),
+        dict(type='FixInvalidPolygon'),
+        dict(type='ShortScaleAspectJitter', short_size=736, scale_divisor=32),
+        dict(type='RandomFlip', prob=0.5, direction='horizontal'),
+        dict(type='RandomRotate', max_angle=10),
+        dict(type='TextDetRandomCrop', target_size=(736, 736)),
+        dict(type='Pad', size=(736, 736)),
+        dict(
+            type='PackTextDetInputs',
+            meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
+    ])
+thvotecount_textdet_test = dict(
+    type='OCRDataset',
+    data_root='data/det/textdet-thvote',
+    ann_file='textdet_test.json',
+    data_prefix=dict(img_path='imgs/'),
+    test_mode=True,
+    pipeline=[
+        dict(
+            type='LoadImageFromFile',
+            file_client_args=dict(backend='disk'),
+            color_type='color_ignore_orientation'),
+        dict(type='Resize', scale=(2240, 2240), keep_ratio=True),
+        dict(
+            type='LoadOCRAnnotations',
+            with_polygon=True,
+            with_bbox=True,
+            with_label=True),
+        dict(
+            type='PackTextDetInputs',
+            meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
+    ])
+train_dataloader = dict(
+    batch_size=10,
+    num_workers=16,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type='OCRDataset',
+        data_root='data/det/vl+vc-textdet',
+        ann_file='textdet_train.json',
+        data_prefix=dict(img_path='imgs/'),
+        filter_cfg=dict(filter_empty_gt=True, min_size=32),
+        pipeline=[
+            dict(
+                type='LoadImageFromFile',
+                file_client_args=dict(backend='disk'),
+                color_type='color_ignore_orientation'),
+            dict(
+                type='LoadOCRAnnotations',
+                with_polygon=True,
+                with_bbox=True,
+                with_label=True),
+            dict(
+                type='TorchVisionWrapper',
+                op='ColorJitter',
+                brightness=0.12549019607843137,
+                saturation=0.5),
+            dict(type='FixInvalidPolygon'),
+            dict(
+                type='ShortScaleAspectJitter',
+                short_size=736,
+                scale_divisor=32),
+            dict(type='RandomFlip', prob=0.5, direction='horizontal'),
+            dict(type='RandomRotate', max_angle=10),
+            dict(type='TextDetRandomCrop', target_size=(736, 736)),
+            dict(type='Pad', size=(736, 736)),
+            dict(
+                type='PackTextDetInputs',
+                meta_keys=('img_path', 'ori_shape', 'img_shape',
+                           'scale_factor'))
+        ]))
+val_dataloader = dict(
+    batch_size=4,
+    num_workers=4,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type='OCRDataset',
+        data_root='data/det/textdet-thvote',
+        ann_file='textdet_test.json',
+        data_prefix=dict(img_path='imgs/'),
+        test_mode=True,
+        pipeline=[
+            dict(
+                type='LoadImageFromFile',
+                file_client_args=dict(backend='disk'),
+                color_type='color_ignore_orientation'),
+            dict(type='Resize', scale=(2240, 2240), keep_ratio=True),
+            dict(
+                type='LoadOCRAnnotations',
+                with_polygon=True,
+                with_bbox=True,
+                with_label=True),
+            dict(
+                type='PackTextDetInputs',
+                meta_keys=('img_path', 'ori_shape', 'img_shape',
+                           'scale_factor'))
+        ]))
+test_dataloader = dict(
+    batch_size=4,
+    num_workers=4,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type='OCRDataset',
+        data_root='data/det/textdet-thvote',
+        ann_file='textdet_test.json',
+        data_prefix=dict(img_path='imgs/'),
+        test_mode=True,
+        pipeline=[
+            dict(
+                type='LoadImageFromFile',
+                file_client_args=dict(backend='disk'),
+                color_type='color_ignore_orientation'),
+            dict(type='Resize', scale=(2240, 2240), keep_ratio=True),
+            dict(
+                type='LoadOCRAnnotations',
+                with_polygon=True,
+                with_bbox=True,
+                with_label=True),
+            dict(
+                type='PackTextDetInputs',
+                meta_keys=('img_path', 'ori_shape', 'img_shape',
+                           'scale_factor'))
+        ]))
+auto_scale_lr = dict(base_batch_size=32)
+launcher = 'none'
+work_dir = './work_dirs/psenet_resnet50_fpnf_votecount'