diff --git "a/Vim-S_15epoch_80.0/logs/log_rank0.txt" "b/Vim-S_15epoch_80.0/logs/log_rank0.txt" new file mode 100644--- /dev/null +++ "b/Vim-S_15epoch_80.0/logs/log_rank0.txt" @@ -0,0 +1,4042 @@ +[2024-12-06 04:46:31 root] (main.py 226): INFO Namespace(batch_size=128, epochs=15, model='RMeeTo_small', multi_reso=False, input_size=224, drop=0.0, drop_path=0.1, model_ema_decay=0.99996, model_ema_force_cpu=False, model_ema=False, opt='adamw', opt_eps=1e-08, opt_betas=None, clip_grad=None, momentum=0.9, weight_decay=0.0, sched='cosine', lr=2e-05, lr_noise=None, lr_noise_pct=0.67, lr_noise_std=1.0, warmup_lr=1e-06, min_lr=1e-06, decay_epochs=30, warmup_epochs=5, cooldown_epochs=10, patience_epochs=10, decay_rate=0.1, color_jitter=0.4, aa='rand-m9-mstd0.5-inc1', smoothing=0.1, train_interpolation='bicubic', repeated_aug=True, reprob=0.25, remode='pixel', recount=1, resplit=False, mixup=0.8, cutmix=1.0, cutmix_minmax=None, mixup_prob=1.0, mixup_switch_prob=0.5, mixup_mode='batch', finetune='', data_set='IMNET', inat_category='name', device='cuda', seed=0, autoresume=False, start_epoch=0, dist_eval=True, num_workers=10, pin_mem=True, world_size=4, port='15662', dist_url='env://', target_flops=3.0, granularity=4, load_compression_rate=False, warmup_compression_rate=False, distill='True', throughput=False, eval=False, merge_method='ToMe', merge_interval=2, if_pruning=False, num_prune='11', metric='X', distance='cosine', if_order=True, if_random=False, if_merge_odd=False, rank=0, gpu=0, distributed=True, dist_backend='nccl') +[2024-12-06 04:46:37 root] (main.py 292): INFO Creating model: RMeeTo_small +[2024-12-06 04:46:45 root] (main.py 372): INFO number of params: 25796584 +[2024-12-06 04:46:46 root] (main.py 488): INFO Start training for 15 epochs +[2024-12-06 04:46:52 root] (utils.py 283): INFO Epoch: [0] [ 0/2502] eta: 4:18:57 lr: 0.000020 loss_cls: 5.0392 (5.0392) grad_norm: 11.4156 (11.4156) time: 6.2100 data: 0.0021 max mem: 13594 +[2024-12-06 04:47:05 root] (utils.py 283): INFO Epoch: [0] [ 10/2502] eta: 1:13:27 lr: 0.000020 loss_cls: 4.8434 (4.6413) grad_norm: 12.2080 (14.5148) time: 1.7686 data: 0.0004 max mem: 13912 +[2024-12-06 04:47:18 root] (utils.py 283): INFO Epoch: [0] [ 20/2502] eta: 1:03:54 lr: 0.000020 loss_cls: 4.4765 (4.4108) grad_norm: 10.3880 (11.8864) time: 1.3118 data: 0.0003 max mem: 13912 +[2024-12-06 04:47:31 root] (utils.py 283): INFO Epoch: [0] [ 30/2502] eta: 1:00:21 lr: 0.000020 loss_cls: 4.3284 (4.3485) grad_norm: 7.3990 (12.5920) time: 1.2982 data: 0.0003 max mem: 13912 +[2024-12-06 04:47:44 root] (utils.py 283): INFO Epoch: [0] [ 40/2502] eta: 0:58:28 lr: 0.000020 loss_cls: 4.0105 (4.2020) grad_norm: 6.9753 (11.1243) time: 1.2993 data: 0.0003 max mem: 13912 +[2024-12-06 04:47:57 root] (utils.py 283): INFO Epoch: [0] [ 50/2502] eta: 0:57:15 lr: 0.000020 loss_cls: 3.7585 (4.1300) grad_norm: 5.2426 (10.2908) time: 1.3024 data: 0.0003 max mem: 13912 +[2024-12-06 04:48:10 root] (utils.py 283): INFO Epoch: [0] [ 60/2502] eta: 0:56:20 lr: 0.000020 loss_cls: 3.7585 (4.0441) grad_norm: 5.6141 (9.6950) time: 1.3004 data: 0.0003 max mem: 13912 +[2024-12-06 04:48:23 root] (utils.py 283): INFO Epoch: [0] [ 70/2502] eta: 0:55:40 lr: 0.000020 loss_cls: 3.6479 (3.9668) grad_norm: 4.5526 (8.9650) time: 1.3031 data: 0.0003 max mem: 13912 +[2024-12-06 04:48:36 root] (utils.py 283): INFO Epoch: [0] [ 80/2502] eta: 0:55:04 lr: 0.000020 loss_cls: 3.6611 (3.9090) grad_norm: 4.3231 (8.4416) time: 1.3038 data: 0.0003 max mem: 13912 +[2024-12-06 04:48:49 root] (utils.py 283): INFO Epoch: [0] [ 90/2502] eta: 0:54:33 lr: 0.000020 loss_cls: 3.6611 (3.8681) grad_norm: 4.3076 (8.1113) time: 1.2983 data: 0.0003 max mem: 13912 +[2024-12-06 04:49:02 root] (utils.py 283): INFO Epoch: [0] [ 100/2502] eta: 0:54:07 lr: 0.000020 loss_cls: 3.8828 (3.8556) grad_norm: 4.4620 (7.7831) time: 1.3018 data: 0.0003 max mem: 13912 +[2024-12-06 04:49:16 root] (utils.py 283): INFO Epoch: [0] [ 110/2502] eta: 0:53:48 lr: 0.000020 loss_cls: 3.8828 (3.8249) grad_norm: 4.1532 (7.4743) time: 1.3156 data: 0.0002 max mem: 13912 +[2024-12-06 04:49:29 root] (utils.py 283): INFO Epoch: [0] [ 120/2502] eta: 0:53:25 lr: 0.000020 loss_cls: 3.6970 (3.8030) grad_norm: 3.8385 (7.2620) time: 1.3146 data: 0.0003 max mem: 13912 +[2024-12-06 04:49:42 root] (utils.py 283): INFO Epoch: [0] [ 130/2502] eta: 0:53:04 lr: 0.000020 loss_cls: 3.6676 (3.7789) grad_norm: 3.7589 (7.0136) time: 1.3026 data: 0.0003 max mem: 13912 +[2024-12-06 04:49:55 root] (utils.py 283): INFO Epoch: [0] [ 140/2502] eta: 0:52:44 lr: 0.000020 loss_cls: 3.5360 (3.7494) grad_norm: 3.7575 (6.7961) time: 1.3025 data: 0.0003 max mem: 13912 +[2024-12-06 04:50:08 root] (utils.py 283): INFO Epoch: [0] [ 150/2502] eta: 0:52:25 lr: 0.000020 loss_cls: 3.3884 (3.7257) grad_norm: 3.8274 (6.6175) time: 1.3049 data: 0.0003 max mem: 13912 +[2024-12-06 04:50:21 root] (utils.py 283): INFO Epoch: [0] [ 160/2502] eta: 0:52:07 lr: 0.000020 loss_cls: 3.4582 (3.7111) grad_norm: 4.0699 (6.5439) time: 1.3041 data: 0.0002 max mem: 13912 +[2024-12-06 04:50:34 root] (utils.py 283): INFO Epoch: [0] [ 170/2502] eta: 0:51:49 lr: 0.000020 loss_cls: 3.4712 (3.6933) grad_norm: 3.6699 (6.4289) time: 1.3045 data: 0.0003 max mem: 13912 +[2024-12-06 04:50:47 root] (utils.py 283): INFO Epoch: [0] [ 180/2502] eta: 0:51:32 lr: 0.000020 loss_cls: 3.4257 (3.6654) grad_norm: 3.5397 (6.3263) time: 1.3039 data: 0.0003 max mem: 13912 +[2024-12-06 04:51:00 root] (utils.py 283): INFO Epoch: [0] [ 190/2502] eta: 0:51:15 lr: 0.000020 loss_cls: 3.2235 (3.6475) grad_norm: 3.5397 (6.2804) time: 1.3023 data: 0.0003 max mem: 13912 +[2024-12-06 04:51:13 root] (utils.py 283): INFO Epoch: [0] [ 200/2502] eta: 0:51:00 lr: 0.000020 loss_cls: 3.2679 (3.6236) grad_norm: 3.5793 (6.2000) time: 1.3087 data: 0.0003 max mem: 13912 +[2024-12-06 04:51:26 root] (utils.py 283): INFO Epoch: [0] [ 210/2502] eta: 0:50:44 lr: 0.000020 loss_cls: 3.3965 (3.6108) grad_norm: 3.5302 (6.0888) time: 1.3086 data: 0.0003 max mem: 13912 +[2024-12-06 04:51:39 root] (utils.py 283): INFO Epoch: [0] [ 220/2502] eta: 0:50:28 lr: 0.000020 loss_cls: 3.5154 (3.5961) grad_norm: 3.4923 (5.9966) time: 1.3045 data: 0.0003 max mem: 13912 +[2024-12-06 04:51:52 root] (utils.py 283): INFO Epoch: [0] [ 230/2502] eta: 0:50:13 lr: 0.000020 loss_cls: 3.4656 (3.5866) grad_norm: 3.5991 (5.9377) time: 1.3087 data: 0.0003 max mem: 13912 +[2024-12-06 04:52:05 root] (utils.py 283): INFO Epoch: [0] [ 240/2502] eta: 0:49:58 lr: 0.000020 loss_cls: 3.4931 (3.5778) grad_norm: 3.6756 (5.8816) time: 1.3091 data: 0.0003 max mem: 13912 +[2024-12-06 04:52:18 root] (utils.py 283): INFO Epoch: [0] [ 250/2502] eta: 0:49:43 lr: 0.000020 loss_cls: 3.3805 (3.5586) grad_norm: 3.6756 (5.7969) time: 1.3069 data: 0.0002 max mem: 13912 +[2024-12-06 04:52:32 root] (utils.py 283): INFO Epoch: [0] [ 260/2502] eta: 0:49:28 lr: 0.000020 loss_cls: 3.2766 (3.5412) grad_norm: 3.2889 (5.7289) time: 1.3064 data: 0.0003 max mem: 13912 +[2024-12-06 04:52:45 root] (utils.py 283): INFO Epoch: [0] [ 270/2502] eta: 0:49:14 lr: 0.000020 loss_cls: 3.2871 (3.5292) grad_norm: 3.5288 (5.6850) time: 1.3058 data: 0.0003 max mem: 13912 +[2024-12-06 04:52:58 root] (utils.py 283): INFO Epoch: [0] [ 280/2502] eta: 0:48:59 lr: 0.000020 loss_cls: 3.3590 (3.5283) grad_norm: 3.4583 (5.6081) time: 1.3050 data: 0.0003 max mem: 13912 +[2024-12-06 04:53:11 root] (utils.py 283): INFO Epoch: [0] [ 290/2502] eta: 0:48:44 lr: 0.000020 loss_cls: 3.2897 (3.5125) grad_norm: 3.3242 (5.5505) time: 1.3046 data: 0.0003 max mem: 13912 +[2024-12-06 04:53:24 root] (utils.py 283): INFO Epoch: [0] [ 300/2502] eta: 0:48:30 lr: 0.000020 loss_cls: 2.9375 (3.4981) grad_norm: 3.3156 (5.4889) time: 1.3048 data: 0.0003 max mem: 13912 +[2024-12-06 04:53:37 root] (utils.py 283): INFO Epoch: [0] [ 310/2502] eta: 0:48:16 lr: 0.000020 loss_cls: 3.1727 (3.4896) grad_norm: 3.4683 (5.4430) time: 1.3082 data: 0.0003 max mem: 13912 +[2024-12-06 04:53:50 root] (utils.py 283): INFO Epoch: [0] [ 320/2502] eta: 0:48:01 lr: 0.000020 loss_cls: 3.5888 (3.4987) grad_norm: 3.9280 (5.4334) time: 1.3070 data: 0.0003 max mem: 13912 +[2024-12-06 04:54:03 root] (utils.py 283): INFO Epoch: [0] [ 330/2502] eta: 0:47:47 lr: 0.000020 loss_cls: 3.6593 (3.4915) grad_norm: 4.0594 (5.4089) time: 1.3024 data: 0.0003 max mem: 13912 +[2024-12-06 04:54:16 root] (utils.py 283): INFO Epoch: [0] [ 340/2502] eta: 0:47:33 lr: 0.000020 loss_cls: 3.5060 (3.4926) grad_norm: 3.6825 (5.3731) time: 1.3063 data: 0.0003 max mem: 13912 +[2024-12-06 04:54:29 root] (utils.py 283): INFO Epoch: [0] [ 350/2502] eta: 0:47:19 lr: 0.000020 loss_cls: 3.5060 (3.4888) grad_norm: 3.7094 (5.4343) time: 1.3091 data: 0.0003 max mem: 13912 +[2024-12-06 04:54:42 root] (utils.py 283): INFO Epoch: [0] [ 360/2502] eta: 0:47:05 lr: 0.000020 loss_cls: 3.3399 (3.4821) grad_norm: 3.7879 (5.4184) time: 1.3089 data: 0.0003 max mem: 13912 +[2024-12-06 04:54:55 root] (utils.py 283): INFO Epoch: [0] [ 370/2502] eta: 0:46:51 lr: 0.000020 loss_cls: 3.3233 (3.4757) grad_norm: 3.3305 (5.3706) time: 1.3064 data: 0.0003 max mem: 13912 +[2024-12-06 04:55:08 root] (utils.py 283): INFO Epoch: [0] [ 380/2502] eta: 0:46:38 lr: 0.000020 loss_cls: 3.3346 (3.4703) grad_norm: 3.2014 (5.3324) time: 1.3057 data: 0.0003 max mem: 13912 +[2024-12-06 04:55:21 root] (utils.py 283): INFO Epoch: [0] [ 390/2502] eta: 0:46:24 lr: 0.000020 loss_cls: 3.3346 (3.4624) grad_norm: 3.4195 (5.2906) time: 1.3060 data: 0.0003 max mem: 13912 +[2024-12-06 04:55:35 root] (utils.py 283): INFO Epoch: [0] [ 400/2502] eta: 0:46:11 lr: 0.000020 loss_cls: 3.4647 (3.4624) grad_norm: 3.4866 (5.2476) time: 1.3190 data: 0.0003 max mem: 13912 +[2024-12-06 04:55:48 root] (utils.py 283): INFO Epoch: [0] [ 410/2502] eta: 0:45:57 lr: 0.000020 loss_cls: 3.4668 (3.4651) grad_norm: 3.5178 (5.2145) time: 1.3192 data: 0.0002 max mem: 13912 +[2024-12-06 04:56:01 root] (utils.py 283): INFO Epoch: [0] [ 420/2502] eta: 0:45:44 lr: 0.000020 loss_cls: 3.4668 (3.4622) grad_norm: 3.6697 (5.1806) time: 1.3056 data: 0.0003 max mem: 13912 +[2024-12-06 04:56:14 root] (utils.py 283): INFO Epoch: [0] [ 430/2502] eta: 0:45:31 lr: 0.000020 loss_cls: 3.5681 (3.4638) grad_norm: 3.8160 (5.1702) time: 1.3156 data: 0.0003 max mem: 13912 +[2024-12-06 04:56:27 root] (utils.py 283): INFO Epoch: [0] [ 440/2502] eta: 0:45:17 lr: 0.000020 loss_cls: 3.5681 (3.4661) grad_norm: 3.5938 (5.1410) time: 1.3160 data: 0.0003 max mem: 13912 +[2024-12-06 04:56:40 root] (utils.py 283): INFO Epoch: [0] [ 450/2502] eta: 0:45:03 lr: 0.000020 loss_cls: 3.4415 (3.4628) grad_norm: 3.2701 (5.1137) time: 1.3057 data: 0.0003 max mem: 13912 +[2024-12-06 04:56:53 root] (utils.py 283): INFO Epoch: [0] [ 460/2502] eta: 0:44:50 lr: 0.000020 loss_cls: 3.3444 (3.4537) grad_norm: 3.5314 (5.0800) time: 1.3051 data: 0.0003 max mem: 13912 +[2024-12-06 04:57:06 root] (utils.py 283): INFO Epoch: [0] [ 470/2502] eta: 0:44:36 lr: 0.000020 loss_cls: 2.9836 (3.4412) grad_norm: 3.2940 (5.0465) time: 1.3087 data: 0.0002 max mem: 13912 +[2024-12-06 04:57:20 root] (utils.py 283): INFO Epoch: [0] [ 480/2502] eta: 0:44:26 lr: 0.000020 loss_cls: 3.0935 (3.4362) grad_norm: 3.1705 (5.0102) time: 1.3506 data: 0.0004 max mem: 13912 +[2024-12-06 04:57:33 root] (utils.py 283): INFO Epoch: [0] [ 490/2502] eta: 0:44:13 lr: 0.000020 loss_cls: 3.3411 (3.4392) grad_norm: 3.2300 (4.9992) time: 1.3521 data: 0.0004 max mem: 13912 +[2024-12-06 04:57:46 root] (utils.py 283): INFO Epoch: [0] [ 500/2502] eta: 0:43:59 lr: 0.000020 loss_cls: 3.4180 (3.4342) grad_norm: 3.2972 (4.9876) time: 1.3079 data: 0.0003 max mem: 13912 +[2024-12-06 04:57:59 root] (utils.py 283): INFO Epoch: [0] [ 510/2502] eta: 0:43:45 lr: 0.000020 loss_cls: 3.4522 (3.4354) grad_norm: 3.3658 (4.9651) time: 1.3012 data: 0.0003 max mem: 13912 +[2024-12-06 04:58:12 root] (utils.py 283): INFO Epoch: [0] [ 520/2502] eta: 0:43:31 lr: 0.000020 loss_cls: 3.4738 (3.4353) grad_norm: 3.4898 (4.9442) time: 1.3018 data: 0.0003 max mem: 13912 +[2024-12-06 04:58:26 root] (utils.py 283): INFO Epoch: [0] [ 530/2502] eta: 0:43:18 lr: 0.000020 loss_cls: 3.4171 (3.4273) grad_norm: 3.5607 (4.9197) time: 1.3068 data: 0.0002 max mem: 13912 +[2024-12-06 04:58:39 root] (utils.py 283): INFO Epoch: [0] [ 540/2502] eta: 0:43:05 lr: 0.000020 loss_cls: 3.4171 (3.4237) grad_norm: 3.4291 (4.8987) time: 1.3155 data: 0.0003 max mem: 13912 +[2024-12-06 04:58:52 root] (utils.py 283): INFO Epoch: [0] [ 550/2502] eta: 0:42:51 lr: 0.000020 loss_cls: 3.5079 (3.4204) grad_norm: 3.3449 (4.8765) time: 1.3143 data: 0.0003 max mem: 13912 +[2024-12-06 04:59:05 root] (utils.py 283): INFO Epoch: [0] [ 560/2502] eta: 0:42:38 lr: 0.000020 loss_cls: 3.6046 (3.4225) grad_norm: 3.3913 (4.8530) time: 1.3091 data: 0.0002 max mem: 13912 +[2024-12-06 04:59:18 root] (utils.py 283): INFO Epoch: [0] [ 570/2502] eta: 0:42:24 lr: 0.000020 loss_cls: 3.6296 (3.4235) grad_norm: 3.3913 (4.8343) time: 1.3107 data: 0.0003 max mem: 13912 +[2024-12-06 04:59:31 root] (utils.py 283): INFO Epoch: [0] [ 580/2502] eta: 0:42:11 lr: 0.000020 loss_cls: 3.6146 (3.4236) grad_norm: 3.3519 (4.8080) time: 1.3119 data: 0.0003 max mem: 13912 +[2024-12-06 04:59:44 root] (utils.py 283): INFO Epoch: [0] [ 590/2502] eta: 0:41:57 lr: 0.000020 loss_cls: 3.4865 (3.4224) grad_norm: 3.4254 (4.7883) time: 1.3092 data: 0.0003 max mem: 13912 +[2024-12-06 04:59:57 root] (utils.py 283): INFO Epoch: [0] [ 600/2502] eta: 0:41:44 lr: 0.000020 loss_cls: 3.4865 (3.4188) grad_norm: 3.2165 (4.7694) time: 1.3085 data: 0.0003 max mem: 13912 +[2024-12-06 05:00:10 root] (utils.py 283): INFO Epoch: [0] [ 610/2502] eta: 0:41:31 lr: 0.000020 loss_cls: 3.2610 (3.4172) grad_norm: 3.1346 (4.7534) time: 1.3092 data: 0.0003 max mem: 13912 +[2024-12-06 05:00:24 root] (utils.py 283): INFO Epoch: [0] [ 620/2502] eta: 0:41:17 lr: 0.000020 loss_cls: 3.2861 (3.4176) grad_norm: 3.2762 (4.7331) time: 1.3100 data: 0.0003 max mem: 13912 +[2024-12-06 05:00:37 root] (utils.py 283): INFO Epoch: [0] [ 630/2502] eta: 0:41:04 lr: 0.000020 loss_cls: 3.4372 (3.4171) grad_norm: 3.5378 (4.7271) time: 1.3145 data: 0.0003 max mem: 13912 +[2024-12-06 05:00:50 root] (utils.py 283): INFO Epoch: [0] [ 640/2502] eta: 0:40:51 lr: 0.000020 loss_cls: 3.4072 (3.4143) grad_norm: 3.4186 (4.7097) time: 1.3140 data: 0.0003 max mem: 13912 +[2024-12-06 05:01:03 root] (utils.py 283): INFO Epoch: [0] [ 650/2502] eta: 0:40:37 lr: 0.000020 loss_cls: 3.3377 (3.4132) grad_norm: 3.2633 (4.7042) time: 1.3098 data: 0.0003 max mem: 13912 +[2024-12-06 05:01:16 root] (utils.py 283): INFO Epoch: [0] [ 660/2502] eta: 0:40:25 lr: 0.000020 loss_cls: 3.2092 (3.4054) grad_norm: 3.1145 (4.6809) time: 1.3199 data: 0.0003 max mem: 13912 +[2024-12-06 05:01:29 root] (utils.py 283): INFO Epoch: [0] [ 670/2502] eta: 0:40:11 lr: 0.000020 loss_cls: 3.2247 (3.4081) grad_norm: 3.0897 (4.6776) time: 1.3219 data: 0.0003 max mem: 13912 +[2024-12-06 05:01:42 root] (utils.py 283): INFO Epoch: [0] [ 680/2502] eta: 0:39:58 lr: 0.000020 loss_cls: 3.5952 (3.4076) grad_norm: 3.4479 (4.6633) time: 1.3075 data: 0.0003 max mem: 13912 +[2024-12-06 05:01:55 root] (utils.py 283): INFO Epoch: [0] [ 690/2502] eta: 0:39:45 lr: 0.000020 loss_cls: 3.0993 (3.4019) grad_norm: 3.3146 (4.6414) time: 1.3056 data: 0.0003 max mem: 13912 +[2024-12-06 05:02:09 root] (utils.py 283): INFO Epoch: [0] [ 700/2502] eta: 0:39:31 lr: 0.000020 loss_cls: 3.0004 (3.3981) grad_norm: 3.3135 (4.6566) time: 1.3097 data: 0.0003 max mem: 13912 +[2024-12-06 05:02:22 root] (utils.py 283): INFO Epoch: [0] [ 710/2502] eta: 0:39:18 lr: 0.000020 loss_cls: 3.2035 (3.3950) grad_norm: 3.3926 (4.6438) time: 1.3084 data: 0.0003 max mem: 13912 +[2024-12-06 05:02:35 root] (utils.py 283): INFO Epoch: [0] [ 720/2502] eta: 0:39:04 lr: 0.000020 loss_cls: 3.2035 (3.3898) grad_norm: 3.2963 (4.6254) time: 1.3050 data: 0.0003 max mem: 13912 +[2024-12-06 05:02:48 root] (utils.py 283): INFO Epoch: [0] [ 730/2502] eta: 0:38:51 lr: 0.000020 loss_cls: 3.3309 (3.3884) grad_norm: 3.3424 (4.6098) time: 1.3048 data: 0.0002 max mem: 13912 +[2024-12-06 05:03:01 root] (utils.py 283): INFO Epoch: [0] [ 740/2502] eta: 0:38:37 lr: 0.000020 loss_cls: 3.4703 (3.3917) grad_norm: 3.3139 (4.5978) time: 1.3049 data: 0.0003 max mem: 13912 +[2024-12-06 05:03:14 root] (utils.py 283): INFO Epoch: [0] [ 750/2502] eta: 0:38:24 lr: 0.000020 loss_cls: 3.5177 (3.3885) grad_norm: 3.2249 (4.5835) time: 1.3042 data: 0.0003 max mem: 13912 +[2024-12-06 05:03:27 root] (utils.py 283): INFO Epoch: [0] [ 760/2502] eta: 0:38:11 lr: 0.000020 loss_cls: 3.5069 (3.3874) grad_norm: 3.1525 (4.5677) time: 1.3068 data: 0.0002 max mem: 13912 +[2024-12-06 05:03:40 root] (utils.py 283): INFO Epoch: [0] [ 770/2502] eta: 0:37:57 lr: 0.000020 loss_cls: 3.4665 (3.3864) grad_norm: 3.1525 (4.5558) time: 1.3057 data: 0.0002 max mem: 13912 +[2024-12-06 05:03:53 root] (utils.py 283): INFO Epoch: [0] [ 780/2502] eta: 0:37:44 lr: 0.000020 loss_cls: 3.3319 (3.3862) grad_norm: 3.1621 (4.5441) time: 1.3042 data: 0.0003 max mem: 13912 +[2024-12-06 05:04:06 root] (utils.py 283): INFO Epoch: [0] [ 790/2502] eta: 0:37:31 lr: 0.000020 loss_cls: 3.2267 (3.3834) grad_norm: 2.9614 (4.5255) time: 1.3041 data: 0.0003 max mem: 13912 +[2024-12-06 05:04:19 root] (utils.py 283): INFO Epoch: [0] [ 800/2502] eta: 0:37:17 lr: 0.000020 loss_cls: 3.2145 (3.3820) grad_norm: 3.1450 (4.5241) time: 1.3045 data: 0.0003 max mem: 13912 +[2024-12-06 05:04:32 root] (utils.py 283): INFO Epoch: [0] [ 810/2502] eta: 0:37:04 lr: 0.000020 loss_cls: 2.9644 (3.3733) grad_norm: 3.5246 (4.5074) time: 1.3088 data: 0.0003 max mem: 13912 +[2024-12-06 05:04:45 root] (utils.py 283): INFO Epoch: [0] [ 820/2502] eta: 0:36:51 lr: 0.000020 loss_cls: 2.9644 (3.3733) grad_norm: 3.1381 (4.4945) time: 1.3107 data: 0.0003 max mem: 13912 +[2024-12-06 05:04:58 root] (utils.py 283): INFO Epoch: [0] [ 830/2502] eta: 0:36:37 lr: 0.000020 loss_cls: 3.3765 (3.3677) grad_norm: 3.2126 (4.4826) time: 1.3067 data: 0.0002 max mem: 13912 +[2024-12-06 05:05:12 root] (utils.py 283): INFO Epoch: [0] [ 840/2502] eta: 0:36:24 lr: 0.000020 loss_cls: 2.9913 (3.3662) grad_norm: 3.2126 (4.5013) time: 1.3117 data: 0.0002 max mem: 13912 +[2024-12-06 05:05:25 root] (utils.py 283): INFO Epoch: [0] [ 850/2502] eta: 0:36:11 lr: 0.000020 loss_cls: 3.3887 (3.3630) grad_norm: 3.2402 (4.4927) time: 1.3133 data: 0.0003 max mem: 13912 +[2024-12-06 05:05:38 root] (utils.py 283): INFO Epoch: [0] [ 860/2502] eta: 0:35:58 lr: 0.000020 loss_cls: 3.2557 (3.3627) grad_norm: 3.2212 (4.4792) time: 1.3078 data: 0.0003 max mem: 13912 +[2024-12-06 05:05:51 root] (utils.py 283): INFO Epoch: [0] [ 870/2502] eta: 0:35:45 lr: 0.000020 loss_cls: 3.2561 (3.3614) grad_norm: 3.3767 (4.4737) time: 1.3088 data: 0.0003 max mem: 13912 +[2024-12-06 05:06:04 root] (utils.py 283): INFO Epoch: [0] [ 880/2502] eta: 0:35:31 lr: 0.000020 loss_cls: 3.2561 (3.3582) grad_norm: 3.2457 (4.4599) time: 1.3074 data: 0.0003 max mem: 13912 +[2024-12-06 05:06:17 root] (utils.py 283): INFO Epoch: [0] [ 890/2502] eta: 0:35:18 lr: 0.000020 loss_cls: 3.1816 (3.3549) grad_norm: 3.0624 (4.4597) time: 1.3051 data: 0.0003 max mem: 13912 +[2024-12-06 05:06:30 root] (utils.py 283): INFO Epoch: [0] [ 900/2502] eta: 0:35:05 lr: 0.000020 loss_cls: 3.2817 (3.3540) grad_norm: 3.0547 (4.4457) time: 1.3023 data: 0.0003 max mem: 13912 +[2024-12-06 05:06:43 root] (utils.py 283): INFO Epoch: [0] [ 910/2502] eta: 0:34:51 lr: 0.000020 loss_cls: 3.5414 (3.3539) grad_norm: 3.1381 (4.4404) time: 1.3041 data: 0.0003 max mem: 13912 +[2024-12-06 05:06:56 root] (utils.py 283): INFO Epoch: [0] [ 920/2502] eta: 0:34:38 lr: 0.000020 loss_cls: 3.6488 (3.3548) grad_norm: 3.1786 (4.4298) time: 1.3094 data: 0.0003 max mem: 13912 +[2024-12-06 05:07:09 root] (utils.py 283): INFO Epoch: [0] [ 930/2502] eta: 0:34:25 lr: 0.000020 loss_cls: 3.5857 (3.3563) grad_norm: 3.0796 (4.4168) time: 1.3079 data: 0.0003 max mem: 13912 +[2024-12-06 05:07:22 root] (utils.py 283): INFO Epoch: [0] [ 940/2502] eta: 0:34:12 lr: 0.000020 loss_cls: 3.4111 (3.3540) grad_norm: 2.9929 (4.4067) time: 1.3082 data: 0.0002 max mem: 13912 +[2024-12-06 05:07:35 root] (utils.py 283): INFO Epoch: [0] [ 950/2502] eta: 0:33:58 lr: 0.000020 loss_cls: 2.8096 (3.3481) grad_norm: 3.1430 (4.3945) time: 1.3110 data: 0.0002 max mem: 13912 +[2024-12-06 05:07:48 root] (utils.py 283): INFO Epoch: [0] [ 960/2502] eta: 0:33:45 lr: 0.000020 loss_cls: 3.1225 (3.3481) grad_norm: 3.2320 (4.3832) time: 1.3049 data: 0.0003 max mem: 13912 +[2024-12-06 05:08:01 root] (utils.py 283): INFO Epoch: [0] [ 970/2502] eta: 0:33:32 lr: 0.000020 loss_cls: 3.4567 (3.3479) grad_norm: 3.2320 (4.3725) time: 1.3031 data: 0.0003 max mem: 13912 +[2024-12-06 05:08:15 root] (utils.py 283): INFO Epoch: [0] [ 980/2502] eta: 0:33:19 lr: 0.000020 loss_cls: 3.4567 (3.3465) grad_norm: 3.3402 (4.3764) time: 1.3076 data: 0.0002 max mem: 13912 +[2024-12-06 05:08:28 root] (utils.py 283): INFO Epoch: [0] [ 990/2502] eta: 0:33:05 lr: 0.000020 loss_cls: 3.5418 (3.3457) grad_norm: 3.7366 (4.3725) time: 1.3086 data: 0.0002 max mem: 13912 +[2024-12-06 05:08:41 root] (utils.py 283): INFO Epoch: [0] [1000/2502] eta: 0:32:52 lr: 0.000020 loss_cls: 3.3554 (3.3437) grad_norm: 3.3225 (4.3606) time: 1.3073 data: 0.0002 max mem: 13912 +[2024-12-06 05:08:54 root] (utils.py 283): INFO Epoch: [0] [1010/2502] eta: 0:32:39 lr: 0.000020 loss_cls: 3.3554 (3.3445) grad_norm: 3.1299 (4.3511) time: 1.3085 data: 0.0002 max mem: 13912 +[2024-12-06 05:09:07 root] (utils.py 283): INFO Epoch: [0] [1020/2502] eta: 0:32:26 lr: 0.000020 loss_cls: 3.4320 (3.3436) grad_norm: 3.2014 (4.3415) time: 1.3075 data: 0.0002 max mem: 13912 +[2024-12-06 05:09:20 root] (utils.py 283): INFO Epoch: [0] [1030/2502] eta: 0:32:13 lr: 0.000020 loss_cls: 2.9745 (3.3393) grad_norm: 3.2468 (4.3351) time: 1.3067 data: 0.0002 max mem: 13912 +[2024-12-06 05:09:33 root] (utils.py 283): INFO Epoch: [0] [1040/2502] eta: 0:31:59 lr: 0.000020 loss_cls: 3.2745 (3.3404) grad_norm: 3.2468 (4.3376) time: 1.3090 data: 0.0002 max mem: 13912 +[2024-12-06 05:09:46 root] (utils.py 283): INFO Epoch: [0] [1050/2502] eta: 0:31:46 lr: 0.000020 loss_cls: 3.2580 (3.3385) grad_norm: 3.1433 (4.3298) time: 1.3117 data: 0.0002 max mem: 13912 +[2024-12-06 05:09:59 root] (utils.py 283): INFO Epoch: [0] [1060/2502] eta: 0:31:33 lr: 0.000020 loss_cls: 3.2151 (3.3389) grad_norm: 3.1350 (4.3219) time: 1.3132 data: 0.0002 max mem: 13912 +[2024-12-06 05:10:12 root] (utils.py 283): INFO Epoch: [0] [1070/2502] eta: 0:31:20 lr: 0.000020 loss_cls: 3.3498 (3.3381) grad_norm: 3.0809 (4.3102) time: 1.3115 data: 0.0002 max mem: 13912 +[2024-12-06 05:10:26 root] (utils.py 283): INFO Epoch: [0] [1080/2502] eta: 0:31:07 lr: 0.000020 loss_cls: 3.2921 (3.3371) grad_norm: 3.0809 (4.3077) time: 1.3138 data: 0.0003 max mem: 13912 +[2024-12-06 05:10:39 root] (utils.py 283): INFO Epoch: [0] [1090/2502] eta: 0:30:54 lr: 0.000020 loss_cls: 3.2015 (3.3349) grad_norm: 3.1020 (4.2997) time: 1.3136 data: 0.0003 max mem: 13912 +[2024-12-06 05:10:52 root] (utils.py 283): INFO Epoch: [0] [1100/2502] eta: 0:30:40 lr: 0.000020 loss_cls: 3.2015 (3.3337) grad_norm: 3.0025 (4.2911) time: 1.3087 data: 0.0003 max mem: 13912 +[2024-12-06 05:11:05 root] (utils.py 283): INFO Epoch: [0] [1110/2502] eta: 0:30:27 lr: 0.000020 loss_cls: 3.3152 (3.3333) grad_norm: 3.0351 (4.2903) time: 1.3077 data: 0.0003 max mem: 13912 +[2024-12-06 05:11:18 root] (utils.py 283): INFO Epoch: [0] [1120/2502] eta: 0:30:14 lr: 0.000020 loss_cls: 3.3469 (3.3339) grad_norm: 3.0582 (4.2802) time: 1.3089 data: 0.0003 max mem: 13912 +[2024-12-06 05:11:31 root] (utils.py 283): INFO Epoch: [0] [1130/2502] eta: 0:30:01 lr: 0.000020 loss_cls: 3.4338 (3.3347) grad_norm: 2.9763 (4.2726) time: 1.3092 data: 0.0002 max mem: 13912 +[2024-12-06 05:11:44 root] (utils.py 283): INFO Epoch: [0] [1140/2502] eta: 0:29:48 lr: 0.000020 loss_cls: 3.4309 (3.3327) grad_norm: 3.1081 (4.2804) time: 1.3077 data: 0.0002 max mem: 13912 +[2024-12-06 05:11:57 root] (utils.py 283): INFO Epoch: [0] [1150/2502] eta: 0:29:35 lr: 0.000020 loss_cls: 3.2938 (3.3314) grad_norm: 3.0696 (4.2776) time: 1.3067 data: 0.0003 max mem: 13912 +[2024-12-06 05:12:10 root] (utils.py 283): INFO Epoch: [0] [1160/2502] eta: 0:29:21 lr: 0.000020 loss_cls: 3.3157 (3.3304) grad_norm: 3.0767 (4.2683) time: 1.3068 data: 0.0003 max mem: 13912 +[2024-12-06 05:12:23 root] (utils.py 283): INFO Epoch: [0] [1170/2502] eta: 0:29:08 lr: 0.000020 loss_cls: 3.3157 (3.3288) grad_norm: 3.2326 (4.2608) time: 1.3065 data: 0.0002 max mem: 13912 +[2024-12-06 05:12:36 root] (utils.py 283): INFO Epoch: [0] [1180/2502] eta: 0:28:55 lr: 0.000020 loss_cls: 3.5284 (3.3309) grad_norm: 3.2695 (4.2572) time: 1.3087 data: 0.0002 max mem: 13912 +[2024-12-06 05:12:50 root] (utils.py 283): INFO Epoch: [0] [1190/2502] eta: 0:28:42 lr: 0.000020 loss_cls: 3.5523 (3.3326) grad_norm: 3.2695 (4.2512) time: 1.3157 data: 0.0002 max mem: 13912 +[2024-12-06 05:13:03 root] (utils.py 283): INFO Epoch: [0] [1200/2502] eta: 0:28:29 lr: 0.000020 loss_cls: 3.4985 (3.3328) grad_norm: 3.3304 (4.2485) time: 1.3214 data: 0.0003 max mem: 13912 +[2024-12-06 05:13:16 root] (utils.py 283): INFO Epoch: [0] [1210/2502] eta: 0:28:16 lr: 0.000020 loss_cls: 3.3320 (3.3315) grad_norm: 3.1615 (4.2397) time: 1.3150 data: 0.0003 max mem: 13912 +[2024-12-06 05:13:29 root] (utils.py 283): INFO Epoch: [0] [1220/2502] eta: 0:28:03 lr: 0.000020 loss_cls: 3.2118 (3.3305) grad_norm: 3.1615 (4.2378) time: 1.3072 data: 0.0003 max mem: 13912 +[2024-12-06 05:13:42 root] (utils.py 283): INFO Epoch: [0] [1230/2502] eta: 0:27:49 lr: 0.000020 loss_cls: 3.2633 (3.3299) grad_norm: 3.1802 (4.2328) time: 1.3080 data: 0.0003 max mem: 13912 +[2024-12-06 05:13:55 root] (utils.py 283): INFO Epoch: [0] [1240/2502] eta: 0:27:36 lr: 0.000020 loss_cls: 3.4253 (3.3296) grad_norm: 3.1725 (4.2410) time: 1.3104 data: 0.0003 max mem: 13912 +[2024-12-06 05:14:08 root] (utils.py 283): INFO Epoch: [0] [1250/2502] eta: 0:27:23 lr: 0.000020 loss_cls: 3.2528 (3.3273) grad_norm: 3.0820 (4.2330) time: 1.3107 data: 0.0003 max mem: 13912 +[2024-12-06 05:14:21 root] (utils.py 283): INFO Epoch: [0] [1260/2502] eta: 0:27:10 lr: 0.000020 loss_cls: 3.3403 (3.3280) grad_norm: 3.3380 (4.2263) time: 1.3149 data: 0.0003 max mem: 13912 +[2024-12-06 05:14:35 root] (utils.py 283): INFO Epoch: [0] [1270/2502] eta: 0:26:57 lr: 0.000020 loss_cls: 3.4173 (3.3284) grad_norm: 3.3419 (4.2190) time: 1.3135 data: 0.0003 max mem: 13912 +[2024-12-06 05:14:48 root] (utils.py 283): INFO Epoch: [0] [1280/2502] eta: 0:26:44 lr: 0.000020 loss_cls: 3.4406 (3.3294) grad_norm: 3.1372 (4.2134) time: 1.3091 data: 0.0003 max mem: 13912 +[2024-12-06 05:15:01 root] (utils.py 283): INFO Epoch: [0] [1290/2502] eta: 0:26:30 lr: 0.000020 loss_cls: 3.4406 (3.3299) grad_norm: 3.0473 (4.2040) time: 1.3093 data: 0.0003 max mem: 13912 +[2024-12-06 05:15:14 root] (utils.py 283): INFO Epoch: [0] [1300/2502] eta: 0:26:17 lr: 0.000020 loss_cls: 3.3454 (3.3298) grad_norm: 2.9019 (4.1957) time: 1.3146 data: 0.0003 max mem: 13912 +[2024-12-06 05:15:27 root] (utils.py 283): INFO Epoch: [0] [1310/2502] eta: 0:26:04 lr: 0.000020 loss_cls: 3.2314 (3.3278) grad_norm: 3.2291 (4.1894) time: 1.3177 data: 0.0003 max mem: 13912 +[2024-12-06 05:15:40 root] (utils.py 283): INFO Epoch: [0] [1320/2502] eta: 0:25:51 lr: 0.000020 loss_cls: 3.0163 (3.3253) grad_norm: 3.2008 (4.1817) time: 1.3094 data: 0.0003 max mem: 13912 +[2024-12-06 05:15:53 root] (utils.py 283): INFO Epoch: [0] [1330/2502] eta: 0:25:38 lr: 0.000020 loss_cls: 3.3554 (3.3256) grad_norm: 2.8046 (4.1763) time: 1.3064 data: 0.0003 max mem: 13912 +[2024-12-06 05:16:06 root] (utils.py 283): INFO Epoch: [0] [1340/2502] eta: 0:25:25 lr: 0.000020 loss_cls: 3.4892 (3.3257) grad_norm: 2.8015 (4.1667) time: 1.3084 data: 0.0003 max mem: 13912 +[2024-12-06 05:16:19 root] (utils.py 283): INFO Epoch: [0] [1350/2502] eta: 0:25:12 lr: 0.000020 loss_cls: 3.3109 (3.3249) grad_norm: 2.8015 (4.1587) time: 1.3085 data: 0.0002 max mem: 13912 +[2024-12-06 05:16:32 root] (utils.py 283): INFO Epoch: [0] [1360/2502] eta: 0:24:59 lr: 0.000020 loss_cls: 3.2680 (3.3256) grad_norm: 3.2596 (4.1602) time: 1.3094 data: 0.0003 max mem: 13912 +[2024-12-06 05:16:46 root] (utils.py 283): INFO Epoch: [0] [1370/2502] eta: 0:24:45 lr: 0.000020 loss_cls: 3.4798 (3.3262) grad_norm: 3.0717 (4.1523) time: 1.3099 data: 0.0002 max mem: 13912 +[2024-12-06 05:16:59 root] (utils.py 283): INFO Epoch: [0] [1380/2502] eta: 0:24:32 lr: 0.000020 loss_cls: 3.4627 (3.3265) grad_norm: 2.7676 (4.1437) time: 1.3058 data: 0.0002 max mem: 13912 +[2024-12-06 05:17:12 root] (utils.py 283): INFO Epoch: [0] [1390/2502] eta: 0:24:19 lr: 0.000020 loss_cls: 3.4347 (3.3265) grad_norm: 2.8950 (4.1381) time: 1.3061 data: 0.0003 max mem: 13912 +[2024-12-06 05:17:25 root] (utils.py 283): INFO Epoch: [0] [1400/2502] eta: 0:24:06 lr: 0.000020 loss_cls: 3.4347 (3.3258) grad_norm: 3.0635 (4.1405) time: 1.3074 data: 0.0002 max mem: 13912 +[2024-12-06 05:17:38 root] (utils.py 283): INFO Epoch: [0] [1410/2502] eta: 0:23:53 lr: 0.000020 loss_cls: 3.5580 (3.3277) grad_norm: 3.1403 (4.1370) time: 1.3076 data: 0.0003 max mem: 13912 +[2024-12-06 05:17:51 root] (utils.py 283): INFO Epoch: [0] [1420/2502] eta: 0:23:40 lr: 0.000020 loss_cls: 3.3923 (3.3264) grad_norm: 2.9165 (4.1298) time: 1.3111 data: 0.0003 max mem: 13912 +[2024-12-06 05:18:04 root] (utils.py 283): INFO Epoch: [0] [1430/2502] eta: 0:23:26 lr: 0.000020 loss_cls: 3.2590 (3.3262) grad_norm: 2.8673 (4.1254) time: 1.3122 data: 0.0003 max mem: 13912 +[2024-12-06 05:18:17 root] (utils.py 283): INFO Epoch: [0] [1440/2502] eta: 0:23:13 lr: 0.000020 loss_cls: 3.3066 (3.3249) grad_norm: 2.8797 (4.1170) time: 1.3112 data: 0.0003 max mem: 13912 +[2024-12-06 05:18:30 root] (utils.py 283): INFO Epoch: [0] [1450/2502] eta: 0:23:00 lr: 0.000020 loss_cls: 3.3066 (3.3250) grad_norm: 2.8797 (4.1115) time: 1.3103 data: 0.0003 max mem: 13912 +[2024-12-06 05:18:43 root] (utils.py 283): INFO Epoch: [0] [1460/2502] eta: 0:22:47 lr: 0.000020 loss_cls: 3.4494 (3.3255) grad_norm: 3.0915 (4.1042) time: 1.3101 data: 0.0003 max mem: 13912 +[2024-12-06 05:18:57 root] (utils.py 283): INFO Epoch: [0] [1470/2502] eta: 0:22:34 lr: 0.000020 loss_cls: 3.4412 (3.3241) grad_norm: 2.9340 (4.0962) time: 1.3103 data: 0.0003 max mem: 13912 +[2024-12-06 05:19:10 root] (utils.py 283): INFO Epoch: [0] [1480/2502] eta: 0:22:21 lr: 0.000020 loss_cls: 3.0589 (3.3213) grad_norm: 2.8519 (4.0913) time: 1.3079 data: 0.0003 max mem: 13912 +[2024-12-06 05:19:23 root] (utils.py 283): INFO Epoch: [0] [1490/2502] eta: 0:22:08 lr: 0.000020 loss_cls: 2.8565 (3.3174) grad_norm: 3.0127 (4.0851) time: 1.3065 data: 0.0003 max mem: 13912 +[2024-12-06 05:19:36 root] (utils.py 283): INFO Epoch: [0] [1500/2502] eta: 0:21:54 lr: 0.000020 loss_cls: 2.9438 (3.3168) grad_norm: 3.0127 (4.0785) time: 1.3088 data: 0.0003 max mem: 13912 +[2024-12-06 05:19:49 root] (utils.py 283): INFO Epoch: [0] [1510/2502] eta: 0:21:41 lr: 0.000020 loss_cls: 3.3488 (3.3164) grad_norm: 2.9214 (4.0726) time: 1.3117 data: 0.0003 max mem: 13912 +[2024-12-06 05:20:02 root] (utils.py 283): INFO Epoch: [0] [1520/2502] eta: 0:21:28 lr: 0.000020 loss_cls: 3.3539 (3.3171) grad_norm: 2.8646 (4.0665) time: 1.3103 data: 0.0003 max mem: 13912 +[2024-12-06 05:20:15 root] (utils.py 283): INFO Epoch: [0] [1530/2502] eta: 0:21:15 lr: 0.000020 loss_cls: 3.3539 (3.3161) grad_norm: 2.8437 (4.0681) time: 1.3091 data: 0.0002 max mem: 13912 +[2024-12-06 05:20:28 root] (utils.py 283): INFO Epoch: [0] [1540/2502] eta: 0:21:02 lr: 0.000020 loss_cls: 3.3076 (3.3153) grad_norm: 2.8620 (4.0623) time: 1.3104 data: 0.0003 max mem: 13912 +[2024-12-06 05:20:41 root] (utils.py 283): INFO Epoch: [0] [1550/2502] eta: 0:20:49 lr: 0.000020 loss_cls: 3.2874 (3.3134) grad_norm: 2.9114 (4.0574) time: 1.3099 data: 0.0003 max mem: 13912 +[2024-12-06 05:20:54 root] (utils.py 283): INFO Epoch: [0] [1560/2502] eta: 0:20:36 lr: 0.000020 loss_cls: 3.0777 (3.3121) grad_norm: 2.9714 (4.0520) time: 1.3075 data: 0.0002 max mem: 13912 +[2024-12-06 05:21:07 root] (utils.py 283): INFO Epoch: [0] [1570/2502] eta: 0:20:22 lr: 0.000020 loss_cls: 3.0777 (3.3095) grad_norm: 2.9914 (4.0454) time: 1.3059 data: 0.0002 max mem: 13912 +[2024-12-06 05:21:20 root] (utils.py 283): INFO Epoch: [0] [1580/2502] eta: 0:20:09 lr: 0.000020 loss_cls: 3.1466 (3.3087) grad_norm: 2.9914 (4.0410) time: 1.3063 data: 0.0002 max mem: 13912 +[2024-12-06 05:21:34 root] (utils.py 283): INFO Epoch: [0] [1590/2502] eta: 0:19:56 lr: 0.000020 loss_cls: 3.3586 (3.3095) grad_norm: 3.2315 (4.0360) time: 1.3061 data: 0.0003 max mem: 13912 +[2024-12-06 05:21:47 root] (utils.py 283): INFO Epoch: [0] [1600/2502] eta: 0:19:43 lr: 0.000020 loss_cls: 3.4489 (3.3109) grad_norm: 3.1435 (4.0345) time: 1.3080 data: 0.0003 max mem: 13912 +[2024-12-06 05:22:00 root] (utils.py 283): INFO Epoch: [0] [1610/2502] eta: 0:19:30 lr: 0.000020 loss_cls: 3.4403 (3.3109) grad_norm: 2.9614 (4.0286) time: 1.3092 data: 0.0003 max mem: 13912 +[2024-12-06 05:22:13 root] (utils.py 283): INFO Epoch: [0] [1620/2502] eta: 0:19:17 lr: 0.000020 loss_cls: 3.2392 (3.3108) grad_norm: 2.9614 (4.0231) time: 1.3103 data: 0.0003 max mem: 13912 +[2024-12-06 05:22:26 root] (utils.py 283): INFO Epoch: [0] [1630/2502] eta: 0:19:04 lr: 0.000020 loss_cls: 3.3969 (3.3107) grad_norm: 3.1206 (4.0186) time: 1.3081 data: 0.0002 max mem: 13912 +[2024-12-06 05:22:39 root] (utils.py 283): INFO Epoch: [0] [1640/2502] eta: 0:18:50 lr: 0.000020 loss_cls: 3.3686 (3.3098) grad_norm: 3.1407 (4.0127) time: 1.3084 data: 0.0002 max mem: 13912 +[2024-12-06 05:22:52 root] (utils.py 283): INFO Epoch: [0] [1650/2502] eta: 0:18:37 lr: 0.000020 loss_cls: 3.1086 (3.3064) grad_norm: 2.8254 (4.0052) time: 1.3095 data: 0.0003 max mem: 13912 +[2024-12-06 05:23:05 root] (utils.py 283): INFO Epoch: [0] [1660/2502] eta: 0:18:24 lr: 0.000020 loss_cls: 2.7532 (3.3034) grad_norm: 2.8129 (4.0006) time: 1.3093 data: 0.0003 max mem: 13912 +[2024-12-06 05:23:18 root] (utils.py 283): INFO Epoch: [0] [1670/2502] eta: 0:18:11 lr: 0.000020 loss_cls: 3.0750 (3.3030) grad_norm: 2.9569 (3.9953) time: 1.3133 data: 0.0003 max mem: 13912 +[2024-12-06 05:23:31 root] (utils.py 283): INFO Epoch: [0] [1680/2502] eta: 0:17:58 lr: 0.000020 loss_cls: 3.3309 (3.3021) grad_norm: 2.7721 (3.9889) time: 1.3119 data: 0.0003 max mem: 13912 +[2024-12-06 05:23:45 root] (utils.py 283): INFO Epoch: [0] [1690/2502] eta: 0:17:45 lr: 0.000020 loss_cls: 3.2389 (3.3018) grad_norm: 2.8121 (3.9836) time: 1.3118 data: 0.0003 max mem: 13912 +[2024-12-06 05:23:58 root] (utils.py 283): INFO Epoch: [0] [1700/2502] eta: 0:17:32 lr: 0.000020 loss_cls: 3.5055 (3.3036) grad_norm: 3.0227 (3.9795) time: 1.3218 data: 0.0003 max mem: 13912 +[2024-12-06 05:24:11 root] (utils.py 283): INFO Epoch: [0] [1710/2502] eta: 0:17:19 lr: 0.000020 loss_cls: 3.4715 (3.3030) grad_norm: 2.9521 (3.9744) time: 1.3218 data: 0.0003 max mem: 13912 +[2024-12-06 05:24:24 root] (utils.py 283): INFO Epoch: [0] [1720/2502] eta: 0:17:06 lr: 0.000020 loss_cls: 3.1750 (3.3019) grad_norm: 3.0692 (3.9696) time: 1.3156 data: 0.0003 max mem: 13912 +[2024-12-06 05:24:37 root] (utils.py 283): INFO Epoch: [0] [1730/2502] eta: 0:16:52 lr: 0.000020 loss_cls: 3.4509 (3.3028) grad_norm: 3.2477 (3.9679) time: 1.3159 data: 0.0002 max mem: 13912 +[2024-12-06 05:24:51 root] (utils.py 283): INFO Epoch: [0] [1740/2502] eta: 0:16:39 lr: 0.000020 loss_cls: 3.4450 (3.3018) grad_norm: 3.4226 (3.9707) time: 1.3237 data: 0.0002 max mem: 13912 +[2024-12-06 05:25:04 root] (utils.py 283): INFO Epoch: [0] [1750/2502] eta: 0:16:26 lr: 0.000020 loss_cls: 3.2241 (3.3006) grad_norm: 3.0612 (3.9675) time: 1.3207 data: 0.0003 max mem: 13912 +[2024-12-06 05:25:17 root] (utils.py 283): INFO Epoch: [0] [1760/2502] eta: 0:16:13 lr: 0.000020 loss_cls: 2.9437 (3.2999) grad_norm: 3.0951 (3.9670) time: 1.3103 data: 0.0003 max mem: 13912 +[2024-12-06 05:25:30 root] (utils.py 283): INFO Epoch: [0] [1770/2502] eta: 0:16:00 lr: 0.000020 loss_cls: 3.3038 (3.3003) grad_norm: 2.9845 (3.9624) time: 1.3148 data: 0.0003 max mem: 13912 +[2024-12-06 05:25:43 root] (utils.py 283): INFO Epoch: [0] [1780/2502] eta: 0:15:47 lr: 0.000020 loss_cls: 3.3051 (3.2994) grad_norm: 3.0259 (3.9577) time: 1.3142 data: 0.0003 max mem: 13912 +[2024-12-06 05:25:56 root] (utils.py 283): INFO Epoch: [0] [1790/2502] eta: 0:15:34 lr: 0.000020 loss_cls: 3.1357 (3.2980) grad_norm: 3.1110 (3.9532) time: 1.3121 data: 0.0003 max mem: 13912 +[2024-12-06 05:26:09 root] (utils.py 283): INFO Epoch: [0] [1800/2502] eta: 0:15:21 lr: 0.000020 loss_cls: 3.2470 (3.2989) grad_norm: 3.1314 (3.9499) time: 1.3144 data: 0.0003 max mem: 13912 +[2024-12-06 05:26:23 root] (utils.py 283): INFO Epoch: [0] [1810/2502] eta: 0:15:08 lr: 0.000020 loss_cls: 3.2956 (3.2979) grad_norm: 3.1314 (3.9524) time: 1.3139 data: 0.0002 max mem: 13912 +[2024-12-06 05:26:36 root] (utils.py 283): INFO Epoch: [0] [1820/2502] eta: 0:14:54 lr: 0.000020 loss_cls: 3.2699 (3.2979) grad_norm: 3.3826 (3.9501) time: 1.3145 data: 0.0003 max mem: 13912 +[2024-12-06 05:26:49 root] (utils.py 283): INFO Epoch: [0] [1830/2502] eta: 0:14:41 lr: 0.000020 loss_cls: 3.3607 (3.2994) grad_norm: 2.9408 (3.9459) time: 1.3143 data: 0.0003 max mem: 13912 +[2024-12-06 05:27:02 root] (utils.py 283): INFO Epoch: [0] [1840/2502] eta: 0:14:28 lr: 0.000020 loss_cls: 3.3561 (3.2982) grad_norm: 2.9474 (3.9414) time: 1.3120 data: 0.0003 max mem: 13912 +[2024-12-06 05:27:15 root] (utils.py 283): INFO Epoch: [0] [1850/2502] eta: 0:14:15 lr: 0.000020 loss_cls: 3.1862 (3.2971) grad_norm: 2.9538 (3.9365) time: 1.3098 data: 0.0003 max mem: 13912 +[2024-12-06 05:27:28 root] (utils.py 283): INFO Epoch: [0] [1860/2502] eta: 0:14:02 lr: 0.000020 loss_cls: 3.1862 (3.2962) grad_norm: 2.9451 (3.9343) time: 1.3089 data: 0.0003 max mem: 13912 +[2024-12-06 05:27:41 root] (utils.py 283): INFO Epoch: [0] [1870/2502] eta: 0:13:49 lr: 0.000020 loss_cls: 3.1997 (3.2948) grad_norm: 2.9451 (3.9309) time: 1.3105 data: 0.0003 max mem: 13912 +[2024-12-06 05:27:54 root] (utils.py 283): INFO Epoch: [0] [1880/2502] eta: 0:13:36 lr: 0.000020 loss_cls: 3.4475 (3.2958) grad_norm: 2.8167 (3.9264) time: 1.3101 data: 0.0002 max mem: 13912 +[2024-12-06 05:28:07 root] (utils.py 283): INFO Epoch: [0] [1890/2502] eta: 0:13:23 lr: 0.000020 loss_cls: 3.5378 (3.2948) grad_norm: 3.1172 (3.9270) time: 1.3093 data: 0.0002 max mem: 13912 +[2024-12-06 05:28:21 root] (utils.py 283): INFO Epoch: [0] [1900/2502] eta: 0:13:09 lr: 0.000020 loss_cls: 3.1342 (3.2937) grad_norm: 3.0182 (3.9250) time: 1.3094 data: 0.0002 max mem: 13912 +[2024-12-06 05:28:34 root] (utils.py 283): INFO Epoch: [0] [1910/2502] eta: 0:12:56 lr: 0.000020 loss_cls: 3.2417 (3.2937) grad_norm: 2.8475 (3.9199) time: 1.3087 data: 0.0003 max mem: 13912 +[2024-12-06 05:28:47 root] (utils.py 283): INFO Epoch: [0] [1920/2502] eta: 0:12:43 lr: 0.000020 loss_cls: 3.1281 (3.2912) grad_norm: 2.8816 (3.9148) time: 1.3081 data: 0.0003 max mem: 13912 +[2024-12-06 05:29:00 root] (utils.py 283): INFO Epoch: [0] [1930/2502] eta: 0:12:30 lr: 0.000020 loss_cls: 2.9709 (3.2906) grad_norm: 3.0276 (3.9123) time: 1.3101 data: 0.0002 max mem: 13912 +[2024-12-06 05:29:13 root] (utils.py 283): INFO Epoch: [0] [1940/2502] eta: 0:12:17 lr: 0.000020 loss_cls: 3.2188 (3.2898) grad_norm: 3.3491 (3.9132) time: 1.3119 data: 0.0002 max mem: 13912 +[2024-12-06 05:29:26 root] (utils.py 283): INFO Epoch: [0] [1950/2502] eta: 0:12:04 lr: 0.000020 loss_cls: 3.2102 (3.2885) grad_norm: 3.1404 (3.9116) time: 1.3118 data: 0.0002 max mem: 13912 +[2024-12-06 05:29:39 root] (utils.py 283): INFO Epoch: [0] [1960/2502] eta: 0:11:51 lr: 0.000020 loss_cls: 3.2192 (3.2874) grad_norm: 2.9171 (3.9077) time: 1.3125 data: 0.0003 max mem: 13912 +[2024-12-06 05:29:52 root] (utils.py 283): INFO Epoch: [0] [1970/2502] eta: 0:11:38 lr: 0.000020 loss_cls: 3.2192 (3.2855) grad_norm: 2.9005 (3.9028) time: 1.3099 data: 0.0003 max mem: 13912 +[2024-12-06 05:30:06 root] (utils.py 283): INFO Epoch: [0] [1980/2502] eta: 0:11:25 lr: 0.000020 loss_cls: 3.0719 (3.2851) grad_norm: 3.0224 (3.9035) time: 1.3377 data: 0.0003 max mem: 13912 +[2024-12-06 05:30:20 root] (utils.py 283): INFO Epoch: [0] [1990/2502] eta: 0:11:12 lr: 0.000020 loss_cls: 3.4216 (3.2843) grad_norm: 3.0771 (3.9021) time: 1.3621 data: 0.0003 max mem: 13912 +[2024-12-06 05:30:33 root] (utils.py 283): INFO Epoch: [0] [2000/2502] eta: 0:10:58 lr: 0.000020 loss_cls: 3.2799 (3.2835) grad_norm: 2.9197 (3.8988) time: 1.3321 data: 0.0003 max mem: 13912 +[2024-12-06 05:30:46 root] (utils.py 283): INFO Epoch: [0] [2010/2502] eta: 0:10:45 lr: 0.000020 loss_cls: 3.3733 (3.2846) grad_norm: 2.8638 (3.8951) time: 1.3132 data: 0.0003 max mem: 13912 +[2024-12-06 05:30:59 root] (utils.py 283): INFO Epoch: [0] [2020/2502] eta: 0:10:32 lr: 0.000020 loss_cls: 3.3815 (3.2839) grad_norm: 2.8638 (3.8903) time: 1.3129 data: 0.0003 max mem: 13912 +[2024-12-06 05:31:12 root] (utils.py 283): INFO Epoch: [0] [2030/2502] eta: 0:10:19 lr: 0.000020 loss_cls: 3.4601 (3.2842) grad_norm: 2.9066 (3.8855) time: 1.3010 data: 0.0003 max mem: 13912 +[2024-12-06 05:31:25 root] (utils.py 283): INFO Epoch: [0] [2040/2502] eta: 0:10:06 lr: 0.000020 loss_cls: 3.5141 (3.2845) grad_norm: 2.9113 (3.8836) time: 1.2939 data: 0.0002 max mem: 13912 +[2024-12-06 05:31:38 root] (utils.py 283): INFO Epoch: [0] [2050/2502] eta: 0:09:53 lr: 0.000020 loss_cls: 3.2459 (3.2833) grad_norm: 2.9787 (3.8836) time: 1.2954 data: 0.0002 max mem: 13912 +[2024-12-06 05:31:51 root] (utils.py 283): INFO Epoch: [0] [2060/2502] eta: 0:09:40 lr: 0.000020 loss_cls: 3.0047 (3.2828) grad_norm: 3.5858 (3.8843) time: 1.3079 data: 0.0002 max mem: 13912 +[2024-12-06 05:32:04 root] (utils.py 283): INFO Epoch: [0] [2070/2502] eta: 0:09:26 lr: 0.000020 loss_cls: 3.3756 (3.2831) grad_norm: 3.0294 (3.8802) time: 1.3173 data: 0.0003 max mem: 13912 +[2024-12-06 05:32:17 root] (utils.py 283): INFO Epoch: [0] [2080/2502] eta: 0:09:13 lr: 0.000020 loss_cls: 3.3686 (3.2826) grad_norm: 2.9755 (3.8787) time: 1.3071 data: 0.0002 max mem: 13912 +[2024-12-06 05:32:30 root] (utils.py 283): INFO Epoch: [0] [2090/2502] eta: 0:09:00 lr: 0.000020 loss_cls: 3.3686 (3.2835) grad_norm: 3.0434 (3.8756) time: 1.2978 data: 0.0002 max mem: 13912 +[2024-12-06 05:32:43 root] (utils.py 283): INFO Epoch: [0] [2100/2502] eta: 0:08:47 lr: 0.000020 loss_cls: 3.5134 (3.2820) grad_norm: 3.0498 (3.8728) time: 1.2960 data: 0.0002 max mem: 13912 +[2024-12-06 05:32:56 root] (utils.py 283): INFO Epoch: [0] [2110/2502] eta: 0:08:34 lr: 0.000020 loss_cls: 3.3375 (3.2823) grad_norm: 2.9871 (3.8687) time: 1.3005 data: 0.0002 max mem: 13912 +[2024-12-06 05:33:09 root] (utils.py 283): INFO Epoch: [0] [2120/2502] eta: 0:08:21 lr: 0.000020 loss_cls: 3.4130 (3.2817) grad_norm: 2.7081 (3.8639) time: 1.3069 data: 0.0002 max mem: 13912 +[2024-12-06 05:33:22 root] (utils.py 283): INFO Epoch: [0] [2130/2502] eta: 0:08:08 lr: 0.000020 loss_cls: 3.3572 (3.2816) grad_norm: 2.7081 (3.8598) time: 1.2995 data: 0.0002 max mem: 13912 +[2024-12-06 05:33:35 root] (utils.py 283): INFO Epoch: [0] [2140/2502] eta: 0:07:54 lr: 0.000020 loss_cls: 3.4538 (3.2825) grad_norm: 2.9089 (3.8561) time: 1.2915 data: 0.0002 max mem: 13912 +[2024-12-06 05:33:48 root] (utils.py 283): INFO Epoch: [0] [2150/2502] eta: 0:07:41 lr: 0.000020 loss_cls: 3.4880 (3.2820) grad_norm: 2.7439 (3.8533) time: 1.2901 data: 0.0002 max mem: 13912 +[2024-12-06 05:34:02 root] (utils.py 283): INFO Epoch: [0] [2160/2502] eta: 0:07:28 lr: 0.000020 loss_cls: 3.3784 (3.2822) grad_norm: 2.8259 (3.8551) time: 1.3467 data: 0.0003 max mem: 13912 +[2024-12-06 05:34:24 root] (utils.py 283): INFO Epoch: [0] [2170/2502] eta: 0:07:17 lr: 0.000020 loss_cls: 3.5581 (3.2830) grad_norm: 3.1006 (3.8570) time: 1.8022 data: 0.0003 max mem: 13912 +[2024-12-06 05:34:48 root] (utils.py 283): INFO Epoch: [0] [2180/2502] eta: 0:07:05 lr: 0.000020 loss_cls: 3.3741 (3.2823) grad_norm: 3.0788 (3.8569) time: 2.2917 data: 0.0003 max mem: 13912 +[2024-12-06 05:35:12 root] (utils.py 283): INFO Epoch: [0] [2190/2502] eta: 0:06:53 lr: 0.000020 loss_cls: 3.5052 (3.2832) grad_norm: 2.9687 (3.8547) time: 2.3836 data: 0.0002 max mem: 13912 +[2024-12-06 05:35:35 root] (utils.py 283): INFO Epoch: [0] [2200/2502] eta: 0:06:41 lr: 0.000020 loss_cls: 3.3253 (3.2814) grad_norm: 2.8952 (3.8504) time: 2.3854 data: 0.0003 max mem: 13912 +[2024-12-06 05:35:59 root] (utils.py 283): INFO Epoch: [0] [2210/2502] eta: 0:06:29 lr: 0.000020 loss_cls: 3.1045 (3.2809) grad_norm: 2.8132 (3.8462) time: 2.3706 data: 0.0003 max mem: 13912 +[2024-12-06 05:36:23 root] (utils.py 283): INFO Epoch: [0] [2220/2502] eta: 0:06:17 lr: 0.000020 loss_cls: 3.2834 (3.2801) grad_norm: 2.8293 (3.8420) time: 2.3719 data: 0.0003 max mem: 13912 +[2024-12-06 05:36:47 root] (utils.py 283): INFO Epoch: [0] [2230/2502] eta: 0:06:05 lr: 0.000020 loss_cls: 3.1802 (3.2790) grad_norm: 2.9469 (3.8418) time: 2.3877 data: 0.0003 max mem: 13912 +[2024-12-06 05:37:11 root] (utils.py 283): INFO Epoch: [0] [2240/2502] eta: 0:05:53 lr: 0.000020 loss_cls: 3.1409 (3.2780) grad_norm: 3.2166 (3.8419) time: 2.3854 data: 0.0003 max mem: 13912 +[2024-12-06 05:37:34 root] (utils.py 283): INFO Epoch: [0] [2250/2502] eta: 0:05:41 lr: 0.000020 loss_cls: 3.0350 (3.2770) grad_norm: 2.9842 (3.8391) time: 2.3826 data: 0.0002 max mem: 13912 +[2024-12-06 05:37:58 root] (utils.py 283): INFO Epoch: [0] [2260/2502] eta: 0:05:28 lr: 0.000020 loss_cls: 2.8912 (3.2761) grad_norm: 2.9115 (3.8377) time: 2.3847 data: 0.0002 max mem: 13912 +[2024-12-06 05:38:22 root] (utils.py 283): INFO Epoch: [0] [2270/2502] eta: 0:05:16 lr: 0.000020 loss_cls: 3.2021 (3.2764) grad_norm: 3.2755 (3.8362) time: 2.3883 data: 0.0003 max mem: 13912 +[2024-12-06 05:38:46 root] (utils.py 283): INFO Epoch: [0] [2280/2502] eta: 0:05:03 lr: 0.000020 loss_cls: 3.3073 (3.2772) grad_norm: 3.2655 (3.8340) time: 2.3813 data: 0.0003 max mem: 13912 +[2024-12-06 05:39:10 root] (utils.py 283): INFO Epoch: [0] [2290/2502] eta: 0:04:50 lr: 0.000020 loss_cls: 3.4176 (3.2771) grad_norm: 2.9734 (3.8309) time: 2.3762 data: 0.0003 max mem: 13912 +[2024-12-06 05:39:33 root] (utils.py 283): INFO Epoch: [0] [2300/2502] eta: 0:04:38 lr: 0.000020 loss_cls: 3.0675 (3.2763) grad_norm: 2.9062 (3.8280) time: 2.3775 data: 0.0003 max mem: 13912 +[2024-12-06 05:39:57 root] (utils.py 283): INFO Epoch: [0] [2310/2502] eta: 0:04:25 lr: 0.000020 loss_cls: 3.0675 (3.2750) grad_norm: 2.8976 (3.8250) time: 2.3840 data: 0.0003 max mem: 13912 +[2024-12-06 05:40:21 root] (utils.py 283): INFO Epoch: [0] [2320/2502] eta: 0:04:12 lr: 0.000020 loss_cls: 3.2153 (3.2755) grad_norm: 2.8471 (3.8210) time: 2.3889 data: 0.0003 max mem: 13912 +[2024-12-06 05:40:45 root] (utils.py 283): INFO Epoch: [0] [2330/2502] eta: 0:03:58 lr: 0.000020 loss_cls: 3.5524 (3.2765) grad_norm: 2.9836 (3.8188) time: 2.3871 data: 0.0002 max mem: 13912 +[2024-12-06 05:41:09 root] (utils.py 283): INFO Epoch: [0] [2340/2502] eta: 0:03:45 lr: 0.000020 loss_cls: 3.5483 (3.2762) grad_norm: 3.1275 (3.8174) time: 2.3673 data: 0.0002 max mem: 13912 +[2024-12-06 05:41:32 root] (utils.py 283): INFO Epoch: [0] [2350/2502] eta: 0:03:32 lr: 0.000020 loss_cls: 3.2187 (3.2760) grad_norm: 2.9710 (3.8133) time: 2.3663 data: 0.0002 max mem: 13912 +[2024-12-06 05:41:56 root] (utils.py 283): INFO Epoch: [0] [2360/2502] eta: 0:03:19 lr: 0.000020 loss_cls: 3.2159 (3.2752) grad_norm: 2.9074 (3.8093) time: 2.3878 data: 0.0003 max mem: 13912 +[2024-12-06 05:42:20 root] (utils.py 283): INFO Epoch: [0] [2370/2502] eta: 0:03:05 lr: 0.000020 loss_cls: 3.2036 (3.2747) grad_norm: 2.9214 (3.8059) time: 2.3883 data: 0.0003 max mem: 13912 +[2024-12-06 05:42:44 root] (utils.py 283): INFO Epoch: [0] [2380/2502] eta: 0:02:52 lr: 0.000020 loss_cls: 3.4127 (3.2752) grad_norm: 2.8386 (3.8035) time: 2.3845 data: 0.0002 max mem: 13912 +[2024-12-06 05:43:08 root] (utils.py 283): INFO Epoch: [0] [2390/2502] eta: 0:02:38 lr: 0.000020 loss_cls: 3.3045 (3.2740) grad_norm: 2.8386 (3.7995) time: 2.3867 data: 0.0003 max mem: 13912 +[2024-12-06 05:43:32 root] (utils.py 283): INFO Epoch: [0] [2400/2502] eta: 0:02:24 lr: 0.000020 loss_cls: 3.1238 (3.2735) grad_norm: 2.8521 (3.7978) time: 2.3851 data: 0.0003 max mem: 13912 +[2024-12-06 05:43:56 root] (utils.py 283): INFO Epoch: [0] [2410/2502] eta: 0:02:10 lr: 0.000020 loss_cls: 3.4044 (3.2740) grad_norm: 2.8676 (3.7944) time: 2.3831 data: 0.0002 max mem: 13912 +[2024-12-06 05:44:19 root] (utils.py 283): INFO Epoch: [0] [2420/2502] eta: 0:01:56 lr: 0.000020 loss_cls: 3.2150 (3.2730) grad_norm: 3.0088 (3.7937) time: 2.3820 data: 0.0002 max mem: 13912 +[2024-12-06 05:44:43 root] (utils.py 283): INFO Epoch: [0] [2430/2502] eta: 0:01:42 lr: 0.000020 loss_cls: 2.9283 (3.2727) grad_norm: 3.3009 (3.7922) time: 2.3871 data: 0.0003 max mem: 13912 +[2024-12-06 05:45:07 root] (utils.py 283): INFO Epoch: [0] [2440/2502] eta: 0:01:28 lr: 0.000020 loss_cls: 3.3961 (3.2730) grad_norm: 3.0761 (3.7899) time: 2.3830 data: 0.0003 max mem: 13912 +[2024-12-06 05:45:31 root] (utils.py 283): INFO Epoch: [0] [2450/2502] eta: 0:01:14 lr: 0.000020 loss_cls: 3.4265 (3.2736) grad_norm: 3.1393 (3.7874) time: 2.3772 data: 0.0003 max mem: 13912 +[2024-12-06 05:45:55 root] (utils.py 283): INFO Epoch: [0] [2460/2502] eta: 0:01:00 lr: 0.000020 loss_cls: 3.4517 (3.2735) grad_norm: 3.1458 (3.7847) time: 2.3833 data: 0.0003 max mem: 13912 +[2024-12-06 05:46:19 root] (utils.py 283): INFO Epoch: [0] [2470/2502] eta: 0:00:46 lr: 0.000020 loss_cls: 3.4346 (3.2724) grad_norm: 3.0840 (3.7816) time: 2.3832 data: 0.0003 max mem: 13912 +[2024-12-06 05:46:42 root] (utils.py 283): INFO Epoch: [0] [2480/2502] eta: 0:00:31 lr: 0.000020 loss_cls: 2.9718 (3.2716) grad_norm: 3.0506 (3.7804) time: 2.3802 data: 0.0002 max mem: 13912 +[2024-12-06 05:47:07 root] (utils.py 283): INFO Epoch: [0] [2490/2502] eta: 0:00:17 lr: 0.000020 loss_cls: 3.1766 (3.2723) grad_norm: 2.8951 (3.7773) time: 2.3991 data: 0.0243 max mem: 13912 +[2024-12-06 05:47:30 root] (utils.py 283): INFO Epoch: [0] [2500/2502] eta: 0:00:02 lr: 0.000020 loss_cls: 3.2659 (3.2717) grad_norm: 2.8920 (3.7742) time: 2.4042 data: 0.0243 max mem: 13912 +[2024-12-06 05:47:33 root] (utils.py 283): INFO Epoch: [0] [2501/2502] eta: 0:00:01 lr: 0.000020 loss_cls: 3.2659 (3.2712) grad_norm: 2.8951 (3.7740) time: 2.4043 data: 0.0243 max mem: 13912 +[2024-12-06 05:47:33 root] (utils.py 297): INFO Epoch: [0] Total time: 1:00:46 (1.4576 s / it) +[2024-12-06 05:47:33 root] (engine.py 179): INFO Averaged stats:lr: 0.000020 loss_cls: 3.2659 (3.2713) grad_norm: 2.8951 (3.7740) +[2024-12-06 05:47:34 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:30 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.4566 (0.4566) acc1: 92.1875 (92.1875) acc3: 98.4375 (98.4375) acc5: 98.4375 (98.4375) time: 0.3099 data: 0.0003 max mem: 13912 +[2024-12-06 05:47:37 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:28 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.6320 (0.6857) acc1: 85.9375 (86.2926) acc3: 96.0938 (95.4545) acc5: 96.8750 (97.0170) time: 0.3242 data: 0.0003 max mem: 13912 +[2024-12-06 05:47:40 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:24 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.6897 (0.7255) acc1: 83.5938 (84.7470) acc3: 95.3125 (94.8661) acc5: 96.8750 (96.6518) time: 0.3179 data: 0.0003 max mem: 13912 +[2024-12-06 05:47:43 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:21 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7540 (0.7501) acc1: 83.5938 (83.9214) acc3: 94.5312 (94.7329) acc5: 96.8750 (96.5726) time: 0.3145 data: 0.0004 max mem: 13912 +[2024-12-06 05:47:46 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:18 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7208 (0.7536) acc1: 84.3750 (83.8986) acc3: 94.5312 (94.7599) acc5: 96.8750 (96.6463) time: 0.3220 data: 0.0004 max mem: 13912 +[2024-12-06 05:47:50 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:15 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.8817 (0.8342) acc1: 78.9062 (81.9240) acc3: 91.4062 (93.4283) acc5: 93.7500 (95.7414) time: 0.3261 data: 0.0004 max mem: 13912 +[2024-12-06 05:47:53 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:12 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.1077 (0.8709) acc1: 75.0000 (81.2756) acc3: 87.5000 (92.6998) acc5: 91.4062 (95.1204) time: 0.3143 data: 0.0004 max mem: 13912 +[2024-12-06 05:47:56 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:08 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0887 (0.9007) acc1: 76.5625 (80.5568) acc3: 89.0625 (92.3856) acc5: 91.4062 (94.8283) time: 0.3130 data: 0.0004 max mem: 13912 +[2024-12-06 05:47:59 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:05 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0983 (0.9339) acc1: 75.0000 (79.7743) acc3: 89.0625 (91.7631) acc5: 92.1875 (94.4734) time: 0.3250 data: 0.0006 max mem: 13912 +[2024-12-06 05:48:02 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:02 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.1609 (0.9589) acc1: 74.2188 (79.0264) acc3: 87.5000 (91.3805) acc5: 92.1875 (94.1793) time: 0.3180 data: 0.0005 max mem: 13912 +[2024-12-06 05:48:05 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0072 (0.9550) acc1: 74.2188 (79.0960) acc3: 89.8438 (91.4480) acc5: 92.9688 (94.2880) time: 0.3134 data: 0.0005 max mem: 13912 +[2024-12-06 05:48:05 root] (utils.py 297): INFO Test: Total time: 0:00:31 (0.3193 s / it) +[2024-12-06 05:48:05 root] (engine.py 264): INFO * Acc@1 78.998 Acc@3 91.628 Acc@5 94.380 loss 0.949 flops 3.584 layer_flops 3.536 +[2024-12-06 05:48:05 root] (main.py 547): INFO Accuracy of the network on the 50000 test images: 79.0% +[2024-12-06 05:48:05 root] (main.py 551): INFO Max accuracy: 79.00% +[2024-12-06 05:48:07 root] (utils.py 283): INFO Epoch: [1] [ 0/2502] eta: 1:42:55 lr: 0.000020 loss_cls: 2.7591 (2.7591) grad_norm: 4.4109 (4.4109) time: 2.4681 data: 0.0004 max mem: 13912 +[2024-12-06 05:48:31 root] (utils.py 283): INFO Epoch: [1] [ 10/2502] eta: 1:38:38 lr: 0.000020 loss_cls: 2.8746 (3.1212) grad_norm: 3.0427 (3.1908) time: 2.3750 data: 0.0003 max mem: 13912 +[2024-12-06 05:48:55 root] (utils.py 283): INFO Epoch: [1] [ 20/2502] eta: 1:38:26 lr: 0.000020 loss_cls: 3.4105 (3.2134) grad_norm: 3.0162 (3.2761) time: 2.3751 data: 0.0003 max mem: 13912 +[2024-12-06 05:49:19 root] (utils.py 283): INFO Epoch: [1] [ 30/2502] eta: 1:38:06 lr: 0.000020 loss_cls: 3.4640 (3.2969) grad_norm: 2.9179 (3.4432) time: 2.3848 data: 0.0003 max mem: 13912 +[2024-12-06 05:49:43 root] (utils.py 283): INFO Epoch: [1] [ 40/2502] eta: 1:37:38 lr: 0.000020 loss_cls: 3.4640 (3.2803) grad_norm: 2.8834 (3.3395) time: 2.3799 data: 0.0003 max mem: 13912 +[2024-12-06 05:50:06 root] (utils.py 283): INFO Epoch: [1] [ 50/2502] eta: 1:37:21 lr: 0.000020 loss_cls: 3.3348 (3.2977) grad_norm: 2.8266 (3.2748) time: 2.3842 data: 0.0002 max mem: 13912 +[2024-12-06 05:50:30 root] (utils.py 283): INFO Epoch: [1] [ 60/2502] eta: 1:36:57 lr: 0.000020 loss_cls: 3.4899 (3.3383) grad_norm: 2.8266 (3.2048) time: 2.3879 data: 0.0003 max mem: 13912 +[2024-12-06 05:50:54 root] (utils.py 283): INFO Epoch: [1] [ 70/2502] eta: 1:36:35 lr: 0.000020 loss_cls: 3.4899 (3.3479) grad_norm: 2.9102 (3.3142) time: 2.3842 data: 0.0002 max mem: 13912 +[2024-12-06 05:51:18 root] (utils.py 283): INFO Epoch: [1] [ 80/2502] eta: 1:36:09 lr: 0.000020 loss_cls: 3.2549 (3.3347) grad_norm: 3.1406 (3.3033) time: 2.3817 data: 0.0002 max mem: 13912 +[2024-12-06 05:51:42 root] (utils.py 283): INFO Epoch: [1] [ 90/2502] eta: 1:35:40 lr: 0.000020 loss_cls: 3.2317 (3.3048) grad_norm: 2.7713 (3.2605) time: 2.3700 data: 0.0002 max mem: 13912 +[2024-12-06 05:52:05 root] (utils.py 283): INFO Epoch: [1] [ 100/2502] eta: 1:35:17 lr: 0.000020 loss_cls: 3.3770 (3.3028) grad_norm: 2.8920 (3.3640) time: 2.3735 data: 0.0003 max mem: 13912 +[2024-12-06 05:52:29 root] (utils.py 283): INFO Epoch: [1] [ 110/2502] eta: 1:34:56 lr: 0.000020 loss_cls: 3.2885 (3.2707) grad_norm: 2.7850 (3.3181) time: 2.3868 data: 0.0003 max mem: 13912 +[2024-12-06 05:52:53 root] (utils.py 283): INFO Epoch: [1] [ 120/2502] eta: 1:34:29 lr: 0.000020 loss_cls: 3.1684 (3.2798) grad_norm: 2.7448 (3.2902) time: 2.3782 data: 0.0002 max mem: 13912 +[2024-12-06 05:53:17 root] (utils.py 283): INFO Epoch: [1] [ 130/2502] eta: 1:34:07 lr: 0.000020 loss_cls: 3.2787 (3.2740) grad_norm: 2.8522 (3.4486) time: 2.3794 data: 0.0002 max mem: 13912 +[2024-12-06 05:53:41 root] (utils.py 283): INFO Epoch: [1] [ 140/2502] eta: 1:33:44 lr: 0.000020 loss_cls: 3.2721 (3.2828) grad_norm: 3.0219 (3.4318) time: 2.3873 data: 0.0003 max mem: 13912 +[2024-12-06 05:54:05 root] (utils.py 283): INFO Epoch: [1] [ 150/2502] eta: 1:33:20 lr: 0.000020 loss_cls: 3.2721 (3.2664) grad_norm: 3.0890 (3.4348) time: 2.3837 data: 0.0003 max mem: 13912 +[2024-12-06 05:54:28 root] (utils.py 283): INFO Epoch: [1] [ 160/2502] eta: 1:32:58 lr: 0.000020 loss_cls: 3.3602 (3.2731) grad_norm: 2.8006 (3.4121) time: 2.3881 data: 0.0003 max mem: 13912 +[2024-12-06 05:54:52 root] (utils.py 283): INFO Epoch: [1] [ 170/2502] eta: 1:32:32 lr: 0.000020 loss_cls: 3.3823 (3.2733) grad_norm: 2.7122 (3.3755) time: 2.3795 data: 0.0003 max mem: 13912 +[2024-12-06 05:55:16 root] (utils.py 283): INFO Epoch: [1] [ 180/2502] eta: 1:32:10 lr: 0.000020 loss_cls: 3.3823 (3.2787) grad_norm: 2.8437 (3.3626) time: 2.3809 data: 0.0003 max mem: 13912 +[2024-12-06 05:55:40 root] (utils.py 283): INFO Epoch: [1] [ 190/2502] eta: 1:31:46 lr: 0.000020 loss_cls: 3.3681 (3.2775) grad_norm: 3.0642 (3.3582) time: 2.3884 data: 0.0002 max mem: 13912 +[2024-12-06 05:56:04 root] (utils.py 283): INFO Epoch: [1] [ 200/2502] eta: 1:31:23 lr: 0.000020 loss_cls: 3.3635 (3.2834) grad_norm: 2.8587 (3.3378) time: 2.3846 data: 0.0002 max mem: 13912 +[2024-12-06 05:56:28 root] (utils.py 283): INFO Epoch: [1] [ 210/2502] eta: 1:30:59 lr: 0.000020 loss_cls: 3.4382 (3.2934) grad_norm: 2.8587 (3.3391) time: 2.3839 data: 0.0002 max mem: 13912 +[2024-12-06 05:56:51 root] (utils.py 283): INFO Epoch: [1] [ 220/2502] eta: 1:30:36 lr: 0.000020 loss_cls: 3.3291 (3.2856) grad_norm: 3.0665 (3.3353) time: 2.3830 data: 0.0002 max mem: 13912 +[2024-12-06 05:57:15 root] (utils.py 283): INFO Epoch: [1] [ 230/2502] eta: 1:30:12 lr: 0.000020 loss_cls: 3.4953 (3.2986) grad_norm: 3.2457 (3.3513) time: 2.3856 data: 0.0003 max mem: 13912 +[2024-12-06 05:57:39 root] (utils.py 283): INFO Epoch: [1] [ 240/2502] eta: 1:29:45 lr: 0.000020 loss_cls: 3.3393 (3.2801) grad_norm: 3.1308 (3.3394) time: 2.3663 data: 0.0003 max mem: 13912 +[2024-12-06 05:58:03 root] (utils.py 283): INFO Epoch: [1] [ 250/2502] eta: 1:29:21 lr: 0.000020 loss_cls: 3.1127 (3.2708) grad_norm: 3.0113 (3.3425) time: 2.3608 data: 0.0002 max mem: 13912 +[2024-12-06 05:58:26 root] (utils.py 283): INFO Epoch: [1] [ 260/2502] eta: 1:28:57 lr: 0.000020 loss_cls: 3.1755 (3.2655) grad_norm: 3.0113 (3.3482) time: 2.3764 data: 0.0002 max mem: 13912 +[2024-12-06 05:58:50 root] (utils.py 283): INFO Epoch: [1] [ 270/2502] eta: 1:28:33 lr: 0.000020 loss_cls: 3.4423 (3.2718) grad_norm: 2.8515 (3.3402) time: 2.3812 data: 0.0003 max mem: 13912 +[2024-12-06 05:59:14 root] (utils.py 283): INFO Epoch: [1] [ 280/2502] eta: 1:28:10 lr: 0.000020 loss_cls: 3.2970 (3.2675) grad_norm: 2.8515 (3.3876) time: 2.3876 data: 0.0003 max mem: 13912 +[2024-12-06 05:59:38 root] (utils.py 283): INFO Epoch: [1] [ 290/2502] eta: 1:27:46 lr: 0.000020 loss_cls: 2.9245 (3.2510) grad_norm: 2.9182 (3.3848) time: 2.3805 data: 0.0002 max mem: 13912 +[2024-12-06 06:00:01 root] (utils.py 283): INFO Epoch: [1] [ 300/2502] eta: 1:27:20 lr: 0.000020 loss_cls: 2.9245 (3.2489) grad_norm: 3.0403 (3.3906) time: 2.3653 data: 0.0002 max mem: 13912 +[2024-12-06 06:00:25 root] (utils.py 283): INFO Epoch: [1] [ 310/2502] eta: 1:26:57 lr: 0.000020 loss_cls: 3.2292 (3.2485) grad_norm: 3.0165 (3.3802) time: 2.3755 data: 0.0002 max mem: 13912 +[2024-12-06 06:00:49 root] (utils.py 283): INFO Epoch: [1] [ 320/2502] eta: 1:26:34 lr: 0.000020 loss_cls: 3.2243 (3.2466) grad_norm: 2.9653 (3.3708) time: 2.3868 data: 0.0003 max mem: 13912 +[2024-12-06 06:01:13 root] (utils.py 283): INFO Epoch: [1] [ 330/2502] eta: 1:26:11 lr: 0.000020 loss_cls: 3.1447 (3.2438) grad_norm: 3.0766 (3.3669) time: 2.3876 data: 0.0003 max mem: 13912 +[2024-12-06 06:01:37 root] (utils.py 283): INFO Epoch: [1] [ 340/2502] eta: 1:25:47 lr: 0.000020 loss_cls: 2.9906 (3.2399) grad_norm: 3.0073 (3.3582) time: 2.3895 data: 0.0003 max mem: 13912 +[2024-12-06 06:02:01 root] (utils.py 283): INFO Epoch: [1] [ 350/2502] eta: 1:25:23 lr: 0.000020 loss_cls: 3.0131 (3.2384) grad_norm: 2.8971 (3.3548) time: 2.3823 data: 0.0003 max mem: 13912 +[2024-12-06 06:02:24 root] (utils.py 283): INFO Epoch: [1] [ 360/2502] eta: 1:24:59 lr: 0.000020 loss_cls: 2.8063 (3.2219) grad_norm: 2.8498 (3.3511) time: 2.3754 data: 0.0003 max mem: 13912 +[2024-12-06 06:02:48 root] (utils.py 283): INFO Epoch: [1] [ 370/2502] eta: 1:24:35 lr: 0.000020 loss_cls: 2.7543 (3.2179) grad_norm: 2.6955 (3.3369) time: 2.3739 data: 0.0003 max mem: 13912 +[2024-12-06 06:03:12 root] (utils.py 283): INFO Epoch: [1] [ 380/2502] eta: 1:24:11 lr: 0.000020 loss_cls: 3.2879 (3.2161) grad_norm: 2.7539 (3.3284) time: 2.3792 data: 0.0003 max mem: 13912 +[2024-12-06 06:03:36 root] (utils.py 283): INFO Epoch: [1] [ 390/2502] eta: 1:23:47 lr: 0.000020 loss_cls: 3.1684 (3.2118) grad_norm: 2.9695 (3.3254) time: 2.3827 data: 0.0003 max mem: 13912 +[2024-12-06 06:04:00 root] (utils.py 283): INFO Epoch: [1] [ 400/2502] eta: 1:23:24 lr: 0.000020 loss_cls: 3.2662 (3.2118) grad_norm: 2.9522 (3.3247) time: 2.3839 data: 0.0002 max mem: 13912 +[2024-12-06 06:04:24 root] (utils.py 283): INFO Epoch: [1] [ 410/2502] eta: 1:23:00 lr: 0.000020 loss_cls: 3.2879 (3.2103) grad_norm: 2.9522 (3.3305) time: 2.3833 data: 0.0003 max mem: 13912 +[2024-12-06 06:04:47 root] (utils.py 283): INFO Epoch: [1] [ 420/2502] eta: 1:22:36 lr: 0.000020 loss_cls: 3.1816 (3.2111) grad_norm: 2.9475 (3.3231) time: 2.3796 data: 0.0002 max mem: 13912 +[2024-12-06 06:05:11 root] (utils.py 283): INFO Epoch: [1] [ 430/2502] eta: 1:22:12 lr: 0.000020 loss_cls: 3.4434 (3.2156) grad_norm: 2.8861 (3.3547) time: 2.3766 data: 0.0002 max mem: 13912 +[2024-12-06 06:05:35 root] (utils.py 283): INFO Epoch: [1] [ 440/2502] eta: 1:21:48 lr: 0.000020 loss_cls: 3.4911 (3.2198) grad_norm: 2.9504 (3.3470) time: 2.3731 data: 0.0003 max mem: 13912 +[2024-12-06 06:05:59 root] (utils.py 283): INFO Epoch: [1] [ 450/2502] eta: 1:21:24 lr: 0.000020 loss_cls: 3.1927 (3.2131) grad_norm: 2.8989 (3.3383) time: 2.3780 data: 0.0003 max mem: 13912 +[2024-12-06 06:06:22 root] (utils.py 283): INFO Epoch: [1] [ 460/2502] eta: 1:21:00 lr: 0.000020 loss_cls: 3.0615 (3.2115) grad_norm: 2.7441 (3.3299) time: 2.3834 data: 0.0003 max mem: 13912 +[2024-12-06 06:06:46 root] (utils.py 283): INFO Epoch: [1] [ 470/2502] eta: 1:20:37 lr: 0.000020 loss_cls: 3.2625 (3.2128) grad_norm: 2.8061 (3.3221) time: 2.3821 data: 0.0003 max mem: 13912 +[2024-12-06 06:07:10 root] (utils.py 283): INFO Epoch: [1] [ 480/2502] eta: 1:20:13 lr: 0.000020 loss_cls: 3.3384 (3.2085) grad_norm: 2.8061 (3.3107) time: 2.3828 data: 0.0002 max mem: 13912 +[2024-12-06 06:07:34 root] (utils.py 283): INFO Epoch: [1] [ 490/2502] eta: 1:19:50 lr: 0.000020 loss_cls: 3.3644 (3.2112) grad_norm: 2.8201 (3.3577) time: 2.3882 data: 0.0003 max mem: 13912 +[2024-12-06 06:07:58 root] (utils.py 283): INFO Epoch: [1] [ 500/2502] eta: 1:19:26 lr: 0.000020 loss_cls: 3.3644 (3.2087) grad_norm: 3.0103 (3.3478) time: 2.3851 data: 0.0003 max mem: 13912 +[2024-12-06 06:08:22 root] (utils.py 283): INFO Epoch: [1] [ 510/2502] eta: 1:19:02 lr: 0.000020 loss_cls: 3.1538 (3.2079) grad_norm: 2.9281 (3.3411) time: 2.3741 data: 0.0003 max mem: 13912 +[2024-12-06 06:08:45 root] (utils.py 283): INFO Epoch: [1] [ 520/2502] eta: 1:18:37 lr: 0.000020 loss_cls: 3.1538 (3.2083) grad_norm: 2.9342 (3.3394) time: 2.3676 data: 0.0003 max mem: 13912 +[2024-12-06 06:09:09 root] (utils.py 283): INFO Epoch: [1] [ 530/2502] eta: 1:18:13 lr: 0.000020 loss_cls: 3.1849 (3.2065) grad_norm: 3.0802 (3.3450) time: 2.3673 data: 0.0003 max mem: 13912 +[2024-12-06 06:09:33 root] (utils.py 283): INFO Epoch: [1] [ 540/2502] eta: 1:17:49 lr: 0.000020 loss_cls: 3.3451 (3.2055) grad_norm: 3.0802 (3.3409) time: 2.3795 data: 0.0002 max mem: 13912 +[2024-12-06 06:09:57 root] (utils.py 283): INFO Epoch: [1] [ 550/2502] eta: 1:17:26 lr: 0.000020 loss_cls: 3.4031 (3.2079) grad_norm: 2.9899 (3.3353) time: 2.3867 data: 0.0002 max mem: 13912 +[2024-12-06 06:10:21 root] (utils.py 283): INFO Epoch: [1] [ 560/2502] eta: 1:17:03 lr: 0.000020 loss_cls: 3.3311 (3.2056) grad_norm: 2.8297 (3.3269) time: 2.3920 data: 0.0003 max mem: 13912 +[2024-12-06 06:10:44 root] (utils.py 283): INFO Epoch: [1] [ 570/2502] eta: 1:16:39 lr: 0.000020 loss_cls: 3.0491 (3.2045) grad_norm: 2.8293 (3.3278) time: 2.3948 data: 0.0003 max mem: 13912 +[2024-12-06 06:11:08 root] (utils.py 283): INFO Epoch: [1] [ 580/2502] eta: 1:16:16 lr: 0.000020 loss_cls: 3.0001 (3.1994) grad_norm: 2.6395 (3.3154) time: 2.3887 data: 0.0003 max mem: 13912 +[2024-12-06 06:11:32 root] (utils.py 283): INFO Epoch: [1] [ 590/2502] eta: 1:15:52 lr: 0.000020 loss_cls: 3.2793 (3.1995) grad_norm: 2.7475 (3.3115) time: 2.3869 data: 0.0003 max mem: 13912 +[2024-12-06 06:11:56 root] (utils.py 283): INFO Epoch: [1] [ 600/2502] eta: 1:15:28 lr: 0.000020 loss_cls: 3.3789 (3.1991) grad_norm: 2.8522 (3.3036) time: 2.3823 data: 0.0003 max mem: 13912 +[2024-12-06 06:12:20 root] (utils.py 283): INFO Epoch: [1] [ 610/2502] eta: 1:15:04 lr: 0.000020 loss_cls: 3.3608 (3.1988) grad_norm: 2.7441 (3.3064) time: 2.3760 data: 0.0003 max mem: 13912 +[2024-12-06 06:12:43 root] (utils.py 283): INFO Epoch: [1] [ 620/2502] eta: 1:14:40 lr: 0.000020 loss_cls: 3.2868 (3.2006) grad_norm: 2.7406 (3.3013) time: 2.3697 data: 0.0002 max mem: 13912 +[2024-12-06 06:13:07 root] (utils.py 283): INFO Epoch: [1] [ 630/2502] eta: 1:14:16 lr: 0.000020 loss_cls: 3.2868 (3.2028) grad_norm: 2.8853 (3.3263) time: 2.3776 data: 0.0002 max mem: 13912 +[2024-12-06 06:13:31 root] (utils.py 283): INFO Epoch: [1] [ 640/2502] eta: 1:13:53 lr: 0.000020 loss_cls: 3.1358 (3.1976) grad_norm: 3.0124 (3.3219) time: 2.3879 data: 0.0002 max mem: 13912 +[2024-12-06 06:13:55 root] (utils.py 283): INFO Epoch: [1] [ 650/2502] eta: 1:13:29 lr: 0.000020 loss_cls: 3.2959 (3.1987) grad_norm: 3.1129 (3.3234) time: 2.3888 data: 0.0002 max mem: 13912 +[2024-12-06 06:14:19 root] (utils.py 283): INFO Epoch: [1] [ 660/2502] eta: 1:13:05 lr: 0.000020 loss_cls: 3.4089 (3.1968) grad_norm: 2.9726 (3.3215) time: 2.3877 data: 0.0002 max mem: 13912 +[2024-12-06 06:14:43 root] (utils.py 283): INFO Epoch: [1] [ 670/2502] eta: 1:12:42 lr: 0.000020 loss_cls: 3.2993 (3.1969) grad_norm: 2.8938 (3.3148) time: 2.3870 data: 0.0002 max mem: 13912 +[2024-12-06 06:15:07 root] (utils.py 283): INFO Epoch: [1] [ 680/2502] eta: 1:12:18 lr: 0.000020 loss_cls: 3.2993 (3.1965) grad_norm: 2.8938 (3.3317) time: 2.3892 data: 0.0003 max mem: 13912 +[2024-12-06 06:15:31 root] (utils.py 283): INFO Epoch: [1] [ 690/2502] eta: 1:11:55 lr: 0.000020 loss_cls: 3.1498 (3.1936) grad_norm: 3.0360 (3.3285) time: 2.3903 data: 0.0003 max mem: 13912 +[2024-12-06 06:15:55 root] (utils.py 283): INFO Epoch: [1] [ 700/2502] eta: 1:11:31 lr: 0.000020 loss_cls: 3.2761 (3.1942) grad_norm: 2.7386 (3.3209) time: 2.3924 data: 0.0003 max mem: 13912 +[2024-12-06 06:16:18 root] (utils.py 283): INFO Epoch: [1] [ 710/2502] eta: 1:11:08 lr: 0.000020 loss_cls: 3.4887 (3.1963) grad_norm: 2.7233 (3.3234) time: 2.3915 data: 0.0003 max mem: 13912 +[2024-12-06 06:16:42 root] (utils.py 283): INFO Epoch: [1] [ 720/2502] eta: 1:10:44 lr: 0.000020 loss_cls: 3.4290 (3.1955) grad_norm: 2.9038 (3.3205) time: 2.3876 data: 0.0003 max mem: 13912 +[2024-12-06 06:17:06 root] (utils.py 283): INFO Epoch: [1] [ 730/2502] eta: 1:10:20 lr: 0.000020 loss_cls: 3.2573 (3.1982) grad_norm: 2.7840 (3.3120) time: 2.3854 data: 0.0003 max mem: 13912 +[2024-12-06 06:17:30 root] (utils.py 283): INFO Epoch: [1] [ 740/2502] eta: 1:09:57 lr: 0.000020 loss_cls: 3.4383 (3.2026) grad_norm: 2.7840 (3.3157) time: 2.3937 data: 0.0002 max mem: 13912 +[2024-12-06 06:17:54 root] (utils.py 283): INFO Epoch: [1] [ 750/2502] eta: 1:09:33 lr: 0.000020 loss_cls: 3.4252 (3.2028) grad_norm: 3.0573 (3.3131) time: 2.3837 data: 0.0002 max mem: 13912 +[2024-12-06 06:18:18 root] (utils.py 283): INFO Epoch: [1] [ 760/2502] eta: 1:09:08 lr: 0.000020 loss_cls: 3.3904 (3.2043) grad_norm: 3.0573 (3.3105) time: 2.3667 data: 0.0002 max mem: 13912 +[2024-12-06 06:18:41 root] (utils.py 283): INFO Epoch: [1] [ 770/2502] eta: 1:08:45 lr: 0.000020 loss_cls: 3.0911 (3.2014) grad_norm: 2.7437 (3.3041) time: 2.3805 data: 0.0002 max mem: 13912 +[2024-12-06 06:19:05 root] (utils.py 283): INFO Epoch: [1] [ 780/2502] eta: 1:08:21 lr: 0.000020 loss_cls: 3.0848 (3.2005) grad_norm: 2.7437 (3.3016) time: 2.3889 data: 0.0003 max mem: 13912 +[2024-12-06 06:19:29 root] (utils.py 283): INFO Epoch: [1] [ 790/2502] eta: 1:07:57 lr: 0.000020 loss_cls: 3.2214 (3.2008) grad_norm: 2.7848 (3.2982) time: 2.3884 data: 0.0003 max mem: 13912 +[2024-12-06 06:19:53 root] (utils.py 283): INFO Epoch: [1] [ 800/2502] eta: 1:07:34 lr: 0.000020 loss_cls: 3.3011 (3.2014) grad_norm: 3.1103 (3.3313) time: 2.3916 data: 0.0002 max mem: 13912 +[2024-12-06 06:20:17 root] (utils.py 283): INFO Epoch: [1] [ 810/2502] eta: 1:07:10 lr: 0.000020 loss_cls: 3.3011 (3.2024) grad_norm: 3.0015 (3.3357) time: 2.3741 data: 0.0003 max mem: 13912 +[2024-12-06 06:20:40 root] (utils.py 283): INFO Epoch: [1] [ 820/2502] eta: 1:06:45 lr: 0.000020 loss_cls: 3.3578 (3.2020) grad_norm: 2.8188 (3.3317) time: 2.3525 data: 0.0003 max mem: 13912 +[2024-12-06 06:21:04 root] (utils.py 283): INFO Epoch: [1] [ 830/2502] eta: 1:06:21 lr: 0.000020 loss_cls: 3.3295 (3.2003) grad_norm: 2.8398 (3.3286) time: 2.3718 data: 0.0003 max mem: 13912 +[2024-12-06 06:21:28 root] (utils.py 283): INFO Epoch: [1] [ 840/2502] eta: 1:05:58 lr: 0.000020 loss_cls: 3.0571 (3.1968) grad_norm: 2.8399 (3.3334) time: 2.3891 data: 0.0003 max mem: 13912 +[2024-12-06 06:21:52 root] (utils.py 283): INFO Epoch: [1] [ 850/2502] eta: 1:05:34 lr: 0.000020 loss_cls: 3.2413 (3.1983) grad_norm: 2.8937 (3.3311) time: 2.3802 data: 0.0003 max mem: 13912 +[2024-12-06 06:22:16 root] (utils.py 283): INFO Epoch: [1] [ 860/2502] eta: 1:05:10 lr: 0.000020 loss_cls: 3.2651 (3.1971) grad_norm: 2.8178 (3.3256) time: 2.3838 data: 0.0003 max mem: 13912 +[2024-12-06 06:22:39 root] (utils.py 283): INFO Epoch: [1] [ 870/2502] eta: 1:04:46 lr: 0.000020 loss_cls: 3.1603 (3.1951) grad_norm: 2.7057 (3.3374) time: 2.3767 data: 0.0002 max mem: 13912 +[2024-12-06 06:23:03 root] (utils.py 283): INFO Epoch: [1] [ 880/2502] eta: 1:04:22 lr: 0.000020 loss_cls: 3.2906 (3.1968) grad_norm: 3.1742 (3.3391) time: 2.3760 data: 0.0002 max mem: 13912 +[2024-12-06 06:23:27 root] (utils.py 283): INFO Epoch: [1] [ 890/2502] eta: 1:03:59 lr: 0.000020 loss_cls: 3.4218 (3.1985) grad_norm: 2.9715 (3.3349) time: 2.3870 data: 0.0003 max mem: 13912 +[2024-12-06 06:23:51 root] (utils.py 283): INFO Epoch: [1] [ 900/2502] eta: 1:03:35 lr: 0.000020 loss_cls: 3.2438 (3.1964) grad_norm: 2.9636 (3.3316) time: 2.3844 data: 0.0003 max mem: 13912 +[2024-12-06 06:24:15 root] (utils.py 283): INFO Epoch: [1] [ 910/2502] eta: 1:03:11 lr: 0.000020 loss_cls: 3.2971 (3.1972) grad_norm: 2.9731 (3.3361) time: 2.3767 data: 0.0002 max mem: 13912 +[2024-12-06 06:24:38 root] (utils.py 283): INFO Epoch: [1] [ 920/2502] eta: 1:02:46 lr: 0.000020 loss_cls: 3.3780 (3.1987) grad_norm: 2.9474 (3.3358) time: 2.3557 data: 0.0002 max mem: 13912 +[2024-12-06 06:25:02 root] (utils.py 283): INFO Epoch: [1] [ 930/2502] eta: 1:02:22 lr: 0.000020 loss_cls: 3.2621 (3.1977) grad_norm: 2.7761 (3.3323) time: 2.3600 data: 0.0002 max mem: 13912 +[2024-12-06 06:25:25 root] (utils.py 283): INFO Epoch: [1] [ 940/2502] eta: 1:01:58 lr: 0.000020 loss_cls: 3.0416 (3.1967) grad_norm: 2.7761 (3.3352) time: 2.3683 data: 0.0002 max mem: 13912 +[2024-12-06 06:25:49 root] (utils.py 283): INFO Epoch: [1] [ 950/2502] eta: 1:01:34 lr: 0.000020 loss_cls: 3.1160 (3.1970) grad_norm: 2.8249 (3.3317) time: 2.3633 data: 0.0003 max mem: 13912 +[2024-12-06 06:26:13 root] (utils.py 283): INFO Epoch: [1] [ 960/2502] eta: 1:01:11 lr: 0.000020 loss_cls: 3.3922 (3.1981) grad_norm: 2.8091 (3.3285) time: 2.3788 data: 0.0003 max mem: 13912 +[2024-12-06 06:26:37 root] (utils.py 283): INFO Epoch: [1] [ 970/2502] eta: 1:00:47 lr: 0.000020 loss_cls: 3.0353 (3.1941) grad_norm: 2.9783 (3.3286) time: 2.3868 data: 0.0003 max mem: 13912 +[2024-12-06 06:27:01 root] (utils.py 283): INFO Epoch: [1] [ 980/2502] eta: 1:00:23 lr: 0.000020 loss_cls: 3.0562 (3.1938) grad_norm: 3.0182 (3.3271) time: 2.3885 data: 0.0003 max mem: 13912 +[2024-12-06 06:27:25 root] (utils.py 283): INFO Epoch: [1] [ 990/2502] eta: 0:59:59 lr: 0.000020 loss_cls: 3.1828 (3.1947) grad_norm: 3.0565 (3.3261) time: 2.3901 data: 0.0003 max mem: 13912 +[2024-12-06 06:27:49 root] (utils.py 283): INFO Epoch: [1] [1000/2502] eta: 0:59:36 lr: 0.000020 loss_cls: 3.2175 (3.1934) grad_norm: 2.9841 (3.3226) time: 2.3948 data: 0.0003 max mem: 13912 +[2024-12-06 06:28:13 root] (utils.py 283): INFO Epoch: [1] [1010/2502] eta: 0:59:13 lr: 0.000020 loss_cls: 3.2291 (3.1933) grad_norm: 2.8386 (3.3182) time: 2.4074 data: 0.0003 max mem: 13912 +[2024-12-06 06:28:37 root] (utils.py 283): INFO Epoch: [1] [1020/2502] eta: 0:58:49 lr: 0.000020 loss_cls: 3.2809 (3.1921) grad_norm: 3.0422 (3.3187) time: 2.4077 data: 0.0003 max mem: 13912 +[2024-12-06 06:29:01 root] (utils.py 283): INFO Epoch: [1] [1030/2502] eta: 0:58:26 lr: 0.000020 loss_cls: 3.2642 (3.1923) grad_norm: 3.0422 (3.3162) time: 2.4169 data: 0.0003 max mem: 13912 +[2024-12-06 06:29:25 root] (utils.py 283): INFO Epoch: [1] [1040/2502] eta: 0:58:02 lr: 0.000020 loss_cls: 3.1049 (3.1897) grad_norm: 2.7849 (3.3133) time: 2.4181 data: 0.0003 max mem: 13912 +[2024-12-06 06:29:49 root] (utils.py 283): INFO Epoch: [1] [1050/2502] eta: 0:57:39 lr: 0.000020 loss_cls: 3.1393 (3.1894) grad_norm: 2.8391 (3.3127) time: 2.3964 data: 0.0003 max mem: 13912 +[2024-12-06 06:30:13 root] (utils.py 283): INFO Epoch: [1] [1060/2502] eta: 0:57:15 lr: 0.000020 loss_cls: 3.3148 (3.1920) grad_norm: 2.9468 (3.3195) time: 2.3874 data: 0.0003 max mem: 13912 +[2024-12-06 06:30:37 root] (utils.py 283): INFO Epoch: [1] [1070/2502] eta: 0:56:51 lr: 0.000020 loss_cls: 3.5312 (3.1927) grad_norm: 3.1573 (3.3174) time: 2.3880 data: 0.0003 max mem: 13912 +[2024-12-06 06:31:00 root] (utils.py 283): INFO Epoch: [1] [1080/2502] eta: 0:56:27 lr: 0.000020 loss_cls: 3.4417 (3.1941) grad_norm: 3.0923 (3.3324) time: 2.3810 data: 0.0003 max mem: 13912 +[2024-12-06 06:31:24 root] (utils.py 283): INFO Epoch: [1] [1090/2502] eta: 0:56:03 lr: 0.000020 loss_cls: 3.4186 (3.1951) grad_norm: 3.0602 (3.3297) time: 2.3779 data: 0.0003 max mem: 13912 +[2024-12-06 06:31:48 root] (utils.py 283): INFO Epoch: [1] [1100/2502] eta: 0:55:40 lr: 0.000020 loss_cls: 3.3919 (3.1935) grad_norm: 2.8290 (3.3253) time: 2.3887 data: 0.0003 max mem: 13912 +[2024-12-06 06:32:12 root] (utils.py 283): INFO Epoch: [1] [1110/2502] eta: 0:55:16 lr: 0.000020 loss_cls: 3.3919 (3.1957) grad_norm: 2.7062 (3.3236) time: 2.3886 data: 0.0003 max mem: 13912 +[2024-12-06 06:32:36 root] (utils.py 283): INFO Epoch: [1] [1120/2502] eta: 0:54:52 lr: 0.000020 loss_cls: 3.3979 (3.1968) grad_norm: 2.9335 (3.3331) time: 2.3859 data: 0.0003 max mem: 13912 +[2024-12-06 06:33:00 root] (utils.py 283): INFO Epoch: [1] [1130/2502] eta: 0:54:28 lr: 0.000020 loss_cls: 3.3901 (3.1986) grad_norm: 3.0241 (3.3320) time: 2.3850 data: 0.0003 max mem: 13912 +[2024-12-06 06:33:24 root] (utils.py 283): INFO Epoch: [1] [1140/2502] eta: 0:54:05 lr: 0.000020 loss_cls: 3.4544 (3.1995) grad_norm: 3.0607 (3.3327) time: 2.3844 data: 0.0002 max mem: 13912 +[2024-12-06 06:33:47 root] (utils.py 283): INFO Epoch: [1] [1150/2502] eta: 0:53:41 lr: 0.000020 loss_cls: 3.4421 (3.2012) grad_norm: 3.0305 (3.3340) time: 2.3825 data: 0.0003 max mem: 13912 +[2024-12-06 06:34:11 root] (utils.py 283): INFO Epoch: [1] [1160/2502] eta: 0:53:17 lr: 0.000020 loss_cls: 3.3925 (3.2006) grad_norm: 2.9997 (3.3333) time: 2.3796 data: 0.0003 max mem: 13912 +[2024-12-06 06:34:35 root] (utils.py 283): INFO Epoch: [1] [1170/2502] eta: 0:52:53 lr: 0.000020 loss_cls: 3.3996 (3.2011) grad_norm: 2.7997 (3.3320) time: 2.3767 data: 0.0003 max mem: 13912 +[2024-12-06 06:34:59 root] (utils.py 283): INFO Epoch: [1] [1180/2502] eta: 0:52:29 lr: 0.000020 loss_cls: 3.3499 (3.2016) grad_norm: 2.7169 (3.3277) time: 2.3800 data: 0.0003 max mem: 13912 +[2024-12-06 06:35:23 root] (utils.py 283): INFO Epoch: [1] [1190/2502] eta: 0:52:05 lr: 0.000020 loss_cls: 3.2631 (3.2010) grad_norm: 2.6428 (3.3231) time: 2.3845 data: 0.0002 max mem: 13912 +[2024-12-06 06:35:47 root] (utils.py 283): INFO Epoch: [1] [1200/2502] eta: 0:51:42 lr: 0.000020 loss_cls: 3.2214 (3.2004) grad_norm: 2.6019 (3.3184) time: 2.3821 data: 0.0003 max mem: 13912 +[2024-12-06 06:36:10 root] (utils.py 283): INFO Epoch: [1] [1210/2502] eta: 0:51:18 lr: 0.000020 loss_cls: 3.1956 (3.1993) grad_norm: 2.8633 (3.3166) time: 2.3839 data: 0.0003 max mem: 13912 +[2024-12-06 06:36:34 root] (utils.py 283): INFO Epoch: [1] [1220/2502] eta: 0:50:54 lr: 0.000020 loss_cls: 3.3412 (3.2010) grad_norm: 2.9716 (3.3167) time: 2.3877 data: 0.0003 max mem: 13912 +[2024-12-06 06:36:58 root] (utils.py 283): INFO Epoch: [1] [1230/2502] eta: 0:50:30 lr: 0.000020 loss_cls: 3.3412 (3.2015) grad_norm: 3.0136 (3.3203) time: 2.3903 data: 0.0003 max mem: 13912 +[2024-12-06 06:37:22 root] (utils.py 283): INFO Epoch: [1] [1240/2502] eta: 0:50:07 lr: 0.000020 loss_cls: 3.3098 (3.2030) grad_norm: 3.1157 (3.3263) time: 2.3914 data: 0.0003 max mem: 13912 +[2024-12-06 06:37:46 root] (utils.py 283): INFO Epoch: [1] [1250/2502] eta: 0:49:43 lr: 0.000020 loss_cls: 3.2693 (3.2016) grad_norm: 2.9367 (3.3225) time: 2.3895 data: 0.0003 max mem: 13912 +[2024-12-06 06:38:10 root] (utils.py 283): INFO Epoch: [1] [1260/2502] eta: 0:49:19 lr: 0.000020 loss_cls: 3.2693 (3.2029) grad_norm: 2.8649 (3.3207) time: 2.3826 data: 0.0003 max mem: 13912 +[2024-12-06 06:38:34 root] (utils.py 283): INFO Epoch: [1] [1270/2502] eta: 0:48:55 lr: 0.000020 loss_cls: 3.3939 (3.2023) grad_norm: 2.7752 (3.3164) time: 2.3762 data: 0.0003 max mem: 13912 +[2024-12-06 06:38:57 root] (utils.py 283): INFO Epoch: [1] [1280/2502] eta: 0:48:31 lr: 0.000020 loss_cls: 3.2823 (3.2016) grad_norm: 2.7752 (3.3150) time: 2.3732 data: 0.0002 max mem: 13912 +[2024-12-06 06:39:21 root] (utils.py 283): INFO Epoch: [1] [1290/2502] eta: 0:48:07 lr: 0.000020 loss_cls: 3.3473 (3.2024) grad_norm: 2.8774 (3.3120) time: 2.3793 data: 0.0003 max mem: 13912 +[2024-12-06 06:39:45 root] (utils.py 283): INFO Epoch: [1] [1300/2502] eta: 0:47:44 lr: 0.000020 loss_cls: 3.3623 (3.2028) grad_norm: 2.9292 (3.3129) time: 2.3897 data: 0.0003 max mem: 13912 +[2024-12-06 06:40:09 root] (utils.py 283): INFO Epoch: [1] [1310/2502] eta: 0:47:20 lr: 0.000020 loss_cls: 3.1127 (3.2012) grad_norm: 2.9292 (3.3097) time: 2.3887 data: 0.0003 max mem: 13912 +[2024-12-06 06:40:33 root] (utils.py 283): INFO Epoch: [1] [1320/2502] eta: 0:46:56 lr: 0.000020 loss_cls: 3.1257 (3.2011) grad_norm: 2.7271 (3.3116) time: 2.3908 data: 0.0003 max mem: 13912 +[2024-12-06 06:40:57 root] (utils.py 283): INFO Epoch: [1] [1330/2502] eta: 0:46:32 lr: 0.000020 loss_cls: 3.4729 (3.2018) grad_norm: 2.7437 (3.3145) time: 2.3910 data: 0.0003 max mem: 13912 +[2024-12-06 06:41:21 root] (utils.py 283): INFO Epoch: [1] [1340/2502] eta: 0:46:08 lr: 0.000020 loss_cls: 3.4466 (3.1999) grad_norm: 3.0135 (3.3155) time: 2.3855 data: 0.0003 max mem: 13912 +[2024-12-06 06:41:44 root] (utils.py 283): INFO Epoch: [1] [1350/2502] eta: 0:45:45 lr: 0.000020 loss_cls: 3.4149 (3.2017) grad_norm: 3.1267 (3.3147) time: 2.3897 data: 0.0003 max mem: 13912 +[2024-12-06 06:42:08 root] (utils.py 283): INFO Epoch: [1] [1360/2502] eta: 0:45:21 lr: 0.000020 loss_cls: 3.3158 (3.1999) grad_norm: 3.1122 (3.3186) time: 2.3846 data: 0.0002 max mem: 13912 +[2024-12-06 06:42:32 root] (utils.py 283): INFO Epoch: [1] [1370/2502] eta: 0:44:57 lr: 0.000020 loss_cls: 3.2154 (3.1989) grad_norm: 2.9084 (3.3194) time: 2.3842 data: 0.0003 max mem: 13912 +[2024-12-06 06:42:56 root] (utils.py 283): INFO Epoch: [1] [1380/2502] eta: 0:44:33 lr: 0.000020 loss_cls: 3.1549 (3.1970) grad_norm: 2.8093 (3.3175) time: 2.3933 data: 0.0003 max mem: 13912 +[2024-12-06 06:43:20 root] (utils.py 283): INFO Epoch: [1] [1390/2502] eta: 0:44:10 lr: 0.000020 loss_cls: 3.1549 (3.1967) grad_norm: 2.7612 (3.3143) time: 2.4009 data: 0.0003 max mem: 13912 +[2024-12-06 06:43:44 root] (utils.py 283): INFO Epoch: [1] [1400/2502] eta: 0:43:46 lr: 0.000020 loss_cls: 3.3671 (3.1957) grad_norm: 2.7612 (3.3119) time: 2.4066 data: 0.0003 max mem: 13912 +[2024-12-06 06:44:08 root] (utils.py 283): INFO Epoch: [1] [1410/2502] eta: 0:43:22 lr: 0.000020 loss_cls: 3.2572 (3.1959) grad_norm: 2.7818 (3.3089) time: 2.4013 data: 0.0003 max mem: 13912 +[2024-12-06 06:44:32 root] (utils.py 283): INFO Epoch: [1] [1420/2502] eta: 0:42:59 lr: 0.000020 loss_cls: 3.2315 (3.1960) grad_norm: 2.9051 (3.3182) time: 2.3963 data: 0.0003 max mem: 13912 +[2024-12-06 06:44:56 root] (utils.py 283): INFO Epoch: [1] [1430/2502] eta: 0:42:35 lr: 0.000020 loss_cls: 3.2791 (3.1956) grad_norm: 2.8428 (3.3144) time: 2.3954 data: 0.0002 max mem: 13912 +[2024-12-06 06:45:20 root] (utils.py 283): INFO Epoch: [1] [1440/2502] eta: 0:42:11 lr: 0.000020 loss_cls: 3.0427 (3.1942) grad_norm: 2.7580 (3.3150) time: 2.3942 data: 0.0003 max mem: 13912 +[2024-12-06 06:45:44 root] (utils.py 283): INFO Epoch: [1] [1450/2502] eta: 0:41:47 lr: 0.000020 loss_cls: 3.0427 (3.1926) grad_norm: 2.8300 (3.3120) time: 2.3886 data: 0.0002 max mem: 13912 +[2024-12-06 06:46:08 root] (utils.py 283): INFO Epoch: [1] [1460/2502] eta: 0:41:23 lr: 0.000020 loss_cls: 3.1178 (3.1929) grad_norm: 2.8674 (3.3120) time: 2.3848 data: 0.0002 max mem: 13912 +[2024-12-06 06:46:32 root] (utils.py 283): INFO Epoch: [1] [1470/2502] eta: 0:41:00 lr: 0.000020 loss_cls: 3.3145 (3.1946) grad_norm: 2.8456 (3.3131) time: 2.3901 data: 0.0002 max mem: 13912 +[2024-12-06 06:46:56 root] (utils.py 283): INFO Epoch: [1] [1480/2502] eta: 0:40:36 lr: 0.000020 loss_cls: 3.3432 (3.1942) grad_norm: 2.7684 (3.3118) time: 2.3926 data: 0.0002 max mem: 13912 +[2024-12-06 06:47:20 root] (utils.py 283): INFO Epoch: [1] [1490/2502] eta: 0:40:12 lr: 0.000020 loss_cls: 3.2377 (3.1938) grad_norm: 2.7782 (3.3087) time: 2.3931 data: 0.0002 max mem: 13912 +[2024-12-06 06:47:43 root] (utils.py 283): INFO Epoch: [1] [1500/2502] eta: 0:39:48 lr: 0.000020 loss_cls: 3.2869 (3.1923) grad_norm: 2.8581 (3.3083) time: 2.3914 data: 0.0003 max mem: 13912 +[2024-12-06 06:48:07 root] (utils.py 283): INFO Epoch: [1] [1510/2502] eta: 0:39:24 lr: 0.000020 loss_cls: 3.1024 (3.1914) grad_norm: 3.0435 (3.3073) time: 2.3869 data: 0.0003 max mem: 13912 +[2024-12-06 06:48:31 root] (utils.py 283): INFO Epoch: [1] [1520/2502] eta: 0:39:01 lr: 0.000020 loss_cls: 3.2635 (3.1918) grad_norm: 2.7966 (3.3060) time: 2.3887 data: 0.0003 max mem: 13912 +[2024-12-06 06:48:55 root] (utils.py 283): INFO Epoch: [1] [1530/2502] eta: 0:38:37 lr: 0.000020 loss_cls: 3.2635 (3.1914) grad_norm: 2.8790 (3.3054) time: 2.3855 data: 0.0003 max mem: 13912 +[2024-12-06 06:49:19 root] (utils.py 283): INFO Epoch: [1] [1540/2502] eta: 0:38:13 lr: 0.000020 loss_cls: 3.2389 (3.1928) grad_norm: 3.0714 (3.3052) time: 2.3816 data: 0.0003 max mem: 13912 +[2024-12-06 06:49:42 root] (utils.py 283): INFO Epoch: [1] [1550/2502] eta: 0:37:49 lr: 0.000020 loss_cls: 3.4440 (3.1947) grad_norm: 3.0928 (3.3034) time: 2.3652 data: 0.0003 max mem: 13912 +[2024-12-06 06:50:06 root] (utils.py 283): INFO Epoch: [1] [1560/2502] eta: 0:37:25 lr: 0.000020 loss_cls: 3.4463 (3.1956) grad_norm: 2.9685 (3.3069) time: 2.3672 data: 0.0003 max mem: 13912 +[2024-12-06 06:50:30 root] (utils.py 283): INFO Epoch: [1] [1570/2502] eta: 0:37:01 lr: 0.000020 loss_cls: 3.0068 (3.1929) grad_norm: 2.8359 (3.3032) time: 2.3881 data: 0.0002 max mem: 13912 +[2024-12-06 06:50:54 root] (utils.py 283): INFO Epoch: [1] [1580/2502] eta: 0:36:37 lr: 0.000020 loss_cls: 2.6281 (3.1908) grad_norm: 2.6649 (3.3021) time: 2.3885 data: 0.0003 max mem: 13912 +[2024-12-06 06:51:18 root] (utils.py 283): INFO Epoch: [1] [1590/2502] eta: 0:36:14 lr: 0.000020 loss_cls: 2.8657 (3.1900) grad_norm: 2.9882 (3.3014) time: 2.3897 data: 0.0003 max mem: 13912 +[2024-12-06 06:51:42 root] (utils.py 283): INFO Epoch: [1] [1600/2502] eta: 0:35:50 lr: 0.000020 loss_cls: 3.0098 (3.1877) grad_norm: 2.8074 (3.2999) time: 2.3903 data: 0.0002 max mem: 13912 +[2024-12-06 06:52:06 root] (utils.py 283): INFO Epoch: [1] [1610/2502] eta: 0:35:26 lr: 0.000020 loss_cls: 3.0689 (3.1876) grad_norm: 2.5769 (3.2969) time: 2.3871 data: 0.0002 max mem: 13912 +[2024-12-06 06:52:30 root] (utils.py 283): INFO Epoch: [1] [1620/2502] eta: 0:35:02 lr: 0.000020 loss_cls: 3.1801 (3.1873) grad_norm: 2.7802 (3.2964) time: 2.3866 data: 0.0003 max mem: 13912 +[2024-12-06 06:52:53 root] (utils.py 283): INFO Epoch: [1] [1630/2502] eta: 0:34:38 lr: 0.000020 loss_cls: 3.2026 (3.1873) grad_norm: 2.9799 (3.2973) time: 2.3878 data: 0.0002 max mem: 13912 +[2024-12-06 06:53:17 root] (utils.py 283): INFO Epoch: [1] [1640/2502] eta: 0:34:15 lr: 0.000020 loss_cls: 3.2954 (3.1877) grad_norm: 2.6726 (3.2955) time: 2.3900 data: 0.0003 max mem: 13912 +[2024-12-06 06:53:41 root] (utils.py 283): INFO Epoch: [1] [1650/2502] eta: 0:33:51 lr: 0.000020 loss_cls: 3.2537 (3.1873) grad_norm: 2.9294 (3.2975) time: 2.3868 data: 0.0003 max mem: 13912 +[2024-12-06 06:54:05 root] (utils.py 283): INFO Epoch: [1] [1660/2502] eta: 0:33:27 lr: 0.000020 loss_cls: 3.0575 (3.1861) grad_norm: 2.8924 (3.2958) time: 2.3833 data: 0.0002 max mem: 13912 +[2024-12-06 06:54:29 root] (utils.py 283): INFO Epoch: [1] [1670/2502] eta: 0:33:03 lr: 0.000020 loss_cls: 3.2882 (3.1875) grad_norm: 2.7523 (3.2958) time: 2.3804 data: 0.0003 max mem: 13912 +[2024-12-06 06:54:53 root] (utils.py 283): INFO Epoch: [1] [1680/2502] eta: 0:32:39 lr: 0.000020 loss_cls: 3.2882 (3.1882) grad_norm: 2.7555 (3.2987) time: 2.3797 data: 0.0003 max mem: 13912 +[2024-12-06 06:55:16 root] (utils.py 283): INFO Epoch: [1] [1690/2502] eta: 0:32:15 lr: 0.000020 loss_cls: 3.0468 (3.1864) grad_norm: 2.9089 (3.2982) time: 2.3812 data: 0.0002 max mem: 13912 +[2024-12-06 06:55:40 root] (utils.py 283): INFO Epoch: [1] [1700/2502] eta: 0:31:51 lr: 0.000020 loss_cls: 3.2838 (3.1871) grad_norm: 2.9089 (3.2976) time: 2.3837 data: 0.0002 max mem: 13912 +[2024-12-06 06:56:04 root] (utils.py 283): INFO Epoch: [1] [1710/2502] eta: 0:31:28 lr: 0.000020 loss_cls: 3.2892 (3.1870) grad_norm: 2.8345 (3.2971) time: 2.3880 data: 0.0002 max mem: 13912 +[2024-12-06 06:56:28 root] (utils.py 283): INFO Epoch: [1] [1720/2502] eta: 0:31:04 lr: 0.000020 loss_cls: 3.4019 (3.1878) grad_norm: 2.7850 (3.2956) time: 2.3884 data: 0.0003 max mem: 13912 +[2024-12-06 06:56:52 root] (utils.py 283): INFO Epoch: [1] [1730/2502] eta: 0:30:40 lr: 0.000020 loss_cls: 3.4019 (3.1861) grad_norm: 2.9688 (3.2982) time: 2.3875 data: 0.0003 max mem: 13912 +[2024-12-06 06:57:16 root] (utils.py 283): INFO Epoch: [1] [1740/2502] eta: 0:30:16 lr: 0.000020 loss_cls: 3.0449 (3.1863) grad_norm: 2.9688 (3.2973) time: 2.3872 data: 0.0003 max mem: 13912 +[2024-12-06 06:57:40 root] (utils.py 283): INFO Epoch: [1] [1750/2502] eta: 0:29:52 lr: 0.000020 loss_cls: 3.2701 (3.1865) grad_norm: 2.9635 (3.2988) time: 2.3819 data: 0.0003 max mem: 13912 +[2024-12-06 06:58:03 root] (utils.py 283): INFO Epoch: [1] [1760/2502] eta: 0:29:28 lr: 0.000020 loss_cls: 3.1818 (3.1865) grad_norm: 2.9950 (3.3042) time: 2.3749 data: 0.0003 max mem: 13912 +[2024-12-06 06:58:27 root] (utils.py 283): INFO Epoch: [1] [1770/2502] eta: 0:29:05 lr: 0.000020 loss_cls: 3.1104 (3.1855) grad_norm: 2.9950 (3.3027) time: 2.3778 data: 0.0003 max mem: 13912 +[2024-12-06 06:58:51 root] (utils.py 283): INFO Epoch: [1] [1780/2502] eta: 0:28:41 lr: 0.000020 loss_cls: 3.1282 (3.1849) grad_norm: 2.9183 (3.3023) time: 2.3863 data: 0.0003 max mem: 13912 +[2024-12-06 06:59:15 root] (utils.py 283): INFO Epoch: [1] [1790/2502] eta: 0:28:17 lr: 0.000020 loss_cls: 3.0971 (3.1846) grad_norm: 2.7923 (3.3024) time: 2.3907 data: 0.0002 max mem: 13912 +[2024-12-06 06:59:39 root] (utils.py 283): INFO Epoch: [1] [1800/2502] eta: 0:27:53 lr: 0.000020 loss_cls: 3.0479 (3.1837) grad_norm: 2.7360 (3.3051) time: 2.3925 data: 0.0002 max mem: 13912 +[2024-12-06 07:00:03 root] (utils.py 283): INFO Epoch: [1] [1810/2502] eta: 0:27:29 lr: 0.000020 loss_cls: 3.2966 (3.1847) grad_norm: 2.8065 (3.3034) time: 2.3900 data: 0.0003 max mem: 13912 +[2024-12-06 07:00:27 root] (utils.py 283): INFO Epoch: [1] [1820/2502] eta: 0:27:05 lr: 0.000020 loss_cls: 3.3828 (3.1849) grad_norm: 2.7972 (3.3015) time: 2.3814 data: 0.0003 max mem: 13912 +[2024-12-06 07:00:50 root] (utils.py 283): INFO Epoch: [1] [1830/2502] eta: 0:26:42 lr: 0.000020 loss_cls: 3.2329 (3.1847) grad_norm: 2.7972 (3.3013) time: 2.3799 data: 0.0002 max mem: 13912 +[2024-12-06 07:01:14 root] (utils.py 283): INFO Epoch: [1] [1840/2502] eta: 0:26:18 lr: 0.000020 loss_cls: 3.2329 (3.1834) grad_norm: 2.8117 (3.2982) time: 2.3728 data: 0.0003 max mem: 13912 +[2024-12-06 07:01:38 root] (utils.py 283): INFO Epoch: [1] [1850/2502] eta: 0:25:54 lr: 0.000020 loss_cls: 3.3805 (3.1831) grad_norm: 2.8660 (3.2964) time: 2.3739 data: 0.0003 max mem: 13912 +[2024-12-06 07:02:02 root] (utils.py 283): INFO Epoch: [1] [1860/2502] eta: 0:25:30 lr: 0.000020 loss_cls: 3.2237 (3.1830) grad_norm: 2.9181 (3.2941) time: 2.3830 data: 0.0002 max mem: 13912 +[2024-12-06 07:02:26 root] (utils.py 283): INFO Epoch: [1] [1870/2502] eta: 0:25:06 lr: 0.000020 loss_cls: 3.3594 (3.1836) grad_norm: 2.6265 (3.2907) time: 2.3875 data: 0.0002 max mem: 13912 +[2024-12-06 07:02:49 root] (utils.py 283): INFO Epoch: [1] [1880/2502] eta: 0:24:42 lr: 0.000020 loss_cls: 3.3891 (3.1830) grad_norm: 2.7359 (3.2885) time: 2.3903 data: 0.0002 max mem: 13912 +[2024-12-06 07:03:13 root] (utils.py 283): INFO Epoch: [1] [1890/2502] eta: 0:24:18 lr: 0.000020 loss_cls: 3.2895 (3.1830) grad_norm: 2.7227 (3.2856) time: 2.3879 data: 0.0002 max mem: 13912 +[2024-12-06 07:03:37 root] (utils.py 283): INFO Epoch: [1] [1900/2502] eta: 0:23:55 lr: 0.000020 loss_cls: 3.4299 (3.1840) grad_norm: 2.5498 (3.2827) time: 2.3879 data: 0.0002 max mem: 13912 +[2024-12-06 07:04:01 root] (utils.py 283): INFO Epoch: [1] [1910/2502] eta: 0:23:31 lr: 0.000020 loss_cls: 3.3952 (3.1841) grad_norm: 2.8952 (3.2818) time: 2.3885 data: 0.0002 max mem: 13912 +[2024-12-06 07:04:25 root] (utils.py 283): INFO Epoch: [1] [1920/2502] eta: 0:23:07 lr: 0.000020 loss_cls: 3.3916 (3.1845) grad_norm: 3.0031 (3.2806) time: 2.3901 data: 0.0003 max mem: 13912 +[2024-12-06 07:04:49 root] (utils.py 283): INFO Epoch: [1] [1930/2502] eta: 0:22:43 lr: 0.000020 loss_cls: 3.2865 (3.1831) grad_norm: 2.8763 (3.2799) time: 2.3885 data: 0.0003 max mem: 13912 +[2024-12-06 07:05:12 root] (utils.py 283): INFO Epoch: [1] [1940/2502] eta: 0:22:19 lr: 0.000020 loss_cls: 3.2947 (3.1836) grad_norm: 2.8611 (3.2818) time: 2.3554 data: 0.0002 max mem: 13912 +[2024-12-06 07:05:36 root] (utils.py 283): INFO Epoch: [1] [1950/2502] eta: 0:21:55 lr: 0.000020 loss_cls: 3.4419 (3.1836) grad_norm: 2.9472 (3.2807) time: 2.3588 data: 0.0002 max mem: 13912 +[2024-12-06 07:06:11 root] (utils.py 283): INFO Epoch: [1] [1960/2502] eta: 0:21:35 lr: 0.000020 loss_cls: 3.3182 (3.1825) grad_norm: 3.0001 (3.2801) time: 2.9323 data: 0.0003 max mem: 13912 +[2024-12-06 07:06:35 root] (utils.py 283): INFO Epoch: [1] [1970/2502] eta: 0:21:11 lr: 0.000020 loss_cls: 3.4313 (3.1847) grad_norm: 2.9669 (3.2794) time: 2.9242 data: 0.0003 max mem: 13912 +[2024-12-06 07:06:58 root] (utils.py 283): INFO Epoch: [1] [1980/2502] eta: 0:20:47 lr: 0.000020 loss_cls: 3.5374 (3.1849) grad_norm: 2.9022 (3.2783) time: 2.3751 data: 0.0002 max mem: 13912 +[2024-12-06 07:07:22 root] (utils.py 283): INFO Epoch: [1] [1990/2502] eta: 0:20:23 lr: 0.000020 loss_cls: 2.9205 (3.1831) grad_norm: 2.9578 (3.2797) time: 2.3858 data: 0.0003 max mem: 13912 +[2024-12-06 07:07:46 root] (utils.py 283): INFO Epoch: [1] [2000/2502] eta: 0:19:59 lr: 0.000020 loss_cls: 3.1415 (3.1839) grad_norm: 3.3270 (3.2814) time: 2.3827 data: 0.0003 max mem: 13912 +[2024-12-06 07:08:10 root] (utils.py 283): INFO Epoch: [1] [2010/2502] eta: 0:19:35 lr: 0.000020 loss_cls: 3.4821 (3.1844) grad_norm: 3.1483 (3.2796) time: 2.3802 data: 0.0002 max mem: 13912 +[2024-12-06 07:08:34 root] (utils.py 283): INFO Epoch: [1] [2020/2502] eta: 0:19:11 lr: 0.000020 loss_cls: 3.3885 (3.1844) grad_norm: 2.8474 (3.2785) time: 2.3881 data: 0.0003 max mem: 13912 +[2024-12-06 07:08:58 root] (utils.py 283): INFO Epoch: [1] [2030/2502] eta: 0:18:47 lr: 0.000020 loss_cls: 3.2807 (3.1847) grad_norm: 2.8737 (3.2781) time: 2.3876 data: 0.0002 max mem: 13912 +[2024-12-06 07:09:21 root] (utils.py 283): INFO Epoch: [1] [2040/2502] eta: 0:18:23 lr: 0.000020 loss_cls: 3.3070 (3.1841) grad_norm: 3.0429 (3.2778) time: 2.3907 data: 0.0003 max mem: 13912 +[2024-12-06 07:09:45 root] (utils.py 283): INFO Epoch: [1] [2050/2502] eta: 0:17:59 lr: 0.000020 loss_cls: 3.2177 (3.1844) grad_norm: 2.9445 (3.2756) time: 2.3894 data: 0.0002 max mem: 13912 +[2024-12-06 07:10:09 root] (utils.py 283): INFO Epoch: [1] [2060/2502] eta: 0:17:36 lr: 0.000020 loss_cls: 3.4136 (3.1852) grad_norm: 2.7667 (3.2732) time: 2.3901 data: 0.0003 max mem: 13912 +[2024-12-06 07:10:33 root] (utils.py 283): INFO Epoch: [1] [2070/2502] eta: 0:17:12 lr: 0.000020 loss_cls: 3.4464 (3.1854) grad_norm: 2.9448 (3.2743) time: 2.3910 data: 0.0003 max mem: 13912 +[2024-12-06 07:10:57 root] (utils.py 283): INFO Epoch: [1] [2080/2502] eta: 0:16:48 lr: 0.000020 loss_cls: 3.4313 (3.1861) grad_norm: 3.0936 (3.2751) time: 2.3925 data: 0.0003 max mem: 13912 +[2024-12-06 07:11:21 root] (utils.py 283): INFO Epoch: [1] [2090/2502] eta: 0:16:24 lr: 0.000020 loss_cls: 3.4313 (3.1866) grad_norm: 3.3182 (3.2767) time: 2.3926 data: 0.0003 max mem: 13912 +[2024-12-06 07:11:45 root] (utils.py 283): INFO Epoch: [1] [2100/2502] eta: 0:16:00 lr: 0.000020 loss_cls: 3.2148 (3.1867) grad_norm: 3.0179 (3.2755) time: 2.3908 data: 0.0003 max mem: 13912 +[2024-12-06 07:12:09 root] (utils.py 283): INFO Epoch: [1] [2110/2502] eta: 0:15:36 lr: 0.000020 loss_cls: 3.1234 (3.1856) grad_norm: 3.0542 (3.2771) time: 2.3881 data: 0.0003 max mem: 13912 +[2024-12-06 07:12:33 root] (utils.py 283): INFO Epoch: [1] [2120/2502] eta: 0:15:12 lr: 0.000020 loss_cls: 3.2609 (3.1863) grad_norm: 3.1710 (3.2789) time: 2.3892 data: 0.0003 max mem: 13912 +[2024-12-06 07:12:57 root] (utils.py 283): INFO Epoch: [1] [2130/2502] eta: 0:14:48 lr: 0.000020 loss_cls: 3.3433 (3.1862) grad_norm: 2.8668 (3.2806) time: 2.3886 data: 0.0002 max mem: 13912 +[2024-12-06 07:13:21 root] (utils.py 283): INFO Epoch: [1] [2140/2502] eta: 0:14:24 lr: 0.000020 loss_cls: 3.2362 (3.1869) grad_norm: 2.8385 (3.2797) time: 2.3894 data: 0.0003 max mem: 13912 +[2024-12-06 07:13:44 root] (utils.py 283): INFO Epoch: [1] [2150/2502] eta: 0:14:00 lr: 0.000020 loss_cls: 3.2292 (3.1871) grad_norm: 2.9090 (3.2793) time: 2.3768 data: 0.0003 max mem: 13912 +[2024-12-06 07:14:08 root] (utils.py 283): INFO Epoch: [1] [2160/2502] eta: 0:13:37 lr: 0.000020 loss_cls: 3.3084 (3.1881) grad_norm: 2.9090 (3.2780) time: 2.3721 data: 0.0003 max mem: 13912 +[2024-12-06 07:14:32 root] (utils.py 283): INFO Epoch: [1] [2170/2502] eta: 0:13:13 lr: 0.000020 loss_cls: 3.2874 (3.1880) grad_norm: 2.8487 (3.2786) time: 2.3867 data: 0.0002 max mem: 13912 +[2024-12-06 07:14:56 root] (utils.py 283): INFO Epoch: [1] [2180/2502] eta: 0:12:49 lr: 0.000020 loss_cls: 3.2730 (3.1883) grad_norm: 2.8879 (3.2780) time: 2.3892 data: 0.0003 max mem: 13912 +[2024-12-06 07:15:20 root] (utils.py 283): INFO Epoch: [1] [2190/2502] eta: 0:12:25 lr: 0.000020 loss_cls: 3.4594 (3.1897) grad_norm: 2.8654 (3.2763) time: 2.3886 data: 0.0003 max mem: 13912 +[2024-12-06 07:15:44 root] (utils.py 283): INFO Epoch: [1] [2200/2502] eta: 0:12:01 lr: 0.000020 loss_cls: 3.3421 (3.1905) grad_norm: 2.8611 (3.2750) time: 2.3895 data: 0.0003 max mem: 13912 +[2024-12-06 07:16:08 root] (utils.py 283): INFO Epoch: [1] [2210/2502] eta: 0:11:37 lr: 0.000020 loss_cls: 3.3421 (3.1902) grad_norm: 2.8611 (3.2734) time: 2.3932 data: 0.0003 max mem: 13912 +[2024-12-06 07:16:31 root] (utils.py 283): INFO Epoch: [1] [2220/2502] eta: 0:11:13 lr: 0.000020 loss_cls: 3.3659 (3.1912) grad_norm: 2.7481 (3.2711) time: 2.3904 data: 0.0002 max mem: 13912 +[2024-12-06 07:16:55 root] (utils.py 283): INFO Epoch: [1] [2230/2502] eta: 0:10:49 lr: 0.000020 loss_cls: 3.4549 (3.1901) grad_norm: 2.8217 (3.2704) time: 2.3853 data: 0.0002 max mem: 13912 +[2024-12-06 07:17:19 root] (utils.py 283): INFO Epoch: [1] [2240/2502] eta: 0:10:25 lr: 0.000020 loss_cls: 3.4368 (3.1911) grad_norm: 2.8211 (3.2696) time: 2.3694 data: 0.0003 max mem: 13912 +[2024-12-06 07:17:43 root] (utils.py 283): INFO Epoch: [1] [2250/2502] eta: 0:10:01 lr: 0.000020 loss_cls: 3.4368 (3.1903) grad_norm: 2.7771 (3.2693) time: 2.3688 data: 0.0003 max mem: 13912 +[2024-12-06 07:18:07 root] (utils.py 283): INFO Epoch: [1] [2260/2502] eta: 0:09:38 lr: 0.000020 loss_cls: 3.2368 (3.1901) grad_norm: 2.6940 (3.2698) time: 2.3870 data: 0.0003 max mem: 13912 +[2024-12-06 07:18:30 root] (utils.py 283): INFO Epoch: [1] [2270/2502] eta: 0:09:14 lr: 0.000020 loss_cls: 3.2368 (3.1893) grad_norm: 2.8181 (3.2683) time: 2.3891 data: 0.0003 max mem: 13912 +[2024-12-06 07:18:54 root] (utils.py 283): INFO Epoch: [1] [2280/2502] eta: 0:08:50 lr: 0.000020 loss_cls: 3.1477 (3.1893) grad_norm: 2.7957 (3.2661) time: 2.3895 data: 0.0003 max mem: 13912 +[2024-12-06 07:19:18 root] (utils.py 283): INFO Epoch: [1] [2290/2502] eta: 0:08:26 lr: 0.000020 loss_cls: 3.3670 (3.1897) grad_norm: 2.8305 (3.2655) time: 2.3873 data: 0.0002 max mem: 13912 +[2024-12-06 07:19:42 root] (utils.py 283): INFO Epoch: [1] [2300/2502] eta: 0:08:02 lr: 0.000020 loss_cls: 3.4151 (3.1901) grad_norm: 2.9119 (3.2637) time: 2.3823 data: 0.0002 max mem: 13912 +[2024-12-06 07:20:06 root] (utils.py 283): INFO Epoch: [1] [2310/2502] eta: 0:07:38 lr: 0.000020 loss_cls: 3.3651 (3.1902) grad_norm: 2.8594 (3.2624) time: 2.3838 data: 0.0002 max mem: 13912 +[2024-12-06 07:20:30 root] (utils.py 283): INFO Epoch: [1] [2320/2502] eta: 0:07:14 lr: 0.000020 loss_cls: 3.2220 (3.1892) grad_norm: 2.8256 (3.2623) time: 2.3869 data: 0.0002 max mem: 13912 +[2024-12-06 07:20:53 root] (utils.py 283): INFO Epoch: [1] [2330/2502] eta: 0:06:50 lr: 0.000020 loss_cls: 3.2571 (3.1899) grad_norm: 2.9211 (3.2618) time: 2.3766 data: 0.0002 max mem: 13912 +[2024-12-06 07:21:17 root] (utils.py 283): INFO Epoch: [1] [2340/2502] eta: 0:06:26 lr: 0.000020 loss_cls: 3.3628 (3.1902) grad_norm: 2.9862 (3.2611) time: 2.3895 data: 0.0002 max mem: 13912 +[2024-12-06 07:21:41 root] (utils.py 283): INFO Epoch: [1] [2350/2502] eta: 0:06:03 lr: 0.000020 loss_cls: 3.3628 (3.1894) grad_norm: 2.6922 (3.2594) time: 2.4026 data: 0.0002 max mem: 13912 +[2024-12-06 07:22:05 root] (utils.py 283): INFO Epoch: [1] [2360/2502] eta: 0:05:39 lr: 0.000020 loss_cls: 3.1523 (3.1882) grad_norm: 2.8644 (3.2580) time: 2.3736 data: 0.0002 max mem: 13912 +[2024-12-06 07:22:29 root] (utils.py 283): INFO Epoch: [1] [2370/2502] eta: 0:05:15 lr: 0.000020 loss_cls: 3.0152 (3.1869) grad_norm: 2.8644 (3.2593) time: 2.3669 data: 0.0002 max mem: 13912 +[2024-12-06 07:22:53 root] (utils.py 283): INFO Epoch: [1] [2380/2502] eta: 0:04:51 lr: 0.000020 loss_cls: 3.1106 (3.1870) grad_norm: 2.8108 (3.2570) time: 2.3800 data: 0.0002 max mem: 13912 +[2024-12-06 07:23:16 root] (utils.py 283): INFO Epoch: [1] [2390/2502] eta: 0:04:27 lr: 0.000020 loss_cls: 3.2502 (3.1866) grad_norm: 2.6804 (3.2546) time: 2.3855 data: 0.0002 max mem: 13912 +[2024-12-06 07:23:40 root] (utils.py 283): INFO Epoch: [1] [2400/2502] eta: 0:04:03 lr: 0.000020 loss_cls: 3.0986 (3.1860) grad_norm: 2.8902 (3.2569) time: 2.3921 data: 0.0003 max mem: 13912 +[2024-12-06 07:24:04 root] (utils.py 283): INFO Epoch: [1] [2410/2502] eta: 0:03:39 lr: 0.000020 loss_cls: 3.1285 (3.1859) grad_norm: 2.9184 (3.2552) time: 2.3941 data: 0.0003 max mem: 13912 +[2024-12-06 07:24:28 root] (utils.py 283): INFO Epoch: [1] [2420/2502] eta: 0:03:15 lr: 0.000020 loss_cls: 3.2655 (3.1863) grad_norm: 2.8372 (3.2557) time: 2.3926 data: 0.0003 max mem: 13912 +[2024-12-06 07:24:52 root] (utils.py 283): INFO Epoch: [1] [2430/2502] eta: 0:02:51 lr: 0.000020 loss_cls: 3.4697 (3.1868) grad_norm: 3.0665 (3.2570) time: 2.3874 data: 0.0003 max mem: 13912 +[2024-12-06 07:25:16 root] (utils.py 283): INFO Epoch: [1] [2440/2502] eta: 0:02:28 lr: 0.000020 loss_cls: 3.4849 (3.1876) grad_norm: 2.9882 (3.2553) time: 2.3747 data: 0.0002 max mem: 13912 +[2024-12-06 07:25:39 root] (utils.py 283): INFO Epoch: [1] [2450/2502] eta: 0:02:04 lr: 0.000020 loss_cls: 3.3659 (3.1876) grad_norm: 2.9057 (3.2619) time: 2.3690 data: 0.0004 max mem: 13912 +[2024-12-06 07:26:03 root] (utils.py 283): INFO Epoch: [1] [2460/2502] eta: 0:01:40 lr: 0.000020 loss_cls: 3.1997 (3.1868) grad_norm: 2.9981 (3.2664) time: 2.3792 data: 0.0004 max mem: 13912 +[2024-12-06 07:26:27 root] (utils.py 283): INFO Epoch: [1] [2470/2502] eta: 0:01:16 lr: 0.000020 loss_cls: 3.2368 (3.1873) grad_norm: 2.9981 (3.2693) time: 2.3876 data: 0.0002 max mem: 13912 +[2024-12-06 07:26:51 root] (utils.py 283): INFO Epoch: [1] [2480/2502] eta: 0:00:52 lr: 0.000020 loss_cls: 3.4080 (3.1888) grad_norm: 3.0261 (3.2694) time: 2.3902 data: 0.0002 max mem: 13912 +[2024-12-06 07:27:15 root] (utils.py 283): INFO Epoch: [1] [2490/2502] eta: 0:00:28 lr: 0.000020 loss_cls: 3.3807 (3.1887) grad_norm: 3.0261 (3.2709) time: 2.4012 data: 0.0230 max mem: 13912 +[2024-12-06 07:27:39 root] (utils.py 283): INFO Epoch: [1] [2500/2502] eta: 0:00:04 lr: 0.000020 loss_cls: 3.2779 (3.1893) grad_norm: 2.8756 (3.2693) time: 2.3991 data: 0.0229 max mem: 13912 +[2024-12-06 07:27:41 root] (utils.py 283): INFO Epoch: [1] [2501/2502] eta: 0:00:02 lr: 0.000020 loss_cls: 3.2923 (3.1896) grad_norm: 2.9689 (3.2694) time: 2.3988 data: 0.0229 max mem: 13912 +[2024-12-06 07:27:41 root] (utils.py 297): INFO Epoch: [1] Total time: 1:39:36 (2.3887 s / it) +[2024-12-06 07:27:41 root] (engine.py 179): INFO Averaged stats:lr: 0.000020 loss_cls: 3.2923 (3.1851) grad_norm: 2.9689 (3.2694) +[2024-12-06 07:27:43 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:42 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.5181 (0.5181) acc1: 89.0625 (89.0625) acc3: 96.8750 (96.8750) acc5: 97.6562 (97.6562) time: 0.4347 data: 0.0003 max mem: 13912 +[2024-12-06 07:27:46 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:28 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.6273 (0.6876) acc1: 85.9375 (85.5824) acc3: 96.0938 (95.3125) acc5: 97.6562 (96.8750) time: 0.3264 data: 0.0004 max mem: 13912 +[2024-12-06 07:27:49 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:25 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7033 (0.7208) acc1: 84.3750 (84.8958) acc3: 95.3125 (94.9033) acc5: 96.0938 (96.4658) time: 0.3204 data: 0.0004 max mem: 13912 +[2024-12-06 07:27:52 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:22 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7574 (0.7427) acc1: 82.8125 (84.0978) acc3: 94.5312 (94.7077) acc5: 96.0938 (96.3458) time: 0.3257 data: 0.0004 max mem: 13912 +[2024-12-06 07:27:55 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:18 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7501 (0.7450) acc1: 83.5938 (83.9177) acc3: 94.5312 (94.7599) acc5: 96.8750 (96.3986) time: 0.3145 data: 0.0004 max mem: 13912 +[2024-12-06 07:27:59 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:15 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.8388 (0.8266) acc1: 77.3438 (81.9240) acc3: 92.1875 (93.4589) acc5: 93.7500 (95.5270) time: 0.3141 data: 0.0004 max mem: 13912 +[2024-12-06 07:28:02 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:12 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.1121 (0.8603) acc1: 74.2188 (81.3140) acc3: 88.2812 (92.8535) acc5: 91.4062 (94.9411) time: 0.3256 data: 0.0004 max mem: 13912 +[2024-12-06 07:28:05 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:09 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.1008 (0.8950) acc1: 77.3438 (80.4908) acc3: 89.8438 (92.5176) acc5: 92.1875 (94.7403) time: 0.3265 data: 0.0004 max mem: 13912 +[2024-12-06 07:28:08 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:05 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.1185 (0.9262) acc1: 75.7812 (79.8129) acc3: 89.0625 (92.0235) acc5: 92.1875 (94.3673) time: 0.3135 data: 0.0006 max mem: 13912 +[2024-12-06 07:28:11 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:02 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.1185 (0.9503) acc1: 75.0000 (79.1123) acc3: 88.2812 (91.6466) acc5: 91.4062 (94.0591) time: 0.3122 data: 0.0006 max mem: 13912 +[2024-12-06 07:28:14 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0551 (0.9478) acc1: 75.7812 (79.1360) acc3: 89.8438 (91.6640) acc5: 92.9688 (94.1360) time: 0.3091 data: 0.0005 max mem: 13912 +[2024-12-06 07:28:14 root] (utils.py 297): INFO Test: Total time: 0:00:31 (0.3183 s / it) +[2024-12-06 07:28:14 root] (engine.py 264): INFO * Acc@1 79.136 Acc@3 91.688 Acc@5 94.330 loss 0.940 flops 3.584 layer_flops 3.536 +[2024-12-06 07:28:14 root] (main.py 547): INFO Accuracy of the network on the 50000 test images: 79.1% +[2024-12-06 07:28:14 root] (main.py 551): INFO Max accuracy: 79.14% +[2024-12-06 07:28:16 root] (utils.py 283): INFO Epoch: [2] [ 0/2502] eta: 1:37:28 lr: 0.000020 loss_cls: 3.5736 (3.5736) grad_norm: 2.3754 (2.3754) time: 2.3375 data: 0.0002 max mem: 13912 +[2024-12-06 07:28:40 root] (utils.py 283): INFO Epoch: [2] [ 10/2502] eta: 1:38:49 lr: 0.000020 loss_cls: 3.2155 (3.2192) grad_norm: 2.8435 (3.1043) time: 2.3793 data: 0.0002 max mem: 13912 +[2024-12-06 07:29:04 root] (utils.py 283): INFO Epoch: [2] [ 20/2502] eta: 1:38:33 lr: 0.000020 loss_cls: 3.2155 (3.1418) grad_norm: 2.8435 (3.2315) time: 2.3847 data: 0.0002 max mem: 13912 +[2024-12-06 07:29:28 root] (utils.py 283): INFO Epoch: [2] [ 30/2502] eta: 1:38:09 lr: 0.000020 loss_cls: 3.1237 (3.0748) grad_norm: 3.2402 (3.3046) time: 2.3840 data: 0.0002 max mem: 13912 +[2024-12-06 07:29:52 root] (utils.py 283): INFO Epoch: [2] [ 40/2502] eta: 1:37:30 lr: 0.000020 loss_cls: 3.2128 (3.1162) grad_norm: 3.1100 (3.4657) time: 2.3696 data: 0.0002 max mem: 13912 +[2024-12-06 07:30:15 root] (utils.py 283): INFO Epoch: [2] [ 50/2502] eta: 1:37:10 lr: 0.000020 loss_cls: 3.2398 (3.1135) grad_norm: 3.0586 (3.3951) time: 2.3707 data: 0.0003 max mem: 13912 +[2024-12-06 07:30:39 root] (utils.py 283): INFO Epoch: [2] [ 60/2502] eta: 1:36:51 lr: 0.000020 loss_cls: 3.3185 (3.1412) grad_norm: 2.9219 (3.3285) time: 2.3873 data: 0.0003 max mem: 13912 +[2024-12-06 07:31:03 root] (utils.py 283): INFO Epoch: [2] [ 70/2502] eta: 1:36:28 lr: 0.000020 loss_cls: 3.2002 (3.1165) grad_norm: 2.8363 (3.2653) time: 2.3860 data: 0.0002 max mem: 13912 +[2024-12-06 07:31:27 root] (utils.py 283): INFO Epoch: [2] [ 80/2502] eta: 1:36:07 lr: 0.000020 loss_cls: 3.1034 (3.1166) grad_norm: 2.7043 (3.2167) time: 2.3863 data: 0.0002 max mem: 13912 +[2024-12-06 07:31:51 root] (utils.py 283): INFO Epoch: [2] [ 90/2502] eta: 1:35:46 lr: 0.000020 loss_cls: 3.1034 (3.1067) grad_norm: 2.6435 (3.1603) time: 2.3915 data: 0.0003 max mem: 13912 +[2024-12-06 07:32:15 root] (utils.py 283): INFO Epoch: [2] [ 100/2502] eta: 1:35:27 lr: 0.000020 loss_cls: 3.2099 (3.1387) grad_norm: 2.8188 (3.1648) time: 2.3968 data: 0.0003 max mem: 13912 +[2024-12-06 07:32:39 root] (utils.py 283): INFO Epoch: [2] [ 110/2502] eta: 1:35:06 lr: 0.000020 loss_cls: 3.3737 (3.1422) grad_norm: 2.9153 (3.1418) time: 2.4007 data: 0.0003 max mem: 13912 +[2024-12-06 07:33:03 root] (utils.py 283): INFO Epoch: [2] [ 120/2502] eta: 1:34:43 lr: 0.000020 loss_cls: 2.9882 (3.1346) grad_norm: 2.8504 (3.1151) time: 2.3952 data: 0.0003 max mem: 13912 +[2024-12-06 07:33:27 root] (utils.py 283): INFO Epoch: [2] [ 130/2502] eta: 1:34:18 lr: 0.000020 loss_cls: 3.1569 (3.1454) grad_norm: 2.8208 (3.1391) time: 2.3836 data: 0.0003 max mem: 13912 +[2024-12-06 07:33:50 root] (utils.py 283): INFO Epoch: [2] [ 140/2502] eta: 1:33:53 lr: 0.000020 loss_cls: 3.4134 (3.1587) grad_norm: 2.8735 (3.1640) time: 2.3773 data: 0.0003 max mem: 13912 +[2024-12-06 07:34:14 root] (utils.py 283): INFO Epoch: [2] [ 150/2502] eta: 1:33:23 lr: 0.000020 loss_cls: 3.3233 (3.1485) grad_norm: 2.8735 (3.1408) time: 2.3637 data: 0.0003 max mem: 13912 +[2024-12-06 07:34:38 root] (utils.py 283): INFO Epoch: [2] [ 160/2502] eta: 1:33:02 lr: 0.000020 loss_cls: 3.2757 (3.1642) grad_norm: 2.9362 (3.1492) time: 2.3742 data: 0.0002 max mem: 13912 +[2024-12-06 07:35:02 root] (utils.py 283): INFO Epoch: [2] [ 170/2502] eta: 1:32:38 lr: 0.000020 loss_cls: 3.3503 (3.1567) grad_norm: 3.1987 (3.1583) time: 2.3925 data: 0.0002 max mem: 13912 +[2024-12-06 07:35:26 root] (utils.py 283): INFO Epoch: [2] [ 180/2502] eta: 1:32:15 lr: 0.000020 loss_cls: 3.2850 (3.1656) grad_norm: 3.1544 (3.1672) time: 2.3867 data: 0.0002 max mem: 13912 +[2024-12-06 07:35:50 root] (utils.py 283): INFO Epoch: [2] [ 190/2502] eta: 1:31:52 lr: 0.000020 loss_cls: 3.1974 (3.1606) grad_norm: 2.8318 (3.1438) time: 2.3882 data: 0.0003 max mem: 13912 +[2024-12-06 07:36:13 root] (utils.py 283): INFO Epoch: [2] [ 200/2502] eta: 1:31:29 lr: 0.000020 loss_cls: 3.3202 (3.1673) grad_norm: 2.7262 (3.1300) time: 2.3918 data: 0.0003 max mem: 13912 +[2024-12-06 07:36:37 root] (utils.py 283): INFO Epoch: [2] [ 210/2502] eta: 1:31:06 lr: 0.000020 loss_cls: 3.4362 (3.1776) grad_norm: 2.7942 (3.1260) time: 2.3914 data: 0.0002 max mem: 13912 +[2024-12-06 07:37:01 root] (utils.py 283): INFO Epoch: [2] [ 220/2502] eta: 1:30:43 lr: 0.000020 loss_cls: 3.4362 (3.1848) grad_norm: 2.7854 (3.1159) time: 2.3908 data: 0.0003 max mem: 13912 +[2024-12-06 07:37:25 root] (utils.py 283): INFO Epoch: [2] [ 230/2502] eta: 1:30:19 lr: 0.000020 loss_cls: 3.3131 (3.1775) grad_norm: 2.7854 (3.1108) time: 2.3907 data: 0.0003 max mem: 13912 +[2024-12-06 07:37:49 root] (utils.py 283): INFO Epoch: [2] [ 240/2502] eta: 1:29:55 lr: 0.000020 loss_cls: 3.3931 (3.1901) grad_norm: 2.7950 (3.1033) time: 2.3872 data: 0.0003 max mem: 13912 +[2024-12-06 07:38:13 root] (utils.py 283): INFO Epoch: [2] [ 250/2502] eta: 1:29:31 lr: 0.000020 loss_cls: 3.4422 (3.1900) grad_norm: 2.7950 (3.0980) time: 2.3832 data: 0.0003 max mem: 13912 +[2024-12-06 07:38:37 root] (utils.py 283): INFO Epoch: [2] [ 260/2502] eta: 1:29:06 lr: 0.000020 loss_cls: 3.2767 (3.1925) grad_norm: 3.0799 (3.1097) time: 2.3765 data: 0.0003 max mem: 13912 +[2024-12-06 07:39:00 root] (utils.py 283): INFO Epoch: [2] [ 270/2502] eta: 1:28:43 lr: 0.000020 loss_cls: 3.2302 (3.1919) grad_norm: 3.0536 (3.1350) time: 2.3801 data: 0.0003 max mem: 13912 +[2024-12-06 07:39:24 root] (utils.py 283): INFO Epoch: [2] [ 280/2502] eta: 1:28:19 lr: 0.000020 loss_cls: 3.2554 (3.1964) grad_norm: 3.0037 (3.1920) time: 2.3897 data: 0.0003 max mem: 13912 +[2024-12-06 07:39:48 root] (utils.py 283): INFO Epoch: [2] [ 290/2502] eta: 1:27:56 lr: 0.000020 loss_cls: 3.3652 (3.1977) grad_norm: 3.1511 (3.1964) time: 2.3904 data: 0.0002 max mem: 13912 +[2024-12-06 07:40:12 root] (utils.py 283): INFO Epoch: [2] [ 300/2502] eta: 1:27:32 lr: 0.000020 loss_cls: 3.3652 (3.2063) grad_norm: 3.0484 (3.1904) time: 2.3894 data: 0.0002 max mem: 13912 +[2024-12-06 07:40:36 root] (utils.py 283): INFO Epoch: [2] [ 310/2502] eta: 1:27:09 lr: 0.000020 loss_cls: 3.2691 (3.1996) grad_norm: 2.8222 (3.1996) time: 2.3900 data: 0.0002 max mem: 13912 +[2024-12-06 07:41:00 root] (utils.py 283): INFO Epoch: [2] [ 320/2502] eta: 1:26:45 lr: 0.000020 loss_cls: 3.0249 (3.1973) grad_norm: 2.7892 (3.1950) time: 2.3902 data: 0.0003 max mem: 13912 +[2024-12-06 07:41:24 root] (utils.py 283): INFO Epoch: [2] [ 330/2502] eta: 1:26:21 lr: 0.000020 loss_cls: 3.0249 (3.1925) grad_norm: 2.8042 (3.1894) time: 2.3884 data: 0.0002 max mem: 13912 +[2024-12-06 07:41:48 root] (utils.py 283): INFO Epoch: [2] [ 340/2502] eta: 1:25:58 lr: 0.000020 loss_cls: 3.2910 (3.1963) grad_norm: 3.0077 (3.1884) time: 2.3913 data: 0.0002 max mem: 13912 +[2024-12-06 07:42:12 root] (utils.py 283): INFO Epoch: [2] [ 350/2502] eta: 1:25:35 lr: 0.000020 loss_cls: 3.4554 (3.1996) grad_norm: 3.0716 (3.1891) time: 2.3957 data: 0.0003 max mem: 13912 +[2024-12-06 07:42:36 root] (utils.py 283): INFO Epoch: [2] [ 360/2502] eta: 1:25:11 lr: 0.000020 loss_cls: 3.3882 (3.1985) grad_norm: 3.0727 (3.1897) time: 2.3914 data: 0.0003 max mem: 13912 +[2024-12-06 07:43:00 root] (utils.py 283): INFO Epoch: [2] [ 370/2502] eta: 1:24:47 lr: 0.000020 loss_cls: 3.1488 (3.1954) grad_norm: 3.1259 (3.2029) time: 2.3895 data: 0.0003 max mem: 13912 +[2024-12-06 07:43:23 root] (utils.py 283): INFO Epoch: [2] [ 380/2502] eta: 1:24:23 lr: 0.000020 loss_cls: 3.1537 (3.1928) grad_norm: 3.2362 (3.2082) time: 2.3880 data: 0.0003 max mem: 13912 +[2024-12-06 07:43:47 root] (utils.py 283): INFO Epoch: [2] [ 390/2502] eta: 1:24:00 lr: 0.000020 loss_cls: 3.1914 (3.1919) grad_norm: 3.0124 (3.2105) time: 2.3881 data: 0.0003 max mem: 13912 +[2024-12-06 07:44:11 root] (utils.py 283): INFO Epoch: [2] [ 400/2502] eta: 1:23:36 lr: 0.000020 loss_cls: 3.2807 (3.1960) grad_norm: 2.9207 (3.2050) time: 2.3924 data: 0.0003 max mem: 13912 +[2024-12-06 07:44:35 root] (utils.py 283): INFO Epoch: [2] [ 410/2502] eta: 1:23:12 lr: 0.000020 loss_cls: 3.2905 (3.1957) grad_norm: 2.8558 (3.1967) time: 2.3885 data: 0.0002 max mem: 13912 +[2024-12-06 07:44:59 root] (utils.py 283): INFO Epoch: [2] [ 420/2502] eta: 1:22:49 lr: 0.000020 loss_cls: 3.0936 (3.1894) grad_norm: 2.8257 (3.1862) time: 2.3905 data: 0.0002 max mem: 13912 +[2024-12-06 07:45:23 root] (utils.py 283): INFO Epoch: [2] [ 430/2502] eta: 1:22:25 lr: 0.000020 loss_cls: 3.1219 (3.1889) grad_norm: 2.7889 (3.1776) time: 2.3921 data: 0.0003 max mem: 13912 +[2024-12-06 07:45:47 root] (utils.py 283): INFO Epoch: [2] [ 440/2502] eta: 1:22:02 lr: 0.000020 loss_cls: 3.2037 (3.1886) grad_norm: 3.0126 (3.1764) time: 2.3913 data: 0.0003 max mem: 13912 +[2024-12-06 07:46:11 root] (utils.py 283): INFO Epoch: [2] [ 450/2502] eta: 1:21:39 lr: 0.000020 loss_cls: 3.2902 (3.1889) grad_norm: 3.0856 (3.1732) time: 2.4050 data: 0.0003 max mem: 13912 +[2024-12-06 07:46:35 root] (utils.py 283): INFO Epoch: [2] [ 460/2502] eta: 1:21:15 lr: 0.000020 loss_cls: 3.3945 (3.1915) grad_norm: 3.0064 (3.1689) time: 2.4005 data: 0.0003 max mem: 13912 +[2024-12-06 07:46:59 root] (utils.py 283): INFO Epoch: [2] [ 470/2502] eta: 1:20:51 lr: 0.000020 loss_cls: 3.4214 (3.1934) grad_norm: 2.8199 (3.1585) time: 2.3819 data: 0.0003 max mem: 13912 +[2024-12-06 07:47:23 root] (utils.py 283): INFO Epoch: [2] [ 480/2502] eta: 1:20:27 lr: 0.000020 loss_cls: 3.2291 (3.1887) grad_norm: 2.7697 (3.1673) time: 2.3844 data: 0.0002 max mem: 13912 +[2024-12-06 07:47:46 root] (utils.py 283): INFO Epoch: [2] [ 490/2502] eta: 1:20:03 lr: 0.000020 loss_cls: 3.2583 (3.1916) grad_norm: 2.7529 (3.1657) time: 2.3843 data: 0.0003 max mem: 13912 +[2024-12-06 07:48:10 root] (utils.py 283): INFO Epoch: [2] [ 500/2502] eta: 1:19:39 lr: 0.000020 loss_cls: 3.4801 (3.1922) grad_norm: 2.7541 (3.1792) time: 2.3842 data: 0.0003 max mem: 13912 +[2024-12-06 07:48:34 root] (utils.py 283): INFO Epoch: [2] [ 510/2502] eta: 1:19:15 lr: 0.000020 loss_cls: 3.0705 (3.1892) grad_norm: 2.8444 (3.1760) time: 2.3897 data: 0.0002 max mem: 13912 +[2024-12-06 07:48:58 root] (utils.py 283): INFO Epoch: [2] [ 520/2502] eta: 1:18:52 lr: 0.000020 loss_cls: 3.2838 (3.1935) grad_norm: 2.9564 (3.1781) time: 2.3916 data: 0.0003 max mem: 13912 +[2024-12-06 07:49:22 root] (utils.py 283): INFO Epoch: [2] [ 530/2502] eta: 1:18:28 lr: 0.000020 loss_cls: 3.1177 (3.1865) grad_norm: 2.9717 (3.1717) time: 2.3941 data: 0.0002 max mem: 13912 +[2024-12-06 07:49:46 root] (utils.py 283): INFO Epoch: [2] [ 540/2502] eta: 1:18:04 lr: 0.000020 loss_cls: 3.1177 (3.1851) grad_norm: 2.8248 (3.1695) time: 2.3938 data: 0.0002 max mem: 13912 +[2024-12-06 07:50:10 root] (utils.py 283): INFO Epoch: [2] [ 550/2502] eta: 1:17:41 lr: 0.000020 loss_cls: 3.2439 (3.1836) grad_norm: 2.6150 (3.1595) time: 2.3955 data: 0.0003 max mem: 13912 +[2024-12-06 07:50:34 root] (utils.py 283): INFO Epoch: [2] [ 560/2502] eta: 1:17:17 lr: 0.000020 loss_cls: 3.2998 (3.1850) grad_norm: 2.6150 (3.1621) time: 2.3953 data: 0.0003 max mem: 13912 +[2024-12-06 07:50:58 root] (utils.py 283): INFO Epoch: [2] [ 570/2502] eta: 1:16:53 lr: 0.000020 loss_cls: 3.2546 (3.1816) grad_norm: 2.7904 (3.1610) time: 2.3940 data: 0.0003 max mem: 13912 +[2024-12-06 07:51:22 root] (utils.py 283): INFO Epoch: [2] [ 580/2502] eta: 1:16:30 lr: 0.000020 loss_cls: 3.2063 (3.1807) grad_norm: 2.9055 (3.1625) time: 2.3911 data: 0.0003 max mem: 13912 +[2024-12-06 07:51:46 root] (utils.py 283): INFO Epoch: [2] [ 590/2502] eta: 1:16:06 lr: 0.000020 loss_cls: 3.4354 (3.1863) grad_norm: 2.9438 (3.1606) time: 2.3894 data: 0.0003 max mem: 13912 +[2024-12-06 07:52:09 root] (utils.py 283): INFO Epoch: [2] [ 600/2502] eta: 1:15:42 lr: 0.000020 loss_cls: 3.5006 (3.1814) grad_norm: 2.9149 (3.1643) time: 2.3902 data: 0.0003 max mem: 13912 +[2024-12-06 07:52:33 root] (utils.py 283): INFO Epoch: [2] [ 610/2502] eta: 1:15:18 lr: 0.000020 loss_cls: 3.0055 (3.1811) grad_norm: 2.8370 (3.1611) time: 2.3889 data: 0.0003 max mem: 13912 +[2024-12-06 07:52:57 root] (utils.py 283): INFO Epoch: [2] [ 620/2502] eta: 1:14:54 lr: 0.000020 loss_cls: 3.4442 (3.1828) grad_norm: 2.8263 (3.1646) time: 2.3874 data: 0.0003 max mem: 13912 +[2024-12-06 07:53:21 root] (utils.py 283): INFO Epoch: [2] [ 630/2502] eta: 1:14:30 lr: 0.000020 loss_cls: 3.3579 (3.1875) grad_norm: 2.8915 (3.1646) time: 2.3875 data: 0.0003 max mem: 13912 +[2024-12-06 07:53:45 root] (utils.py 283): INFO Epoch: [2] [ 640/2502] eta: 1:14:06 lr: 0.000020 loss_cls: 3.3281 (3.1851) grad_norm: 2.8305 (3.1672) time: 2.3900 data: 0.0003 max mem: 13912 +[2024-12-06 07:54:09 root] (utils.py 283): INFO Epoch: [2] [ 650/2502] eta: 1:13:43 lr: 0.000020 loss_cls: 3.2389 (3.1846) grad_norm: 3.0301 (3.1706) time: 2.3914 data: 0.0003 max mem: 13912 +[2024-12-06 07:54:33 root] (utils.py 283): INFO Epoch: [2] [ 660/2502] eta: 1:13:19 lr: 0.000020 loss_cls: 3.2114 (3.1847) grad_norm: 2.8798 (3.1668) time: 2.3883 data: 0.0003 max mem: 13912 +[2024-12-06 07:54:57 root] (utils.py 283): INFO Epoch: [2] [ 670/2502] eta: 1:12:55 lr: 0.000020 loss_cls: 3.1819 (3.1850) grad_norm: 2.8250 (3.1680) time: 2.3825 data: 0.0003 max mem: 13912 +[2024-12-06 07:55:21 root] (utils.py 283): INFO Epoch: [2] [ 680/2502] eta: 1:12:31 lr: 0.000020 loss_cls: 3.2840 (3.1869) grad_norm: 2.8422 (3.1687) time: 2.3870 data: 0.0003 max mem: 13912 +[2024-12-06 07:55:44 root] (utils.py 283): INFO Epoch: [2] [ 690/2502] eta: 1:12:07 lr: 0.000020 loss_cls: 3.3295 (3.1882) grad_norm: 2.8324 (3.1636) time: 2.3841 data: 0.0002 max mem: 13912 +[2024-12-06 07:56:08 root] (utils.py 283): INFO Epoch: [2] [ 700/2502] eta: 1:11:43 lr: 0.000020 loss_cls: 3.3722 (3.1898) grad_norm: 2.8678 (3.1612) time: 2.3924 data: 0.0002 max mem: 13912 +[2024-12-06 07:56:32 root] (utils.py 283): INFO Epoch: [2] [ 710/2502] eta: 1:11:19 lr: 0.000020 loss_cls: 3.3193 (3.1879) grad_norm: 2.9116 (3.1616) time: 2.3925 data: 0.0003 max mem: 13912 +[2024-12-06 07:56:56 root] (utils.py 283): INFO Epoch: [2] [ 720/2502] eta: 1:10:55 lr: 0.000020 loss_cls: 3.1474 (3.1858) grad_norm: 2.8808 (3.1611) time: 2.3801 data: 0.0003 max mem: 13912 +[2024-12-06 07:57:20 root] (utils.py 283): INFO Epoch: [2] [ 730/2502] eta: 1:10:31 lr: 0.000020 loss_cls: 3.1474 (3.1842) grad_norm: 2.7203 (3.1537) time: 2.3882 data: 0.0002 max mem: 13912 +[2024-12-06 07:57:44 root] (utils.py 283): INFO Epoch: [2] [ 740/2502] eta: 1:10:07 lr: 0.000020 loss_cls: 3.1252 (3.1828) grad_norm: 2.7203 (3.1518) time: 2.3843 data: 0.0002 max mem: 13912 +[2024-12-06 07:58:08 root] (utils.py 283): INFO Epoch: [2] [ 750/2502] eta: 1:09:43 lr: 0.000020 loss_cls: 3.3405 (3.1848) grad_norm: 2.8749 (3.1543) time: 2.3865 data: 0.0002 max mem: 13912 +[2024-12-06 07:58:31 root] (utils.py 283): INFO Epoch: [2] [ 760/2502] eta: 1:09:19 lr: 0.000020 loss_cls: 3.3715 (3.1874) grad_norm: 3.0500 (3.1577) time: 2.3891 data: 0.0003 max mem: 13912 +[2024-12-06 07:58:55 root] (utils.py 283): INFO Epoch: [2] [ 770/2502] eta: 1:08:56 lr: 0.000020 loss_cls: 3.2808 (3.1857) grad_norm: 2.8771 (3.1550) time: 2.3905 data: 0.0003 max mem: 13912 +[2024-12-06 07:59:19 root] (utils.py 283): INFO Epoch: [2] [ 780/2502] eta: 1:08:32 lr: 0.000020 loss_cls: 3.2735 (3.1859) grad_norm: 2.8153 (3.1557) time: 2.3947 data: 0.0003 max mem: 13912 +[2024-12-06 07:59:43 root] (utils.py 283): INFO Epoch: [2] [ 790/2502] eta: 1:08:08 lr: 0.000020 loss_cls: 3.2735 (3.1840) grad_norm: 2.9416 (3.1600) time: 2.3881 data: 0.0003 max mem: 13912 +[2024-12-06 08:00:07 root] (utils.py 283): INFO Epoch: [2] [ 800/2502] eta: 1:07:44 lr: 0.000020 loss_cls: 3.2010 (3.1843) grad_norm: 2.9010 (3.1550) time: 2.3809 data: 0.0002 max mem: 13912 +[2024-12-06 08:00:31 root] (utils.py 283): INFO Epoch: [2] [ 810/2502] eta: 1:07:20 lr: 0.000020 loss_cls: 3.1196 (3.1837) grad_norm: 2.7425 (3.1565) time: 2.3759 data: 0.0002 max mem: 13912 +[2024-12-06 08:00:55 root] (utils.py 283): INFO Epoch: [2] [ 820/2502] eta: 1:06:56 lr: 0.000020 loss_cls: 3.3313 (3.1856) grad_norm: 2.7778 (3.1533) time: 2.3792 data: 0.0002 max mem: 13912 +[2024-12-06 08:01:19 root] (utils.py 283): INFO Epoch: [2] [ 830/2502] eta: 1:06:32 lr: 0.000020 loss_cls: 3.4201 (3.1866) grad_norm: 2.8576 (3.1538) time: 2.3902 data: 0.0002 max mem: 13912 +[2024-12-06 08:01:42 root] (utils.py 283): INFO Epoch: [2] [ 840/2502] eta: 1:06:08 lr: 0.000020 loss_cls: 3.3858 (3.1865) grad_norm: 2.9136 (3.1541) time: 2.3951 data: 0.0002 max mem: 13912 +[2024-12-06 08:02:06 root] (utils.py 283): INFO Epoch: [2] [ 850/2502] eta: 1:05:45 lr: 0.000020 loss_cls: 3.2423 (3.1850) grad_norm: 2.9876 (3.1529) time: 2.3937 data: 0.0002 max mem: 13912 +[2024-12-06 08:02:30 root] (utils.py 283): INFO Epoch: [2] [ 860/2502] eta: 1:05:21 lr: 0.000020 loss_cls: 3.3843 (3.1892) grad_norm: 2.8421 (3.1508) time: 2.3909 data: 0.0002 max mem: 13912 +[2024-12-06 08:02:54 root] (utils.py 283): INFO Epoch: [2] [ 870/2502] eta: 1:04:57 lr: 0.000020 loss_cls: 3.6335 (3.1941) grad_norm: 2.8783 (3.1514) time: 2.3909 data: 0.0003 max mem: 13912 +[2024-12-06 08:03:18 root] (utils.py 283): INFO Epoch: [2] [ 880/2502] eta: 1:04:33 lr: 0.000020 loss_cls: 3.4440 (3.1956) grad_norm: 3.0920 (3.1517) time: 2.3902 data: 0.0003 max mem: 13912 +[2024-12-06 08:03:42 root] (utils.py 283): INFO Epoch: [2] [ 890/2502] eta: 1:04:09 lr: 0.000020 loss_cls: 3.2854 (3.1940) grad_norm: 2.9421 (3.1570) time: 2.3905 data: 0.0003 max mem: 13912 +[2024-12-06 08:04:06 root] (utils.py 283): INFO Epoch: [2] [ 900/2502] eta: 1:03:45 lr: 0.000020 loss_cls: 3.0781 (3.1910) grad_norm: 2.6893 (3.1537) time: 2.3893 data: 0.0002 max mem: 13912 +[2024-12-06 08:04:30 root] (utils.py 283): INFO Epoch: [2] [ 910/2502] eta: 1:03:21 lr: 0.000020 loss_cls: 3.1499 (3.1924) grad_norm: 2.7435 (3.1505) time: 2.3885 data: 0.0003 max mem: 13912 +[2024-12-06 08:04:54 root] (utils.py 283): INFO Epoch: [2] [ 920/2502] eta: 1:02:58 lr: 0.000020 loss_cls: 3.3769 (3.1917) grad_norm: 2.7639 (3.1478) time: 2.3883 data: 0.0003 max mem: 13912 +[2024-12-06 08:05:18 root] (utils.py 283): INFO Epoch: [2] [ 930/2502] eta: 1:02:34 lr: 0.000020 loss_cls: 3.4791 (3.1929) grad_norm: 2.8363 (3.1471) time: 2.3847 data: 0.0002 max mem: 13912 +[2024-12-06 08:05:41 root] (utils.py 283): INFO Epoch: [2] [ 940/2502] eta: 1:02:10 lr: 0.000020 loss_cls: 3.4591 (3.1939) grad_norm: 2.8830 (3.1461) time: 2.3846 data: 0.0002 max mem: 13912 +[2024-12-06 08:06:05 root] (utils.py 283): INFO Epoch: [2] [ 950/2502] eta: 1:01:46 lr: 0.000020 loss_cls: 3.3625 (3.1937) grad_norm: 2.6826 (3.1447) time: 2.3897 data: 0.0002 max mem: 13912 +[2024-12-06 08:06:29 root] (utils.py 283): INFO Epoch: [2] [ 960/2502] eta: 1:01:22 lr: 0.000020 loss_cls: 3.2169 (3.1916) grad_norm: 2.7477 (3.1416) time: 2.3860 data: 0.0003 max mem: 13912 +[2024-12-06 08:06:53 root] (utils.py 283): INFO Epoch: [2] [ 970/2502] eta: 1:00:58 lr: 0.000020 loss_cls: 3.3292 (3.1932) grad_norm: 2.8053 (3.1395) time: 2.3826 data: 0.0003 max mem: 13912 +[2024-12-06 08:07:17 root] (utils.py 283): INFO Epoch: [2] [ 980/2502] eta: 1:00:34 lr: 0.000020 loss_cls: 3.3292 (3.1949) grad_norm: 2.9934 (3.1440) time: 2.3852 data: 0.0003 max mem: 13912 +[2024-12-06 08:07:41 root] (utils.py 283): INFO Epoch: [2] [ 990/2502] eta: 1:00:10 lr: 0.000020 loss_cls: 3.2261 (3.1951) grad_norm: 2.9934 (3.1401) time: 2.3835 data: 0.0003 max mem: 13912 +[2024-12-06 08:08:05 root] (utils.py 283): INFO Epoch: [2] [1000/2502] eta: 0:59:46 lr: 0.000020 loss_cls: 3.2319 (3.1954) grad_norm: 2.7709 (3.1375) time: 2.3857 data: 0.0002 max mem: 13912 +[2024-12-06 08:08:28 root] (utils.py 283): INFO Epoch: [2] [1010/2502] eta: 0:59:22 lr: 0.000020 loss_cls: 3.4049 (3.1970) grad_norm: 2.8042 (3.1352) time: 2.3894 data: 0.0003 max mem: 13912 +[2024-12-06 08:08:52 root] (utils.py 283): INFO Epoch: [2] [1020/2502] eta: 0:58:59 lr: 0.000020 loss_cls: 3.4184 (3.1957) grad_norm: 2.6673 (3.1335) time: 2.3930 data: 0.0003 max mem: 13912 +[2024-12-06 08:09:16 root] (utils.py 283): INFO Epoch: [2] [1030/2502] eta: 0:58:35 lr: 0.000020 loss_cls: 2.8985 (3.1928) grad_norm: 2.6673 (3.1312) time: 2.3976 data: 0.0002 max mem: 13912 +[2024-12-06 08:09:40 root] (utils.py 283): INFO Epoch: [2] [1040/2502] eta: 0:58:11 lr: 0.000020 loss_cls: 3.1071 (3.1930) grad_norm: 2.7443 (3.1286) time: 2.3933 data: 0.0002 max mem: 13912 +[2024-12-06 08:10:04 root] (utils.py 283): INFO Epoch: [2] [1050/2502] eta: 0:57:47 lr: 0.000020 loss_cls: 3.2857 (3.1924) grad_norm: 2.7654 (3.1262) time: 2.3892 data: 0.0003 max mem: 13912 +[2024-12-06 08:10:28 root] (utils.py 283): INFO Epoch: [2] [1060/2502] eta: 0:57:23 lr: 0.000020 loss_cls: 3.2868 (3.1930) grad_norm: 2.8158 (3.1246) time: 2.3894 data: 0.0003 max mem: 13912 +[2024-12-06 08:10:52 root] (utils.py 283): INFO Epoch: [2] [1070/2502] eta: 0:56:59 lr: 0.000020 loss_cls: 3.3778 (3.1947) grad_norm: 2.7569 (3.1209) time: 2.3868 data: 0.0002 max mem: 13912 +[2024-12-06 08:11:16 root] (utils.py 283): INFO Epoch: [2] [1080/2502] eta: 0:56:35 lr: 0.000020 loss_cls: 3.3316 (3.1945) grad_norm: 2.8192 (3.1212) time: 2.3876 data: 0.0002 max mem: 13912 +[2024-12-06 08:11:40 root] (utils.py 283): INFO Epoch: [2] [1090/2502] eta: 0:56:11 lr: 0.000020 loss_cls: 3.2595 (3.1953) grad_norm: 2.9156 (3.1213) time: 2.3864 data: 0.0002 max mem: 13912 +[2024-12-06 08:12:04 root] (utils.py 283): INFO Epoch: [2] [1100/2502] eta: 0:55:48 lr: 0.000020 loss_cls: 3.2492 (3.1949) grad_norm: 2.8686 (3.1220) time: 2.3900 data: 0.0002 max mem: 13912 +[2024-12-06 08:12:27 root] (utils.py 283): INFO Epoch: [2] [1110/2502] eta: 0:55:24 lr: 0.000020 loss_cls: 3.1201 (3.1924) grad_norm: 2.8045 (3.1228) time: 2.3910 data: 0.0002 max mem: 13912 +[2024-12-06 08:12:51 root] (utils.py 283): INFO Epoch: [2] [1120/2502] eta: 0:55:00 lr: 0.000020 loss_cls: 2.9870 (3.1921) grad_norm: 2.8700 (3.1227) time: 2.3756 data: 0.0002 max mem: 13912 +[2024-12-06 08:13:15 root] (utils.py 283): INFO Epoch: [2] [1130/2502] eta: 0:54:36 lr: 0.000020 loss_cls: 3.0246 (3.1916) grad_norm: 2.9567 (3.1275) time: 2.3784 data: 0.0002 max mem: 13912 +[2024-12-06 08:13:39 root] (utils.py 283): INFO Epoch: [2] [1140/2502] eta: 0:54:12 lr: 0.000020 loss_cls: 2.9606 (3.1888) grad_norm: 3.0695 (3.1281) time: 2.3930 data: 0.0003 max mem: 13912 +[2024-12-06 08:14:03 root] (utils.py 283): INFO Epoch: [2] [1150/2502] eta: 0:53:48 lr: 0.000020 loss_cls: 3.0767 (3.1896) grad_norm: 3.1368 (3.1268) time: 2.3947 data: 0.0003 max mem: 13912 +[2024-12-06 08:14:27 root] (utils.py 283): INFO Epoch: [2] [1160/2502] eta: 0:53:24 lr: 0.000020 loss_cls: 3.3674 (3.1898) grad_norm: 3.0351 (3.1273) time: 2.3887 data: 0.0002 max mem: 13912 +[2024-12-06 08:14:51 root] (utils.py 283): INFO Epoch: [2] [1170/2502] eta: 0:53:00 lr: 0.000020 loss_cls: 3.3739 (3.1915) grad_norm: 2.8235 (3.1265) time: 2.3842 data: 0.0002 max mem: 13912 +[2024-12-06 08:15:14 root] (utils.py 283): INFO Epoch: [2] [1180/2502] eta: 0:52:36 lr: 0.000020 loss_cls: 3.3739 (3.1913) grad_norm: 2.7157 (3.1242) time: 2.3845 data: 0.0002 max mem: 13912 +[2024-12-06 08:15:38 root] (utils.py 283): INFO Epoch: [2] [1190/2502] eta: 0:52:12 lr: 0.000020 loss_cls: 3.2337 (3.1912) grad_norm: 2.6996 (3.1292) time: 2.3733 data: 0.0002 max mem: 13912 +[2024-12-06 08:16:02 root] (utils.py 283): INFO Epoch: [2] [1200/2502] eta: 0:51:48 lr: 0.000020 loss_cls: 3.2982 (3.1928) grad_norm: 2.6971 (3.1331) time: 2.3580 data: 0.0002 max mem: 13912 +[2024-12-06 08:16:25 root] (utils.py 283): INFO Epoch: [2] [1210/2502] eta: 0:51:24 lr: 0.000020 loss_cls: 3.0556 (3.1892) grad_norm: 2.9235 (3.1344) time: 2.3684 data: 0.0002 max mem: 13912 +[2024-12-06 08:16:49 root] (utils.py 283): INFO Epoch: [2] [1220/2502] eta: 0:51:00 lr: 0.000020 loss_cls: 3.0556 (3.1897) grad_norm: 2.9235 (3.1330) time: 2.3816 data: 0.0002 max mem: 13912 +[2024-12-06 08:17:13 root] (utils.py 283): INFO Epoch: [2] [1230/2502] eta: 0:50:36 lr: 0.000020 loss_cls: 3.2847 (3.1888) grad_norm: 2.6748 (3.1348) time: 2.3748 data: 0.0002 max mem: 13912 +[2024-12-06 08:17:36 root] (utils.py 283): INFO Epoch: [2] [1240/2502] eta: 0:50:12 lr: 0.000020 loss_cls: 3.3191 (3.1907) grad_norm: 2.9204 (3.1370) time: 2.3569 data: 0.0002 max mem: 13912 +[2024-12-06 08:18:00 root] (utils.py 283): INFO Epoch: [2] [1250/2502] eta: 0:49:48 lr: 0.000020 loss_cls: 3.4693 (3.1933) grad_norm: 3.1161 (3.1564) time: 2.3604 data: 0.0003 max mem: 13912 +[2024-12-06 08:18:24 root] (utils.py 283): INFO Epoch: [2] [1260/2502] eta: 0:49:24 lr: 0.000020 loss_cls: 3.3982 (3.1946) grad_norm: 3.1161 (3.1556) time: 2.3861 data: 0.0002 max mem: 13912 +[2024-12-06 08:18:48 root] (utils.py 283): INFO Epoch: [2] [1270/2502] eta: 0:49:00 lr: 0.000020 loss_cls: 3.3085 (3.1949) grad_norm: 2.9069 (3.1552) time: 2.3918 data: 0.0003 max mem: 13912 +[2024-12-06 08:19:12 root] (utils.py 283): INFO Epoch: [2] [1280/2502] eta: 0:48:36 lr: 0.000020 loss_cls: 3.2945 (3.1948) grad_norm: 2.9781 (3.1557) time: 2.3891 data: 0.0003 max mem: 13912 +[2024-12-06 08:19:36 root] (utils.py 283): INFO Epoch: [2] [1290/2502] eta: 0:48:12 lr: 0.000020 loss_cls: 3.3727 (3.1969) grad_norm: 3.2342 (3.1594) time: 2.3899 data: 0.0002 max mem: 13912 +[2024-12-06 08:20:00 root] (utils.py 283): INFO Epoch: [2] [1300/2502] eta: 0:47:49 lr: 0.000020 loss_cls: 3.5138 (3.1990) grad_norm: 3.1593 (3.1592) time: 2.3887 data: 0.0003 max mem: 13912 +[2024-12-06 08:20:24 root] (utils.py 283): INFO Epoch: [2] [1310/2502] eta: 0:47:25 lr: 0.000020 loss_cls: 3.5517 (3.2005) grad_norm: 2.7653 (3.1566) time: 2.3881 data: 0.0002 max mem: 13912 +[2024-12-06 08:20:47 root] (utils.py 283): INFO Epoch: [2] [1320/2502] eta: 0:47:01 lr: 0.000020 loss_cls: 3.4395 (3.2020) grad_norm: 2.7613 (3.1547) time: 2.3856 data: 0.0002 max mem: 13912 +[2024-12-06 08:21:11 root] (utils.py 283): INFO Epoch: [2] [1330/2502] eta: 0:46:37 lr: 0.000020 loss_cls: 3.3570 (3.2004) grad_norm: 2.5995 (3.1527) time: 2.3800 data: 0.0002 max mem: 13912 +[2024-12-06 08:21:35 root] (utils.py 283): INFO Epoch: [2] [1340/2502] eta: 0:46:13 lr: 0.000020 loss_cls: 3.2555 (3.2010) grad_norm: 2.5995 (3.1509) time: 2.3784 data: 0.0002 max mem: 13912 +[2024-12-06 08:21:59 root] (utils.py 283): INFO Epoch: [2] [1350/2502] eta: 0:45:49 lr: 0.000020 loss_cls: 3.4227 (3.2002) grad_norm: 2.8765 (3.1494) time: 2.3813 data: 0.0002 max mem: 13912 +[2024-12-06 08:22:23 root] (utils.py 283): INFO Epoch: [2] [1360/2502] eta: 0:45:25 lr: 0.000020 loss_cls: 3.4227 (3.2006) grad_norm: 2.9892 (3.1488) time: 2.3871 data: 0.0002 max mem: 13912 +[2024-12-06 08:22:47 root] (utils.py 283): INFO Epoch: [2] [1370/2502] eta: 0:45:01 lr: 0.000020 loss_cls: 3.4841 (3.2018) grad_norm: 2.8886 (3.1469) time: 2.3894 data: 0.0002 max mem: 13912 +[2024-12-06 08:23:10 root] (utils.py 283): INFO Epoch: [2] [1380/2502] eta: 0:44:37 lr: 0.000020 loss_cls: 3.3565 (3.2019) grad_norm: 2.7520 (3.1455) time: 2.3870 data: 0.0002 max mem: 13912 +[2024-12-06 08:23:34 root] (utils.py 283): INFO Epoch: [2] [1390/2502] eta: 0:44:14 lr: 0.000020 loss_cls: 3.3043 (3.2024) grad_norm: 2.8508 (3.1504) time: 2.3881 data: 0.0002 max mem: 13912 +[2024-12-06 08:23:58 root] (utils.py 283): INFO Epoch: [2] [1400/2502] eta: 0:43:50 lr: 0.000020 loss_cls: 3.3043 (3.2025) grad_norm: 3.2619 (3.1566) time: 2.3887 data: 0.0002 max mem: 13912 +[2024-12-06 08:24:22 root] (utils.py 283): INFO Epoch: [2] [1410/2502] eta: 0:43:26 lr: 0.000020 loss_cls: 3.2782 (3.2025) grad_norm: 2.8367 (3.1538) time: 2.3882 data: 0.0002 max mem: 13912 +[2024-12-06 08:24:46 root] (utils.py 283): INFO Epoch: [2] [1420/2502] eta: 0:43:02 lr: 0.000020 loss_cls: 3.3811 (3.2026) grad_norm: 2.6876 (3.1527) time: 2.3916 data: 0.0003 max mem: 13912 +[2024-12-06 08:25:10 root] (utils.py 283): INFO Epoch: [2] [1430/2502] eta: 0:42:38 lr: 0.000020 loss_cls: 3.0560 (3.2007) grad_norm: 2.7131 (3.1500) time: 2.3917 data: 0.0003 max mem: 13912 +[2024-12-06 08:25:34 root] (utils.py 283): INFO Epoch: [2] [1440/2502] eta: 0:42:14 lr: 0.000020 loss_cls: 3.2097 (3.2011) grad_norm: 2.8190 (3.1515) time: 2.3858 data: 0.0003 max mem: 13912 +[2024-12-06 08:25:58 root] (utils.py 283): INFO Epoch: [2] [1450/2502] eta: 0:41:51 lr: 0.000020 loss_cls: 3.3752 (3.2004) grad_norm: 2.9310 (3.1503) time: 2.3871 data: 0.0003 max mem: 13912 +[2024-12-06 08:26:21 root] (utils.py 283): INFO Epoch: [2] [1460/2502] eta: 0:41:26 lr: 0.000020 loss_cls: 3.1878 (3.1995) grad_norm: 2.6840 (3.1521) time: 2.3768 data: 0.0003 max mem: 13912 +[2024-12-06 08:26:45 root] (utils.py 283): INFO Epoch: [2] [1470/2502] eta: 0:41:03 lr: 0.000020 loss_cls: 2.9128 (3.1973) grad_norm: 2.7410 (3.1513) time: 2.3758 data: 0.0003 max mem: 13912 +[2024-12-06 08:27:09 root] (utils.py 283): INFO Epoch: [2] [1480/2502] eta: 0:40:39 lr: 0.000020 loss_cls: 2.9128 (3.1964) grad_norm: 2.8591 (3.1526) time: 2.3863 data: 0.0003 max mem: 13912 +[2024-12-06 08:27:33 root] (utils.py 283): INFO Epoch: [2] [1490/2502] eta: 0:40:15 lr: 0.000020 loss_cls: 3.3083 (3.1972) grad_norm: 2.9175 (3.1527) time: 2.3854 data: 0.0003 max mem: 13912 +[2024-12-06 08:27:57 root] (utils.py 283): INFO Epoch: [2] [1500/2502] eta: 0:39:51 lr: 0.000020 loss_cls: 3.3472 (3.1961) grad_norm: 2.8245 (3.1507) time: 2.3878 data: 0.0002 max mem: 13912 +[2024-12-06 08:28:21 root] (utils.py 283): INFO Epoch: [2] [1510/2502] eta: 0:39:27 lr: 0.000020 loss_cls: 3.3310 (3.1968) grad_norm: 2.9610 (3.1521) time: 2.3917 data: 0.0002 max mem: 13912 +[2024-12-06 08:28:45 root] (utils.py 283): INFO Epoch: [2] [1520/2502] eta: 0:39:03 lr: 0.000020 loss_cls: 3.1975 (3.1958) grad_norm: 2.9893 (3.1502) time: 2.3944 data: 0.0002 max mem: 13912 +[2024-12-06 08:29:09 root] (utils.py 283): INFO Epoch: [2] [1530/2502] eta: 0:38:40 lr: 0.000020 loss_cls: 3.1576 (3.1958) grad_norm: 2.8818 (3.1486) time: 2.3938 data: 0.0002 max mem: 13912 +[2024-12-06 08:29:33 root] (utils.py 283): INFO Epoch: [2] [1540/2502] eta: 0:38:16 lr: 0.000020 loss_cls: 3.3883 (3.1966) grad_norm: 2.7250 (3.1462) time: 2.3987 data: 0.0002 max mem: 13912 +[2024-12-06 08:29:57 root] (utils.py 283): INFO Epoch: [2] [1550/2502] eta: 0:37:52 lr: 0.000020 loss_cls: 3.3463 (3.1968) grad_norm: 2.7250 (3.1442) time: 2.3954 data: 0.0002 max mem: 13912 +[2024-12-06 08:30:20 root] (utils.py 283): INFO Epoch: [2] [1560/2502] eta: 0:37:28 lr: 0.000020 loss_cls: 3.1233 (3.1966) grad_norm: 2.6490 (3.1410) time: 2.3867 data: 0.0002 max mem: 13912 +[2024-12-06 08:30:44 root] (utils.py 283): INFO Epoch: [2] [1570/2502] eta: 0:37:04 lr: 0.000020 loss_cls: 3.2870 (3.1976) grad_norm: 2.7539 (3.1455) time: 2.3817 data: 0.0003 max mem: 13912 +[2024-12-06 08:31:08 root] (utils.py 283): INFO Epoch: [2] [1580/2502] eta: 0:36:40 lr: 0.000020 loss_cls: 3.4281 (3.1971) grad_norm: 2.8181 (3.1444) time: 2.3847 data: 0.0003 max mem: 13912 +[2024-12-06 08:31:32 root] (utils.py 283): INFO Epoch: [2] [1590/2502] eta: 0:36:16 lr: 0.000020 loss_cls: 3.3346 (3.1975) grad_norm: 2.6859 (3.1407) time: 2.3895 data: 0.0003 max mem: 13912 +[2024-12-06 08:31:56 root] (utils.py 283): INFO Epoch: [2] [1600/2502] eta: 0:35:53 lr: 0.000020 loss_cls: 3.3154 (3.1971) grad_norm: 2.6329 (3.1403) time: 2.3879 data: 0.0002 max mem: 13912 +[2024-12-06 08:32:20 root] (utils.py 283): INFO Epoch: [2] [1610/2502] eta: 0:35:29 lr: 0.000020 loss_cls: 3.3321 (3.1967) grad_norm: 2.7181 (3.1375) time: 2.3886 data: 0.0002 max mem: 13912 +[2024-12-06 08:32:44 root] (utils.py 283): INFO Epoch: [2] [1620/2502] eta: 0:35:05 lr: 0.000020 loss_cls: 3.3631 (3.1978) grad_norm: 2.7198 (3.1383) time: 2.3888 data: 0.0002 max mem: 13912 +[2024-12-06 08:33:07 root] (utils.py 283): INFO Epoch: [2] [1630/2502] eta: 0:34:41 lr: 0.000020 loss_cls: 3.4054 (3.1969) grad_norm: 2.8117 (3.1362) time: 2.3853 data: 0.0002 max mem: 13912 +[2024-12-06 08:33:31 root] (utils.py 283): INFO Epoch: [2] [1640/2502] eta: 0:34:17 lr: 0.000020 loss_cls: 3.3636 (3.1976) grad_norm: 2.9706 (3.1414) time: 2.3711 data: 0.0002 max mem: 13912 +[2024-12-06 08:33:55 root] (utils.py 283): INFO Epoch: [2] [1650/2502] eta: 0:33:53 lr: 0.000020 loss_cls: 3.1734 (3.1967) grad_norm: 2.9051 (3.1411) time: 2.3712 data: 0.0002 max mem: 13912 +[2024-12-06 08:34:19 root] (utils.py 283): INFO Epoch: [2] [1660/2502] eta: 0:33:29 lr: 0.000020 loss_cls: 3.1504 (3.1963) grad_norm: 2.7274 (3.1381) time: 2.3859 data: 0.0002 max mem: 13912 +[2024-12-06 08:34:43 root] (utils.py 283): INFO Epoch: [2] [1670/2502] eta: 0:33:05 lr: 0.000020 loss_cls: 3.1350 (3.1953) grad_norm: 2.7274 (3.1385) time: 2.3865 data: 0.0002 max mem: 13912 +[2024-12-06 08:35:07 root] (utils.py 283): INFO Epoch: [2] [1680/2502] eta: 0:32:41 lr: 0.000020 loss_cls: 2.9728 (3.1943) grad_norm: 2.6615 (3.1356) time: 2.3901 data: 0.0003 max mem: 13912 +[2024-12-06 08:35:31 root] (utils.py 283): INFO Epoch: [2] [1690/2502] eta: 0:32:18 lr: 0.000020 loss_cls: 2.9728 (3.1931) grad_norm: 2.5990 (3.1348) time: 2.3947 data: 0.0003 max mem: 13912 +[2024-12-06 08:35:54 root] (utils.py 283): INFO Epoch: [2] [1700/2502] eta: 0:31:54 lr: 0.000020 loss_cls: 3.2635 (3.1941) grad_norm: 2.6253 (3.1373) time: 2.3944 data: 0.0003 max mem: 13912 +[2024-12-06 08:36:18 root] (utils.py 283): INFO Epoch: [2] [1710/2502] eta: 0:31:30 lr: 0.000020 loss_cls: 3.2635 (3.1944) grad_norm: 2.6722 (3.1367) time: 2.3908 data: 0.0003 max mem: 13912 +[2024-12-06 08:36:42 root] (utils.py 283): INFO Epoch: [2] [1720/2502] eta: 0:31:06 lr: 0.000020 loss_cls: 3.3282 (3.1960) grad_norm: 2.6600 (3.1357) time: 2.3674 data: 0.0003 max mem: 13912 +[2024-12-06 08:37:06 root] (utils.py 283): INFO Epoch: [2] [1730/2502] eta: 0:30:42 lr: 0.000020 loss_cls: 3.3282 (3.1955) grad_norm: 2.6746 (3.1335) time: 2.3685 data: 0.0002 max mem: 13912 +[2024-12-06 08:37:37 root] (utils.py 283): INFO Epoch: [2] [1740/2502] eta: 0:30:22 lr: 0.000020 loss_cls: 3.0309 (3.1940) grad_norm: 2.7864 (3.1329) time: 2.7772 data: 0.0003 max mem: 13912 +[2024-12-06 08:38:04 root] (utils.py 283): INFO Epoch: [2] [1750/2502] eta: 0:29:59 lr: 0.000020 loss_cls: 2.8577 (3.1926) grad_norm: 2.9975 (3.1366) time: 2.9228 data: 0.0003 max mem: 13912 +[2024-12-06 08:38:28 root] (utils.py 283): INFO Epoch: [2] [1760/2502] eta: 0:29:35 lr: 0.000020 loss_cls: 2.8533 (3.1915) grad_norm: 2.7627 (3.1357) time: 2.5361 data: 0.0003 max mem: 13912 +[2024-12-06 08:38:52 root] (utils.py 283): INFO Epoch: [2] [1770/2502] eta: 0:29:11 lr: 0.000020 loss_cls: 3.2449 (3.1919) grad_norm: 2.7996 (3.1387) time: 2.3963 data: 0.0003 max mem: 13912 +[2024-12-06 08:39:16 root] (utils.py 283): INFO Epoch: [2] [1780/2502] eta: 0:28:47 lr: 0.000020 loss_cls: 3.3056 (3.1918) grad_norm: 2.7885 (3.1360) time: 2.3943 data: 0.0003 max mem: 13912 +[2024-12-06 08:39:40 root] (utils.py 283): INFO Epoch: [2] [1790/2502] eta: 0:28:23 lr: 0.000020 loss_cls: 3.3056 (3.1930) grad_norm: 2.6298 (3.1332) time: 2.3850 data: 0.0003 max mem: 13912 +[2024-12-06 08:40:04 root] (utils.py 283): INFO Epoch: [2] [1800/2502] eta: 0:27:59 lr: 0.000020 loss_cls: 3.3946 (3.1925) grad_norm: 2.6721 (3.1316) time: 2.3827 data: 0.0003 max mem: 13912 +[2024-12-06 08:40:28 root] (utils.py 283): INFO Epoch: [2] [1810/2502] eta: 0:27:35 lr: 0.000020 loss_cls: 3.2910 (3.1921) grad_norm: 2.7187 (3.1321) time: 2.3840 data: 0.0002 max mem: 13912 +[2024-12-06 08:40:51 root] (utils.py 283): INFO Epoch: [2] [1820/2502] eta: 0:27:11 lr: 0.000020 loss_cls: 3.2910 (3.1924) grad_norm: 2.9892 (3.1417) time: 2.3893 data: 0.0002 max mem: 13912 +[2024-12-06 08:41:15 root] (utils.py 283): INFO Epoch: [2] [1830/2502] eta: 0:26:47 lr: 0.000020 loss_cls: 3.2354 (3.1918) grad_norm: 2.9522 (3.1395) time: 2.3938 data: 0.0002 max mem: 13912 +[2024-12-06 08:41:39 root] (utils.py 283): INFO Epoch: [2] [1840/2502] eta: 0:26:23 lr: 0.000020 loss_cls: 3.2965 (3.1919) grad_norm: 2.6827 (3.1388) time: 2.3926 data: 0.0003 max mem: 13912 +[2024-12-06 08:42:03 root] (utils.py 283): INFO Epoch: [2] [1850/2502] eta: 0:26:00 lr: 0.000020 loss_cls: 2.9110 (3.1904) grad_norm: 2.8436 (3.1369) time: 2.3890 data: 0.0003 max mem: 13912 +[2024-12-06 08:42:27 root] (utils.py 283): INFO Epoch: [2] [1860/2502] eta: 0:25:36 lr: 0.000020 loss_cls: 2.9110 (3.1901) grad_norm: 2.8152 (3.1354) time: 2.3818 data: 0.0002 max mem: 13912 +[2024-12-06 08:42:51 root] (utils.py 283): INFO Epoch: [2] [1870/2502] eta: 0:25:12 lr: 0.000020 loss_cls: 3.2256 (3.1905) grad_norm: 2.8524 (3.1371) time: 2.3754 data: 0.0002 max mem: 13912 +[2024-12-06 08:43:15 root] (utils.py 283): INFO Epoch: [2] [1880/2502] eta: 0:24:48 lr: 0.000020 loss_cls: 3.3043 (3.1893) grad_norm: 2.7792 (3.1376) time: 2.3791 data: 0.0003 max mem: 13912 +[2024-12-06 08:43:38 root] (utils.py 283): INFO Epoch: [2] [1890/2502] eta: 0:24:24 lr: 0.000020 loss_cls: 3.1976 (3.1892) grad_norm: 2.7185 (3.1359) time: 2.3877 data: 0.0003 max mem: 13912 +[2024-12-06 08:44:02 root] (utils.py 283): INFO Epoch: [2] [1900/2502] eta: 0:24:00 lr: 0.000020 loss_cls: 3.1976 (3.1885) grad_norm: 2.7612 (3.1366) time: 2.3908 data: 0.0003 max mem: 13912 +[2024-12-06 08:44:26 root] (utils.py 283): INFO Epoch: [2] [1910/2502] eta: 0:23:36 lr: 0.000020 loss_cls: 3.1850 (3.1878) grad_norm: 2.8371 (3.1355) time: 2.3848 data: 0.0003 max mem: 13912 +[2024-12-06 08:44:50 root] (utils.py 283): INFO Epoch: [2] [1920/2502] eta: 0:23:12 lr: 0.000020 loss_cls: 3.0653 (3.1874) grad_norm: 2.7821 (3.1383) time: 2.3842 data: 0.0003 max mem: 13912 +[2024-12-06 08:45:14 root] (utils.py 283): INFO Epoch: [2] [1930/2502] eta: 0:22:48 lr: 0.000020 loss_cls: 3.1180 (3.1876) grad_norm: 2.6308 (3.1417) time: 2.3847 data: 0.0002 max mem: 13912 +[2024-12-06 08:45:38 root] (utils.py 283): INFO Epoch: [2] [1940/2502] eta: 0:22:24 lr: 0.000020 loss_cls: 3.1180 (3.1869) grad_norm: 2.9208 (3.1415) time: 2.3863 data: 0.0003 max mem: 13912 +[2024-12-06 08:46:02 root] (utils.py 283): INFO Epoch: [2] [1950/2502] eta: 0:22:00 lr: 0.000020 loss_cls: 3.0915 (3.1856) grad_norm: 2.7957 (3.1394) time: 2.3910 data: 0.0003 max mem: 13912 +[2024-12-06 08:46:25 root] (utils.py 283): INFO Epoch: [2] [1960/2502] eta: 0:21:36 lr: 0.000020 loss_cls: 3.3426 (3.1864) grad_norm: 2.7957 (3.1390) time: 2.3849 data: 0.0003 max mem: 13912 +[2024-12-06 08:46:49 root] (utils.py 283): INFO Epoch: [2] [1970/2502] eta: 0:21:12 lr: 0.000020 loss_cls: 3.1653 (3.1856) grad_norm: 2.9760 (3.1417) time: 2.3637 data: 0.0002 max mem: 13912 +[2024-12-06 08:47:13 root] (utils.py 283): INFO Epoch: [2] [1980/2502] eta: 0:20:48 lr: 0.000020 loss_cls: 3.1415 (3.1858) grad_norm: 2.8328 (3.1394) time: 2.3711 data: 0.0002 max mem: 13912 +[2024-12-06 08:47:37 root] (utils.py 283): INFO Epoch: [2] [1990/2502] eta: 0:20:24 lr: 0.000020 loss_cls: 3.0769 (3.1851) grad_norm: 2.7115 (3.1383) time: 2.3907 data: 0.0002 max mem: 13912 +[2024-12-06 08:48:01 root] (utils.py 283): INFO Epoch: [2] [2000/2502] eta: 0:20:00 lr: 0.000020 loss_cls: 2.9766 (3.1832) grad_norm: 2.6824 (3.1366) time: 2.3873 data: 0.0002 max mem: 13912 +[2024-12-06 08:48:24 root] (utils.py 283): INFO Epoch: [2] [2010/2502] eta: 0:19:36 lr: 0.000020 loss_cls: 2.9766 (3.1833) grad_norm: 2.5853 (3.1367) time: 2.3852 data: 0.0002 max mem: 13912 +[2024-12-06 08:48:48 root] (utils.py 283): INFO Epoch: [2] [2020/2502] eta: 0:19:12 lr: 0.000020 loss_cls: 3.3144 (3.1830) grad_norm: 2.6905 (3.1367) time: 2.3891 data: 0.0002 max mem: 13912 +[2024-12-06 08:49:12 root] (utils.py 283): INFO Epoch: [2] [2030/2502] eta: 0:18:48 lr: 0.000020 loss_cls: 3.3144 (3.1833) grad_norm: 2.6905 (3.1354) time: 2.3928 data: 0.0003 max mem: 13912 +[2024-12-06 08:49:36 root] (utils.py 283): INFO Epoch: [2] [2040/2502] eta: 0:18:25 lr: 0.000020 loss_cls: 3.2264 (3.1817) grad_norm: 2.6886 (3.1338) time: 2.3865 data: 0.0002 max mem: 13912 +[2024-12-06 08:50:00 root] (utils.py 283): INFO Epoch: [2] [2050/2502] eta: 0:18:01 lr: 0.000020 loss_cls: 3.2264 (3.1813) grad_norm: 2.6886 (3.1313) time: 2.3866 data: 0.0002 max mem: 13912 +[2024-12-06 08:50:24 root] (utils.py 283): INFO Epoch: [2] [2060/2502] eta: 0:17:37 lr: 0.000020 loss_cls: 3.3366 (3.1821) grad_norm: 2.5135 (3.1335) time: 2.3902 data: 0.0002 max mem: 13912 +[2024-12-06 08:50:48 root] (utils.py 283): INFO Epoch: [2] [2070/2502] eta: 0:17:13 lr: 0.000020 loss_cls: 3.2936 (3.1813) grad_norm: 2.7384 (3.1326) time: 2.3883 data: 0.0002 max mem: 13912 +[2024-12-06 08:51:11 root] (utils.py 283): INFO Epoch: [2] [2080/2502] eta: 0:16:49 lr: 0.000020 loss_cls: 3.0395 (3.1801) grad_norm: 2.8095 (3.1337) time: 2.3733 data: 0.0003 max mem: 13912 +[2024-12-06 08:51:35 root] (utils.py 283): INFO Epoch: [2] [2090/2502] eta: 0:16:25 lr: 0.000020 loss_cls: 3.1051 (3.1805) grad_norm: 2.9411 (3.1328) time: 2.3713 data: 0.0003 max mem: 13912 +[2024-12-06 08:51:59 root] (utils.py 283): INFO Epoch: [2] [2100/2502] eta: 0:16:01 lr: 0.000020 loss_cls: 3.1128 (3.1804) grad_norm: 2.7278 (3.1301) time: 2.3853 data: 0.0002 max mem: 13912 +[2024-12-06 08:52:23 root] (utils.py 283): INFO Epoch: [2] [2110/2502] eta: 0:15:37 lr: 0.000020 loss_cls: 2.9493 (3.1792) grad_norm: 2.6903 (3.1349) time: 2.3905 data: 0.0002 max mem: 13912 +[2024-12-06 08:52:47 root] (utils.py 283): INFO Epoch: [2] [2120/2502] eta: 0:15:13 lr: 0.000020 loss_cls: 2.9190 (3.1787) grad_norm: 2.6865 (3.1338) time: 2.3910 data: 0.0002 max mem: 13912 +[2024-12-06 08:53:11 root] (utils.py 283): INFO Epoch: [2] [2130/2502] eta: 0:14:49 lr: 0.000020 loss_cls: 3.1490 (3.1793) grad_norm: 2.7073 (3.1323) time: 2.3891 data: 0.0002 max mem: 13912 +[2024-12-06 08:53:35 root] (utils.py 283): INFO Epoch: [2] [2140/2502] eta: 0:14:25 lr: 0.000020 loss_cls: 3.2970 (3.1791) grad_norm: 2.7073 (3.1308) time: 2.3884 data: 0.0002 max mem: 13912 +[2024-12-06 08:53:59 root] (utils.py 283): INFO Epoch: [2] [2150/2502] eta: 0:14:01 lr: 0.000020 loss_cls: 3.5216 (3.1811) grad_norm: 2.9086 (3.1312) time: 2.3864 data: 0.0002 max mem: 13912 +[2024-12-06 08:54:22 root] (utils.py 283): INFO Epoch: [2] [2160/2502] eta: 0:13:37 lr: 0.000020 loss_cls: 3.5694 (3.1825) grad_norm: 2.9449 (3.1341) time: 2.3850 data: 0.0002 max mem: 13912 +[2024-12-06 08:54:46 root] (utils.py 283): INFO Epoch: [2] [2170/2502] eta: 0:13:13 lr: 0.000020 loss_cls: 3.4343 (3.1827) grad_norm: 2.9148 (3.1344) time: 2.3852 data: 0.0002 max mem: 13912 +[2024-12-06 08:55:10 root] (utils.py 283): INFO Epoch: [2] [2180/2502] eta: 0:12:50 lr: 0.000020 loss_cls: 3.2168 (3.1815) grad_norm: 2.6690 (3.1339) time: 2.3847 data: 0.0003 max mem: 13912 +[2024-12-06 08:55:34 root] (utils.py 283): INFO Epoch: [2] [2190/2502] eta: 0:12:26 lr: 0.000020 loss_cls: 3.1662 (3.1812) grad_norm: 2.6505 (3.1336) time: 2.3707 data: 0.0003 max mem: 13912 +[2024-12-06 08:55:58 root] (utils.py 283): INFO Epoch: [2] [2200/2502] eta: 0:12:02 lr: 0.000020 loss_cls: 3.2541 (3.1818) grad_norm: 2.8254 (3.1339) time: 2.3704 data: 0.0002 max mem: 13912 +[2024-12-06 08:56:21 root] (utils.py 283): INFO Epoch: [2] [2210/2502] eta: 0:11:38 lr: 0.000020 loss_cls: 3.1804 (3.1811) grad_norm: 2.6965 (3.1324) time: 2.3832 data: 0.0002 max mem: 13912 +[2024-12-06 08:56:45 root] (utils.py 283): INFO Epoch: [2] [2220/2502] eta: 0:11:14 lr: 0.000020 loss_cls: 3.1555 (3.1816) grad_norm: 2.9241 (3.1346) time: 2.3847 data: 0.0002 max mem: 13912 +[2024-12-06 08:57:09 root] (utils.py 283): INFO Epoch: [2] [2230/2502] eta: 0:10:50 lr: 0.000020 loss_cls: 3.1702 (3.1818) grad_norm: 3.3359 (3.1365) time: 2.3826 data: 0.0002 max mem: 13912 +[2024-12-06 08:57:33 root] (utils.py 283): INFO Epoch: [2] [2240/2502] eta: 0:10:26 lr: 0.000020 loss_cls: 3.2482 (3.1825) grad_norm: 3.1005 (3.1380) time: 2.3801 data: 0.0002 max mem: 13912 +[2024-12-06 08:57:57 root] (utils.py 283): INFO Epoch: [2] [2250/2502] eta: 0:10:02 lr: 0.000020 loss_cls: 3.3749 (3.1819) grad_norm: 2.8456 (3.1380) time: 2.3805 data: 0.0003 max mem: 13912 +[2024-12-06 08:58:20 root] (utils.py 283): INFO Epoch: [2] [2260/2502] eta: 0:09:38 lr: 0.000020 loss_cls: 3.3861 (3.1832) grad_norm: 2.8726 (3.1376) time: 2.3801 data: 0.0003 max mem: 13912 +[2024-12-06 08:58:44 root] (utils.py 283): INFO Epoch: [2] [2270/2502] eta: 0:09:14 lr: 0.000020 loss_cls: 3.4524 (3.1839) grad_norm: 2.9772 (3.1392) time: 2.3805 data: 0.0003 max mem: 13912 +[2024-12-06 08:59:08 root] (utils.py 283): INFO Epoch: [2] [2280/2502] eta: 0:08:50 lr: 0.000020 loss_cls: 3.0583 (3.1825) grad_norm: 2.8674 (3.1377) time: 2.3823 data: 0.0002 max mem: 13912 +[2024-12-06 08:59:32 root] (utils.py 283): INFO Epoch: [2] [2290/2502] eta: 0:08:26 lr: 0.000020 loss_cls: 2.9043 (3.1821) grad_norm: 2.6424 (3.1373) time: 2.3870 data: 0.0002 max mem: 13912 +[2024-12-06 08:59:56 root] (utils.py 283): INFO Epoch: [2] [2300/2502] eta: 0:08:02 lr: 0.000020 loss_cls: 3.2652 (3.1829) grad_norm: 2.7917 (3.1430) time: 2.3824 data: 0.0002 max mem: 13912 +[2024-12-06 09:00:20 root] (utils.py 283): INFO Epoch: [2] [2310/2502] eta: 0:07:39 lr: 0.000020 loss_cls: 3.2949 (3.1831) grad_norm: 2.7917 (3.1424) time: 2.3832 data: 0.0002 max mem: 13912 +[2024-12-06 09:00:44 root] (utils.py 283): INFO Epoch: [2] [2320/2502] eta: 0:07:15 lr: 0.000020 loss_cls: 3.2072 (3.1833) grad_norm: 2.7965 (3.1439) time: 2.3906 data: 0.0003 max mem: 13912 +[2024-12-06 09:01:07 root] (utils.py 283): INFO Epoch: [2] [2330/2502] eta: 0:06:51 lr: 0.000020 loss_cls: 3.1110 (3.1822) grad_norm: 2.7771 (3.1417) time: 2.3925 data: 0.0003 max mem: 13912 +[2024-12-06 09:01:31 root] (utils.py 283): INFO Epoch: [2] [2340/2502] eta: 0:06:27 lr: 0.000020 loss_cls: 3.1370 (3.1832) grad_norm: 2.6416 (3.1418) time: 2.3894 data: 0.0003 max mem: 13912 +[2024-12-06 09:01:55 root] (utils.py 283): INFO Epoch: [2] [2350/2502] eta: 0:06:03 lr: 0.000020 loss_cls: 3.3361 (3.1834) grad_norm: 2.8583 (3.1421) time: 2.3716 data: 0.0003 max mem: 13912 +[2024-12-06 09:02:19 root] (utils.py 283): INFO Epoch: [2] [2360/2502] eta: 0:05:39 lr: 0.000020 loss_cls: 3.1928 (3.1835) grad_norm: 2.6734 (3.1401) time: 2.3709 data: 0.0003 max mem: 13912 +[2024-12-06 09:02:43 root] (utils.py 283): INFO Epoch: [2] [2370/2502] eta: 0:05:15 lr: 0.000020 loss_cls: 3.1928 (3.1831) grad_norm: 2.6621 (3.1393) time: 2.3881 data: 0.0002 max mem: 13912 +[2024-12-06 09:03:07 root] (utils.py 283): INFO Epoch: [2] [2380/2502] eta: 0:04:51 lr: 0.000020 loss_cls: 3.2178 (3.1833) grad_norm: 2.8121 (3.1384) time: 2.3941 data: 0.0002 max mem: 13912 +[2024-12-06 09:03:31 root] (utils.py 283): INFO Epoch: [2] [2390/2502] eta: 0:04:27 lr: 0.000020 loss_cls: 3.4812 (3.1840) grad_norm: 2.8724 (3.1396) time: 2.3924 data: 0.0002 max mem: 13912 +[2024-12-06 09:03:54 root] (utils.py 283): INFO Epoch: [2] [2400/2502] eta: 0:04:03 lr: 0.000020 loss_cls: 3.1806 (3.1828) grad_norm: 2.8147 (3.1393) time: 2.3902 data: 0.0002 max mem: 13912 +[2024-12-06 09:04:18 root] (utils.py 283): INFO Epoch: [2] [2410/2502] eta: 0:03:39 lr: 0.000020 loss_cls: 3.1806 (3.1827) grad_norm: 2.7942 (3.1405) time: 2.3889 data: 0.0002 max mem: 13912 +[2024-12-06 09:04:42 root] (utils.py 283): INFO Epoch: [2] [2420/2502] eta: 0:03:16 lr: 0.000020 loss_cls: 3.3399 (3.1836) grad_norm: 2.8048 (3.1401) time: 2.3892 data: 0.0002 max mem: 13912 +[2024-12-06 09:05:06 root] (utils.py 283): INFO Epoch: [2] [2430/2502] eta: 0:02:52 lr: 0.000020 loss_cls: 3.3852 (3.1836) grad_norm: 3.0332 (3.1400) time: 2.3909 data: 0.0003 max mem: 13912 +[2024-12-06 09:05:30 root] (utils.py 283): INFO Epoch: [2] [2440/2502] eta: 0:02:28 lr: 0.000020 loss_cls: 3.0653 (3.1823) grad_norm: 2.9190 (3.1403) time: 2.3916 data: 0.0003 max mem: 13912 +[2024-12-06 09:05:54 root] (utils.py 283): INFO Epoch: [2] [2450/2502] eta: 0:02:04 lr: 0.000020 loss_cls: 3.0892 (3.1821) grad_norm: 2.9096 (3.1400) time: 2.3908 data: 0.0003 max mem: 13912 +[2024-12-06 09:06:18 root] (utils.py 283): INFO Epoch: [2] [2460/2502] eta: 0:01:40 lr: 0.000020 loss_cls: 3.2664 (3.1827) grad_norm: 2.9096 (3.1403) time: 2.3895 data: 0.0003 max mem: 13912 +[2024-12-06 09:06:42 root] (utils.py 283): INFO Epoch: [2] [2470/2502] eta: 0:01:16 lr: 0.000020 loss_cls: 3.4901 (3.1832) grad_norm: 2.7241 (3.1388) time: 2.3927 data: 0.0003 max mem: 13912 +[2024-12-06 09:07:06 root] (utils.py 283): INFO Epoch: [2] [2480/2502] eta: 0:00:52 lr: 0.000020 loss_cls: 3.3034 (3.1834) grad_norm: 2.6694 (3.1592) time: 2.3928 data: 0.0003 max mem: 13912 +[2024-12-06 09:07:30 root] (utils.py 283): INFO Epoch: [2] [2490/2502] eta: 0:00:28 lr: 0.000020 loss_cls: 3.1430 (3.1828) grad_norm: 2.6694 (3.1592) time: 2.4012 data: 0.0234 max mem: 13912 +[2024-12-06 09:07:54 root] (utils.py 283): INFO Epoch: [2] [2500/2502] eta: 0:00:04 lr: 0.000020 loss_cls: 3.1481 (3.1824) grad_norm: 2.8821 (3.1585) time: 2.4009 data: 0.0234 max mem: 13912 +[2024-12-06 09:07:56 root] (utils.py 283): INFO Epoch: [2] [2501/2502] eta: 0:00:02 lr: 0.000020 loss_cls: 3.1481 (3.1823) grad_norm: 2.9509 (3.1591) time: 2.4004 data: 0.0234 max mem: 13912 +[2024-12-06 09:07:56 root] (utils.py 297): INFO Epoch: [2] Total time: 1:39:41 (2.3909 s / it) +[2024-12-06 09:07:56 root] (engine.py 179): INFO Averaged stats:lr: 0.000020 loss_cls: 3.1481 (3.1783) grad_norm: 2.9509 (3.1591) +[2024-12-06 09:07:57 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:29 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.4382 (0.4382) acc1: 92.1875 (92.1875) acc3: 98.4375 (98.4375) acc5: 98.4375 (98.4375) time: 0.2975 data: 0.0003 max mem: 13912 +[2024-12-06 09:08:00 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:28 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.6671 (0.6844) acc1: 84.3750 (85.9375) acc3: 94.5312 (94.8864) acc5: 97.6562 (97.0170) time: 0.3240 data: 0.0004 max mem: 13912 +[2024-12-06 09:08:04 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:25 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7198 (0.7190) acc1: 84.3750 (84.9330) acc3: 94.5312 (94.6429) acc5: 96.8750 (96.8006) time: 0.3261 data: 0.0004 max mem: 13912 +[2024-12-06 09:08:07 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:22 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7232 (0.7412) acc1: 84.3750 (84.1734) acc3: 94.5312 (94.4808) acc5: 96.8750 (96.6986) time: 0.3264 data: 0.0005 max mem: 13912 +[2024-12-06 09:08:10 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:18 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7475 (0.7418) acc1: 84.3750 (84.0701) acc3: 94.5312 (94.4931) acc5: 96.8750 (96.6654) time: 0.3143 data: 0.0005 max mem: 13912 +[2024-12-06 09:08:13 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:15 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7887 (0.8250) acc1: 77.3438 (82.1232) acc3: 92.1875 (93.2904) acc5: 94.5312 (95.7108) time: 0.3134 data: 0.0005 max mem: 13912 +[2024-12-06 09:08:17 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:12 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0472 (0.8565) acc1: 75.0000 (81.6598) acc3: 88.2812 (92.6614) acc5: 90.6250 (95.1204) time: 0.3255 data: 0.0005 max mem: 13912 +[2024-12-06 09:08:20 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:08 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0959 (0.8878) acc1: 78.1250 (80.7879) acc3: 89.8438 (92.2975) acc5: 92.9688 (94.8724) time: 0.3178 data: 0.0004 max mem: 13912 +[2024-12-06 09:08:23 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:05 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0959 (0.9175) acc1: 74.2188 (79.8804) acc3: 89.8438 (91.8403) acc5: 93.7500 (94.5795) time: 0.3133 data: 0.0006 max mem: 13912 +[2024-12-06 09:08:26 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:02 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.1212 (0.9431) acc1: 71.8750 (79.1896) acc3: 88.2812 (91.4234) acc5: 91.4062 (94.2909) time: 0.3202 data: 0.0006 max mem: 13912 +[2024-12-06 09:08:28 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.9920 (0.9387) acc1: 75.7812 (79.2320) acc3: 89.8438 (91.4960) acc5: 94.5312 (94.3680) time: 0.3085 data: 0.0005 max mem: 13912 +[2024-12-06 09:08:28 root] (utils.py 297): INFO Test: Total time: 0:00:31 (0.3180 s / it) +[2024-12-06 09:08:28 root] (engine.py 264): INFO * Acc@1 79.250 Acc@3 91.698 Acc@5 94.482 loss 0.936 flops 3.584 layer_flops 3.536 +[2024-12-06 09:08:28 root] (main.py 547): INFO Accuracy of the network on the 50000 test images: 79.3% +[2024-12-06 09:08:29 root] (main.py 551): INFO Max accuracy: 79.25% +[2024-12-06 09:08:31 root] (utils.py 283): INFO Epoch: [3] [ 0/2502] eta: 1:39:28 lr: 0.000019 loss_cls: 2.4089 (2.4089) grad_norm: 2.8769 (2.8769) time: 2.3854 data: 0.0004 max mem: 13912 +[2024-12-06 09:08:55 root] (utils.py 283): INFO Epoch: [3] [ 10/2502] eta: 1:39:03 lr: 0.000019 loss_cls: 3.0178 (2.9735) grad_norm: 2.7336 (2.8218) time: 2.3851 data: 0.0003 max mem: 13912 +[2024-12-06 09:09:19 root] (utils.py 283): INFO Epoch: [3] [ 20/2502] eta: 1:38:46 lr: 0.000019 loss_cls: 3.1533 (3.0508) grad_norm: 2.7336 (2.8311) time: 2.3880 data: 0.0003 max mem: 13912 +[2024-12-06 09:09:43 root] (utils.py 283): INFO Epoch: [3] [ 30/2502] eta: 1:38:19 lr: 0.000019 loss_cls: 3.2528 (3.0380) grad_norm: 2.7672 (2.8677) time: 2.3875 data: 0.0003 max mem: 13912 +[2024-12-06 09:10:07 root] (utils.py 283): INFO Epoch: [3] [ 40/2502] eta: 1:37:58 lr: 0.000019 loss_cls: 3.2874 (3.1183) grad_norm: 2.8039 (2.8940) time: 2.3872 data: 0.0003 max mem: 13912 +[2024-12-06 09:10:30 root] (utils.py 283): INFO Epoch: [3] [ 50/2502] eta: 1:37:27 lr: 0.000019 loss_cls: 3.1703 (3.0774) grad_norm: 3.0159 (3.0247) time: 2.3820 data: 0.0002 max mem: 13912 +[2024-12-06 09:10:54 root] (utils.py 283): INFO Epoch: [3] [ 60/2502] eta: 1:36:58 lr: 0.000019 loss_cls: 2.9942 (3.0332) grad_norm: 2.7783 (2.9620) time: 2.3730 data: 0.0003 max mem: 13912 +[2024-12-06 09:11:18 root] (utils.py 283): INFO Epoch: [3] [ 70/2502] eta: 1:36:44 lr: 0.000019 loss_cls: 3.0979 (3.0538) grad_norm: 2.6114 (2.9690) time: 2.3922 data: 0.0002 max mem: 13912 +[2024-12-06 09:11:42 root] (utils.py 283): INFO Epoch: [3] [ 80/2502] eta: 1:36:21 lr: 0.000019 loss_cls: 3.3077 (3.0866) grad_norm: 2.7050 (2.9710) time: 2.3995 data: 0.0002 max mem: 13912 +[2024-12-06 09:12:06 root] (utils.py 283): INFO Epoch: [3] [ 90/2502] eta: 1:35:52 lr: 0.000019 loss_cls: 3.3289 (3.0926) grad_norm: 2.6771 (2.9569) time: 2.3785 data: 0.0002 max mem: 13912 +[2024-12-06 09:12:30 root] (utils.py 283): INFO Epoch: [3] [ 100/2502] eta: 1:35:29 lr: 0.000019 loss_cls: 3.4348 (3.1098) grad_norm: 2.5960 (2.9285) time: 2.3798 data: 0.0002 max mem: 13912 +[2024-12-06 09:12:53 root] (utils.py 283): INFO Epoch: [3] [ 110/2502] eta: 1:35:07 lr: 0.000019 loss_cls: 3.4009 (3.0995) grad_norm: 2.6691 (2.9475) time: 2.3904 data: 0.0002 max mem: 13912 +[2024-12-06 09:13:17 root] (utils.py 283): INFO Epoch: [3] [ 120/2502] eta: 1:34:44 lr: 0.000019 loss_cls: 3.3210 (3.1149) grad_norm: 2.8441 (2.9418) time: 2.3909 data: 0.0003 max mem: 13912 +[2024-12-06 09:13:41 root] (utils.py 283): INFO Epoch: [3] [ 130/2502] eta: 1:34:22 lr: 0.000019 loss_cls: 3.3666 (3.1277) grad_norm: 2.8192 (2.9446) time: 2.3926 data: 0.0003 max mem: 13912 +[2024-12-06 09:14:05 root] (utils.py 283): INFO Epoch: [3] [ 140/2502] eta: 1:33:58 lr: 0.000019 loss_cls: 3.2831 (3.1269) grad_norm: 2.6725 (2.9335) time: 2.3929 data: 0.0003 max mem: 13912 +[2024-12-06 09:14:29 root] (utils.py 283): INFO Epoch: [3] [ 150/2502] eta: 1:33:35 lr: 0.000019 loss_cls: 3.2274 (3.1311) grad_norm: 2.6434 (2.9588) time: 2.3911 data: 0.0003 max mem: 13912 +[2024-12-06 09:14:53 root] (utils.py 283): INFO Epoch: [3] [ 160/2502] eta: 1:33:12 lr: 0.000019 loss_cls: 3.3162 (3.1475) grad_norm: 2.8594 (2.9580) time: 2.3911 data: 0.0003 max mem: 13912 +[2024-12-06 09:15:17 root] (utils.py 283): INFO Epoch: [3] [ 170/2502] eta: 1:32:48 lr: 0.000019 loss_cls: 3.3195 (3.1394) grad_norm: 2.7874 (2.9506) time: 2.3910 data: 0.0003 max mem: 13912 +[2024-12-06 09:15:41 root] (utils.py 283): INFO Epoch: [3] [ 180/2502] eta: 1:32:24 lr: 0.000019 loss_cls: 3.2105 (3.1434) grad_norm: 2.7514 (2.9313) time: 2.3881 data: 0.0003 max mem: 13912 +[2024-12-06 09:16:05 root] (utils.py 283): INFO Epoch: [3] [ 190/2502] eta: 1:32:00 lr: 0.000019 loss_cls: 3.2520 (3.1506) grad_norm: 2.7256 (2.9415) time: 2.3869 data: 0.0003 max mem: 13912 +[2024-12-06 09:16:29 root] (utils.py 283): INFO Epoch: [3] [ 200/2502] eta: 1:31:36 lr: 0.000019 loss_cls: 3.2661 (3.1585) grad_norm: 2.8470 (2.9590) time: 2.3881 data: 0.0003 max mem: 13912 +[2024-12-06 09:16:52 root] (utils.py 283): INFO Epoch: [3] [ 210/2502] eta: 1:31:11 lr: 0.000019 loss_cls: 3.2550 (3.1533) grad_norm: 2.8462 (2.9499) time: 2.3832 data: 0.0003 max mem: 13912 +[2024-12-06 09:17:16 root] (utils.py 283): INFO Epoch: [3] [ 220/2502] eta: 1:30:48 lr: 0.000019 loss_cls: 2.9708 (3.1300) grad_norm: 2.7086 (2.9500) time: 2.3851 data: 0.0003 max mem: 13912 +[2024-12-06 09:17:40 root] (utils.py 283): INFO Epoch: [3] [ 230/2502] eta: 1:30:23 lr: 0.000019 loss_cls: 3.0505 (3.1333) grad_norm: 2.8973 (3.1179) time: 2.3836 data: 0.0003 max mem: 13912 +[2024-12-06 09:18:04 root] (utils.py 283): INFO Epoch: [3] [ 240/2502] eta: 1:29:57 lr: 0.000019 loss_cls: 3.1938 (3.1373) grad_norm: 3.0480 (3.1290) time: 2.3698 data: 0.0003 max mem: 13912 +[2024-12-06 09:18:28 root] (utils.py 283): INFO Epoch: [3] [ 250/2502] eta: 1:29:35 lr: 0.000019 loss_cls: 3.1316 (3.1366) grad_norm: 3.0914 (3.1244) time: 2.3834 data: 0.0003 max mem: 13912 +[2024-12-06 09:18:52 root] (utils.py 283): INFO Epoch: [3] [ 260/2502] eta: 1:29:11 lr: 0.000019 loss_cls: 3.2194 (3.1352) grad_norm: 2.8547 (3.1105) time: 2.3950 data: 0.0003 max mem: 13912 +[2024-12-06 09:19:16 root] (utils.py 283): INFO Epoch: [3] [ 270/2502] eta: 1:28:47 lr: 0.000019 loss_cls: 3.2194 (3.1370) grad_norm: 2.8547 (3.1273) time: 2.3896 data: 0.0003 max mem: 13912 +[2024-12-06 09:19:39 root] (utils.py 283): INFO Epoch: [3] [ 280/2502] eta: 1:28:23 lr: 0.000019 loss_cls: 3.0316 (3.1373) grad_norm: 2.8862 (3.1193) time: 2.3880 data: 0.0003 max mem: 13912 +[2024-12-06 09:20:03 root] (utils.py 283): INFO Epoch: [3] [ 290/2502] eta: 1:28:00 lr: 0.000019 loss_cls: 3.0316 (3.1407) grad_norm: 2.7179 (3.1221) time: 2.3884 data: 0.0003 max mem: 13912 +[2024-12-06 09:20:27 root] (utils.py 283): INFO Epoch: [3] [ 300/2502] eta: 1:27:36 lr: 0.000019 loss_cls: 3.4175 (3.1431) grad_norm: 2.7179 (3.1113) time: 2.3931 data: 0.0003 max mem: 13912 +[2024-12-06 09:20:51 root] (utils.py 283): INFO Epoch: [3] [ 310/2502] eta: 1:27:12 lr: 0.000019 loss_cls: 3.3870 (3.1446) grad_norm: 2.9148 (3.1102) time: 2.3878 data: 0.0003 max mem: 13912 +[2024-12-06 09:21:15 root] (utils.py 283): INFO Epoch: [3] [ 320/2502] eta: 1:26:48 lr: 0.000019 loss_cls: 3.1812 (3.1401) grad_norm: 2.7866 (3.0981) time: 2.3851 data: 0.0003 max mem: 13912 +[2024-12-06 09:21:39 root] (utils.py 283): INFO Epoch: [3] [ 330/2502] eta: 1:26:24 lr: 0.000019 loss_cls: 3.2117 (3.1461) grad_norm: 2.6901 (3.0915) time: 2.3865 data: 0.0003 max mem: 13912 +[2024-12-06 09:22:03 root] (utils.py 283): INFO Epoch: [3] [ 340/2502] eta: 1:26:00 lr: 0.000019 loss_cls: 3.4448 (3.1457) grad_norm: 2.8317 (3.1070) time: 2.3865 data: 0.0003 max mem: 13912 +[2024-12-06 09:22:26 root] (utils.py 283): INFO Epoch: [3] [ 350/2502] eta: 1:25:36 lr: 0.000019 loss_cls: 3.1356 (3.1403) grad_norm: 2.8987 (3.1005) time: 2.3853 data: 0.0003 max mem: 13912 +[2024-12-06 09:22:50 root] (utils.py 283): INFO Epoch: [3] [ 360/2502] eta: 1:25:11 lr: 0.000019 loss_cls: 3.2979 (3.1474) grad_norm: 2.8987 (3.1100) time: 2.3736 data: 0.0003 max mem: 13912 +[2024-12-06 09:23:14 root] (utils.py 283): INFO Epoch: [3] [ 370/2502] eta: 1:24:47 lr: 0.000019 loss_cls: 3.3380 (3.1534) grad_norm: 2.9685 (3.2471) time: 2.3741 data: 0.0003 max mem: 13912 +[2024-12-06 09:23:38 root] (utils.py 283): INFO Epoch: [3] [ 380/2502] eta: 1:24:24 lr: 0.000019 loss_cls: 3.4610 (3.1543) grad_norm: 2.8965 (3.2420) time: 2.3876 data: 0.0003 max mem: 13912 +[2024-12-06 09:24:02 root] (utils.py 283): INFO Epoch: [3] [ 390/2502] eta: 1:24:00 lr: 0.000019 loss_cls: 3.3662 (3.1575) grad_norm: 2.8301 (3.2301) time: 2.3926 data: 0.0003 max mem: 13912 +[2024-12-06 09:24:26 root] (utils.py 283): INFO Epoch: [3] [ 400/2502] eta: 1:23:36 lr: 0.000019 loss_cls: 3.3512 (3.1648) grad_norm: 2.8851 (3.2303) time: 2.3927 data: 0.0003 max mem: 13912 +[2024-12-06 09:24:50 root] (utils.py 283): INFO Epoch: [3] [ 410/2502] eta: 1:23:13 lr: 0.000019 loss_cls: 3.1329 (3.1605) grad_norm: 2.8851 (3.2195) time: 2.3916 data: 0.0003 max mem: 13912 +[2024-12-06 09:25:14 root] (utils.py 283): INFO Epoch: [3] [ 420/2502] eta: 1:22:49 lr: 0.000019 loss_cls: 3.0761 (3.1610) grad_norm: 2.5469 (3.2054) time: 2.3880 data: 0.0003 max mem: 13912 +[2024-12-06 09:25:37 root] (utils.py 283): INFO Epoch: [3] [ 430/2502] eta: 1:22:25 lr: 0.000019 loss_cls: 3.2463 (3.1592) grad_norm: 2.6416 (3.2005) time: 2.3864 data: 0.0003 max mem: 13912 +[2024-12-06 09:26:01 root] (utils.py 283): INFO Epoch: [3] [ 440/2502] eta: 1:22:01 lr: 0.000019 loss_cls: 3.3777 (3.1627) grad_norm: 2.6830 (3.1929) time: 2.3873 data: 0.0003 max mem: 13912 +[2024-12-06 09:26:25 root] (utils.py 283): INFO Epoch: [3] [ 450/2502] eta: 1:21:38 lr: 0.000019 loss_cls: 3.1313 (3.1589) grad_norm: 2.5704 (3.1807) time: 2.3907 data: 0.0003 max mem: 13912 +[2024-12-06 09:26:49 root] (utils.py 283): INFO Epoch: [3] [ 460/2502] eta: 1:21:14 lr: 0.000019 loss_cls: 3.1117 (3.1558) grad_norm: 2.6573 (3.1708) time: 2.3921 data: 0.0003 max mem: 13912 +[2024-12-06 09:27:13 root] (utils.py 283): INFO Epoch: [3] [ 470/2502] eta: 1:20:50 lr: 0.000019 loss_cls: 3.2513 (3.1561) grad_norm: 2.7057 (3.1607) time: 2.3923 data: 0.0003 max mem: 13912 +[2024-12-06 09:27:37 root] (utils.py 283): INFO Epoch: [3] [ 480/2502] eta: 1:20:26 lr: 0.000019 loss_cls: 3.1983 (3.1562) grad_norm: 2.5475 (3.1511) time: 2.3869 data: 0.0003 max mem: 13912 +[2024-12-06 09:28:01 root] (utils.py 283): INFO Epoch: [3] [ 490/2502] eta: 1:20:03 lr: 0.000019 loss_cls: 3.1517 (3.1539) grad_norm: 2.5935 (3.1423) time: 2.3861 data: 0.0003 max mem: 13912 +[2024-12-06 09:28:25 root] (utils.py 283): INFO Epoch: [3] [ 500/2502] eta: 1:19:38 lr: 0.000019 loss_cls: 3.1945 (3.1529) grad_norm: 2.8240 (3.1437) time: 2.3856 data: 0.0003 max mem: 13912 +[2024-12-06 09:28:48 root] (utils.py 283): INFO Epoch: [3] [ 510/2502] eta: 1:19:13 lr: 0.000019 loss_cls: 3.1714 (3.1523) grad_norm: 2.8145 (3.1331) time: 2.3676 data: 0.0003 max mem: 13912 +[2024-12-06 09:29:12 root] (utils.py 283): INFO Epoch: [3] [ 520/2502] eta: 1:18:50 lr: 0.000019 loss_cls: 2.9476 (3.1488) grad_norm: 2.6027 (3.1290) time: 2.3749 data: 0.0003 max mem: 13912 +[2024-12-06 09:29:36 root] (utils.py 283): INFO Epoch: [3] [ 530/2502] eta: 1:18:26 lr: 0.000019 loss_cls: 3.2894 (3.1527) grad_norm: 2.9077 (3.1255) time: 2.3921 data: 0.0003 max mem: 13912 +[2024-12-06 09:30:00 root] (utils.py 283): INFO Epoch: [3] [ 540/2502] eta: 1:18:02 lr: 0.000019 loss_cls: 3.2894 (3.1511) grad_norm: 2.6982 (3.1157) time: 2.3906 data: 0.0003 max mem: 13912 +[2024-12-06 09:30:24 root] (utils.py 283): INFO Epoch: [3] [ 550/2502] eta: 1:17:38 lr: 0.000019 loss_cls: 2.9257 (3.1440) grad_norm: 2.5490 (3.1087) time: 2.3813 data: 0.0003 max mem: 13912 +[2024-12-06 09:30:47 root] (utils.py 283): INFO Epoch: [3] [ 560/2502] eta: 1:17:13 lr: 0.000019 loss_cls: 3.2847 (3.1451) grad_norm: 2.4947 (3.0973) time: 2.3684 data: 0.0003 max mem: 13912 +[2024-12-06 09:31:11 root] (utils.py 283): INFO Epoch: [3] [ 570/2502] eta: 1:16:49 lr: 0.000019 loss_cls: 3.4367 (3.1486) grad_norm: 2.5656 (3.0933) time: 2.3697 data: 0.0003 max mem: 13912 +[2024-12-06 09:31:35 root] (utils.py 283): INFO Epoch: [3] [ 580/2502] eta: 1:16:25 lr: 0.000019 loss_cls: 3.3534 (3.1480) grad_norm: 2.6994 (3.0930) time: 2.3829 data: 0.0003 max mem: 13912 +[2024-12-06 09:31:59 root] (utils.py 283): INFO Epoch: [3] [ 590/2502] eta: 1:16:02 lr: 0.000019 loss_cls: 3.3794 (3.1554) grad_norm: 2.9429 (3.0935) time: 2.3904 data: 0.0003 max mem: 13912 +[2024-12-06 09:32:23 root] (utils.py 283): INFO Epoch: [3] [ 600/2502] eta: 1:15:38 lr: 0.000019 loss_cls: 3.3191 (3.1512) grad_norm: 2.9882 (3.0911) time: 2.3901 data: 0.0003 max mem: 13912 +[2024-12-06 09:32:47 root] (utils.py 283): INFO Epoch: [3] [ 610/2502] eta: 1:15:14 lr: 0.000019 loss_cls: 2.9691 (3.1479) grad_norm: 2.6485 (3.0833) time: 2.3867 data: 0.0003 max mem: 13912 +[2024-12-06 09:33:10 root] (utils.py 283): INFO Epoch: [3] [ 620/2502] eta: 1:14:49 lr: 0.000019 loss_cls: 3.2312 (3.1511) grad_norm: 2.5964 (3.0763) time: 2.3680 data: 0.0003 max mem: 13912 +[2024-12-06 09:33:34 root] (utils.py 283): INFO Epoch: [3] [ 630/2502] eta: 1:14:25 lr: 0.000019 loss_cls: 3.2401 (3.1488) grad_norm: 2.6854 (3.0762) time: 2.3615 data: 0.0003 max mem: 13912 +[2024-12-06 09:33:58 root] (utils.py 283): INFO Epoch: [3] [ 640/2502] eta: 1:14:01 lr: 0.000019 loss_cls: 3.2120 (3.1510) grad_norm: 2.8674 (3.0753) time: 2.3767 data: 0.0003 max mem: 13912 +[2024-12-06 09:34:22 root] (utils.py 283): INFO Epoch: [3] [ 650/2502] eta: 1:13:37 lr: 0.000019 loss_cls: 3.4254 (3.1527) grad_norm: 2.7818 (3.0726) time: 2.3859 data: 0.0003 max mem: 13912 +[2024-12-06 09:34:45 root] (utils.py 283): INFO Epoch: [3] [ 660/2502] eta: 1:13:13 lr: 0.000019 loss_cls: 3.2081 (3.1505) grad_norm: 2.7512 (3.0696) time: 2.3926 data: 0.0003 max mem: 13912 +[2024-12-06 09:35:09 root] (utils.py 283): INFO Epoch: [3] [ 670/2502] eta: 1:12:50 lr: 0.000019 loss_cls: 3.2081 (3.1514) grad_norm: 2.7792 (3.0702) time: 2.3984 data: 0.0003 max mem: 13912 +[2024-12-06 09:35:33 root] (utils.py 283): INFO Epoch: [3] [ 680/2502] eta: 1:12:26 lr: 0.000019 loss_cls: 3.3399 (3.1531) grad_norm: 2.6890 (3.0648) time: 2.3950 data: 0.0003 max mem: 13912 +[2024-12-06 09:35:57 root] (utils.py 283): INFO Epoch: [3] [ 690/2502] eta: 1:12:03 lr: 0.000019 loss_cls: 3.2070 (3.1489) grad_norm: 2.6165 (3.0623) time: 2.3893 data: 0.0003 max mem: 13912 +[2024-12-06 09:36:21 root] (utils.py 283): INFO Epoch: [3] [ 700/2502] eta: 1:11:39 lr: 0.000019 loss_cls: 3.0414 (3.1510) grad_norm: 2.5997 (3.0576) time: 2.3848 data: 0.0003 max mem: 13912 +[2024-12-06 09:36:45 root] (utils.py 283): INFO Epoch: [3] [ 710/2502] eta: 1:11:15 lr: 0.000019 loss_cls: 3.3229 (3.1501) grad_norm: 2.6998 (3.0563) time: 2.3865 data: 0.0003 max mem: 13912 +[2024-12-06 09:37:09 root] (utils.py 283): INFO Epoch: [3] [ 720/2502] eta: 1:10:51 lr: 0.000019 loss_cls: 2.9436 (3.1462) grad_norm: 2.7431 (3.0534) time: 2.3933 data: 0.0003 max mem: 13912 +[2024-12-06 09:37:33 root] (utils.py 283): INFO Epoch: [3] [ 730/2502] eta: 1:10:27 lr: 0.000019 loss_cls: 2.7493 (3.1443) grad_norm: 2.6689 (3.0573) time: 2.3893 data: 0.0003 max mem: 13912 +[2024-12-06 09:37:57 root] (utils.py 283): INFO Epoch: [3] [ 740/2502] eta: 1:10:03 lr: 0.000019 loss_cls: 3.2880 (3.1445) grad_norm: 2.6930 (3.0543) time: 2.3831 data: 0.0003 max mem: 13912 +[2024-12-06 09:38:20 root] (utils.py 283): INFO Epoch: [3] [ 750/2502] eta: 1:09:38 lr: 0.000019 loss_cls: 3.3495 (3.1456) grad_norm: 2.8648 (3.0561) time: 2.3609 data: 0.0003 max mem: 13912 +[2024-12-06 09:38:44 root] (utils.py 283): INFO Epoch: [3] [ 760/2502] eta: 1:09:14 lr: 0.000019 loss_cls: 3.3794 (3.1489) grad_norm: 2.8491 (3.0556) time: 2.3587 data: 0.0002 max mem: 13912 +[2024-12-06 09:39:08 root] (utils.py 283): INFO Epoch: [3] [ 770/2502] eta: 1:08:50 lr: 0.000019 loss_cls: 3.4123 (3.1504) grad_norm: 2.5915 (3.0552) time: 2.3742 data: 0.0002 max mem: 13912 +[2024-12-06 09:39:31 root] (utils.py 283): INFO Epoch: [3] [ 780/2502] eta: 1:08:26 lr: 0.000019 loss_cls: 3.3280 (3.1505) grad_norm: 2.7336 (3.0547) time: 2.3615 data: 0.0002 max mem: 13912 +[2024-12-06 09:39:55 root] (utils.py 283): INFO Epoch: [3] [ 790/2502] eta: 1:08:02 lr: 0.000019 loss_cls: 3.2056 (3.1511) grad_norm: 2.7477 (3.0507) time: 2.3671 data: 0.0002 max mem: 13912 +[2024-12-06 09:40:19 root] (utils.py 283): INFO Epoch: [3] [ 800/2502] eta: 1:07:38 lr: 0.000019 loss_cls: 3.0812 (3.1500) grad_norm: 2.7349 (3.0542) time: 2.3819 data: 0.0002 max mem: 13912 +[2024-12-06 09:40:42 root] (utils.py 283): INFO Epoch: [3] [ 810/2502] eta: 1:07:13 lr: 0.000019 loss_cls: 2.9709 (3.1488) grad_norm: 2.7590 (3.0514) time: 2.3697 data: 0.0002 max mem: 13912 +[2024-12-06 09:41:06 root] (utils.py 283): INFO Epoch: [3] [ 820/2502] eta: 1:06:49 lr: 0.000019 loss_cls: 3.2116 (3.1492) grad_norm: 2.7590 (3.0518) time: 2.3638 data: 0.0003 max mem: 13912 +[2024-12-06 09:41:30 root] (utils.py 283): INFO Epoch: [3] [ 830/2502] eta: 1:06:26 lr: 0.000019 loss_cls: 3.3508 (3.1508) grad_norm: 2.6665 (3.0475) time: 2.3786 data: 0.0003 max mem: 13912 +[2024-12-06 09:41:54 root] (utils.py 283): INFO Epoch: [3] [ 840/2502] eta: 1:06:02 lr: 0.000019 loss_cls: 3.1326 (3.1486) grad_norm: 2.6408 (3.0447) time: 2.3891 data: 0.0002 max mem: 13912 +[2024-12-06 09:42:18 root] (utils.py 283): INFO Epoch: [3] [ 850/2502] eta: 1:05:38 lr: 0.000019 loss_cls: 3.2183 (3.1503) grad_norm: 2.9121 (3.0453) time: 2.3889 data: 0.0003 max mem: 13912 +[2024-12-06 09:42:41 root] (utils.py 283): INFO Epoch: [3] [ 860/2502] eta: 1:05:14 lr: 0.000019 loss_cls: 3.3454 (3.1505) grad_norm: 2.7966 (3.0417) time: 2.3880 data: 0.0003 max mem: 13912 +[2024-12-06 09:43:05 root] (utils.py 283): INFO Epoch: [3] [ 870/2502] eta: 1:04:51 lr: 0.000019 loss_cls: 3.3444 (3.1529) grad_norm: 2.7496 (3.0442) time: 2.3905 data: 0.0003 max mem: 13912 +[2024-12-06 09:43:29 root] (utils.py 283): INFO Epoch: [3] [ 880/2502] eta: 1:04:27 lr: 0.000019 loss_cls: 3.3774 (3.1552) grad_norm: 2.6872 (3.0400) time: 2.3927 data: 0.0003 max mem: 13912 +[2024-12-06 09:43:53 root] (utils.py 283): INFO Epoch: [3] [ 890/2502] eta: 1:04:03 lr: 0.000019 loss_cls: 3.3490 (3.1576) grad_norm: 2.5509 (3.0359) time: 2.3917 data: 0.0003 max mem: 13912 +[2024-12-06 09:44:17 root] (utils.py 283): INFO Epoch: [3] [ 900/2502] eta: 1:03:39 lr: 0.000019 loss_cls: 3.1329 (3.1572) grad_norm: 2.5509 (3.0321) time: 2.3867 data: 0.0003 max mem: 13912 +[2024-12-06 09:44:41 root] (utils.py 283): INFO Epoch: [3] [ 910/2502] eta: 1:03:16 lr: 0.000019 loss_cls: 3.0420 (3.1545) grad_norm: 2.4569 (3.0397) time: 2.3857 data: 0.0003 max mem: 13912 +[2024-12-06 09:45:05 root] (utils.py 283): INFO Epoch: [3] [ 920/2502] eta: 1:02:52 lr: 0.000019 loss_cls: 3.2690 (3.1547) grad_norm: 2.4860 (3.0424) time: 2.3916 data: 0.0003 max mem: 13912 +[2024-12-06 09:45:29 root] (utils.py 283): INFO Epoch: [3] [ 930/2502] eta: 1:02:28 lr: 0.000019 loss_cls: 3.3722 (3.1553) grad_norm: 2.8350 (3.0425) time: 2.3929 data: 0.0003 max mem: 13912 +[2024-12-06 09:45:53 root] (utils.py 283): INFO Epoch: [3] [ 940/2502] eta: 1:02:05 lr: 0.000019 loss_cls: 3.2559 (3.1576) grad_norm: 2.8678 (3.0449) time: 2.3967 data: 0.0003 max mem: 13912 +[2024-12-06 09:46:17 root] (utils.py 283): INFO Epoch: [3] [ 950/2502] eta: 1:01:41 lr: 0.000019 loss_cls: 3.3081 (3.1595) grad_norm: 2.9802 (3.0479) time: 2.3942 data: 0.0003 max mem: 13912 +[2024-12-06 09:46:41 root] (utils.py 283): INFO Epoch: [3] [ 960/2502] eta: 1:01:17 lr: 0.000019 loss_cls: 3.3244 (3.1607) grad_norm: 2.6744 (3.0454) time: 2.3836 data: 0.0003 max mem: 13912 +[2024-12-06 09:47:04 root] (utils.py 283): INFO Epoch: [3] [ 970/2502] eta: 1:00:53 lr: 0.000019 loss_cls: 3.2092 (3.1589) grad_norm: 2.5974 (3.0414) time: 2.3846 data: 0.0003 max mem: 13912 +[2024-12-06 09:47:28 root] (utils.py 283): INFO Epoch: [3] [ 980/2502] eta: 1:00:29 lr: 0.000019 loss_cls: 2.9310 (3.1573) grad_norm: 2.6022 (3.0381) time: 2.3862 data: 0.0003 max mem: 13912 +[2024-12-06 09:47:52 root] (utils.py 283): INFO Epoch: [3] [ 990/2502] eta: 1:00:05 lr: 0.000019 loss_cls: 3.3693 (3.1623) grad_norm: 2.6652 (3.0357) time: 2.3847 data: 0.0003 max mem: 13912 +[2024-12-06 09:48:16 root] (utils.py 283): INFO Epoch: [3] [1000/2502] eta: 0:59:41 lr: 0.000019 loss_cls: 3.6010 (3.1633) grad_norm: 2.6122 (3.0312) time: 2.3848 data: 0.0003 max mem: 13912 +[2024-12-06 09:48:40 root] (utils.py 283): INFO Epoch: [3] [1010/2502] eta: 0:59:18 lr: 0.000019 loss_cls: 3.3210 (3.1631) grad_norm: 2.6103 (3.0315) time: 2.3976 data: 0.0003 max mem: 13912 +[2024-12-06 09:49:04 root] (utils.py 283): INFO Epoch: [3] [1020/2502] eta: 0:58:54 lr: 0.000019 loss_cls: 3.2513 (3.1637) grad_norm: 2.8475 (3.0318) time: 2.3977 data: 0.0003 max mem: 13912 +[2024-12-06 09:49:27 root] (utils.py 283): INFO Epoch: [3] [1030/2502] eta: 0:58:30 lr: 0.000019 loss_cls: 3.3224 (3.1655) grad_norm: 2.8255 (3.0308) time: 2.3688 data: 0.0003 max mem: 13912 +[2024-12-06 09:49:51 root] (utils.py 283): INFO Epoch: [3] [1040/2502] eta: 0:58:06 lr: 0.000019 loss_cls: 3.3751 (3.1669) grad_norm: 2.7502 (3.0606) time: 2.3720 data: 0.0003 max mem: 13912 +[2024-12-06 09:50:15 root] (utils.py 283): INFO Epoch: [3] [1050/2502] eta: 0:57:42 lr: 0.000019 loss_cls: 3.4801 (3.1693) grad_norm: 2.8959 (3.0612) time: 2.3888 data: 0.0003 max mem: 13912 +[2024-12-06 09:50:39 root] (utils.py 283): INFO Epoch: [3] [1060/2502] eta: 0:57:19 lr: 0.000019 loss_cls: 3.3748 (3.1701) grad_norm: 2.9484 (3.0639) time: 2.3912 data: 0.0003 max mem: 13912 +[2024-12-06 09:51:03 root] (utils.py 283): INFO Epoch: [3] [1070/2502] eta: 0:56:55 lr: 0.000019 loss_cls: 3.2428 (3.1695) grad_norm: 2.7842 (3.0682) time: 2.3939 data: 0.0003 max mem: 13912 +[2024-12-06 09:51:27 root] (utils.py 283): INFO Epoch: [3] [1080/2502] eta: 0:56:31 lr: 0.000019 loss_cls: 3.2428 (3.1689) grad_norm: 2.6807 (3.0672) time: 2.3915 data: 0.0003 max mem: 13912 +[2024-12-06 09:51:51 root] (utils.py 283): INFO Epoch: [3] [1090/2502] eta: 0:56:07 lr: 0.000019 loss_cls: 3.4812 (3.1707) grad_norm: 2.6540 (3.0659) time: 2.3900 data: 0.0003 max mem: 13912 +[2024-12-06 09:52:15 root] (utils.py 283): INFO Epoch: [3] [1100/2502] eta: 0:55:43 lr: 0.000019 loss_cls: 3.4558 (3.1724) grad_norm: 2.7155 (3.0646) time: 2.3890 data: 0.0003 max mem: 13912 +[2024-12-06 09:52:39 root] (utils.py 283): INFO Epoch: [3] [1110/2502] eta: 0:55:20 lr: 0.000019 loss_cls: 3.3181 (3.1726) grad_norm: 2.7755 (3.0628) time: 2.3908 data: 0.0003 max mem: 13912 +[2024-12-06 09:53:03 root] (utils.py 283): INFO Epoch: [3] [1120/2502] eta: 0:54:56 lr: 0.000019 loss_cls: 3.3146 (3.1737) grad_norm: 2.8681 (3.0748) time: 2.3900 data: 0.0002 max mem: 13912 +[2024-12-06 09:53:26 root] (utils.py 283): INFO Epoch: [3] [1130/2502] eta: 0:54:32 lr: 0.000019 loss_cls: 3.2482 (3.1722) grad_norm: 2.8681 (3.0763) time: 2.3880 data: 0.0003 max mem: 13912 +[2024-12-06 09:53:50 root] (utils.py 283): INFO Epoch: [3] [1140/2502] eta: 0:54:08 lr: 0.000019 loss_cls: 3.3099 (3.1738) grad_norm: 2.7896 (3.0768) time: 2.3819 data: 0.0003 max mem: 13912 +[2024-12-06 09:54:14 root] (utils.py 283): INFO Epoch: [3] [1150/2502] eta: 0:53:44 lr: 0.000019 loss_cls: 3.3099 (3.1726) grad_norm: 2.9312 (3.0777) time: 2.3756 data: 0.0003 max mem: 13912 +[2024-12-06 09:54:38 root] (utils.py 283): INFO Epoch: [3] [1160/2502] eta: 0:53:20 lr: 0.000019 loss_cls: 3.1442 (3.1728) grad_norm: 3.1399 (3.0770) time: 2.3757 data: 0.0002 max mem: 13912 +[2024-12-06 09:55:02 root] (utils.py 283): INFO Epoch: [3] [1170/2502] eta: 0:52:56 lr: 0.000019 loss_cls: 3.1590 (3.1711) grad_norm: 2.8575 (3.0734) time: 2.3804 data: 0.0003 max mem: 13912 +[2024-12-06 09:55:26 root] (utils.py 283): INFO Epoch: [3] [1180/2502] eta: 0:52:33 lr: 0.000019 loss_cls: 3.1313 (3.1686) grad_norm: 2.7105 (3.0727) time: 2.3893 data: 0.0003 max mem: 13912 +[2024-12-06 09:55:49 root] (utils.py 283): INFO Epoch: [3] [1190/2502] eta: 0:52:09 lr: 0.000019 loss_cls: 2.9530 (3.1678) grad_norm: 3.0171 (3.0741) time: 2.3903 data: 0.0003 max mem: 13912 +[2024-12-06 09:56:13 root] (utils.py 283): INFO Epoch: [3] [1200/2502] eta: 0:51:45 lr: 0.000019 loss_cls: 3.2424 (3.1685) grad_norm: 2.8590 (3.0712) time: 2.3903 data: 0.0003 max mem: 13912 +[2024-12-06 09:56:37 root] (utils.py 283): INFO Epoch: [3] [1210/2502] eta: 0:51:21 lr: 0.000019 loss_cls: 3.2228 (3.1685) grad_norm: 2.6707 (3.0684) time: 2.3910 data: 0.0003 max mem: 13912 +[2024-12-06 09:57:01 root] (utils.py 283): INFO Epoch: [3] [1220/2502] eta: 0:50:57 lr: 0.000019 loss_cls: 3.1129 (3.1682) grad_norm: 2.7345 (3.0789) time: 2.3896 data: 0.0003 max mem: 13912 +[2024-12-06 09:57:25 root] (utils.py 283): INFO Epoch: [3] [1230/2502] eta: 0:50:34 lr: 0.000019 loss_cls: 3.3284 (3.1687) grad_norm: 2.8417 (3.0774) time: 2.3914 data: 0.0003 max mem: 13912 +[2024-12-06 09:57:49 root] (utils.py 283): INFO Epoch: [3] [1240/2502] eta: 0:50:10 lr: 0.000019 loss_cls: 3.3416 (3.1698) grad_norm: 2.8417 (3.0757) time: 2.3904 data: 0.0003 max mem: 13912 +[2024-12-06 09:58:13 root] (utils.py 283): INFO Epoch: [3] [1250/2502] eta: 0:49:46 lr: 0.000019 loss_cls: 3.3926 (3.1715) grad_norm: 2.6068 (3.0748) time: 2.3886 data: 0.0003 max mem: 13912 +[2024-12-06 09:58:37 root] (utils.py 283): INFO Epoch: [3] [1260/2502] eta: 0:49:22 lr: 0.000019 loss_cls: 3.4024 (3.1718) grad_norm: 2.6575 (3.0735) time: 2.3786 data: 0.0003 max mem: 13912 +[2024-12-06 09:59:00 root] (utils.py 283): INFO Epoch: [3] [1270/2502] eta: 0:48:58 lr: 0.000019 loss_cls: 3.2403 (3.1715) grad_norm: 2.6296 (3.0722) time: 2.3787 data: 0.0003 max mem: 13912 +[2024-12-06 09:59:24 root] (utils.py 283): INFO Epoch: [3] [1280/2502] eta: 0:48:34 lr: 0.000019 loss_cls: 3.2423 (3.1718) grad_norm: 2.6296 (3.0720) time: 2.3812 data: 0.0003 max mem: 13912 +[2024-12-06 09:59:48 root] (utils.py 283): INFO Epoch: [3] [1290/2502] eta: 0:48:10 lr: 0.000019 loss_cls: 3.3690 (3.1723) grad_norm: 2.6053 (3.0703) time: 2.3822 data: 0.0003 max mem: 13912 +[2024-12-06 10:00:12 root] (utils.py 283): INFO Epoch: [3] [1300/2502] eta: 0:47:46 lr: 0.000019 loss_cls: 3.2238 (3.1711) grad_norm: 2.7216 (3.0690) time: 2.3868 data: 0.0003 max mem: 13912 +[2024-12-06 10:00:36 root] (utils.py 283): INFO Epoch: [3] [1310/2502] eta: 0:47:23 lr: 0.000019 loss_cls: 3.0807 (3.1719) grad_norm: 3.0223 (3.0705) time: 2.3870 data: 0.0003 max mem: 13912 +[2024-12-06 10:01:00 root] (utils.py 283): INFO Epoch: [3] [1320/2502] eta: 0:46:59 lr: 0.000019 loss_cls: 3.3201 (3.1728) grad_norm: 2.7493 (3.0725) time: 2.3892 data: 0.0003 max mem: 13912 +[2024-12-06 10:01:24 root] (utils.py 283): INFO Epoch: [3] [1330/2502] eta: 0:46:35 lr: 0.000019 loss_cls: 3.3009 (3.1732) grad_norm: 2.7493 (3.0767) time: 2.3872 data: 0.0003 max mem: 13912 +[2024-12-06 10:01:47 root] (utils.py 283): INFO Epoch: [3] [1340/2502] eta: 0:46:11 lr: 0.000019 loss_cls: 3.2519 (3.1746) grad_norm: 2.8184 (3.0741) time: 2.3871 data: 0.0003 max mem: 13912 +[2024-12-06 10:02:11 root] (utils.py 283): INFO Epoch: [3] [1350/2502] eta: 0:45:47 lr: 0.000019 loss_cls: 3.1831 (3.1744) grad_norm: 2.7784 (3.0728) time: 2.3900 data: 0.0003 max mem: 13912 +[2024-12-06 10:02:35 root] (utils.py 283): INFO Epoch: [3] [1360/2502] eta: 0:45:24 lr: 0.000019 loss_cls: 3.0614 (3.1739) grad_norm: 2.7784 (3.0710) time: 2.3927 data: 0.0003 max mem: 13912 +[2024-12-06 10:02:59 root] (utils.py 283): INFO Epoch: [3] [1370/2502] eta: 0:45:00 lr: 0.000019 loss_cls: 3.3609 (3.1740) grad_norm: 2.8223 (3.0697) time: 2.3911 data: 0.0003 max mem: 13912 +[2024-12-06 10:03:23 root] (utils.py 283): INFO Epoch: [3] [1380/2502] eta: 0:44:36 lr: 0.000019 loss_cls: 3.2937 (3.1739) grad_norm: 2.8323 (3.0719) time: 2.3925 data: 0.0003 max mem: 13912 +[2024-12-06 10:03:47 root] (utils.py 283): INFO Epoch: [3] [1390/2502] eta: 0:44:12 lr: 0.000019 loss_cls: 3.2190 (3.1730) grad_norm: 2.8323 (3.0726) time: 2.3904 data: 0.0003 max mem: 13912 +[2024-12-06 10:04:11 root] (utils.py 283): INFO Epoch: [3] [1400/2502] eta: 0:43:48 lr: 0.000019 loss_cls: 3.0301 (3.1717) grad_norm: 2.7498 (3.0694) time: 2.3827 data: 0.0003 max mem: 13912 +[2024-12-06 10:04:35 root] (utils.py 283): INFO Epoch: [3] [1410/2502] eta: 0:43:24 lr: 0.000019 loss_cls: 3.2840 (3.1729) grad_norm: 2.6855 (3.0702) time: 2.3763 data: 0.0003 max mem: 13912 +[2024-12-06 10:04:58 root] (utils.py 283): INFO Epoch: [3] [1420/2502] eta: 0:43:00 lr: 0.000019 loss_cls: 3.3199 (3.1740) grad_norm: 2.6855 (3.0677) time: 2.3780 data: 0.0003 max mem: 13912 +[2024-12-06 10:05:22 root] (utils.py 283): INFO Epoch: [3] [1430/2502] eta: 0:42:37 lr: 0.000019 loss_cls: 3.3163 (3.1758) grad_norm: 2.6806 (3.0665) time: 2.3871 data: 0.0003 max mem: 13912 +[2024-12-06 10:05:46 root] (utils.py 283): INFO Epoch: [3] [1440/2502] eta: 0:42:13 lr: 0.000019 loss_cls: 3.1993 (3.1749) grad_norm: 2.7219 (3.0652) time: 2.3853 data: 0.0003 max mem: 13912 +[2024-12-06 10:06:10 root] (utils.py 283): INFO Epoch: [3] [1450/2502] eta: 0:41:49 lr: 0.000019 loss_cls: 3.0358 (3.1744) grad_norm: 2.7548 (3.0644) time: 2.3883 data: 0.0003 max mem: 13912 +[2024-12-06 10:06:34 root] (utils.py 283): INFO Epoch: [3] [1460/2502] eta: 0:41:25 lr: 0.000019 loss_cls: 3.5379 (3.1778) grad_norm: 2.6647 (3.0615) time: 2.3956 data: 0.0003 max mem: 13912 +[2024-12-06 10:06:58 root] (utils.py 283): INFO Epoch: [3] [1470/2502] eta: 0:41:01 lr: 0.000019 loss_cls: 3.5325 (3.1772) grad_norm: 2.6647 (3.0599) time: 2.3896 data: 0.0003 max mem: 13912 +[2024-12-06 10:07:22 root] (utils.py 283): INFO Epoch: [3] [1480/2502] eta: 0:40:37 lr: 0.000019 loss_cls: 3.1976 (3.1773) grad_norm: 2.6758 (3.0591) time: 2.3866 data: 0.0003 max mem: 13912 +[2024-12-06 10:07:46 root] (utils.py 283): INFO Epoch: [3] [1490/2502] eta: 0:40:14 lr: 0.000019 loss_cls: 3.1432 (3.1768) grad_norm: 2.7067 (3.0582) time: 2.3929 data: 0.0003 max mem: 13912 +[2024-12-06 10:08:10 root] (utils.py 283): INFO Epoch: [3] [1500/2502] eta: 0:39:50 lr: 0.000019 loss_cls: 3.0219 (3.1741) grad_norm: 2.6536 (3.0566) time: 2.3920 data: 0.0003 max mem: 13912 +[2024-12-06 10:08:33 root] (utils.py 283): INFO Epoch: [3] [1510/2502] eta: 0:39:26 lr: 0.000019 loss_cls: 3.0016 (3.1734) grad_norm: 2.5958 (3.0536) time: 2.3543 data: 0.0003 max mem: 13912 +[2024-12-06 10:08:57 root] (utils.py 283): INFO Epoch: [3] [1520/2502] eta: 0:39:02 lr: 0.000019 loss_cls: 3.3492 (3.1745) grad_norm: 2.7580 (3.0556) time: 2.3594 data: 0.0003 max mem: 13912 +[2024-12-06 10:09:32 root] (utils.py 283): INFO Epoch: [3] [1530/2502] eta: 0:38:45 lr: 0.000019 loss_cls: 3.3777 (3.1747) grad_norm: 2.9220 (3.0591) time: 2.9414 data: 0.0003 max mem: 13912 +[2024-12-06 10:09:56 root] (utils.py 283): INFO Epoch: [3] [1540/2502] eta: 0:38:21 lr: 0.000019 loss_cls: 3.0925 (3.1727) grad_norm: 2.8883 (3.0595) time: 2.9394 data: 0.0003 max mem: 13912 +[2024-12-06 10:10:19 root] (utils.py 283): INFO Epoch: [3] [1550/2502] eta: 0:37:57 lr: 0.000019 loss_cls: 3.0250 (3.1719) grad_norm: 2.8562 (3.0642) time: 2.3929 data: 0.0003 max mem: 13912 +[2024-12-06 10:10:43 root] (utils.py 283): INFO Epoch: [3] [1560/2502] eta: 0:37:33 lr: 0.000019 loss_cls: 3.2621 (3.1713) grad_norm: 2.8562 (3.0660) time: 2.3932 data: 0.0003 max mem: 13912 +[2024-12-06 10:11:07 root] (utils.py 283): INFO Epoch: [3] [1570/2502] eta: 0:37:09 lr: 0.000019 loss_cls: 3.2243 (3.1718) grad_norm: 2.8791 (3.0663) time: 2.3892 data: 0.0003 max mem: 13912 +[2024-12-06 10:11:31 root] (utils.py 283): INFO Epoch: [3] [1580/2502] eta: 0:36:45 lr: 0.000019 loss_cls: 3.1867 (3.1717) grad_norm: 2.7778 (3.0649) time: 2.3866 data: 0.0003 max mem: 13912 +[2024-12-06 10:11:55 root] (utils.py 283): INFO Epoch: [3] [1590/2502] eta: 0:36:21 lr: 0.000019 loss_cls: 3.0406 (3.1705) grad_norm: 2.7892 (3.0686) time: 2.3918 data: 0.0003 max mem: 13912 +[2024-12-06 10:12:19 root] (utils.py 283): INFO Epoch: [3] [1600/2502] eta: 0:35:57 lr: 0.000019 loss_cls: 3.0406 (3.1708) grad_norm: 3.0317 (3.0695) time: 2.3958 data: 0.0003 max mem: 13912 +[2024-12-06 10:12:43 root] (utils.py 283): INFO Epoch: [3] [1610/2502] eta: 0:35:33 lr: 0.000019 loss_cls: 3.2936 (3.1705) grad_norm: 2.8969 (3.0703) time: 2.3918 data: 0.0003 max mem: 13912 +[2024-12-06 10:13:07 root] (utils.py 283): INFO Epoch: [3] [1620/2502] eta: 0:35:10 lr: 0.000019 loss_cls: 3.2655 (3.1694) grad_norm: 2.8347 (3.0692) time: 2.3923 data: 0.0003 max mem: 13912 +[2024-12-06 10:13:31 root] (utils.py 283): INFO Epoch: [3] [1630/2502] eta: 0:34:46 lr: 0.000019 loss_cls: 3.0071 (3.1693) grad_norm: 2.7153 (3.0680) time: 2.3912 data: 0.0003 max mem: 13912 +[2024-12-06 10:13:55 root] (utils.py 283): INFO Epoch: [3] [1640/2502] eta: 0:34:22 lr: 0.000019 loss_cls: 3.2852 (3.1691) grad_norm: 2.6647 (3.0658) time: 2.3841 data: 0.0003 max mem: 13912 +[2024-12-06 10:14:18 root] (utils.py 283): INFO Epoch: [3] [1650/2502] eta: 0:33:58 lr: 0.000019 loss_cls: 3.1397 (3.1683) grad_norm: 2.7169 (3.0645) time: 2.3752 data: 0.0003 max mem: 13912 +[2024-12-06 10:14:42 root] (utils.py 283): INFO Epoch: [3] [1660/2502] eta: 0:33:34 lr: 0.000019 loss_cls: 3.1040 (3.1688) grad_norm: 2.8689 (3.0641) time: 2.3821 data: 0.0003 max mem: 13912 +[2024-12-06 10:15:07 root] (utils.py 283): INFO Epoch: [3] [1670/2502] eta: 0:33:10 lr: 0.000019 loss_cls: 3.1321 (3.1672) grad_norm: 2.8689 (3.0631) time: 2.4144 data: 0.0003 max mem: 13912 +[2024-12-06 10:15:30 root] (utils.py 283): INFO Epoch: [3] [1680/2502] eta: 0:32:46 lr: 0.000019 loss_cls: 2.9430 (3.1663) grad_norm: 2.7107 (3.0631) time: 2.4133 data: 0.0003 max mem: 13912 +[2024-12-06 10:15:54 root] (utils.py 283): INFO Epoch: [3] [1690/2502] eta: 0:32:22 lr: 0.000019 loss_cls: 3.1241 (3.1666) grad_norm: 2.7392 (3.0622) time: 2.3917 data: 0.0003 max mem: 13912 +[2024-12-06 10:16:18 root] (utils.py 283): INFO Epoch: [3] [1700/2502] eta: 0:31:58 lr: 0.000019 loss_cls: 3.2649 (3.1656) grad_norm: 2.7527 (3.0659) time: 2.3864 data: 0.0003 max mem: 13912 +[2024-12-06 10:16:42 root] (utils.py 283): INFO Epoch: [3] [1710/2502] eta: 0:31:34 lr: 0.000019 loss_cls: 3.2880 (3.1671) grad_norm: 2.8604 (3.0648) time: 2.3823 data: 0.0003 max mem: 13912 +[2024-12-06 10:17:06 root] (utils.py 283): INFO Epoch: [3] [1720/2502] eta: 0:31:10 lr: 0.000019 loss_cls: 3.3490 (3.1674) grad_norm: 2.5557 (3.0624) time: 2.3826 data: 0.0003 max mem: 13912 +[2024-12-06 10:17:30 root] (utils.py 283): INFO Epoch: [3] [1730/2502] eta: 0:30:46 lr: 0.000019 loss_cls: 2.9396 (3.1662) grad_norm: 2.7700 (3.0623) time: 2.3839 data: 0.0003 max mem: 13912 +[2024-12-06 10:17:54 root] (utils.py 283): INFO Epoch: [3] [1740/2502] eta: 0:30:22 lr: 0.000019 loss_cls: 2.9396 (3.1654) grad_norm: 2.8167 (3.0657) time: 2.3899 data: 0.0003 max mem: 13912 +[2024-12-06 10:18:18 root] (utils.py 283): INFO Epoch: [3] [1750/2502] eta: 0:29:58 lr: 0.000019 loss_cls: 3.2668 (3.1655) grad_norm: 2.5714 (3.0624) time: 2.3971 data: 0.0003 max mem: 13912 +[2024-12-06 10:18:42 root] (utils.py 283): INFO Epoch: [3] [1760/2502] eta: 0:29:35 lr: 0.000019 loss_cls: 3.4109 (3.1667) grad_norm: 2.5825 (3.0608) time: 2.3968 data: 0.0003 max mem: 13912 +[2024-12-06 10:19:06 root] (utils.py 283): INFO Epoch: [3] [1770/2502] eta: 0:29:11 lr: 0.000019 loss_cls: 3.4292 (3.1669) grad_norm: 2.7284 (3.0602) time: 2.3952 data: 0.0003 max mem: 13912 +[2024-12-06 10:19:29 root] (utils.py 283): INFO Epoch: [3] [1780/2502] eta: 0:28:47 lr: 0.000019 loss_cls: 3.4292 (3.1680) grad_norm: 2.9494 (3.0619) time: 2.3922 data: 0.0003 max mem: 13912 +[2024-12-06 10:19:53 root] (utils.py 283): INFO Epoch: [3] [1790/2502] eta: 0:28:23 lr: 0.000019 loss_cls: 3.3254 (3.1670) grad_norm: 2.9951 (3.0609) time: 2.3913 data: 0.0003 max mem: 13912 +[2024-12-06 10:20:17 root] (utils.py 283): INFO Epoch: [3] [1800/2502] eta: 0:27:59 lr: 0.000019 loss_cls: 3.1655 (3.1676) grad_norm: 2.9951 (3.0653) time: 2.3958 data: 0.0003 max mem: 13912 +[2024-12-06 10:20:41 root] (utils.py 283): INFO Epoch: [3] [1810/2502] eta: 0:27:35 lr: 0.000019 loss_cls: 3.2309 (3.1662) grad_norm: 3.2071 (3.0674) time: 2.3937 data: 0.0003 max mem: 13912 +[2024-12-06 10:21:05 root] (utils.py 283): INFO Epoch: [3] [1820/2502] eta: 0:27:11 lr: 0.000019 loss_cls: 3.0814 (3.1660) grad_norm: 2.6905 (3.0659) time: 2.3909 data: 0.0003 max mem: 13912 +[2024-12-06 10:21:29 root] (utils.py 283): INFO Epoch: [3] [1830/2502] eta: 0:26:47 lr: 0.000019 loss_cls: 3.0814 (3.1649) grad_norm: 2.6045 (3.0693) time: 2.3903 data: 0.0003 max mem: 13912 +[2024-12-06 10:21:53 root] (utils.py 283): INFO Epoch: [3] [1840/2502] eta: 0:26:23 lr: 0.000019 loss_cls: 2.9291 (3.1640) grad_norm: 2.8652 (3.0704) time: 2.3844 data: 0.0003 max mem: 13912 +[2024-12-06 10:22:17 root] (utils.py 283): INFO Epoch: [3] [1850/2502] eta: 0:25:59 lr: 0.000019 loss_cls: 3.1385 (3.1641) grad_norm: 2.7409 (3.0680) time: 2.3865 data: 0.0003 max mem: 13912 +[2024-12-06 10:22:41 root] (utils.py 283): INFO Epoch: [3] [1860/2502] eta: 0:25:35 lr: 0.000019 loss_cls: 3.3714 (3.1659) grad_norm: 2.7150 (3.0675) time: 2.3882 data: 0.0003 max mem: 13912 +[2024-12-06 10:23:05 root] (utils.py 283): INFO Epoch: [3] [1870/2502] eta: 0:25:11 lr: 0.000019 loss_cls: 3.3284 (3.1645) grad_norm: 2.7150 (3.0660) time: 2.3896 data: 0.0003 max mem: 13912 +[2024-12-06 10:23:29 root] (utils.py 283): INFO Epoch: [3] [1880/2502] eta: 0:24:47 lr: 0.000019 loss_cls: 3.2638 (3.1652) grad_norm: 2.6978 (3.0646) time: 2.3939 data: 0.0003 max mem: 13912 +[2024-12-06 10:23:52 root] (utils.py 283): INFO Epoch: [3] [1890/2502] eta: 0:24:24 lr: 0.000019 loss_cls: 3.4043 (3.1660) grad_norm: 2.7905 (3.0651) time: 2.3917 data: 0.0003 max mem: 13912 +[2024-12-06 10:24:16 root] (utils.py 283): INFO Epoch: [3] [1900/2502] eta: 0:24:00 lr: 0.000019 loss_cls: 3.2638 (3.1655) grad_norm: 2.8030 (3.0653) time: 2.3872 data: 0.0003 max mem: 13912 +[2024-12-06 10:24:40 root] (utils.py 283): INFO Epoch: [3] [1910/2502] eta: 0:23:36 lr: 0.000019 loss_cls: 3.3571 (3.1667) grad_norm: 2.8031 (3.0655) time: 2.3817 data: 0.0003 max mem: 13912 +[2024-12-06 10:25:04 root] (utils.py 283): INFO Epoch: [3] [1920/2502] eta: 0:23:12 lr: 0.000019 loss_cls: 3.5118 (3.1685) grad_norm: 2.7558 (3.0651) time: 2.3758 data: 0.0003 max mem: 13912 +[2024-12-06 10:25:28 root] (utils.py 283): INFO Epoch: [3] [1930/2502] eta: 0:22:48 lr: 0.000019 loss_cls: 3.4192 (3.1686) grad_norm: 2.7021 (3.0645) time: 2.3789 data: 0.0003 max mem: 13912 +[2024-12-06 10:25:52 root] (utils.py 283): INFO Epoch: [3] [1940/2502] eta: 0:22:24 lr: 0.000019 loss_cls: 3.2789 (3.1696) grad_norm: 2.6119 (3.0623) time: 2.3882 data: 0.0003 max mem: 13912 +[2024-12-06 10:26:15 root] (utils.py 283): INFO Epoch: [3] [1950/2502] eta: 0:22:00 lr: 0.000019 loss_cls: 3.2734 (3.1688) grad_norm: 2.5541 (3.0605) time: 2.3892 data: 0.0003 max mem: 13912 +[2024-12-06 10:26:39 root] (utils.py 283): INFO Epoch: [3] [1960/2502] eta: 0:21:36 lr: 0.000019 loss_cls: 3.2803 (3.1701) grad_norm: 2.6156 (3.0588) time: 2.3877 data: 0.0003 max mem: 13912 +[2024-12-06 10:27:03 root] (utils.py 283): INFO Epoch: [3] [1970/2502] eta: 0:21:12 lr: 0.000019 loss_cls: 3.2803 (3.1699) grad_norm: 2.7505 (3.0572) time: 2.3885 data: 0.0003 max mem: 13912 +[2024-12-06 10:27:27 root] (utils.py 283): INFO Epoch: [3] [1980/2502] eta: 0:20:48 lr: 0.000019 loss_cls: 3.1126 (3.1707) grad_norm: 2.8219 (3.0573) time: 2.3905 data: 0.0003 max mem: 13912 +[2024-12-06 10:27:51 root] (utils.py 283): INFO Epoch: [3] [1990/2502] eta: 0:20:24 lr: 0.000019 loss_cls: 3.3612 (3.1711) grad_norm: 2.8219 (3.0560) time: 2.3925 data: 0.0003 max mem: 13912 +[2024-12-06 10:28:15 root] (utils.py 283): INFO Epoch: [3] [2000/2502] eta: 0:20:00 lr: 0.000019 loss_cls: 2.6950 (3.1683) grad_norm: 2.8088 (3.0563) time: 2.3933 data: 0.0003 max mem: 13912 +[2024-12-06 10:28:39 root] (utils.py 283): INFO Epoch: [3] [2010/2502] eta: 0:19:36 lr: 0.000019 loss_cls: 2.6893 (3.1667) grad_norm: 2.7005 (3.0564) time: 2.3907 data: 0.0003 max mem: 13912 +[2024-12-06 10:29:03 root] (utils.py 283): INFO Epoch: [3] [2020/2502] eta: 0:19:12 lr: 0.000019 loss_cls: 3.0162 (3.1669) grad_norm: 2.5724 (3.0701) time: 2.3887 data: 0.0003 max mem: 13912 +[2024-12-06 10:29:27 root] (utils.py 283): INFO Epoch: [3] [2030/2502] eta: 0:18:48 lr: 0.000019 loss_cls: 3.2793 (3.1679) grad_norm: 2.6422 (3.0688) time: 2.3912 data: 0.0003 max mem: 13912 +[2024-12-06 10:29:51 root] (utils.py 283): INFO Epoch: [3] [2040/2502] eta: 0:18:25 lr: 0.000019 loss_cls: 3.1420 (3.1678) grad_norm: 2.7838 (3.0677) time: 2.3925 data: 0.0003 max mem: 13912 +[2024-12-06 10:30:15 root] (utils.py 283): INFO Epoch: [3] [2050/2502] eta: 0:18:01 lr: 0.000019 loss_cls: 3.1584 (3.1677) grad_norm: 2.8667 (3.0681) time: 2.3934 data: 0.0003 max mem: 13912 +[2024-12-06 10:30:39 root] (utils.py 283): INFO Epoch: [3] [2060/2502] eta: 0:17:37 lr: 0.000019 loss_cls: 3.3093 (3.1679) grad_norm: 2.8268 (3.0669) time: 2.3935 data: 0.0003 max mem: 13912 +[2024-12-06 10:31:02 root] (utils.py 283): INFO Epoch: [3] [2070/2502] eta: 0:17:13 lr: 0.000019 loss_cls: 3.1493 (3.1680) grad_norm: 2.6621 (3.0657) time: 2.3924 data: 0.0003 max mem: 13912 +[2024-12-06 10:31:26 root] (utils.py 283): INFO Epoch: [3] [2080/2502] eta: 0:16:49 lr: 0.000019 loss_cls: 3.0234 (3.1673) grad_norm: 2.6646 (3.0656) time: 2.3924 data: 0.0003 max mem: 13912 +[2024-12-06 10:31:50 root] (utils.py 283): INFO Epoch: [3] [2090/2502] eta: 0:16:25 lr: 0.000019 loss_cls: 3.0234 (3.1667) grad_norm: 2.6930 (3.0651) time: 2.3930 data: 0.0003 max mem: 13912 +[2024-12-06 10:32:14 root] (utils.py 283): INFO Epoch: [3] [2100/2502] eta: 0:16:01 lr: 0.000019 loss_cls: 3.0906 (3.1669) grad_norm: 2.7550 (3.0645) time: 2.3941 data: 0.0003 max mem: 13912 +[2024-12-06 10:32:38 root] (utils.py 283): INFO Epoch: [3] [2110/2502] eta: 0:15:37 lr: 0.000019 loss_cls: 3.0906 (3.1657) grad_norm: 2.7720 (3.0639) time: 2.3901 data: 0.0002 max mem: 13912 +[2024-12-06 10:33:01 root] (utils.py 283): INFO Epoch: [3] [2120/2502] eta: 0:15:13 lr: 0.000019 loss_cls: 3.2736 (3.1670) grad_norm: 3.0641 (3.0657) time: 2.3609 data: 0.0003 max mem: 13912 +[2024-12-06 10:33:25 root] (utils.py 283): INFO Epoch: [3] [2130/2502] eta: 0:14:49 lr: 0.000019 loss_cls: 3.2736 (3.1662) grad_norm: 2.8979 (3.0671) time: 2.3606 data: 0.0003 max mem: 13912 +[2024-12-06 10:33:49 root] (utils.py 283): INFO Epoch: [3] [2140/2502] eta: 0:14:25 lr: 0.000019 loss_cls: 3.0251 (3.1651) grad_norm: 2.5507 (3.0649) time: 2.3841 data: 0.0003 max mem: 13912 +[2024-12-06 10:34:13 root] (utils.py 283): INFO Epoch: [3] [2150/2502] eta: 0:14:01 lr: 0.000019 loss_cls: 3.1665 (3.1658) grad_norm: 2.5372 (3.0653) time: 2.3851 data: 0.0003 max mem: 13912 +[2024-12-06 10:34:37 root] (utils.py 283): INFO Epoch: [3] [2160/2502] eta: 0:13:37 lr: 0.000019 loss_cls: 3.3007 (3.1665) grad_norm: 2.6590 (3.0645) time: 2.3897 data: 0.0003 max mem: 13912 +[2024-12-06 10:35:01 root] (utils.py 283): INFO Epoch: [3] [2170/2502] eta: 0:13:13 lr: 0.000019 loss_cls: 3.3910 (3.1677) grad_norm: 2.8566 (3.0636) time: 2.3888 data: 0.0003 max mem: 13912 +[2024-12-06 10:35:24 root] (utils.py 283): INFO Epoch: [3] [2180/2502] eta: 0:12:50 lr: 0.000019 loss_cls: 3.3910 (3.1685) grad_norm: 2.9015 (3.0642) time: 2.3700 data: 0.0003 max mem: 13912 +[2024-12-06 10:35:48 root] (utils.py 283): INFO Epoch: [3] [2190/2502] eta: 0:12:26 lr: 0.000019 loss_cls: 3.4205 (3.1691) grad_norm: 2.5770 (3.0621) time: 2.3681 data: 0.0003 max mem: 13912 +[2024-12-06 10:36:12 root] (utils.py 283): INFO Epoch: [3] [2200/2502] eta: 0:12:02 lr: 0.000019 loss_cls: 3.4205 (3.1700) grad_norm: 2.6235 (3.0624) time: 2.3840 data: 0.0003 max mem: 13912 +[2024-12-06 10:36:36 root] (utils.py 283): INFO Epoch: [3] [2210/2502] eta: 0:11:38 lr: 0.000019 loss_cls: 3.3489 (3.1704) grad_norm: 2.7546 (3.0606) time: 2.3870 data: 0.0003 max mem: 13912 +[2024-12-06 10:37:00 root] (utils.py 283): INFO Epoch: [3] [2220/2502] eta: 0:11:14 lr: 0.000019 loss_cls: 3.2892 (3.1701) grad_norm: 2.6849 (3.0681) time: 2.3845 data: 0.0003 max mem: 13912 +[2024-12-06 10:37:24 root] (utils.py 283): INFO Epoch: [3] [2230/2502] eta: 0:10:50 lr: 0.000019 loss_cls: 3.1459 (3.1695) grad_norm: 2.7959 (3.0670) time: 2.3827 data: 0.0003 max mem: 13912 +[2024-12-06 10:37:47 root] (utils.py 283): INFO Epoch: [3] [2240/2502] eta: 0:10:26 lr: 0.000019 loss_cls: 3.0074 (3.1697) grad_norm: 2.8322 (3.0693) time: 2.3849 data: 0.0003 max mem: 13912 +[2024-12-06 10:38:11 root] (utils.py 283): INFO Epoch: [3] [2250/2502] eta: 0:10:02 lr: 0.000019 loss_cls: 3.1158 (3.1690) grad_norm: 2.9136 (3.0687) time: 2.3760 data: 0.0003 max mem: 13912 +[2024-12-06 10:38:35 root] (utils.py 283): INFO Epoch: [3] [2260/2502] eta: 0:09:38 lr: 0.000019 loss_cls: 3.1360 (3.1685) grad_norm: 2.7886 (3.0681) time: 2.3736 data: 0.0003 max mem: 13912 +[2024-12-06 10:38:59 root] (utils.py 283): INFO Epoch: [3] [2270/2502] eta: 0:09:14 lr: 0.000019 loss_cls: 3.1450 (3.1681) grad_norm: 2.7820 (3.0677) time: 2.3831 data: 0.0003 max mem: 13912 +[2024-12-06 10:39:23 root] (utils.py 283): INFO Epoch: [3] [2280/2502] eta: 0:08:50 lr: 0.000019 loss_cls: 3.1450 (3.1683) grad_norm: 2.9279 (3.0688) time: 2.3889 data: 0.0003 max mem: 13912 +[2024-12-06 10:39:47 root] (utils.py 283): INFO Epoch: [3] [2290/2502] eta: 0:08:26 lr: 0.000019 loss_cls: 3.2200 (3.1687) grad_norm: 2.7694 (3.0684) time: 2.3911 data: 0.0003 max mem: 13912 +[2024-12-06 10:40:11 root] (utils.py 283): INFO Epoch: [3] [2300/2502] eta: 0:08:02 lr: 0.000019 loss_cls: 3.2645 (3.1693) grad_norm: 2.7694 (3.0676) time: 2.3906 data: 0.0003 max mem: 13912 +[2024-12-06 10:40:34 root] (utils.py 283): INFO Epoch: [3] [2310/2502] eta: 0:07:39 lr: 0.000019 loss_cls: 3.2645 (3.1685) grad_norm: 2.6351 (3.0661) time: 2.3929 data: 0.0003 max mem: 13912 +[2024-12-06 10:40:58 root] (utils.py 283): INFO Epoch: [3] [2320/2502] eta: 0:07:15 lr: 0.000019 loss_cls: 3.1895 (3.1688) grad_norm: 2.6176 (3.0654) time: 2.3923 data: 0.0003 max mem: 13912 +[2024-12-06 10:41:22 root] (utils.py 283): INFO Epoch: [3] [2330/2502] eta: 0:06:51 lr: 0.000019 loss_cls: 3.2136 (3.1687) grad_norm: 2.8014 (3.0683) time: 2.3937 data: 0.0003 max mem: 13912 +[2024-12-06 10:41:46 root] (utils.py 283): INFO Epoch: [3] [2340/2502] eta: 0:06:27 lr: 0.000019 loss_cls: 3.1373 (3.1682) grad_norm: 2.8684 (3.0682) time: 2.3923 data: 0.0003 max mem: 13912 +[2024-12-06 10:42:10 root] (utils.py 283): INFO Epoch: [3] [2350/2502] eta: 0:06:03 lr: 0.000019 loss_cls: 2.8608 (3.1671) grad_norm: 2.7455 (3.0670) time: 2.3939 data: 0.0003 max mem: 13912 +[2024-12-06 10:42:34 root] (utils.py 283): INFO Epoch: [3] [2360/2502] eta: 0:05:39 lr: 0.000019 loss_cls: 3.1454 (3.1676) grad_norm: 2.8381 (3.0672) time: 2.3965 data: 0.0003 max mem: 13912 +[2024-12-06 10:42:58 root] (utils.py 283): INFO Epoch: [3] [2370/2502] eta: 0:05:15 lr: 0.000019 loss_cls: 3.2428 (3.1673) grad_norm: 2.8321 (3.0653) time: 2.3930 data: 0.0003 max mem: 13912 +[2024-12-06 10:43:22 root] (utils.py 283): INFO Epoch: [3] [2380/2502] eta: 0:04:51 lr: 0.000019 loss_cls: 3.2490 (3.1675) grad_norm: 2.6199 (3.0653) time: 2.3915 data: 0.0003 max mem: 13912 +[2024-12-06 10:43:46 root] (utils.py 283): INFO Epoch: [3] [2390/2502] eta: 0:04:27 lr: 0.000019 loss_cls: 3.2487 (3.1675) grad_norm: 2.6199 (3.0643) time: 2.3887 data: 0.0003 max mem: 13912 +[2024-12-06 10:44:10 root] (utils.py 283): INFO Epoch: [3] [2400/2502] eta: 0:04:03 lr: 0.000019 loss_cls: 3.1304 (3.1672) grad_norm: 2.6054 (3.0625) time: 2.3848 data: 0.0003 max mem: 13912 +[2024-12-06 10:44:34 root] (utils.py 283): INFO Epoch: [3] [2410/2502] eta: 0:03:39 lr: 0.000019 loss_cls: 3.3078 (3.1684) grad_norm: 2.6054 (3.0626) time: 2.3872 data: 0.0003 max mem: 13912 +[2024-12-06 10:44:57 root] (utils.py 283): INFO Epoch: [3] [2420/2502] eta: 0:03:16 lr: 0.000019 loss_cls: 3.4280 (3.1680) grad_norm: 2.7182 (3.0613) time: 2.3877 data: 0.0003 max mem: 13912 +[2024-12-06 10:45:21 root] (utils.py 283): INFO Epoch: [3] [2430/2502] eta: 0:02:52 lr: 0.000019 loss_cls: 3.4111 (3.1681) grad_norm: 2.6612 (3.0631) time: 2.3893 data: 0.0003 max mem: 13912 +[2024-12-06 10:45:45 root] (utils.py 283): INFO Epoch: [3] [2440/2502] eta: 0:02:28 lr: 0.000019 loss_cls: 3.3227 (3.1687) grad_norm: 2.7197 (3.0622) time: 2.3817 data: 0.0003 max mem: 13912 +[2024-12-06 10:46:09 root] (utils.py 283): INFO Epoch: [3] [2450/2502] eta: 0:02:04 lr: 0.000019 loss_cls: 3.1392 (3.1681) grad_norm: 2.7591 (3.0625) time: 2.3714 data: 0.0003 max mem: 13912 +[2024-12-06 10:46:33 root] (utils.py 283): INFO Epoch: [3] [2460/2502] eta: 0:01:40 lr: 0.000019 loss_cls: 3.0328 (3.1678) grad_norm: 2.7875 (3.0620) time: 2.3838 data: 0.0003 max mem: 13912 +[2024-12-06 10:46:57 root] (utils.py 283): INFO Epoch: [3] [2470/2502] eta: 0:01:16 lr: 0.000019 loss_cls: 3.1377 (3.1680) grad_norm: 2.7315 (3.0611) time: 2.4012 data: 0.0003 max mem: 13912 +[2024-12-06 10:47:21 root] (utils.py 283): INFO Epoch: [3] [2480/2502] eta: 0:00:52 lr: 0.000019 loss_cls: 3.2027 (3.1673) grad_norm: 2.7315 (3.0616) time: 2.3943 data: 0.0003 max mem: 13912 +[2024-12-06 10:47:45 root] (utils.py 283): INFO Epoch: [3] [2490/2502] eta: 0:00:28 lr: 0.000019 loss_cls: 3.1345 (3.1667) grad_norm: 2.5652 (3.0597) time: 2.4185 data: 0.0238 max mem: 13912 +[2024-12-06 10:48:09 root] (utils.py 283): INFO Epoch: [3] [2500/2502] eta: 0:00:04 lr: 0.000019 loss_cls: 3.2352 (3.1674) grad_norm: 2.5055 (3.0596) time: 2.4234 data: 0.0238 max mem: 13912 +[2024-12-06 10:48:11 root] (utils.py 283): INFO Epoch: [3] [2501/2502] eta: 0:00:02 lr: 0.000019 loss_cls: 3.1345 (3.1673) grad_norm: 2.5177 (3.0596) time: 2.4138 data: 0.0238 max mem: 13912 +[2024-12-06 10:48:11 root] (utils.py 297): INFO Epoch: [3] Total time: 1:39:42 (2.3912 s / it) +[2024-12-06 10:48:11 root] (engine.py 179): INFO Averaged stats:lr: 0.000019 loss_cls: 3.1345 (3.1619) grad_norm: 2.5177 (3.0596) +[2024-12-06 10:48:13 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:40 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.4706 (0.4706) acc1: 88.2812 (88.2812) acc3: 98.4375 (98.4375) acc5: 98.4375 (98.4375) time: 0.4167 data: 0.0003 max mem: 13912 +[2024-12-06 10:48:16 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:27 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.6542 (0.6666) acc1: 87.5000 (85.2273) acc3: 96.8750 (95.5256) acc5: 96.8750 (96.9460) time: 0.3104 data: 0.0004 max mem: 13912 +[2024-12-06 10:48:19 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:24 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7063 (0.7087) acc1: 85.1562 (84.7842) acc3: 94.5312 (94.7917) acc5: 96.8750 (96.5402) time: 0.3125 data: 0.0004 max mem: 13912 +[2024-12-06 10:48:22 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:21 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7191 (0.7283) acc1: 83.5938 (83.9718) acc3: 94.5312 (94.6069) acc5: 96.8750 (96.6230) time: 0.3257 data: 0.0004 max mem: 13912 +[2024-12-06 10:48:25 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:18 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7330 (0.7324) acc1: 83.5938 (83.7271) acc3: 94.5312 (94.6456) acc5: 96.8750 (96.6845) time: 0.3144 data: 0.0004 max mem: 13912 +[2024-12-06 10:48:29 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:15 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.8084 (0.8177) acc1: 78.9062 (81.8321) acc3: 92.1875 (93.4130) acc5: 95.3125 (95.6036) time: 0.3137 data: 0.0004 max mem: 13912 +[2024-12-06 10:48:32 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:12 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.1161 (0.8490) acc1: 75.0000 (81.4293) acc3: 88.2812 (92.8023) acc5: 91.4062 (95.0948) time: 0.3253 data: 0.0004 max mem: 13912 +[2024-12-06 10:48:35 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:08 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0693 (0.8795) acc1: 78.9062 (80.6778) acc3: 89.8438 (92.5176) acc5: 92.9688 (94.9494) time: 0.3266 data: 0.0004 max mem: 13912 +[2024-12-06 10:48:38 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:05 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0693 (0.9080) acc1: 75.7812 (80.0829) acc3: 89.8438 (92.0814) acc5: 92.9688 (94.5988) time: 0.3131 data: 0.0006 max mem: 13912 +[2024-12-06 10:48:41 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:02 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.1406 (0.9340) acc1: 73.4375 (79.3098) acc3: 89.0625 (91.7497) acc5: 92.1875 (94.3338) time: 0.3113 data: 0.0007 max mem: 13912 +[2024-12-06 10:48:44 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.9973 (0.9273) acc1: 76.5625 (79.3760) acc3: 89.8438 (91.8320) acc5: 93.7500 (94.4480) time: 0.3082 data: 0.0006 max mem: 13912 +[2024-12-06 10:48:44 root] (utils.py 297): INFO Test: Total time: 0:00:30 (0.3163 s / it) +[2024-12-06 10:48:44 root] (engine.py 264): INFO * Acc@1 79.232 Acc@3 91.840 Acc@5 94.582 loss 0.924 flops 3.584 layer_flops 3.536 +[2024-12-06 10:48:44 root] (main.py 547): INFO Accuracy of the network on the 50000 test images: 79.2% +[2024-12-06 10:48:44 root] (main.py 551): INFO Max accuracy: 79.25% +[2024-12-06 10:48:46 root] (utils.py 283): INFO Epoch: [4] [ 0/2502] eta: 1:46:00 lr: 0.000018 loss_cls: 3.1357 (3.1357) grad_norm: 2.4756 (2.4756) time: 2.5421 data: 0.0002 max mem: 13912 +[2024-12-06 10:49:10 root] (utils.py 283): INFO Epoch: [4] [ 10/2502] eta: 1:38:43 lr: 0.000018 loss_cls: 3.1937 (3.2690) grad_norm: 2.7493 (3.3142) time: 2.3768 data: 0.0003 max mem: 13912 +[2024-12-06 10:49:34 root] (utils.py 283): INFO Epoch: [4] [ 20/2502] eta: 1:38:38 lr: 0.000018 loss_cls: 3.3991 (3.3464) grad_norm: 2.9091 (3.2763) time: 2.3768 data: 0.0003 max mem: 13912 +[2024-12-06 10:49:58 root] (utils.py 283): INFO Epoch: [4] [ 30/2502] eta: 1:38:24 lr: 0.000018 loss_cls: 3.4530 (3.3573) grad_norm: 2.9545 (3.2343) time: 2.3951 data: 0.0003 max mem: 13912 +[2024-12-06 10:50:21 root] (utils.py 283): INFO Epoch: [4] [ 40/2502] eta: 1:38:03 lr: 0.000018 loss_cls: 3.1687 (3.2301) grad_norm: 2.8695 (3.1588) time: 2.3948 data: 0.0003 max mem: 13912 +[2024-12-06 10:50:45 root] (utils.py 283): INFO Epoch: [4] [ 50/2502] eta: 1:37:42 lr: 0.000018 loss_cls: 3.2454 (3.2714) grad_norm: 2.5947 (3.1289) time: 2.3942 data: 0.0003 max mem: 13912 +[2024-12-06 10:51:09 root] (utils.py 283): INFO Epoch: [4] [ 60/2502] eta: 1:37:18 lr: 0.000018 loss_cls: 3.2581 (3.2530) grad_norm: 2.6264 (3.0540) time: 2.3930 data: 0.0003 max mem: 13912 +[2024-12-06 10:51:33 root] (utils.py 283): INFO Epoch: [4] [ 70/2502] eta: 1:36:53 lr: 0.000018 loss_cls: 3.1817 (3.2605) grad_norm: 2.6924 (3.0584) time: 2.3895 data: 0.0003 max mem: 13912 +[2024-12-06 10:51:57 root] (utils.py 283): INFO Epoch: [4] [ 80/2502] eta: 1:36:30 lr: 0.000018 loss_cls: 3.2729 (3.2471) grad_norm: 2.6924 (3.0650) time: 2.3908 data: 0.0003 max mem: 13912 +[2024-12-06 10:52:21 root] (utils.py 283): INFO Epoch: [4] [ 90/2502] eta: 1:36:06 lr: 0.000018 loss_cls: 3.2729 (3.2603) grad_norm: 2.7857 (3.0680) time: 2.3927 data: 0.0003 max mem: 13912 +[2024-12-06 10:52:45 root] (utils.py 283): INFO Epoch: [4] [ 100/2502] eta: 1:35:42 lr: 0.000018 loss_cls: 3.1777 (3.2311) grad_norm: 2.7624 (3.0585) time: 2.3903 data: 0.0003 max mem: 13912 +[2024-12-06 10:53:09 root] (utils.py 283): INFO Epoch: [4] [ 110/2502] eta: 1:35:18 lr: 0.000018 loss_cls: 3.0872 (3.2423) grad_norm: 2.7557 (3.0466) time: 2.3903 data: 0.0003 max mem: 13912 +[2024-12-06 10:53:33 root] (utils.py 283): INFO Epoch: [4] [ 120/2502] eta: 1:34:55 lr: 0.000018 loss_cls: 3.2986 (3.2380) grad_norm: 2.8082 (3.0337) time: 2.3923 data: 0.0003 max mem: 13912 +[2024-12-06 10:53:57 root] (utils.py 283): INFO Epoch: [4] [ 130/2502] eta: 1:34:31 lr: 0.000018 loss_cls: 3.3303 (3.2366) grad_norm: 2.6415 (3.0012) time: 2.3919 data: 0.0003 max mem: 13912 +[2024-12-06 10:54:21 root] (utils.py 283): INFO Epoch: [4] [ 140/2502] eta: 1:34:06 lr: 0.000018 loss_cls: 3.0584 (3.2168) grad_norm: 2.6415 (2.9925) time: 2.3878 data: 0.0003 max mem: 13912 +[2024-12-06 10:54:44 root] (utils.py 283): INFO Epoch: [4] [ 150/2502] eta: 1:33:42 lr: 0.000018 loss_cls: 3.0249 (3.2048) grad_norm: 2.7859 (2.9773) time: 2.3873 data: 0.0003 max mem: 13912 +[2024-12-06 10:55:08 root] (utils.py 283): INFO Epoch: [4] [ 160/2502] eta: 1:33:18 lr: 0.000018 loss_cls: 3.2595 (3.2110) grad_norm: 2.7687 (3.0034) time: 2.3909 data: 0.0003 max mem: 13912 +[2024-12-06 10:55:32 root] (utils.py 283): INFO Epoch: [4] [ 170/2502] eta: 1:32:54 lr: 0.000018 loss_cls: 3.4486 (3.2211) grad_norm: 2.8485 (3.0332) time: 2.3889 data: 0.0003 max mem: 13912 +[2024-12-06 10:55:56 root] (utils.py 283): INFO Epoch: [4] [ 180/2502] eta: 1:32:30 lr: 0.000018 loss_cls: 3.3788 (3.2124) grad_norm: 2.7999 (3.0199) time: 2.3874 data: 0.0003 max mem: 13912 +[2024-12-06 10:56:20 root] (utils.py 283): INFO Epoch: [4] [ 190/2502] eta: 1:32:05 lr: 0.000018 loss_cls: 3.3788 (3.2211) grad_norm: 2.6555 (3.0082) time: 2.3856 data: 0.0003 max mem: 13912 +[2024-12-06 10:56:44 root] (utils.py 283): INFO Epoch: [4] [ 200/2502] eta: 1:31:41 lr: 0.000018 loss_cls: 3.3907 (3.2242) grad_norm: 2.5944 (2.9967) time: 2.3879 data: 0.0003 max mem: 13912 +[2024-12-06 10:57:08 root] (utils.py 283): INFO Epoch: [4] [ 210/2502] eta: 1:31:18 lr: 0.000018 loss_cls: 3.2970 (3.2175) grad_norm: 2.5944 (2.9811) time: 2.3966 data: 0.0003 max mem: 13912 +[2024-12-06 10:57:32 root] (utils.py 283): INFO Epoch: [4] [ 220/2502] eta: 1:30:54 lr: 0.000018 loss_cls: 3.0133 (3.2046) grad_norm: 2.7669 (2.9859) time: 2.3946 data: 0.0003 max mem: 13912 +[2024-12-06 10:57:56 root] (utils.py 283): INFO Epoch: [4] [ 230/2502] eta: 1:30:30 lr: 0.000018 loss_cls: 3.0133 (3.2057) grad_norm: 2.8884 (2.9787) time: 2.3863 data: 0.0003 max mem: 13912 +[2024-12-06 10:58:20 root] (utils.py 283): INFO Epoch: [4] [ 240/2502] eta: 1:30:06 lr: 0.000018 loss_cls: 3.4567 (3.2132) grad_norm: 2.8976 (2.9991) time: 2.3869 data: 0.0003 max mem: 13912 +[2024-12-06 10:58:43 root] (utils.py 283): INFO Epoch: [4] [ 250/2502] eta: 1:29:42 lr: 0.000018 loss_cls: 3.4567 (3.2210) grad_norm: 2.9612 (2.9960) time: 2.3906 data: 0.0003 max mem: 13912 +[2024-12-06 10:59:07 root] (utils.py 283): INFO Epoch: [4] [ 260/2502] eta: 1:29:17 lr: 0.000018 loss_cls: 3.3235 (3.2229) grad_norm: 2.7287 (2.9834) time: 2.3835 data: 0.0002 max mem: 13912 +[2024-12-06 10:59:31 root] (utils.py 283): INFO Epoch: [4] [ 270/2502] eta: 1:28:54 lr: 0.000018 loss_cls: 3.1795 (3.2139) grad_norm: 2.7440 (2.9853) time: 2.3892 data: 0.0002 max mem: 13912 +[2024-12-06 10:59:55 root] (utils.py 283): INFO Epoch: [4] [ 280/2502] eta: 1:28:31 lr: 0.000018 loss_cls: 3.1348 (3.2177) grad_norm: 2.8169 (2.9844) time: 2.4002 data: 0.0003 max mem: 13912 +[2024-12-06 11:00:19 root] (utils.py 283): INFO Epoch: [4] [ 290/2502] eta: 1:28:06 lr: 0.000018 loss_cls: 3.2610 (3.2156) grad_norm: 2.7695 (2.9847) time: 2.3894 data: 0.0003 max mem: 13912 +[2024-12-06 11:00:43 root] (utils.py 283): INFO Epoch: [4] [ 300/2502] eta: 1:27:42 lr: 0.000018 loss_cls: 3.2940 (3.2198) grad_norm: 2.6721 (3.0055) time: 2.3821 data: 0.0003 max mem: 13912 +[2024-12-06 11:01:07 root] (utils.py 283): INFO Epoch: [4] [ 310/2502] eta: 1:27:19 lr: 0.000018 loss_cls: 3.0437 (3.2091) grad_norm: 2.6658 (2.9989) time: 2.3975 data: 0.0003 max mem: 13912 +[2024-12-06 11:01:31 root] (utils.py 283): INFO Epoch: [4] [ 320/2502] eta: 1:26:55 lr: 0.000018 loss_cls: 2.9974 (3.2082) grad_norm: 2.7124 (3.0162) time: 2.3978 data: 0.0003 max mem: 13912 +[2024-12-06 11:01:55 root] (utils.py 283): INFO Epoch: [4] [ 330/2502] eta: 1:26:32 lr: 0.000018 loss_cls: 3.2232 (3.2100) grad_norm: 2.7124 (3.0112) time: 2.3910 data: 0.0003 max mem: 13912 +[2024-12-06 11:02:19 root] (utils.py 283): INFO Epoch: [4] [ 340/2502] eta: 1:26:07 lr: 0.000018 loss_cls: 3.2399 (3.2110) grad_norm: 2.6711 (3.0035) time: 2.3846 data: 0.0003 max mem: 13912 +[2024-12-06 11:02:43 root] (utils.py 283): INFO Epoch: [4] [ 350/2502] eta: 1:25:44 lr: 0.000018 loss_cls: 3.2848 (3.2129) grad_norm: 2.6785 (3.0067) time: 2.3875 data: 0.0003 max mem: 13912 +[2024-12-06 11:03:06 root] (utils.py 283): INFO Epoch: [4] [ 360/2502] eta: 1:25:19 lr: 0.000018 loss_cls: 3.3464 (3.2163) grad_norm: 2.9171 (3.0289) time: 2.3884 data: 0.0003 max mem: 13912 +[2024-12-06 11:03:30 root] (utils.py 283): INFO Epoch: [4] [ 370/2502] eta: 1:24:55 lr: 0.000018 loss_cls: 3.1900 (3.2050) grad_norm: 2.9171 (3.0632) time: 2.3828 data: 0.0003 max mem: 13912 +[2024-12-06 11:03:54 root] (utils.py 283): INFO Epoch: [4] [ 380/2502] eta: 1:24:31 lr: 0.000018 loss_cls: 2.8051 (3.1995) grad_norm: 2.8185 (3.0660) time: 2.3920 data: 0.0003 max mem: 13912 +[2024-12-06 11:04:18 root] (utils.py 283): INFO Epoch: [4] [ 390/2502] eta: 1:24:07 lr: 0.000018 loss_cls: 3.2931 (3.1984) grad_norm: 2.9723 (3.0880) time: 2.3932 data: 0.0003 max mem: 13912 +[2024-12-06 11:04:42 root] (utils.py 283): INFO Epoch: [4] [ 400/2502] eta: 1:23:43 lr: 0.000018 loss_cls: 3.0845 (3.1929) grad_norm: 2.9881 (3.0851) time: 2.3892 data: 0.0003 max mem: 13912 +[2024-12-06 11:05:06 root] (utils.py 283): INFO Epoch: [4] [ 410/2502] eta: 1:23:20 lr: 0.000018 loss_cls: 3.0845 (3.1931) grad_norm: 2.9454 (3.0855) time: 2.3937 data: 0.0003 max mem: 13912 +[2024-12-06 11:05:30 root] (utils.py 283): INFO Epoch: [4] [ 420/2502] eta: 1:22:55 lr: 0.000018 loss_cls: 2.8948 (3.1878) grad_norm: 2.7425 (3.0998) time: 2.3910 data: 0.0003 max mem: 13912 +[2024-12-06 11:05:54 root] (utils.py 283): INFO Epoch: [4] [ 430/2502] eta: 1:22:31 lr: 0.000018 loss_cls: 2.9820 (3.1842) grad_norm: 2.9238 (3.1037) time: 2.3790 data: 0.0003 max mem: 13912 +[2024-12-06 11:06:17 root] (utils.py 283): INFO Epoch: [4] [ 440/2502] eta: 1:22:07 lr: 0.000018 loss_cls: 3.2140 (3.1869) grad_norm: 2.9238 (3.0975) time: 2.3834 data: 0.0003 max mem: 13912 +[2024-12-06 11:06:41 root] (utils.py 283): INFO Epoch: [4] [ 450/2502] eta: 1:21:43 lr: 0.000018 loss_cls: 3.1633 (3.1853) grad_norm: 2.8364 (3.0909) time: 2.3873 data: 0.0003 max mem: 13912 +[2024-12-06 11:07:05 root] (utils.py 283): INFO Epoch: [4] [ 460/2502] eta: 1:21:19 lr: 0.000018 loss_cls: 3.1633 (3.1844) grad_norm: 2.5917 (3.0873) time: 2.3881 data: 0.0003 max mem: 13912 +[2024-12-06 11:07:29 root] (utils.py 283): INFO Epoch: [4] [ 470/2502] eta: 1:20:55 lr: 0.000018 loss_cls: 3.2811 (3.1882) grad_norm: 2.6371 (3.0824) time: 2.3902 data: 0.0003 max mem: 13912 +[2024-12-06 11:07:53 root] (utils.py 283): INFO Epoch: [4] [ 480/2502] eta: 1:20:31 lr: 0.000018 loss_cls: 3.2096 (3.1843) grad_norm: 2.6152 (3.1046) time: 2.3883 data: 0.0003 max mem: 13912 +[2024-12-06 11:08:17 root] (utils.py 283): INFO Epoch: [4] [ 490/2502] eta: 1:20:08 lr: 0.000018 loss_cls: 3.2135 (3.1842) grad_norm: 2.6152 (3.0981) time: 2.3903 data: 0.0003 max mem: 13912 +[2024-12-06 11:08:41 root] (utils.py 283): INFO Epoch: [4] [ 500/2502] eta: 1:19:43 lr: 0.000018 loss_cls: 3.3517 (3.1880) grad_norm: 2.7851 (3.0951) time: 2.3904 data: 0.0003 max mem: 13912 +[2024-12-06 11:09:04 root] (utils.py 283): INFO Epoch: [4] [ 510/2502] eta: 1:19:19 lr: 0.000018 loss_cls: 3.3224 (3.1870) grad_norm: 2.8070 (3.0910) time: 2.3755 data: 0.0003 max mem: 13912 +[2024-12-06 11:09:28 root] (utils.py 283): INFO Epoch: [4] [ 520/2502] eta: 1:18:55 lr: 0.000018 loss_cls: 3.2362 (3.1867) grad_norm: 2.7258 (3.0879) time: 2.3755 data: 0.0003 max mem: 13912 +[2024-12-06 11:09:52 root] (utils.py 283): INFO Epoch: [4] [ 530/2502] eta: 1:18:31 lr: 0.000018 loss_cls: 2.9070 (3.1809) grad_norm: 2.8760 (3.1118) time: 2.3877 data: 0.0003 max mem: 13912 +[2024-12-06 11:10:16 root] (utils.py 283): INFO Epoch: [4] [ 540/2502] eta: 1:18:07 lr: 0.000018 loss_cls: 3.0528 (3.1832) grad_norm: 2.9326 (3.1082) time: 2.3919 data: 0.0003 max mem: 13912 +[2024-12-06 11:10:40 root] (utils.py 283): INFO Epoch: [4] [ 550/2502] eta: 1:17:43 lr: 0.000018 loss_cls: 3.3944 (3.1826) grad_norm: 2.9529 (3.1074) time: 2.3876 data: 0.0003 max mem: 13912 +[2024-12-06 11:11:04 root] (utils.py 283): INFO Epoch: [4] [ 560/2502] eta: 1:17:19 lr: 0.000018 loss_cls: 2.8173 (3.1748) grad_norm: 2.8227 (3.1038) time: 2.3867 data: 0.0003 max mem: 13912 +[2024-12-06 11:11:28 root] (utils.py 283): INFO Epoch: [4] [ 570/2502] eta: 1:16:55 lr: 0.000018 loss_cls: 2.8574 (3.1758) grad_norm: 2.6471 (3.1074) time: 2.3885 data: 0.0003 max mem: 13912 +[2024-12-06 11:11:52 root] (utils.py 283): INFO Epoch: [4] [ 580/2502] eta: 1:16:31 lr: 0.000018 loss_cls: 3.2045 (3.1759) grad_norm: 2.7375 (3.1073) time: 2.3899 data: 0.0003 max mem: 13912 +[2024-12-06 11:12:16 root] (utils.py 283): INFO Epoch: [4] [ 590/2502] eta: 1:16:08 lr: 0.000018 loss_cls: 3.2045 (3.1724) grad_norm: 2.7941 (3.1051) time: 2.3938 data: 0.0003 max mem: 13912 +[2024-12-06 11:12:39 root] (utils.py 283): INFO Epoch: [4] [ 600/2502] eta: 1:15:44 lr: 0.000018 loss_cls: 3.2396 (3.1715) grad_norm: 2.8006 (3.1029) time: 2.3911 data: 0.0003 max mem: 13912 +[2024-12-06 11:13:03 root] (utils.py 283): INFO Epoch: [4] [ 610/2502] eta: 1:15:20 lr: 0.000018 loss_cls: 3.2522 (3.1702) grad_norm: 2.8421 (3.1069) time: 2.3909 data: 0.0003 max mem: 13912 +[2024-12-06 11:13:27 root] (utils.py 283): INFO Epoch: [4] [ 620/2502] eta: 1:14:56 lr: 0.000018 loss_cls: 3.2267 (3.1685) grad_norm: 2.9869 (3.1063) time: 2.3885 data: 0.0003 max mem: 13912 +[2024-12-06 11:13:51 root] (utils.py 283): INFO Epoch: [4] [ 630/2502] eta: 1:14:32 lr: 0.000018 loss_cls: 3.2932 (3.1727) grad_norm: 2.9869 (3.1134) time: 2.3891 data: 0.0003 max mem: 13912 +[2024-12-06 11:14:15 root] (utils.py 283): INFO Epoch: [4] [ 640/2502] eta: 1:14:08 lr: 0.000018 loss_cls: 3.3405 (3.1697) grad_norm: 2.9116 (3.1096) time: 2.3912 data: 0.0003 max mem: 13912 +[2024-12-06 11:14:39 root] (utils.py 283): INFO Epoch: [4] [ 650/2502] eta: 1:13:44 lr: 0.000018 loss_cls: 2.9194 (3.1663) grad_norm: 2.9116 (3.1098) time: 2.3877 data: 0.0003 max mem: 13912 +[2024-12-06 11:15:03 root] (utils.py 283): INFO Epoch: [4] [ 660/2502] eta: 1:13:20 lr: 0.000018 loss_cls: 3.1710 (3.1664) grad_norm: 2.8212 (3.1032) time: 2.3884 data: 0.0003 max mem: 13912 +[2024-12-06 11:15:27 root] (utils.py 283): INFO Epoch: [4] [ 670/2502] eta: 1:12:57 lr: 0.000018 loss_cls: 3.3131 (3.1680) grad_norm: 2.6905 (3.0979) time: 2.3911 data: 0.0003 max mem: 13912 +[2024-12-06 11:15:51 root] (utils.py 283): INFO Epoch: [4] [ 680/2502] eta: 1:12:33 lr: 0.000018 loss_cls: 3.3581 (3.1713) grad_norm: 2.7402 (3.0985) time: 2.3928 data: 0.0003 max mem: 13912 +[2024-12-06 11:16:15 root] (utils.py 283): INFO Epoch: [4] [ 690/2502] eta: 1:12:09 lr: 0.000018 loss_cls: 3.3114 (3.1719) grad_norm: 2.8179 (3.0956) time: 2.3886 data: 0.0003 max mem: 13912 +[2024-12-06 11:16:38 root] (utils.py 283): INFO Epoch: [4] [ 700/2502] eta: 1:11:45 lr: 0.000018 loss_cls: 3.2934 (3.1734) grad_norm: 2.7866 (3.1044) time: 2.3885 data: 0.0003 max mem: 13912 +[2024-12-06 11:17:02 root] (utils.py 283): INFO Epoch: [4] [ 710/2502] eta: 1:11:21 lr: 0.000018 loss_cls: 3.4374 (3.1728) grad_norm: 2.7183 (3.1041) time: 2.3921 data: 0.0003 max mem: 13912 +[2024-12-06 11:17:26 root] (utils.py 283): INFO Epoch: [4] [ 720/2502] eta: 1:10:57 lr: 0.000018 loss_cls: 3.2902 (3.1714) grad_norm: 2.7183 (3.1041) time: 2.3883 data: 0.0003 max mem: 13912 +[2024-12-06 11:17:50 root] (utils.py 283): INFO Epoch: [4] [ 730/2502] eta: 1:10:33 lr: 0.000018 loss_cls: 3.3444 (3.1735) grad_norm: 2.8611 (3.1014) time: 2.3749 data: 0.0003 max mem: 13912 +[2024-12-06 11:18:14 root] (utils.py 283): INFO Epoch: [4] [ 740/2502] eta: 1:10:09 lr: 0.000018 loss_cls: 3.4904 (3.1744) grad_norm: 2.8969 (3.1010) time: 2.3757 data: 0.0003 max mem: 13912 +[2024-12-06 11:18:38 root] (utils.py 283): INFO Epoch: [4] [ 750/2502] eta: 1:09:45 lr: 0.000018 loss_cls: 3.3020 (3.1754) grad_norm: 2.8597 (3.1013) time: 2.3894 data: 0.0003 max mem: 13912 +[2024-12-06 11:19:02 root] (utils.py 283): INFO Epoch: [4] [ 760/2502] eta: 1:09:21 lr: 0.000018 loss_cls: 3.1221 (3.1739) grad_norm: 2.7778 (3.1003) time: 2.3892 data: 0.0003 max mem: 13912 +[2024-12-06 11:19:25 root] (utils.py 283): INFO Epoch: [4] [ 770/2502] eta: 1:08:57 lr: 0.000018 loss_cls: 3.0582 (3.1721) grad_norm: 2.8563 (3.1000) time: 2.3863 data: 0.0003 max mem: 13912 +[2024-12-06 11:19:49 root] (utils.py 283): INFO Epoch: [4] [ 780/2502] eta: 1:08:33 lr: 0.000018 loss_cls: 3.3627 (3.1742) grad_norm: 2.9244 (3.0985) time: 2.3839 data: 0.0003 max mem: 13912 +[2024-12-06 11:20:13 root] (utils.py 283): INFO Epoch: [4] [ 790/2502] eta: 1:08:09 lr: 0.000018 loss_cls: 3.3627 (3.1708) grad_norm: 2.7119 (3.0951) time: 2.3826 data: 0.0003 max mem: 13912 +[2024-12-06 11:20:37 root] (utils.py 283): INFO Epoch: [4] [ 800/2502] eta: 1:07:45 lr: 0.000018 loss_cls: 3.1489 (3.1713) grad_norm: 2.6111 (3.0960) time: 2.3857 data: 0.0003 max mem: 13912 +[2024-12-06 11:21:01 root] (utils.py 283): INFO Epoch: [4] [ 810/2502] eta: 1:07:21 lr: 0.000018 loss_cls: 3.3078 (3.1709) grad_norm: 2.6679 (3.0952) time: 2.3803 data: 0.0003 max mem: 13912 +[2024-12-06 11:21:25 root] (utils.py 283): INFO Epoch: [4] [ 820/2502] eta: 1:06:57 lr: 0.000018 loss_cls: 3.1554 (3.1678) grad_norm: 2.6122 (3.0901) time: 2.3771 data: 0.0003 max mem: 13912 +[2024-12-06 11:21:48 root] (utils.py 283): INFO Epoch: [4] [ 830/2502] eta: 1:06:33 lr: 0.000018 loss_cls: 3.3075 (3.1705) grad_norm: 2.6573 (3.0942) time: 2.3778 data: 0.0003 max mem: 13912 +[2024-12-06 11:22:12 root] (utils.py 283): INFO Epoch: [4] [ 840/2502] eta: 1:06:09 lr: 0.000018 loss_cls: 3.4662 (3.1716) grad_norm: 3.7854 (3.1472) time: 2.3810 data: 0.0003 max mem: 13912 +[2024-12-06 11:22:36 root] (utils.py 283): INFO Epoch: [4] [ 850/2502] eta: 1:05:45 lr: 0.000018 loss_cls: 3.4662 (3.1738) grad_norm: 3.7854 (3.1822) time: 2.3865 data: 0.0003 max mem: 13912 +[2024-12-06 11:23:00 root] (utils.py 283): INFO Epoch: [4] [ 860/2502] eta: 1:05:21 lr: 0.000018 loss_cls: 3.4325 (3.1758) grad_norm: 3.2957 (3.1847) time: 2.3867 data: 0.0003 max mem: 13912 +[2024-12-06 11:23:24 root] (utils.py 283): INFO Epoch: [4] [ 870/2502] eta: 1:04:57 lr: 0.000018 loss_cls: 3.4436 (3.1760) grad_norm: 2.9157 (3.1833) time: 2.3921 data: 0.0003 max mem: 13912 +[2024-12-06 11:23:48 root] (utils.py 283): INFO Epoch: [4] [ 880/2502] eta: 1:04:33 lr: 0.000018 loss_cls: 3.3572 (3.1760) grad_norm: 2.6702 (3.1771) time: 2.3970 data: 0.0003 max mem: 13912 +[2024-12-06 11:24:12 root] (utils.py 283): INFO Epoch: [4] [ 890/2502] eta: 1:04:10 lr: 0.000018 loss_cls: 3.2352 (3.1754) grad_norm: 2.6680 (3.1725) time: 2.3964 data: 0.0003 max mem: 13912 +[2024-12-06 11:24:36 root] (utils.py 283): INFO Epoch: [4] [ 900/2502] eta: 1:03:46 lr: 0.000018 loss_cls: 3.2220 (3.1761) grad_norm: 2.7101 (3.1694) time: 2.3933 data: 0.0003 max mem: 13912 +[2024-12-06 11:25:00 root] (utils.py 283): INFO Epoch: [4] [ 910/2502] eta: 1:03:22 lr: 0.000018 loss_cls: 3.2401 (3.1736) grad_norm: 2.7711 (3.1732) time: 2.3875 data: 0.0003 max mem: 13912 +[2024-12-06 11:25:23 root] (utils.py 283): INFO Epoch: [4] [ 920/2502] eta: 1:02:58 lr: 0.000018 loss_cls: 2.8173 (3.1723) grad_norm: 2.9876 (3.1752) time: 2.3825 data: 0.0003 max mem: 13912 +[2024-12-06 11:25:47 root] (utils.py 283): INFO Epoch: [4] [ 930/2502] eta: 1:02:34 lr: 0.000018 loss_cls: 2.8850 (3.1709) grad_norm: 3.0104 (3.1736) time: 2.3906 data: 0.0003 max mem: 13912 +[2024-12-06 11:26:11 root] (utils.py 283): INFO Epoch: [4] [ 940/2502] eta: 1:02:11 lr: 0.000018 loss_cls: 3.1689 (3.1720) grad_norm: 2.8465 (3.1736) time: 2.4011 data: 0.0003 max mem: 13912 +[2024-12-06 11:26:35 root] (utils.py 283): INFO Epoch: [4] [ 950/2502] eta: 1:01:47 lr: 0.000018 loss_cls: 3.1809 (3.1717) grad_norm: 2.6272 (3.1680) time: 2.4013 data: 0.0003 max mem: 13912 +[2024-12-06 11:26:59 root] (utils.py 283): INFO Epoch: [4] [ 960/2502] eta: 1:01:23 lr: 0.000018 loss_cls: 3.3696 (3.1745) grad_norm: 2.7455 (3.1675) time: 2.4013 data: 0.0003 max mem: 13912 +[2024-12-06 11:27:23 root] (utils.py 283): INFO Epoch: [4] [ 970/2502] eta: 1:00:59 lr: 0.000018 loss_cls: 3.4178 (3.1748) grad_norm: 2.6869 (3.1622) time: 2.3976 data: 0.0003 max mem: 13912 +[2024-12-06 11:27:47 root] (utils.py 283): INFO Epoch: [4] [ 980/2502] eta: 1:00:35 lr: 0.000018 loss_cls: 3.1792 (3.1734) grad_norm: 2.6323 (3.1582) time: 2.3903 data: 0.0003 max mem: 13912 +[2024-12-06 11:28:11 root] (utils.py 283): INFO Epoch: [4] [ 990/2502] eta: 1:00:12 lr: 0.000018 loss_cls: 3.2505 (3.1730) grad_norm: 2.8372 (3.1830) time: 2.3881 data: 0.0003 max mem: 13912 +[2024-12-06 11:28:35 root] (utils.py 283): INFO Epoch: [4] [1000/2502] eta: 0:59:48 lr: 0.000018 loss_cls: 3.3944 (3.1754) grad_norm: 2.8992 (3.1793) time: 2.3866 data: 0.0003 max mem: 13912 +[2024-12-06 11:28:59 root] (utils.py 283): INFO Epoch: [4] [1010/2502] eta: 0:59:24 lr: 0.000018 loss_cls: 3.4210 (3.1770) grad_norm: 2.6416 (3.1741) time: 2.3895 data: 0.0003 max mem: 13912 +[2024-12-06 11:29:23 root] (utils.py 283): INFO Epoch: [4] [1020/2502] eta: 0:59:00 lr: 0.000018 loss_cls: 3.2789 (3.1774) grad_norm: 2.5511 (3.1705) time: 2.3941 data: 0.0003 max mem: 13912 +[2024-12-06 11:29:47 root] (utils.py 283): INFO Epoch: [4] [1030/2502] eta: 0:58:36 lr: 0.000018 loss_cls: 3.1491 (3.1776) grad_norm: 2.6650 (3.1695) time: 2.3951 data: 0.0003 max mem: 13912 +[2024-12-06 11:30:11 root] (utils.py 283): INFO Epoch: [4] [1040/2502] eta: 0:58:12 lr: 0.000018 loss_cls: 3.3794 (3.1777) grad_norm: 2.9990 (3.1678) time: 2.3938 data: 0.0003 max mem: 13912 +[2024-12-06 11:30:35 root] (utils.py 283): INFO Epoch: [4] [1050/2502] eta: 0:57:48 lr: 0.000018 loss_cls: 3.0702 (3.1753) grad_norm: 2.8153 (3.1647) time: 2.3915 data: 0.0003 max mem: 13912 +[2024-12-06 11:30:58 root] (utils.py 283): INFO Epoch: [4] [1060/2502] eta: 0:57:25 lr: 0.000018 loss_cls: 3.2824 (3.1767) grad_norm: 2.7626 (3.1627) time: 2.3884 data: 0.0003 max mem: 13912 +[2024-12-06 11:31:22 root] (utils.py 283): INFO Epoch: [4] [1070/2502] eta: 0:57:01 lr: 0.000018 loss_cls: 3.3339 (3.1761) grad_norm: 2.6375 (3.1604) time: 2.3900 data: 0.0003 max mem: 13912 +[2024-12-06 11:31:46 root] (utils.py 283): INFO Epoch: [4] [1080/2502] eta: 0:56:37 lr: 0.000018 loss_cls: 3.2027 (3.1753) grad_norm: 2.6403 (3.1762) time: 2.3925 data: 0.0003 max mem: 13912 +[2024-12-06 11:32:10 root] (utils.py 283): INFO Epoch: [4] [1090/2502] eta: 0:56:13 lr: 0.000018 loss_cls: 3.2411 (3.1768) grad_norm: 2.8419 (3.1818) time: 2.3933 data: 0.0003 max mem: 13912 +[2024-12-06 11:32:34 root] (utils.py 283): INFO Epoch: [4] [1100/2502] eta: 0:55:49 lr: 0.000018 loss_cls: 3.1971 (3.1741) grad_norm: 2.8794 (3.1807) time: 2.3919 data: 0.0003 max mem: 13912 +[2024-12-06 11:32:58 root] (utils.py 283): INFO Epoch: [4] [1110/2502] eta: 0:55:25 lr: 0.000018 loss_cls: 3.0965 (3.1747) grad_norm: 2.9135 (3.1775) time: 2.3866 data: 0.0003 max mem: 13912 +[2024-12-06 11:33:22 root] (utils.py 283): INFO Epoch: [4] [1120/2502] eta: 0:55:01 lr: 0.000018 loss_cls: 3.3675 (3.1758) grad_norm: 2.8492 (3.1812) time: 2.3851 data: 0.0003 max mem: 13912 +[2024-12-06 11:33:46 root] (utils.py 283): INFO Epoch: [4] [1130/2502] eta: 0:54:37 lr: 0.000018 loss_cls: 3.3983 (3.1755) grad_norm: 2.9243 (3.1801) time: 2.3868 data: 0.0003 max mem: 13912 +[2024-12-06 11:34:10 root] (utils.py 283): INFO Epoch: [4] [1140/2502] eta: 0:54:13 lr: 0.000018 loss_cls: 3.3581 (3.1768) grad_norm: 2.7821 (3.1788) time: 2.3905 data: 0.0003 max mem: 13912 +[2024-12-06 11:34:33 root] (utils.py 283): INFO Epoch: [4] [1150/2502] eta: 0:53:49 lr: 0.000018 loss_cls: 3.3719 (3.1771) grad_norm: 2.7365 (3.1953) time: 2.3801 data: 0.0003 max mem: 13912 +[2024-12-06 11:34:57 root] (utils.py 283): INFO Epoch: [4] [1160/2502] eta: 0:53:25 lr: 0.000018 loss_cls: 3.3399 (3.1764) grad_norm: 2.8011 (3.1938) time: 2.3768 data: 0.0003 max mem: 13912 +[2024-12-06 11:35:21 root] (utils.py 283): INFO Epoch: [4] [1170/2502] eta: 0:53:02 lr: 0.000018 loss_cls: 3.1758 (3.1751) grad_norm: 2.8011 (3.1948) time: 2.3938 data: 0.0003 max mem: 13912 +[2024-12-06 11:35:45 root] (utils.py 283): INFO Epoch: [4] [1180/2502] eta: 0:52:38 lr: 0.000018 loss_cls: 3.0388 (3.1733) grad_norm: 2.8619 (3.1950) time: 2.3972 data: 0.0003 max mem: 13912 +[2024-12-06 11:36:09 root] (utils.py 283): INFO Epoch: [4] [1190/2502] eta: 0:52:14 lr: 0.000018 loss_cls: 3.3468 (3.1727) grad_norm: 2.7151 (3.1906) time: 2.3920 data: 0.0003 max mem: 13912 +[2024-12-06 11:36:33 root] (utils.py 283): INFO Epoch: [4] [1200/2502] eta: 0:51:50 lr: 0.000018 loss_cls: 3.3214 (3.1715) grad_norm: 2.6764 (3.1864) time: 2.3936 data: 0.0003 max mem: 13912 +[2024-12-06 11:36:57 root] (utils.py 283): INFO Epoch: [4] [1210/2502] eta: 0:51:26 lr: 0.000018 loss_cls: 3.2276 (3.1726) grad_norm: 2.7156 (3.1855) time: 2.3915 data: 0.0003 max mem: 13912 +[2024-12-06 11:37:21 root] (utils.py 283): INFO Epoch: [4] [1220/2502] eta: 0:51:02 lr: 0.000018 loss_cls: 3.1843 (3.1709) grad_norm: 2.8311 (3.1881) time: 2.3877 data: 0.0003 max mem: 13912 +[2024-12-06 11:37:45 root] (utils.py 283): INFO Epoch: [4] [1230/2502] eta: 0:50:38 lr: 0.000018 loss_cls: 2.9130 (3.1713) grad_norm: 2.7919 (3.1845) time: 2.3882 data: 0.0003 max mem: 13912 +[2024-12-06 11:38:08 root] (utils.py 283): INFO Epoch: [4] [1240/2502] eta: 0:50:14 lr: 0.000018 loss_cls: 3.3173 (3.1695) grad_norm: 2.5289 (3.1794) time: 2.3851 data: 0.0003 max mem: 13912 +[2024-12-06 11:38:32 root] (utils.py 283): INFO Epoch: [4] [1250/2502] eta: 0:49:51 lr: 0.000018 loss_cls: 3.3400 (3.1703) grad_norm: 2.4885 (3.1754) time: 2.3880 data: 0.0003 max mem: 13912 +[2024-12-06 11:38:56 root] (utils.py 283): INFO Epoch: [4] [1260/2502] eta: 0:49:27 lr: 0.000018 loss_cls: 3.3392 (3.1697) grad_norm: 2.5862 (3.1718) time: 2.3937 data: 0.0003 max mem: 13912 +[2024-12-06 11:39:20 root] (utils.py 283): INFO Epoch: [4] [1270/2502] eta: 0:49:03 lr: 0.000018 loss_cls: 3.0832 (3.1686) grad_norm: 2.5862 (3.1697) time: 2.3924 data: 0.0003 max mem: 13912 +[2024-12-06 11:39:44 root] (utils.py 283): INFO Epoch: [4] [1280/2502] eta: 0:48:39 lr: 0.000018 loss_cls: 3.2189 (3.1690) grad_norm: 2.6225 (3.1678) time: 2.3902 data: 0.0003 max mem: 13912 +[2024-12-06 11:40:07 root] (utils.py 283): INFO Epoch: [4] [1290/2502] eta: 0:48:14 lr: 0.000018 loss_cls: 3.3988 (3.1704) grad_norm: 2.6646 (3.1719) time: 2.3482 data: 0.0003 max mem: 13912 +[2024-12-06 11:40:31 root] (utils.py 283): INFO Epoch: [4] [1300/2502] eta: 0:47:50 lr: 0.000018 loss_cls: 3.3970 (3.1712) grad_norm: 2.9467 (3.1703) time: 2.3475 data: 0.0003 max mem: 13912 +[2024-12-06 11:41:06 root] (utils.py 283): INFO Epoch: [4] [1310/2502] eta: 0:47:36 lr: 0.000018 loss_cls: 3.2341 (3.1701) grad_norm: 2.7959 (3.1677) time: 2.9322 data: 0.0003 max mem: 13912 +[2024-12-06 11:41:29 root] (utils.py 283): INFO Epoch: [4] [1320/2502] eta: 0:47:12 lr: 0.000018 loss_cls: 3.1114 (3.1707) grad_norm: 2.6223 (3.1672) time: 2.9170 data: 0.0003 max mem: 13912 +[2024-12-06 11:41:53 root] (utils.py 283): INFO Epoch: [4] [1330/2502] eta: 0:46:48 lr: 0.000018 loss_cls: 3.2461 (3.1705) grad_norm: 2.8331 (3.1658) time: 2.3709 data: 0.0003 max mem: 13912 +[2024-12-06 11:42:17 root] (utils.py 283): INFO Epoch: [4] [1340/2502] eta: 0:46:24 lr: 0.000018 loss_cls: 3.3280 (3.1724) grad_norm: 2.8791 (3.1654) time: 2.3872 data: 0.0003 max mem: 13912 +[2024-12-06 11:42:41 root] (utils.py 283): INFO Epoch: [4] [1350/2502] eta: 0:46:00 lr: 0.000018 loss_cls: 3.3786 (3.1734) grad_norm: 2.5740 (3.1631) time: 2.3888 data: 0.0003 max mem: 13912 +[2024-12-06 11:43:05 root] (utils.py 283): INFO Epoch: [4] [1360/2502] eta: 0:45:36 lr: 0.000018 loss_cls: 3.3066 (3.1736) grad_norm: 2.6329 (3.1609) time: 2.3912 data: 0.0003 max mem: 13912 +[2024-12-06 11:43:29 root] (utils.py 283): INFO Epoch: [4] [1370/2502] eta: 0:45:12 lr: 0.000018 loss_cls: 3.3558 (3.1739) grad_norm: 2.8194 (3.1592) time: 2.3912 data: 0.0003 max mem: 13912 +[2024-12-06 11:43:53 root] (utils.py 283): INFO Epoch: [4] [1380/2502] eta: 0:44:48 lr: 0.000018 loss_cls: 3.2071 (3.1724) grad_norm: 2.7225 (3.1577) time: 2.3929 data: 0.0003 max mem: 13912 +[2024-12-06 11:44:17 root] (utils.py 283): INFO Epoch: [4] [1390/2502] eta: 0:44:24 lr: 0.000018 loss_cls: 3.1363 (3.1720) grad_norm: 2.6305 (3.1539) time: 2.3907 data: 0.0003 max mem: 13912 +[2024-12-06 11:44:41 root] (utils.py 283): INFO Epoch: [4] [1400/2502] eta: 0:44:00 lr: 0.000018 loss_cls: 3.1363 (3.1702) grad_norm: 2.5672 (3.1535) time: 2.3837 data: 0.0003 max mem: 13912 +[2024-12-06 11:45:04 root] (utils.py 283): INFO Epoch: [4] [1410/2502] eta: 0:43:36 lr: 0.000018 loss_cls: 3.2943 (3.1723) grad_norm: 2.6650 (3.1541) time: 2.3876 data: 0.0003 max mem: 13912 +[2024-12-06 11:45:28 root] (utils.py 283): INFO Epoch: [4] [1420/2502] eta: 0:43:12 lr: 0.000018 loss_cls: 3.4230 (3.1715) grad_norm: 2.8542 (3.1518) time: 2.3880 data: 0.0003 max mem: 13912 +[2024-12-06 11:45:52 root] (utils.py 283): INFO Epoch: [4] [1430/2502] eta: 0:42:48 lr: 0.000018 loss_cls: 3.1918 (3.1720) grad_norm: 2.9287 (3.1626) time: 2.3724 data: 0.0003 max mem: 13912 +[2024-12-06 11:46:16 root] (utils.py 283): INFO Epoch: [4] [1440/2502] eta: 0:42:24 lr: 0.000018 loss_cls: 3.2127 (3.1719) grad_norm: 2.7453 (3.1595) time: 2.3737 data: 0.0003 max mem: 13912 +[2024-12-06 11:46:40 root] (utils.py 283): INFO Epoch: [4] [1450/2502] eta: 0:42:00 lr: 0.000018 loss_cls: 3.3027 (3.1723) grad_norm: 2.7350 (3.1604) time: 2.3877 data: 0.0003 max mem: 13912 +[2024-12-06 11:47:04 root] (utils.py 283): INFO Epoch: [4] [1460/2502] eta: 0:41:36 lr: 0.000018 loss_cls: 3.3575 (3.1736) grad_norm: 2.7721 (3.1582) time: 2.3887 data: 0.0003 max mem: 13912 +[2024-12-06 11:47:27 root] (utils.py 283): INFO Epoch: [4] [1470/2502] eta: 0:41:12 lr: 0.000018 loss_cls: 3.3301 (3.1727) grad_norm: 2.7021 (3.1580) time: 2.3877 data: 0.0003 max mem: 13912 +[2024-12-06 11:47:51 root] (utils.py 283): INFO Epoch: [4] [1480/2502] eta: 0:40:48 lr: 0.000018 loss_cls: 3.3802 (3.1750) grad_norm: 2.7179 (3.1568) time: 2.3811 data: 0.0003 max mem: 13912 +[2024-12-06 11:48:15 root] (utils.py 283): INFO Epoch: [4] [1490/2502] eta: 0:40:24 lr: 0.000018 loss_cls: 3.5366 (3.1772) grad_norm: 2.7424 (3.1550) time: 2.3820 data: 0.0003 max mem: 13912 +[2024-12-06 11:48:39 root] (utils.py 283): INFO Epoch: [4] [1500/2502] eta: 0:40:00 lr: 0.000018 loss_cls: 3.4425 (3.1789) grad_norm: 2.7233 (3.1540) time: 2.3912 data: 0.0003 max mem: 13912 +[2024-12-06 11:49:03 root] (utils.py 283): INFO Epoch: [4] [1510/2502] eta: 0:39:36 lr: 0.000018 loss_cls: 3.4038 (3.1798) grad_norm: 2.6561 (3.1508) time: 2.3894 data: 0.0003 max mem: 13912 +[2024-12-06 11:49:27 root] (utils.py 283): INFO Epoch: [4] [1520/2502] eta: 0:39:11 lr: 0.000018 loss_cls: 3.1793 (3.1777) grad_norm: 2.5847 (3.1479) time: 2.3778 data: 0.0003 max mem: 13912 +[2024-12-06 11:49:51 root] (utils.py 283): INFO Epoch: [4] [1530/2502] eta: 0:38:47 lr: 0.000018 loss_cls: 3.1793 (3.1772) grad_norm: 2.8163 (3.1464) time: 2.3821 data: 0.0003 max mem: 13912 +[2024-12-06 11:50:14 root] (utils.py 283): INFO Epoch: [4] [1540/2502] eta: 0:38:24 lr: 0.000018 loss_cls: 3.3354 (3.1780) grad_norm: 2.8764 (3.1492) time: 2.3925 data: 0.0003 max mem: 13912 +[2024-12-06 11:50:38 root] (utils.py 283): INFO Epoch: [4] [1550/2502] eta: 0:38:00 lr: 0.000018 loss_cls: 3.3582 (3.1777) grad_norm: 2.8437 (3.1536) time: 2.3906 data: 0.0003 max mem: 13912 +[2024-12-06 11:51:02 root] (utils.py 283): INFO Epoch: [4] [1560/2502] eta: 0:37:35 lr: 0.000018 loss_cls: 3.0759 (3.1770) grad_norm: 2.7824 (3.1525) time: 2.3848 data: 0.0002 max mem: 13912 +[2024-12-06 11:51:26 root] (utils.py 283): INFO Epoch: [4] [1570/2502] eta: 0:37:12 lr: 0.000018 loss_cls: 3.0733 (3.1759) grad_norm: 2.8145 (3.1505) time: 2.3841 data: 0.0003 max mem: 13912 +[2024-12-06 11:51:50 root] (utils.py 283): INFO Epoch: [4] [1580/2502] eta: 0:36:48 lr: 0.000018 loss_cls: 3.1181 (3.1753) grad_norm: 2.6787 (3.1486) time: 2.3913 data: 0.0003 max mem: 13912 +[2024-12-06 11:52:14 root] (utils.py 283): INFO Epoch: [4] [1590/2502] eta: 0:36:24 lr: 0.000018 loss_cls: 3.1508 (3.1749) grad_norm: 2.6863 (3.1475) time: 2.3947 data: 0.0003 max mem: 13912 +[2024-12-06 11:52:38 root] (utils.py 283): INFO Epoch: [4] [1600/2502] eta: 0:36:00 lr: 0.000018 loss_cls: 3.0670 (3.1743) grad_norm: 2.8172 (3.1458) time: 2.3929 data: 0.0003 max mem: 13912 +[2024-12-06 11:53:02 root] (utils.py 283): INFO Epoch: [4] [1610/2502] eta: 0:35:36 lr: 0.000018 loss_cls: 3.0589 (3.1742) grad_norm: 2.7631 (3.1443) time: 2.3901 data: 0.0003 max mem: 13912 +[2024-12-06 11:53:26 root] (utils.py 283): INFO Epoch: [4] [1620/2502] eta: 0:35:12 lr: 0.000018 loss_cls: 3.0145 (3.1723) grad_norm: 2.7631 (3.1430) time: 2.3859 data: 0.0002 max mem: 13912 +[2024-12-06 11:53:49 root] (utils.py 283): INFO Epoch: [4] [1630/2502] eta: 0:34:48 lr: 0.000018 loss_cls: 3.0168 (3.1721) grad_norm: 2.7506 (3.1422) time: 2.3744 data: 0.0002 max mem: 13912 +[2024-12-06 11:54:13 root] (utils.py 283): INFO Epoch: [4] [1640/2502] eta: 0:34:23 lr: 0.000018 loss_cls: 3.3299 (3.1723) grad_norm: 2.7506 (3.1414) time: 2.3618 data: 0.0002 max mem: 13912 +[2024-12-06 11:54:37 root] (utils.py 283): INFO Epoch: [4] [1650/2502] eta: 0:33:59 lr: 0.000018 loss_cls: 3.4762 (3.1725) grad_norm: 2.7971 (3.1394) time: 2.3716 data: 0.0003 max mem: 13912 +[2024-12-06 11:55:00 root] (utils.py 283): INFO Epoch: [4] [1660/2502] eta: 0:33:35 lr: 0.000018 loss_cls: 3.2264 (3.1720) grad_norm: 2.7971 (3.1387) time: 2.3778 data: 0.0003 max mem: 13912 +[2024-12-06 11:55:24 root] (utils.py 283): INFO Epoch: [4] [1670/2502] eta: 0:33:11 lr: 0.000018 loss_cls: 3.2310 (3.1729) grad_norm: 2.8262 (3.1381) time: 2.3546 data: 0.0003 max mem: 13912 +[2024-12-06 11:55:48 root] (utils.py 283): INFO Epoch: [4] [1680/2502] eta: 0:32:47 lr: 0.000018 loss_cls: 3.2812 (3.1720) grad_norm: 2.6124 (3.1358) time: 2.3604 data: 0.0003 max mem: 13912 +[2024-12-06 11:56:11 root] (utils.py 283): INFO Epoch: [4] [1690/2502] eta: 0:32:23 lr: 0.000018 loss_cls: 3.1651 (3.1724) grad_norm: 2.5581 (3.1324) time: 2.3784 data: 0.0003 max mem: 13912 +[2024-12-06 11:56:35 root] (utils.py 283): INFO Epoch: [4] [1700/2502] eta: 0:31:59 lr: 0.000018 loss_cls: 3.2065 (3.1727) grad_norm: 2.5322 (3.1299) time: 2.3702 data: 0.0002 max mem: 13912 +[2024-12-06 11:56:59 root] (utils.py 283): INFO Epoch: [4] [1710/2502] eta: 0:31:35 lr: 0.000018 loss_cls: 3.0909 (3.1712) grad_norm: 2.5373 (3.1273) time: 2.3615 data: 0.0003 max mem: 13912 +[2024-12-06 11:57:22 root] (utils.py 283): INFO Epoch: [4] [1720/2502] eta: 0:31:11 lr: 0.000018 loss_cls: 2.7553 (3.1697) grad_norm: 2.5207 (3.1264) time: 2.3669 data: 0.0003 max mem: 13912 +[2024-12-06 11:57:46 root] (utils.py 283): INFO Epoch: [4] [1730/2502] eta: 0:30:47 lr: 0.000018 loss_cls: 3.0029 (3.1688) grad_norm: 2.5423 (3.1254) time: 2.3709 data: 0.0003 max mem: 13912 +[2024-12-06 11:58:10 root] (utils.py 283): INFO Epoch: [4] [1740/2502] eta: 0:30:23 lr: 0.000018 loss_cls: 3.1249 (3.1687) grad_norm: 2.8344 (3.1288) time: 2.3637 data: 0.0002 max mem: 13912 +[2024-12-06 11:58:33 root] (utils.py 283): INFO Epoch: [4] [1750/2502] eta: 0:29:59 lr: 0.000018 loss_cls: 3.1235 (3.1674) grad_norm: 2.7724 (3.1278) time: 2.3692 data: 0.0002 max mem: 13912 +[2024-12-06 11:58:57 root] (utils.py 283): INFO Epoch: [4] [1760/2502] eta: 0:29:35 lr: 0.000018 loss_cls: 3.2888 (3.1678) grad_norm: 2.7517 (3.1317) time: 2.3766 data: 0.0002 max mem: 13912 +[2024-12-06 11:59:21 root] (utils.py 283): INFO Epoch: [4] [1770/2502] eta: 0:29:11 lr: 0.000018 loss_cls: 3.3289 (3.1684) grad_norm: 2.7342 (3.1312) time: 2.3608 data: 0.0002 max mem: 13912 +[2024-12-06 11:59:44 root] (utils.py 283): INFO Epoch: [4] [1780/2502] eta: 0:28:47 lr: 0.000018 loss_cls: 3.2524 (3.1684) grad_norm: 2.5657 (3.1278) time: 2.3602 data: 0.0002 max mem: 13912 +[2024-12-06 12:00:08 root] (utils.py 283): INFO Epoch: [4] [1790/2502] eta: 0:28:23 lr: 0.000018 loss_cls: 3.0794 (3.1671) grad_norm: 2.5552 (3.1262) time: 2.3745 data: 0.0002 max mem: 13912 +[2024-12-06 12:00:32 root] (utils.py 283): INFO Epoch: [4] [1800/2502] eta: 0:27:59 lr: 0.000018 loss_cls: 3.0938 (3.1679) grad_norm: 2.6091 (3.1259) time: 2.3616 data: 0.0002 max mem: 13912 +[2024-12-06 12:00:55 root] (utils.py 283): INFO Epoch: [4] [1810/2502] eta: 0:27:35 lr: 0.000018 loss_cls: 3.3958 (3.1688) grad_norm: 2.7364 (3.1256) time: 2.3635 data: 0.0002 max mem: 13912 +[2024-12-06 12:01:19 root] (utils.py 283): INFO Epoch: [4] [1820/2502] eta: 0:27:11 lr: 0.000018 loss_cls: 3.2206 (3.1682) grad_norm: 2.8829 (3.1268) time: 2.3750 data: 0.0002 max mem: 13912 +[2024-12-06 12:01:42 root] (utils.py 283): INFO Epoch: [4] [1830/2502] eta: 0:26:47 lr: 0.000018 loss_cls: 3.0894 (3.1676) grad_norm: 2.8844 (3.1261) time: 2.3565 data: 0.0002 max mem: 13912 +[2024-12-06 12:02:06 root] (utils.py 283): INFO Epoch: [4] [1840/2502] eta: 0:26:23 lr: 0.000018 loss_cls: 3.1811 (3.1678) grad_norm: 2.7886 (3.1247) time: 2.3605 data: 0.0002 max mem: 13912 +[2024-12-06 12:02:30 root] (utils.py 283): INFO Epoch: [4] [1850/2502] eta: 0:25:59 lr: 0.000018 loss_cls: 3.1811 (3.1672) grad_norm: 2.7411 (3.1243) time: 2.3810 data: 0.0002 max mem: 13912 +[2024-12-06 12:02:54 root] (utils.py 283): INFO Epoch: [4] [1860/2502] eta: 0:25:35 lr: 0.000018 loss_cls: 3.0558 (3.1665) grad_norm: 2.8317 (3.1270) time: 2.3773 data: 0.0002 max mem: 13912 +[2024-12-06 12:03:17 root] (utils.py 283): INFO Epoch: [4] [1870/2502] eta: 0:25:11 lr: 0.000018 loss_cls: 3.1694 (3.1667) grad_norm: 2.8568 (3.1289) time: 2.3639 data: 0.0003 max mem: 13912 +[2024-12-06 12:03:41 root] (utils.py 283): INFO Epoch: [4] [1880/2502] eta: 0:24:47 lr: 0.000018 loss_cls: 3.1518 (3.1654) grad_norm: 2.8169 (3.1336) time: 2.3672 data: 0.0003 max mem: 13912 +[2024-12-06 12:04:05 root] (utils.py 283): INFO Epoch: [4] [1890/2502] eta: 0:24:23 lr: 0.000018 loss_cls: 3.4799 (3.1671) grad_norm: 2.8836 (3.1347) time: 2.3798 data: 0.0002 max mem: 13912 +[2024-12-06 12:04:29 root] (utils.py 283): INFO Epoch: [4] [1900/2502] eta: 0:23:59 lr: 0.000018 loss_cls: 3.4799 (3.1676) grad_norm: 2.9165 (3.1354) time: 2.3784 data: 0.0002 max mem: 13912 +[2024-12-06 12:04:52 root] (utils.py 283): INFO Epoch: [4] [1910/2502] eta: 0:23:35 lr: 0.000018 loss_cls: 3.2261 (3.1676) grad_norm: 2.9165 (3.1344) time: 2.3715 data: 0.0003 max mem: 13912 +[2024-12-06 12:05:16 root] (utils.py 283): INFO Epoch: [4] [1920/2502] eta: 0:23:11 lr: 0.000018 loss_cls: 3.2736 (3.1680) grad_norm: 2.7394 (3.1326) time: 2.3568 data: 0.0002 max mem: 13912 +[2024-12-06 12:05:40 root] (utils.py 283): INFO Epoch: [4] [1930/2502] eta: 0:22:47 lr: 0.000018 loss_cls: 3.0725 (3.1673) grad_norm: 2.7334 (3.1376) time: 2.3621 data: 0.0002 max mem: 13912 +[2024-12-06 12:06:03 root] (utils.py 283): INFO Epoch: [4] [1940/2502] eta: 0:22:23 lr: 0.000018 loss_cls: 3.1256 (3.1678) grad_norm: 2.7560 (3.1363) time: 2.3791 data: 0.0002 max mem: 13912 +[2024-12-06 12:06:27 root] (utils.py 283): INFO Epoch: [4] [1950/2502] eta: 0:21:59 lr: 0.000018 loss_cls: 3.2267 (3.1668) grad_norm: 2.6460 (3.1341) time: 2.3854 data: 0.0002 max mem: 13912 +[2024-12-06 12:06:51 root] (utils.py 283): INFO Epoch: [4] [1960/2502] eta: 0:21:35 lr: 0.000018 loss_cls: 3.1633 (3.1671) grad_norm: 2.8829 (3.1482) time: 2.3862 data: 0.0003 max mem: 13912 +[2024-12-06 12:07:15 root] (utils.py 283): INFO Epoch: [4] [1970/2502] eta: 0:21:11 lr: 0.000018 loss_cls: 3.0502 (3.1668) grad_norm: 2.9713 (3.1496) time: 2.3905 data: 0.0003 max mem: 13912 +[2024-12-06 12:07:39 root] (utils.py 283): INFO Epoch: [4] [1980/2502] eta: 0:20:47 lr: 0.000018 loss_cls: 2.9641 (3.1664) grad_norm: 2.9713 (3.1500) time: 2.3932 data: 0.0003 max mem: 13912 +[2024-12-06 12:08:03 root] (utils.py 283): INFO Epoch: [4] [1990/2502] eta: 0:20:23 lr: 0.000018 loss_cls: 3.0887 (3.1655) grad_norm: 2.8746 (3.1485) time: 2.3927 data: 0.0003 max mem: 13912 +[2024-12-06 12:08:27 root] (utils.py 283): INFO Epoch: [4] [2000/2502] eta: 0:19:59 lr: 0.000018 loss_cls: 3.3490 (3.1670) grad_norm: 2.9518 (3.1494) time: 2.3848 data: 0.0003 max mem: 13912 +[2024-12-06 12:08:51 root] (utils.py 283): INFO Epoch: [4] [2010/2502] eta: 0:19:36 lr: 0.000018 loss_cls: 3.3132 (3.1664) grad_norm: 2.8977 (3.1478) time: 2.3822 data: 0.0003 max mem: 13912 +[2024-12-06 12:09:15 root] (utils.py 283): INFO Epoch: [4] [2020/2502] eta: 0:19:12 lr: 0.000018 loss_cls: 3.2204 (3.1663) grad_norm: 2.8082 (3.1470) time: 2.3916 data: 0.0003 max mem: 13912 +[2024-12-06 12:09:38 root] (utils.py 283): INFO Epoch: [4] [2030/2502] eta: 0:18:48 lr: 0.000018 loss_cls: 3.2599 (3.1668) grad_norm: 2.8082 (3.1457) time: 2.3861 data: 0.0002 max mem: 13912 +[2024-12-06 12:10:02 root] (utils.py 283): INFO Epoch: [4] [2040/2502] eta: 0:18:24 lr: 0.000018 loss_cls: 3.1894 (3.1663) grad_norm: 2.8426 (3.1453) time: 2.3864 data: 0.0002 max mem: 13912 +[2024-12-06 12:10:26 root] (utils.py 283): INFO Epoch: [4] [2050/2502] eta: 0:18:00 lr: 0.000018 loss_cls: 3.2300 (3.1670) grad_norm: 2.7651 (3.1437) time: 2.3891 data: 0.0003 max mem: 13912 +[2024-12-06 12:10:50 root] (utils.py 283): INFO Epoch: [4] [2060/2502] eta: 0:17:36 lr: 0.000018 loss_cls: 3.3325 (3.1669) grad_norm: 2.7143 (3.1431) time: 2.3836 data: 0.0002 max mem: 13912 +[2024-12-06 12:11:14 root] (utils.py 283): INFO Epoch: [4] [2070/2502] eta: 0:17:12 lr: 0.000018 loss_cls: 3.3331 (3.1676) grad_norm: 3.0568 (3.1459) time: 2.3730 data: 0.0002 max mem: 13912 +[2024-12-06 12:11:37 root] (utils.py 283): INFO Epoch: [4] [2080/2502] eta: 0:16:48 lr: 0.000018 loss_cls: 3.2142 (3.1678) grad_norm: 2.9859 (3.1438) time: 2.3568 data: 0.0002 max mem: 13912 +[2024-12-06 12:12:01 root] (utils.py 283): INFO Epoch: [4] [2090/2502] eta: 0:16:24 lr: 0.000018 loss_cls: 3.0598 (3.1659) grad_norm: 2.5506 (3.1426) time: 2.3632 data: 0.0002 max mem: 13912 +[2024-12-06 12:12:25 root] (utils.py 283): INFO Epoch: [4] [2100/2502] eta: 0:16:00 lr: 0.000018 loss_cls: 2.8946 (3.1649) grad_norm: 2.5506 (3.1405) time: 2.3781 data: 0.0002 max mem: 13912 +[2024-12-06 12:12:48 root] (utils.py 283): INFO Epoch: [4] [2110/2502] eta: 0:15:36 lr: 0.000018 loss_cls: 2.9551 (3.1643) grad_norm: 2.5708 (3.1384) time: 2.3737 data: 0.0002 max mem: 13912 +[2024-12-06 12:13:12 root] (utils.py 283): INFO Epoch: [4] [2120/2502] eta: 0:15:12 lr: 0.000018 loss_cls: 3.0768 (3.1637) grad_norm: 2.5708 (3.1363) time: 2.3630 data: 0.0002 max mem: 13912 +[2024-12-06 12:13:36 root] (utils.py 283): INFO Epoch: [4] [2130/2502] eta: 0:14:48 lr: 0.000018 loss_cls: 2.9863 (3.1629) grad_norm: 2.5736 (3.1353) time: 2.3639 data: 0.0002 max mem: 13912 +[2024-12-06 12:13:59 root] (utils.py 283): INFO Epoch: [4] [2140/2502] eta: 0:14:24 lr: 0.000018 loss_cls: 3.1125 (3.1626) grad_norm: 2.7350 (3.1340) time: 2.3678 data: 0.0002 max mem: 13912 +[2024-12-06 12:14:23 root] (utils.py 283): INFO Epoch: [4] [2150/2502] eta: 0:14:00 lr: 0.000018 loss_cls: 3.1371 (3.1622) grad_norm: 2.7350 (3.1319) time: 2.3584 data: 0.0002 max mem: 13912 +[2024-12-06 12:14:47 root] (utils.py 283): INFO Epoch: [4] [2160/2502] eta: 0:13:37 lr: 0.000018 loss_cls: 3.2154 (3.1630) grad_norm: 2.6394 (3.1422) time: 2.3657 data: 0.0002 max mem: 13912 +[2024-12-06 12:15:10 root] (utils.py 283): INFO Epoch: [4] [2170/2502] eta: 0:13:13 lr: 0.000018 loss_cls: 3.1326 (3.1620) grad_norm: 2.8778 (3.1410) time: 2.3759 data: 0.0002 max mem: 13912 +[2024-12-06 12:15:34 root] (utils.py 283): INFO Epoch: [4] [2180/2502] eta: 0:12:49 lr: 0.000018 loss_cls: 2.9527 (3.1610) grad_norm: 2.8796 (3.1408) time: 2.3583 data: 0.0002 max mem: 13912 +[2024-12-06 12:15:58 root] (utils.py 283): INFO Epoch: [4] [2190/2502] eta: 0:12:25 lr: 0.000018 loss_cls: 2.9736 (3.1604) grad_norm: 2.8796 (3.1407) time: 2.3600 data: 0.0002 max mem: 13912 +[2024-12-06 12:16:21 root] (utils.py 283): INFO Epoch: [4] [2200/2502] eta: 0:12:01 lr: 0.000018 loss_cls: 2.9367 (3.1594) grad_norm: 2.8373 (3.1402) time: 2.3587 data: 0.0002 max mem: 13912 +[2024-12-06 12:16:45 root] (utils.py 283): INFO Epoch: [4] [2210/2502] eta: 0:11:37 lr: 0.000018 loss_cls: 3.0398 (3.1594) grad_norm: 2.7024 (3.1376) time: 2.3585 data: 0.0002 max mem: 13912 +[2024-12-06 12:17:08 root] (utils.py 283): INFO Epoch: [4] [2220/2502] eta: 0:11:13 lr: 0.000018 loss_cls: 3.2511 (3.1592) grad_norm: 2.6908 (3.1379) time: 2.3751 data: 0.0002 max mem: 13912 +[2024-12-06 12:17:32 root] (utils.py 283): INFO Epoch: [4] [2230/2502] eta: 0:10:49 lr: 0.000018 loss_cls: 3.2511 (3.1592) grad_norm: 2.7527 (3.1366) time: 2.3738 data: 0.0002 max mem: 13912 +[2024-12-06 12:17:56 root] (utils.py 283): INFO Epoch: [4] [2240/2502] eta: 0:10:25 lr: 0.000018 loss_cls: 3.2380 (3.1592) grad_norm: 2.9078 (3.1382) time: 2.3620 data: 0.0002 max mem: 13912 +[2024-12-06 12:18:19 root] (utils.py 283): INFO Epoch: [4] [2250/2502] eta: 0:10:01 lr: 0.000018 loss_cls: 3.2488 (3.1598) grad_norm: 3.0267 (3.1371) time: 2.3630 data: 0.0002 max mem: 13912 +[2024-12-06 12:18:43 root] (utils.py 283): INFO Epoch: [4] [2260/2502] eta: 0:09:37 lr: 0.000018 loss_cls: 3.3558 (3.1604) grad_norm: 2.7746 (3.1373) time: 2.3758 data: 0.0002 max mem: 13912 +[2024-12-06 12:19:07 root] (utils.py 283): INFO Epoch: [4] [2270/2502] eta: 0:09:13 lr: 0.000018 loss_cls: 3.1054 (3.1592) grad_norm: 2.7746 (3.1363) time: 2.3621 data: 0.0002 max mem: 13912 +[2024-12-06 12:19:31 root] (utils.py 283): INFO Epoch: [4] [2280/2502] eta: 0:08:50 lr: 0.000018 loss_cls: 3.1447 (3.1601) grad_norm: 2.8778 (3.1380) time: 2.3638 data: 0.0002 max mem: 13912 +[2024-12-06 12:19:54 root] (utils.py 283): INFO Epoch: [4] [2290/2502] eta: 0:08:26 lr: 0.000018 loss_cls: 3.4508 (3.1606) grad_norm: 3.0072 (3.1418) time: 2.3740 data: 0.0002 max mem: 13912 +[2024-12-06 12:20:18 root] (utils.py 283): INFO Epoch: [4] [2300/2502] eta: 0:08:02 lr: 0.000018 loss_cls: 3.3115 (3.1604) grad_norm: 2.7043 (3.1416) time: 2.3581 data: 0.0002 max mem: 13912 +[2024-12-06 12:20:41 root] (utils.py 283): INFO Epoch: [4] [2310/2502] eta: 0:07:38 lr: 0.000018 loss_cls: 3.2263 (3.1605) grad_norm: 2.6848 (3.1408) time: 2.3639 data: 0.0002 max mem: 13912 +[2024-12-06 12:21:05 root] (utils.py 283): INFO Epoch: [4] [2320/2502] eta: 0:07:14 lr: 0.000018 loss_cls: 3.3988 (3.1611) grad_norm: 2.6848 (3.1386) time: 2.3833 data: 0.0002 max mem: 13912 +[2024-12-06 12:21:29 root] (utils.py 283): INFO Epoch: [4] [2330/2502] eta: 0:06:50 lr: 0.000018 loss_cls: 3.2598 (3.1602) grad_norm: 2.6223 (3.1402) time: 2.3829 data: 0.0003 max mem: 13912 +[2024-12-06 12:21:53 root] (utils.py 283): INFO Epoch: [4] [2340/2502] eta: 0:06:26 lr: 0.000018 loss_cls: 3.2598 (3.1604) grad_norm: 2.7674 (3.1399) time: 2.3819 data: 0.0003 max mem: 13912 +[2024-12-06 12:22:17 root] (utils.py 283): INFO Epoch: [4] [2350/2502] eta: 0:06:02 lr: 0.000018 loss_cls: 3.2686 (3.1596) grad_norm: 2.7480 (3.1389) time: 2.3832 data: 0.0003 max mem: 13912 +[2024-12-06 12:22:41 root] (utils.py 283): INFO Epoch: [4] [2360/2502] eta: 0:05:39 lr: 0.000018 loss_cls: 2.9566 (3.1579) grad_norm: 2.7282 (3.1382) time: 2.3865 data: 0.0003 max mem: 13912 +[2024-12-06 12:23:05 root] (utils.py 283): INFO Epoch: [4] [2370/2502] eta: 0:05:15 lr: 0.000018 loss_cls: 2.9449 (3.1572) grad_norm: 2.7324 (3.1369) time: 2.3877 data: 0.0003 max mem: 13912 +[2024-12-06 12:23:28 root] (utils.py 283): INFO Epoch: [4] [2380/2502] eta: 0:04:51 lr: 0.000018 loss_cls: 3.3261 (3.1573) grad_norm: 2.8148 (3.1359) time: 2.3761 data: 0.0003 max mem: 13912 +[2024-12-06 12:23:52 root] (utils.py 283): INFO Epoch: [4] [2390/2502] eta: 0:04:27 lr: 0.000018 loss_cls: 3.1819 (3.1565) grad_norm: 2.7522 (3.1346) time: 2.3792 data: 0.0003 max mem: 13912 +[2024-12-06 12:24:16 root] (utils.py 283): INFO Epoch: [4] [2400/2502] eta: 0:04:03 lr: 0.000018 loss_cls: 3.0876 (3.1570) grad_norm: 2.7374 (3.1342) time: 2.4063 data: 0.0003 max mem: 13912 +[2024-12-06 12:24:40 root] (utils.py 283): INFO Epoch: [4] [2410/2502] eta: 0:03:39 lr: 0.000018 loss_cls: 3.0254 (3.1559) grad_norm: 2.7849 (3.1330) time: 2.4114 data: 0.0003 max mem: 13912 +[2024-12-06 12:25:04 root] (utils.py 283): INFO Epoch: [4] [2420/2502] eta: 0:03:15 lr: 0.000018 loss_cls: 3.1921 (3.1562) grad_norm: 2.5711 (3.1314) time: 2.4012 data: 0.0003 max mem: 13912 +[2024-12-06 12:25:28 root] (utils.py 283): INFO Epoch: [4] [2430/2502] eta: 0:02:51 lr: 0.000018 loss_cls: 3.1692 (3.1552) grad_norm: 2.6032 (3.1312) time: 2.4029 data: 0.0003 max mem: 13912 +[2024-12-06 12:25:52 root] (utils.py 283): INFO Epoch: [4] [2440/2502] eta: 0:02:28 lr: 0.000018 loss_cls: 3.0405 (3.1556) grad_norm: 2.8107 (3.1297) time: 2.3989 data: 0.0003 max mem: 13912 +[2024-12-06 12:26:16 root] (utils.py 283): INFO Epoch: [4] [2450/2502] eta: 0:02:04 lr: 0.000018 loss_cls: 3.2286 (3.1551) grad_norm: 2.6652 (3.1288) time: 2.3975 data: 0.0003 max mem: 13912 +[2024-12-06 12:26:40 root] (utils.py 283): INFO Epoch: [4] [2460/2502] eta: 0:01:40 lr: 0.000018 loss_cls: 3.2687 (3.1558) grad_norm: 2.6658 (3.1278) time: 2.4021 data: 0.0003 max mem: 13912 +[2024-12-06 12:27:04 root] (utils.py 283): INFO Epoch: [4] [2470/2502] eta: 0:01:16 lr: 0.000018 loss_cls: 3.1556 (3.1556) grad_norm: 2.7121 (3.1265) time: 2.4023 data: 0.0003 max mem: 13912 +[2024-12-06 12:27:29 root] (utils.py 283): INFO Epoch: [4] [2480/2502] eta: 0:00:52 lr: 0.000018 loss_cls: 3.1556 (3.1553) grad_norm: 2.7338 (3.1250) time: 2.4201 data: 0.0003 max mem: 13912 +[2024-12-06 12:27:53 root] (utils.py 283): INFO Epoch: [4] [2490/2502] eta: 0:00:28 lr: 0.000018 loss_cls: 3.2044 (3.1552) grad_norm: 2.8513 (3.1268) time: 2.4358 data: 0.0241 max mem: 13912 +[2024-12-06 12:28:17 root] (utils.py 283): INFO Epoch: [4] [2500/2502] eta: 0:00:04 lr: 0.000018 loss_cls: 3.2434 (3.1547) grad_norm: 2.9069 (3.1257) time: 2.4097 data: 0.0241 max mem: 13912 +[2024-12-06 12:28:20 root] (utils.py 283): INFO Epoch: [4] [2501/2502] eta: 0:00:02 lr: 0.000018 loss_cls: 3.2434 (3.1548) grad_norm: 2.9568 (3.1258) time: 2.4199 data: 0.0241 max mem: 13912 +[2024-12-06 12:28:20 root] (utils.py 297): INFO Epoch: [4] Total time: 1:39:36 (2.3885 s / it) +[2024-12-06 12:28:20 root] (engine.py 179): INFO Averaged stats:lr: 0.000018 loss_cls: 3.2434 (3.1633) grad_norm: 2.9568 (3.1258) +[2024-12-06 12:28:21 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:28 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.4710 (0.4710) acc1: 92.9688 (92.9688) acc3: 97.6562 (97.6562) acc5: 98.4375 (98.4375) time: 0.2958 data: 0.0005 max mem: 13912 +[2024-12-06 12:28:24 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:26 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7147 (0.7002) acc1: 85.1562 (85.3693) acc3: 96.0938 (94.9574) acc5: 96.8750 (96.5909) time: 0.3021 data: 0.0004 max mem: 13912 +[2024-12-06 12:28:27 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:24 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7147 (0.7372) acc1: 84.3750 (84.5610) acc3: 95.3125 (94.7173) acc5: 96.8750 (96.5030) time: 0.3139 data: 0.0005 max mem: 13912 +[2024-12-06 12:28:30 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:21 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7950 (0.7537) acc1: 83.5938 (83.8962) acc3: 95.3125 (94.7329) acc5: 96.8750 (96.6230) time: 0.3251 data: 0.0005 max mem: 13912 +[2024-12-06 12:28:34 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:18 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7635 (0.7547) acc1: 83.5938 (83.8796) acc3: 95.3125 (94.7980) acc5: 96.8750 (96.7035) time: 0.3266 data: 0.0004 max mem: 13912 +[2024-12-06 12:28:37 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:15 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.8339 (0.8389) acc1: 78.1250 (81.7555) acc3: 91.4062 (93.4589) acc5: 94.5312 (95.7567) time: 0.3139 data: 0.0004 max mem: 13912 +[2024-12-06 12:28:40 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:12 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.1030 (0.8701) acc1: 75.0000 (81.3781) acc3: 87.5000 (92.7894) acc5: 91.4062 (95.2100) time: 0.3128 data: 0.0004 max mem: 13912 +[2024-12-06 12:28:43 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:08 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0766 (0.8996) acc1: 78.1250 (80.6998) acc3: 89.8438 (92.4846) acc5: 92.1875 (94.9604) time: 0.3266 data: 0.0004 max mem: 13912 +[2024-12-06 12:28:46 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:05 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0809 (0.9274) acc1: 75.7812 (80.0251) acc3: 89.0625 (91.9464) acc5: 92.1875 (94.5312) time: 0.3140 data: 0.0006 max mem: 13912 +[2024-12-06 12:28:49 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:02 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0990 (0.9522) acc1: 75.0000 (79.3183) acc3: 87.5000 (91.5436) acc5: 91.4062 (94.2651) time: 0.3119 data: 0.0006 max mem: 13912 +[2024-12-06 12:28:52 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0182 (0.9473) acc1: 76.1905 (79.2880) acc3: 89.0625 (91.6480) acc5: 91.4062 (94.3440) time: 0.3203 data: 0.0006 max mem: 13912 +[2024-12-06 12:28:52 root] (utils.py 297): INFO Test: Total time: 0:00:31 (0.3179 s / it) +[2024-12-06 12:28:52 root] (engine.py 264): INFO * Acc@1 79.228 Acc@3 91.808 Acc@5 94.502 loss 0.941 flops 3.584 layer_flops 3.536 +[2024-12-06 12:28:52 root] (main.py 547): INFO Accuracy of the network on the 50000 test images: 79.2% +[2024-12-06 12:28:52 root] (main.py 551): INFO Max accuracy: 79.25% +[2024-12-06 12:28:54 root] (utils.py 283): INFO Epoch: [5] [ 0/2502] eta: 1:39:04 lr: 0.000017 loss_cls: 3.6251 (3.6251) grad_norm: 2.5905 (2.5905) time: 2.3760 data: 0.0002 max mem: 13912 +[2024-12-06 12:29:18 root] (utils.py 283): INFO Epoch: [5] [ 10/2502] eta: 1:39:45 lr: 0.000017 loss_cls: 3.2121 (3.3528) grad_norm: 2.7898 (3.0411) time: 2.4019 data: 0.0003 max mem: 13912 +[2024-12-06 12:29:42 root] (utils.py 283): INFO Epoch: [5] [ 20/2502] eta: 1:39:18 lr: 0.000017 loss_cls: 3.2856 (3.2778) grad_norm: 2.8660 (3.2605) time: 2.4017 data: 0.0003 max mem: 13912 +[2024-12-06 12:30:06 root] (utils.py 283): INFO Epoch: [5] [ 30/2502] eta: 1:38:55 lr: 0.000017 loss_cls: 3.1606 (3.2031) grad_norm: 2.8790 (3.3509) time: 2.4005 data: 0.0003 max mem: 13912 +[2024-12-06 12:30:30 root] (utils.py 283): INFO Epoch: [5] [ 40/2502] eta: 1:38:28 lr: 0.000017 loss_cls: 3.0836 (3.2100) grad_norm: 2.8283 (3.2233) time: 2.3994 data: 0.0002 max mem: 13912 +[2024-12-06 12:30:54 root] (utils.py 283): INFO Epoch: [5] [ 50/2502] eta: 1:38:04 lr: 0.000017 loss_cls: 3.1377 (3.1677) grad_norm: 2.8283 (3.1446) time: 2.3981 data: 0.0003 max mem: 13912 +[2024-12-06 12:31:18 root] (utils.py 283): INFO Epoch: [5] [ 60/2502] eta: 1:37:41 lr: 0.000017 loss_cls: 3.1211 (3.1620) grad_norm: 2.7374 (3.1228) time: 2.4008 data: 0.0003 max mem: 13912 +[2024-12-06 12:31:42 root] (utils.py 283): INFO Epoch: [5] [ 70/2502] eta: 1:37:24 lr: 0.000017 loss_cls: 3.3721 (3.1875) grad_norm: 2.6837 (3.1801) time: 2.4113 data: 0.0003 max mem: 13912 +[2024-12-06 12:32:06 root] (utils.py 283): INFO Epoch: [5] [ 80/2502] eta: 1:37:01 lr: 0.000017 loss_cls: 3.5263 (3.2174) grad_norm: 2.6901 (3.2722) time: 2.4145 data: 0.0003 max mem: 13912 +[2024-12-06 12:32:30 root] (utils.py 283): INFO Epoch: [5] [ 90/2502] eta: 1:36:37 lr: 0.000017 loss_cls: 3.3375 (3.2016) grad_norm: 2.6771 (3.2098) time: 2.4047 data: 0.0003 max mem: 13912 +[2024-12-06 12:32:54 root] (utils.py 283): INFO Epoch: [5] [ 100/2502] eta: 1:36:11 lr: 0.000017 loss_cls: 3.0169 (3.1860) grad_norm: 2.5993 (3.1562) time: 2.3998 data: 0.0003 max mem: 13912 +[2024-12-06 12:33:18 root] (utils.py 283): INFO Epoch: [5] [ 110/2502] eta: 1:35:47 lr: 0.000017 loss_cls: 3.2535 (3.1865) grad_norm: 2.5993 (3.1166) time: 2.4001 data: 0.0003 max mem: 13912 +[2024-12-06 12:33:42 root] (utils.py 283): INFO Epoch: [5] [ 120/2502] eta: 1:35:22 lr: 0.000017 loss_cls: 3.2229 (3.1803) grad_norm: 2.6011 (3.0862) time: 2.4003 data: 0.0003 max mem: 13912 +[2024-12-06 12:34:06 root] (utils.py 283): INFO Epoch: [5] [ 130/2502] eta: 1:34:57 lr: 0.000017 loss_cls: 3.1776 (3.1742) grad_norm: 2.7692 (3.1067) time: 2.3985 data: 0.0003 max mem: 13912 +[2024-12-06 12:34:30 root] (utils.py 283): INFO Epoch: [5] [ 140/2502] eta: 1:34:32 lr: 0.000017 loss_cls: 3.2169 (3.1764) grad_norm: 2.8118 (3.0766) time: 2.3949 data: 0.0003 max mem: 13912 +[2024-12-06 12:34:55 root] (utils.py 283): INFO Epoch: [5] [ 150/2502] eta: 1:34:12 lr: 0.000017 loss_cls: 3.3214 (3.1806) grad_norm: 2.8241 (3.2445) time: 2.4092 data: 0.0003 max mem: 13912 +[2024-12-06 12:35:18 root] (utils.py 283): INFO Epoch: [5] [ 160/2502] eta: 1:33:47 lr: 0.000017 loss_cls: 3.4994 (3.1931) grad_norm: 2.8277 (3.2207) time: 2.4127 data: 0.0003 max mem: 13912 +[2024-12-06 12:35:42 root] (utils.py 283): INFO Epoch: [5] [ 170/2502] eta: 1:33:17 lr: 0.000017 loss_cls: 3.4139 (3.1965) grad_norm: 2.7668 (3.2243) time: 2.3804 data: 0.0003 max mem: 13912 +[2024-12-06 12:36:06 root] (utils.py 283): INFO Epoch: [5] [ 180/2502] eta: 1:32:55 lr: 0.000017 loss_cls: 3.3460 (3.1944) grad_norm: 2.7831 (3.2291) time: 2.3875 data: 0.0003 max mem: 13912 +[2024-12-06 12:36:30 root] (utils.py 283): INFO Epoch: [5] [ 190/2502] eta: 1:32:33 lr: 0.000017 loss_cls: 3.3217 (3.1955) grad_norm: 2.8303 (3.2104) time: 2.4136 data: 0.0003 max mem: 13912 +[2024-12-06 12:36:54 root] (utils.py 283): INFO Epoch: [5] [ 200/2502] eta: 1:32:09 lr: 0.000017 loss_cls: 3.3217 (3.1895) grad_norm: 2.8123 (3.1924) time: 2.4087 data: 0.0003 max mem: 13912 +[2024-12-06 12:37:18 root] (utils.py 283): INFO Epoch: [5] [ 210/2502] eta: 1:31:44 lr: 0.000017 loss_cls: 3.3122 (3.1973) grad_norm: 2.8375 (3.1830) time: 2.4011 data: 0.0003 max mem: 13912 +[2024-12-06 12:37:42 root] (utils.py 283): INFO Epoch: [5] [ 220/2502] eta: 1:31:19 lr: 0.000017 loss_cls: 3.3051 (3.1982) grad_norm: 2.7796 (3.1833) time: 2.3938 data: 0.0003 max mem: 13912 +[2024-12-06 12:38:06 root] (utils.py 283): INFO Epoch: [5] [ 230/2502] eta: 1:30:55 lr: 0.000017 loss_cls: 3.2709 (3.1972) grad_norm: 2.7796 (3.1625) time: 2.3935 data: 0.0003 max mem: 13912 +[2024-12-06 12:38:30 root] (utils.py 283): INFO Epoch: [5] [ 240/2502] eta: 1:30:31 lr: 0.000017 loss_cls: 2.9872 (3.1832) grad_norm: 2.8005 (3.1546) time: 2.3994 data: 0.0003 max mem: 13912 +[2024-12-06 12:38:54 root] (utils.py 283): INFO Epoch: [5] [ 250/2502] eta: 1:30:07 lr: 0.000017 loss_cls: 2.9332 (3.1833) grad_norm: 3.0142 (3.2039) time: 2.4025 data: 0.0003 max mem: 13912 +[2024-12-06 12:39:19 root] (utils.py 283): INFO Epoch: [5] [ 260/2502] eta: 1:29:44 lr: 0.000017 loss_cls: 3.3596 (3.1930) grad_norm: 2.8244 (3.1859) time: 2.4111 data: 0.0003 max mem: 13912 +[2024-12-06 12:39:43 root] (utils.py 283): INFO Epoch: [5] [ 270/2502] eta: 1:29:20 lr: 0.000017 loss_cls: 3.0346 (3.1804) grad_norm: 2.7157 (3.1703) time: 2.4103 data: 0.0003 max mem: 13912 +[2024-12-06 12:40:07 root] (utils.py 283): INFO Epoch: [5] [ 280/2502] eta: 1:28:56 lr: 0.000017 loss_cls: 2.8571 (3.1743) grad_norm: 2.6778 (3.1499) time: 2.4024 data: 0.0003 max mem: 13912 +[2024-12-06 12:40:31 root] (utils.py 283): INFO Epoch: [5] [ 290/2502] eta: 1:28:32 lr: 0.000017 loss_cls: 3.0521 (3.1697) grad_norm: 2.7162 (3.1554) time: 2.3985 data: 0.0003 max mem: 13912 +[2024-12-06 12:40:55 root] (utils.py 283): INFO Epoch: [5] [ 300/2502] eta: 1:28:08 lr: 0.000017 loss_cls: 3.1172 (3.1671) grad_norm: 2.9190 (3.1693) time: 2.3976 data: 0.0003 max mem: 13912 +[2024-12-06 12:41:18 root] (utils.py 283): INFO Epoch: [5] [ 310/2502] eta: 1:27:43 lr: 0.000017 loss_cls: 3.3608 (3.1717) grad_norm: 2.7631 (3.1589) time: 2.3981 data: 0.0003 max mem: 13912 +[2024-12-06 12:41:42 root] (utils.py 283): INFO Epoch: [5] [ 320/2502] eta: 1:27:19 lr: 0.000017 loss_cls: 3.3891 (3.1716) grad_norm: 2.8280 (3.2059) time: 2.3966 data: 0.0003 max mem: 13912 +[2024-12-06 12:42:06 root] (utils.py 283): INFO Epoch: [5] [ 330/2502] eta: 1:26:54 lr: 0.000017 loss_cls: 3.1877 (3.1686) grad_norm: 2.8100 (3.2200) time: 2.3918 data: 0.0003 max mem: 13912 +[2024-12-06 12:42:30 root] (utils.py 283): INFO Epoch: [5] [ 340/2502] eta: 1:26:29 lr: 0.000017 loss_cls: 3.2448 (3.1744) grad_norm: 2.7227 (3.2227) time: 2.3866 data: 0.0003 max mem: 13912 +[2024-12-06 12:42:54 root] (utils.py 283): INFO Epoch: [5] [ 350/2502] eta: 1:26:05 lr: 0.000017 loss_cls: 3.2448 (3.1670) grad_norm: 2.6231 (3.2101) time: 2.3901 data: 0.0003 max mem: 13912 +[2024-12-06 12:43:18 root] (utils.py 283): INFO Epoch: [5] [ 360/2502] eta: 1:25:40 lr: 0.000017 loss_cls: 3.1022 (3.1705) grad_norm: 2.6532 (3.2179) time: 2.3918 data: 0.0003 max mem: 13912 +[2024-12-06 12:43:42 root] (utils.py 283): INFO Epoch: [5] [ 370/2502] eta: 1:25:16 lr: 0.000017 loss_cls: 3.4122 (3.1719) grad_norm: 2.6532 (3.2050) time: 2.3911 data: 0.0003 max mem: 13912 +[2024-12-06 12:44:06 root] (utils.py 283): INFO Epoch: [5] [ 380/2502] eta: 1:24:51 lr: 0.000017 loss_cls: 3.3457 (3.1698) grad_norm: 2.6287 (3.1965) time: 2.3874 data: 0.0003 max mem: 13912 +[2024-12-06 12:44:30 root] (utils.py 283): INFO Epoch: [5] [ 390/2502] eta: 1:24:26 lr: 0.000017 loss_cls: 3.3457 (3.1707) grad_norm: 2.6806 (3.1848) time: 2.3876 data: 0.0003 max mem: 13912 +[2024-12-06 12:44:54 root] (utils.py 283): INFO Epoch: [5] [ 400/2502] eta: 1:24:02 lr: 0.000017 loss_cls: 3.5137 (3.1768) grad_norm: 2.6806 (3.1790) time: 2.3925 data: 0.0003 max mem: 13912 +[2024-12-06 12:45:18 root] (utils.py 283): INFO Epoch: [5] [ 410/2502] eta: 1:23:38 lr: 0.000017 loss_cls: 3.4463 (3.1825) grad_norm: 2.7750 (3.1758) time: 2.3933 data: 0.0003 max mem: 13912 +[2024-12-06 12:45:41 root] (utils.py 283): INFO Epoch: [5] [ 420/2502] eta: 1:23:13 lr: 0.000017 loss_cls: 3.1671 (3.1805) grad_norm: 2.8687 (3.1802) time: 2.3910 data: 0.0003 max mem: 13912 +[2024-12-06 12:46:05 root] (utils.py 283): INFO Epoch: [5] [ 430/2502] eta: 1:22:49 lr: 0.000017 loss_cls: 3.1481 (3.1804) grad_norm: 2.7729 (3.1732) time: 2.3872 data: 0.0003 max mem: 13912 +[2024-12-06 12:46:29 root] (utils.py 283): INFO Epoch: [5] [ 440/2502] eta: 1:22:24 lr: 0.000017 loss_cls: 3.2716 (3.1790) grad_norm: 2.6093 (3.1672) time: 2.3798 data: 0.0003 max mem: 13912 +[2024-12-06 12:46:53 root] (utils.py 283): INFO Epoch: [5] [ 450/2502] eta: 1:21:58 lr: 0.000017 loss_cls: 3.3319 (3.1819) grad_norm: 2.6408 (3.1647) time: 2.3695 data: 0.0003 max mem: 13912 +[2024-12-06 12:47:17 root] (utils.py 283): INFO Epoch: [5] [ 460/2502] eta: 1:21:34 lr: 0.000017 loss_cls: 3.4204 (3.1832) grad_norm: 2.5982 (3.1523) time: 2.3794 data: 0.0003 max mem: 13912 +[2024-12-06 12:47:41 root] (utils.py 283): INFO Epoch: [5] [ 470/2502] eta: 1:21:10 lr: 0.000017 loss_cls: 3.2681 (3.1804) grad_norm: 2.6579 (3.1454) time: 2.3929 data: 0.0003 max mem: 13912 +[2024-12-06 12:48:05 root] (utils.py 283): INFO Epoch: [5] [ 480/2502] eta: 1:20:46 lr: 0.000017 loss_cls: 3.1488 (3.1759) grad_norm: 2.7780 (3.1416) time: 2.3934 data: 0.0003 max mem: 13912 +[2024-12-06 12:48:28 root] (utils.py 283): INFO Epoch: [5] [ 490/2502] eta: 1:20:21 lr: 0.000017 loss_cls: 2.9931 (3.1721) grad_norm: 2.7375 (3.1363) time: 2.3889 data: 0.0003 max mem: 13912 +[2024-12-06 12:48:52 root] (utils.py 283): INFO Epoch: [5] [ 500/2502] eta: 1:19:57 lr: 0.000017 loss_cls: 3.3205 (3.1751) grad_norm: 2.6643 (3.1291) time: 2.3879 data: 0.0003 max mem: 13912 +[2024-12-06 12:49:16 root] (utils.py 283): INFO Epoch: [5] [ 510/2502] eta: 1:19:33 lr: 0.000017 loss_cls: 3.5006 (3.1806) grad_norm: 2.7929 (3.1530) time: 2.3908 data: 0.0003 max mem: 13912 +[2024-12-06 12:49:40 root] (utils.py 283): INFO Epoch: [5] [ 520/2502] eta: 1:19:08 lr: 0.000017 loss_cls: 3.3779 (3.1807) grad_norm: 2.7810 (3.1504) time: 2.3853 data: 0.0003 max mem: 13912 +[2024-12-06 12:50:04 root] (utils.py 283): INFO Epoch: [5] [ 530/2502] eta: 1:18:44 lr: 0.000017 loss_cls: 3.2555 (3.1840) grad_norm: 2.6112 (3.1439) time: 2.3865 data: 0.0003 max mem: 13912 +[2024-12-06 12:50:28 root] (utils.py 283): INFO Epoch: [5] [ 540/2502] eta: 1:18:20 lr: 0.000017 loss_cls: 3.4004 (3.1866) grad_norm: 2.6112 (3.1390) time: 2.3932 data: 0.0003 max mem: 13912 +[2024-12-06 12:50:52 root] (utils.py 283): INFO Epoch: [5] [ 550/2502] eta: 1:17:56 lr: 0.000017 loss_cls: 3.2995 (3.1848) grad_norm: 2.5950 (3.1345) time: 2.3951 data: 0.0003 max mem: 13912 +[2024-12-06 12:51:16 root] (utils.py 283): INFO Epoch: [5] [ 560/2502] eta: 1:17:32 lr: 0.000017 loss_cls: 3.1224 (3.1836) grad_norm: 2.5650 (3.1361) time: 2.3905 data: 0.0003 max mem: 13912 +[2024-12-06 12:51:40 root] (utils.py 283): INFO Epoch: [5] [ 570/2502] eta: 1:17:08 lr: 0.000017 loss_cls: 3.1236 (3.1783) grad_norm: 2.8389 (3.1454) time: 2.3881 data: 0.0003 max mem: 13912 +[2024-12-06 12:52:03 root] (utils.py 283): INFO Epoch: [5] [ 580/2502] eta: 1:16:43 lr: 0.000017 loss_cls: 3.2823 (3.1801) grad_norm: 2.8389 (3.1378) time: 2.3865 data: 0.0003 max mem: 13912 +[2024-12-06 12:52:27 root] (utils.py 283): INFO Epoch: [5] [ 590/2502] eta: 1:16:19 lr: 0.000017 loss_cls: 3.3895 (3.1812) grad_norm: 2.7551 (3.1361) time: 2.3872 data: 0.0003 max mem: 13912 +[2024-12-06 12:52:51 root] (utils.py 283): INFO Epoch: [5] [ 600/2502] eta: 1:15:56 lr: 0.000017 loss_cls: 3.2856 (3.1806) grad_norm: 2.8285 (3.1385) time: 2.3973 data: 0.0003 max mem: 13912 +[2024-12-06 12:53:15 root] (utils.py 283): INFO Epoch: [5] [ 610/2502] eta: 1:15:32 lr: 0.000017 loss_cls: 3.1786 (3.1784) grad_norm: 2.8684 (3.1360) time: 2.4014 data: 0.0003 max mem: 13912 +[2024-12-06 12:53:39 root] (utils.py 283): INFO Epoch: [5] [ 620/2502] eta: 1:15:08 lr: 0.000017 loss_cls: 3.1786 (3.1776) grad_norm: 2.8250 (3.1301) time: 2.4002 data: 0.0003 max mem: 13912 +[2024-12-06 12:54:03 root] (utils.py 283): INFO Epoch: [5] [ 630/2502] eta: 1:14:44 lr: 0.000017 loss_cls: 3.2304 (3.1761) grad_norm: 2.8322 (3.1294) time: 2.3894 data: 0.0003 max mem: 13912 +[2024-12-06 12:54:27 root] (utils.py 283): INFO Epoch: [5] [ 640/2502] eta: 1:14:20 lr: 0.000017 loss_cls: 3.4037 (3.1805) grad_norm: 2.8325 (3.1299) time: 2.3872 data: 0.0003 max mem: 13912 +[2024-12-06 12:54:51 root] (utils.py 283): INFO Epoch: [5] [ 650/2502] eta: 1:13:56 lr: 0.000017 loss_cls: 3.3906 (3.1792) grad_norm: 2.7852 (3.1285) time: 2.3940 data: 0.0003 max mem: 13912 +[2024-12-06 12:55:15 root] (utils.py 283): INFO Epoch: [5] [ 660/2502] eta: 1:13:32 lr: 0.000017 loss_cls: 3.1176 (3.1747) grad_norm: 2.7831 (3.1250) time: 2.3924 data: 0.0003 max mem: 13912 +[2024-12-06 12:55:39 root] (utils.py 283): INFO Epoch: [5] [ 670/2502] eta: 1:13:08 lr: 0.000017 loss_cls: 3.2111 (3.1774) grad_norm: 2.9328 (3.1529) time: 2.3926 data: 0.0003 max mem: 13912 +[2024-12-06 12:56:03 root] (utils.py 283): INFO Epoch: [5] [ 680/2502] eta: 1:12:44 lr: 0.000017 loss_cls: 3.2111 (3.1753) grad_norm: 3.1873 (3.1536) time: 2.3936 data: 0.0003 max mem: 13912 +[2024-12-06 12:56:27 root] (utils.py 283): INFO Epoch: [5] [ 690/2502] eta: 1:12:20 lr: 0.000017 loss_cls: 3.1795 (3.1754) grad_norm: 2.7567 (3.1466) time: 2.3957 data: 0.0003 max mem: 13912 +[2024-12-06 12:56:51 root] (utils.py 283): INFO Epoch: [5] [ 700/2502] eta: 1:11:56 lr: 0.000017 loss_cls: 3.3093 (3.1778) grad_norm: 2.6894 (3.1437) time: 2.3944 data: 0.0003 max mem: 13912 +[2024-12-06 12:57:15 root] (utils.py 283): INFO Epoch: [5] [ 710/2502] eta: 1:11:32 lr: 0.000017 loss_cls: 3.4017 (3.1809) grad_norm: 2.7269 (3.1368) time: 2.3922 data: 0.0003 max mem: 13912 +[2024-12-06 12:57:39 root] (utils.py 283): INFO Epoch: [5] [ 720/2502] eta: 1:11:08 lr: 0.000017 loss_cls: 3.3939 (3.1818) grad_norm: 2.7191 (3.1499) time: 2.4006 data: 0.0003 max mem: 13912 +[2024-12-06 12:58:03 root] (utils.py 283): INFO Epoch: [5] [ 730/2502] eta: 1:10:44 lr: 0.000017 loss_cls: 3.1267 (3.1788) grad_norm: 2.8793 (3.1462) time: 2.3987 data: 0.0003 max mem: 13912 +[2024-12-06 12:58:26 root] (utils.py 283): INFO Epoch: [5] [ 740/2502] eta: 1:10:20 lr: 0.000017 loss_cls: 3.0493 (3.1758) grad_norm: 2.8037 (3.1410) time: 2.3863 data: 0.0003 max mem: 13912 +[2024-12-06 12:58:50 root] (utils.py 283): INFO Epoch: [5] [ 750/2502] eta: 1:09:55 lr: 0.000017 loss_cls: 3.0608 (3.1747) grad_norm: 2.7913 (3.1387) time: 2.3806 data: 0.0003 max mem: 13912 +[2024-12-06 12:59:14 root] (utils.py 283): INFO Epoch: [5] [ 760/2502] eta: 1:09:31 lr: 0.000017 loss_cls: 3.0477 (3.1727) grad_norm: 2.7723 (3.1340) time: 2.3821 data: 0.0003 max mem: 13912 +[2024-12-06 12:59:38 root] (utils.py 283): INFO Epoch: [5] [ 770/2502] eta: 1:09:07 lr: 0.000017 loss_cls: 2.9782 (3.1710) grad_norm: 2.7082 (3.1296) time: 2.3765 data: 0.0003 max mem: 13912 +[2024-12-06 13:00:02 root] (utils.py 283): INFO Epoch: [5] [ 780/2502] eta: 1:08:42 lr: 0.000017 loss_cls: 3.2405 (3.1723) grad_norm: 2.7746 (3.1333) time: 2.3753 data: 0.0003 max mem: 13912 +[2024-12-06 13:00:25 root] (utils.py 283): INFO Epoch: [5] [ 790/2502] eta: 1:08:18 lr: 0.000017 loss_cls: 3.3836 (3.1708) grad_norm: 2.6998 (3.1268) time: 2.3837 data: 0.0003 max mem: 13912 +[2024-12-06 13:00:49 root] (utils.py 283): INFO Epoch: [5] [ 800/2502] eta: 1:07:54 lr: 0.000017 loss_cls: 2.8681 (3.1651) grad_norm: 2.4944 (3.1200) time: 2.3837 data: 0.0003 max mem: 13912 +[2024-12-06 13:01:13 root] (utils.py 283): INFO Epoch: [5] [ 810/2502] eta: 1:07:30 lr: 0.000017 loss_cls: 2.7779 (3.1632) grad_norm: 2.5328 (3.1169) time: 2.3890 data: 0.0003 max mem: 13912 +[2024-12-06 13:01:37 root] (utils.py 283): INFO Epoch: [5] [ 820/2502] eta: 1:07:06 lr: 0.000017 loss_cls: 3.1749 (3.1638) grad_norm: 2.6535 (3.1122) time: 2.3944 data: 0.0003 max mem: 13912 +[2024-12-06 13:02:01 root] (utils.py 283): INFO Epoch: [5] [ 830/2502] eta: 1:06:42 lr: 0.000017 loss_cls: 3.2701 (3.1624) grad_norm: 2.6991 (3.1096) time: 2.3934 data: 0.0003 max mem: 13912 +[2024-12-06 13:02:25 root] (utils.py 283): INFO Epoch: [5] [ 840/2502] eta: 1:06:18 lr: 0.000017 loss_cls: 3.2497 (3.1628) grad_norm: 2.6804 (3.1058) time: 2.3929 data: 0.0003 max mem: 13912 +[2024-12-06 13:02:49 root] (utils.py 283): INFO Epoch: [5] [ 850/2502] eta: 1:05:54 lr: 0.000017 loss_cls: 3.2497 (3.1632) grad_norm: 2.6277 (3.1046) time: 2.3905 data: 0.0003 max mem: 13912 +[2024-12-06 13:03:13 root] (utils.py 283): INFO Epoch: [5] [ 860/2502] eta: 1:05:30 lr: 0.000017 loss_cls: 3.4806 (3.1657) grad_norm: 2.7385 (3.1068) time: 2.3840 data: 0.0003 max mem: 13912 +[2024-12-06 13:03:37 root] (utils.py 283): INFO Epoch: [5] [ 870/2502] eta: 1:05:06 lr: 0.000017 loss_cls: 3.3456 (3.1681) grad_norm: 2.7252 (3.1014) time: 2.3858 data: 0.0003 max mem: 13912 +[2024-12-06 13:04:00 root] (utils.py 283): INFO Epoch: [5] [ 880/2502] eta: 1:04:41 lr: 0.000017 loss_cls: 3.1883 (3.1660) grad_norm: 2.5589 (3.1010) time: 2.3758 data: 0.0003 max mem: 13912 +[2024-12-06 13:04:24 root] (utils.py 283): INFO Epoch: [5] [ 890/2502] eta: 1:04:17 lr: 0.000017 loss_cls: 2.8059 (3.1606) grad_norm: 2.6219 (3.0994) time: 2.3722 data: 0.0003 max mem: 13912 +[2024-12-06 13:04:48 root] (utils.py 283): INFO Epoch: [5] [ 900/2502] eta: 1:03:53 lr: 0.000017 loss_cls: 2.5461 (3.1543) grad_norm: 2.6375 (3.0963) time: 2.3876 data: 0.0003 max mem: 13912 +[2024-12-06 13:05:12 root] (utils.py 283): INFO Epoch: [5] [ 910/2502] eta: 1:03:29 lr: 0.000017 loss_cls: 2.9916 (3.1541) grad_norm: 2.6537 (3.0926) time: 2.3900 data: 0.0003 max mem: 13912 +[2024-12-06 13:05:36 root] (utils.py 283): INFO Epoch: [5] [ 920/2502] eta: 1:03:05 lr: 0.000017 loss_cls: 3.3049 (3.1562) grad_norm: 2.6673 (3.0943) time: 2.3892 data: 0.0003 max mem: 13912 +[2024-12-06 13:06:00 root] (utils.py 283): INFO Epoch: [5] [ 930/2502] eta: 1:02:41 lr: 0.000017 loss_cls: 3.4368 (3.1592) grad_norm: 2.7790 (3.0961) time: 2.3914 data: 0.0003 max mem: 13912 +[2024-12-06 13:06:24 root] (utils.py 283): INFO Epoch: [5] [ 940/2502] eta: 1:02:17 lr: 0.000017 loss_cls: 3.4497 (3.1604) grad_norm: 2.7274 (3.0928) time: 2.3916 data: 0.0003 max mem: 13912 +[2024-12-06 13:06:48 root] (utils.py 283): INFO Epoch: [5] [ 950/2502] eta: 1:01:54 lr: 0.000017 loss_cls: 3.2033 (3.1597) grad_norm: 2.6148 (3.0895) time: 2.3916 data: 0.0003 max mem: 13912 +[2024-12-06 13:07:12 root] (utils.py 283): INFO Epoch: [5] [ 960/2502] eta: 1:01:30 lr: 0.000017 loss_cls: 3.1514 (3.1615) grad_norm: 2.8610 (3.1040) time: 2.3948 data: 0.0003 max mem: 13912 +[2024-12-06 13:07:35 root] (utils.py 283): INFO Epoch: [5] [ 970/2502] eta: 1:01:06 lr: 0.000017 loss_cls: 3.1698 (3.1611) grad_norm: 2.6698 (3.1000) time: 2.3909 data: 0.0003 max mem: 13912 +[2024-12-06 13:07:59 root] (utils.py 283): INFO Epoch: [5] [ 980/2502] eta: 1:00:42 lr: 0.000017 loss_cls: 3.1698 (3.1610) grad_norm: 2.6296 (3.0976) time: 2.3839 data: 0.0003 max mem: 13912 +[2024-12-06 13:08:23 root] (utils.py 283): INFO Epoch: [5] [ 990/2502] eta: 1:00:17 lr: 0.000017 loss_cls: 3.1348 (3.1605) grad_norm: 2.5392 (3.0923) time: 2.3739 data: 0.0003 max mem: 13912 +[2024-12-06 13:08:47 root] (utils.py 283): INFO Epoch: [5] [1000/2502] eta: 0:59:53 lr: 0.000017 loss_cls: 3.1101 (3.1617) grad_norm: 2.6126 (3.0880) time: 2.3739 data: 0.0003 max mem: 13912 +[2024-12-06 13:09:10 root] (utils.py 283): INFO Epoch: [5] [1010/2502] eta: 0:59:29 lr: 0.000017 loss_cls: 3.0090 (3.1574) grad_norm: 2.5516 (3.0827) time: 2.3794 data: 0.0003 max mem: 13912 +[2024-12-06 13:09:34 root] (utils.py 283): INFO Epoch: [5] [1020/2502] eta: 0:59:05 lr: 0.000017 loss_cls: 2.9502 (3.1551) grad_norm: 2.5127 (3.0774) time: 2.3805 data: 0.0003 max mem: 13912 +[2024-12-06 13:09:58 root] (utils.py 283): INFO Epoch: [5] [1030/2502] eta: 0:58:41 lr: 0.000017 loss_cls: 3.0601 (3.1550) grad_norm: 2.5572 (3.0725) time: 2.3862 data: 0.0003 max mem: 13912 +[2024-12-06 13:10:22 root] (utils.py 283): INFO Epoch: [5] [1040/2502] eta: 0:58:17 lr: 0.000017 loss_cls: 3.1965 (3.1538) grad_norm: 2.6188 (3.0710) time: 2.3819 data: 0.0003 max mem: 13912 +[2024-12-06 13:10:46 root] (utils.py 283): INFO Epoch: [5] [1050/2502] eta: 0:57:53 lr: 0.000017 loss_cls: 3.1965 (3.1533) grad_norm: 2.6650 (3.0691) time: 2.3820 data: 0.0003 max mem: 13912 +[2024-12-06 13:11:10 root] (utils.py 283): INFO Epoch: [5] [1060/2502] eta: 0:57:29 lr: 0.000017 loss_cls: 3.0677 (3.1512) grad_norm: 2.6351 (3.0702) time: 2.3899 data: 0.0003 max mem: 13912 +[2024-12-06 13:11:34 root] (utils.py 283): INFO Epoch: [5] [1070/2502] eta: 0:57:05 lr: 0.000017 loss_cls: 3.0468 (3.1504) grad_norm: 2.6703 (3.0661) time: 2.3931 data: 0.0003 max mem: 13912 +[2024-12-06 13:11:57 root] (utils.py 283): INFO Epoch: [5] [1080/2502] eta: 0:56:40 lr: 0.000017 loss_cls: 3.2632 (3.1509) grad_norm: 2.6184 (3.0637) time: 2.3570 data: 0.0003 max mem: 13912 +[2024-12-06 13:12:21 root] (utils.py 283): INFO Epoch: [5] [1090/2502] eta: 0:56:16 lr: 0.000017 loss_cls: 2.9780 (3.1485) grad_norm: 2.6269 (3.0606) time: 2.3521 data: 0.0003 max mem: 13912 +[2024-12-06 13:12:56 root] (utils.py 283): INFO Epoch: [5] [1100/2502] eta: 0:56:06 lr: 0.000017 loss_cls: 3.0238 (3.1482) grad_norm: 2.6276 (3.0581) time: 2.9359 data: 0.0003 max mem: 13912 +[2024-12-06 13:13:20 root] (utils.py 283): INFO Epoch: [5] [1110/2502] eta: 0:55:42 lr: 0.000017 loss_cls: 3.3038 (3.1477) grad_norm: 2.7394 (3.0569) time: 2.9471 data: 0.0003 max mem: 13912 +[2024-12-06 13:13:44 root] (utils.py 283): INFO Epoch: [5] [1120/2502] eta: 0:55:18 lr: 0.000017 loss_cls: 3.3467 (3.1498) grad_norm: 2.6888 (3.0519) time: 2.3980 data: 0.0003 max mem: 13912 +[2024-12-06 13:14:07 root] (utils.py 283): INFO Epoch: [5] [1130/2502] eta: 0:54:54 lr: 0.000017 loss_cls: 3.2756 (3.1463) grad_norm: 2.4911 (3.0482) time: 2.3904 data: 0.0003 max mem: 13912 +[2024-12-06 13:14:31 root] (utils.py 283): INFO Epoch: [5] [1140/2502] eta: 0:54:30 lr: 0.000017 loss_cls: 2.7203 (3.1432) grad_norm: 2.5353 (3.0440) time: 2.3921 data: 0.0003 max mem: 13912 +[2024-12-06 13:14:55 root] (utils.py 283): INFO Epoch: [5] [1150/2502] eta: 0:54:06 lr: 0.000017 loss_cls: 2.7586 (3.1414) grad_norm: 2.5353 (3.0456) time: 2.3867 data: 0.0003 max mem: 13912 +[2024-12-06 13:15:19 root] (utils.py 283): INFO Epoch: [5] [1160/2502] eta: 0:53:41 lr: 0.000017 loss_cls: 3.2166 (3.1426) grad_norm: 2.7422 (3.0444) time: 2.3862 data: 0.0003 max mem: 13912 +[2024-12-06 13:15:43 root] (utils.py 283): INFO Epoch: [5] [1170/2502] eta: 0:53:17 lr: 0.000017 loss_cls: 3.3137 (3.1432) grad_norm: 2.6806 (3.0413) time: 2.3901 data: 0.0003 max mem: 13912 +[2024-12-06 13:16:07 root] (utils.py 283): INFO Epoch: [5] [1180/2502] eta: 0:52:53 lr: 0.000017 loss_cls: 3.2425 (3.1419) grad_norm: 2.9294 (3.0440) time: 2.3799 data: 0.0003 max mem: 13912 +[2024-12-06 13:16:30 root] (utils.py 283): INFO Epoch: [5] [1190/2502] eta: 0:52:29 lr: 0.000017 loss_cls: 3.0784 (3.1420) grad_norm: 3.2214 (3.0547) time: 2.3688 data: 0.0003 max mem: 13912 +[2024-12-06 13:16:54 root] (utils.py 283): INFO Epoch: [5] [1200/2502] eta: 0:52:04 lr: 0.000017 loss_cls: 3.4423 (3.1431) grad_norm: 2.6861 (3.0554) time: 2.3770 data: 0.0003 max mem: 13912 +[2024-12-06 13:17:18 root] (utils.py 283): INFO Epoch: [5] [1210/2502] eta: 0:51:40 lr: 0.000017 loss_cls: 3.1051 (3.1427) grad_norm: 2.6861 (3.0572) time: 2.3879 data: 0.0003 max mem: 13912 +[2024-12-06 13:17:42 root] (utils.py 283): INFO Epoch: [5] [1220/2502] eta: 0:51:16 lr: 0.000017 loss_cls: 3.0694 (3.1419) grad_norm: 2.7170 (3.0567) time: 2.3904 data: 0.0003 max mem: 13912 +[2024-12-06 13:18:06 root] (utils.py 283): INFO Epoch: [5] [1230/2502] eta: 0:50:52 lr: 0.000017 loss_cls: 3.1699 (3.1418) grad_norm: 2.7170 (3.0616) time: 2.3920 data: 0.0003 max mem: 13912 +[2024-12-06 13:18:30 root] (utils.py 283): INFO Epoch: [5] [1240/2502] eta: 0:50:28 lr: 0.000017 loss_cls: 3.2093 (3.1435) grad_norm: 2.6899 (3.0605) time: 2.3920 data: 0.0003 max mem: 13912 +[2024-12-06 13:18:54 root] (utils.py 283): INFO Epoch: [5] [1250/2502] eta: 0:50:04 lr: 0.000017 loss_cls: 3.2918 (3.1439) grad_norm: 2.6827 (3.0610) time: 2.3940 data: 0.0003 max mem: 13912 +[2024-12-06 13:19:18 root] (utils.py 283): INFO Epoch: [5] [1260/2502] eta: 0:49:40 lr: 0.000017 loss_cls: 3.1647 (3.1446) grad_norm: 2.9005 (3.0610) time: 2.3960 data: 0.0003 max mem: 13912 +[2024-12-06 13:19:42 root] (utils.py 283): INFO Epoch: [5] [1270/2502] eta: 0:49:16 lr: 0.000017 loss_cls: 3.3718 (3.1468) grad_norm: 2.8196 (3.0599) time: 2.3926 data: 0.0003 max mem: 13912 +[2024-12-06 13:20:06 root] (utils.py 283): INFO Epoch: [5] [1280/2502] eta: 0:48:52 lr: 0.000017 loss_cls: 3.3899 (3.1460) grad_norm: 2.5819 (3.0562) time: 2.3891 data: 0.0003 max mem: 13912 +[2024-12-06 13:20:30 root] (utils.py 283): INFO Epoch: [5] [1290/2502] eta: 0:48:28 lr: 0.000017 loss_cls: 3.1965 (3.1462) grad_norm: 2.5944 (3.0539) time: 2.3888 data: 0.0003 max mem: 13912 +[2024-12-06 13:20:53 root] (utils.py 283): INFO Epoch: [5] [1300/2502] eta: 0:48:04 lr: 0.000017 loss_cls: 3.0068 (3.1443) grad_norm: 2.7604 (3.0646) time: 2.3871 data: 0.0003 max mem: 13912 +[2024-12-06 13:21:17 root] (utils.py 283): INFO Epoch: [5] [1310/2502] eta: 0:47:39 lr: 0.000017 loss_cls: 2.9966 (3.1430) grad_norm: 2.9663 (3.0640) time: 2.3834 data: 0.0003 max mem: 13912 +[2024-12-06 13:21:41 root] (utils.py 283): INFO Epoch: [5] [1320/2502] eta: 0:47:15 lr: 0.000017 loss_cls: 3.1104 (3.1426) grad_norm: 2.6779 (3.0610) time: 2.3795 data: 0.0003 max mem: 13912 +[2024-12-06 13:22:05 root] (utils.py 283): INFO Epoch: [5] [1330/2502] eta: 0:46:51 lr: 0.000017 loss_cls: 3.2717 (3.1439) grad_norm: 2.6325 (3.0681) time: 2.3750 data: 0.0003 max mem: 13912 +[2024-12-06 13:22:29 root] (utils.py 283): INFO Epoch: [5] [1340/2502] eta: 0:46:27 lr: 0.000017 loss_cls: 3.4144 (3.1438) grad_norm: 2.7410 (3.0682) time: 2.3823 data: 0.0003 max mem: 13912 +[2024-12-06 13:22:53 root] (utils.py 283): INFO Epoch: [5] [1350/2502] eta: 0:46:03 lr: 0.000017 loss_cls: 3.1806 (3.1444) grad_norm: 2.7193 (3.0665) time: 2.3926 data: 0.0003 max mem: 13912 +[2024-12-06 13:23:16 root] (utils.py 283): INFO Epoch: [5] [1360/2502] eta: 0:45:39 lr: 0.000017 loss_cls: 3.1806 (3.1446) grad_norm: 2.7193 (3.0680) time: 2.3898 data: 0.0003 max mem: 13912 +[2024-12-06 13:23:40 root] (utils.py 283): INFO Epoch: [5] [1370/2502] eta: 0:45:15 lr: 0.000017 loss_cls: 3.0937 (3.1429) grad_norm: 2.6489 (3.0641) time: 2.3866 data: 0.0003 max mem: 13912 +[2024-12-06 13:24:04 root] (utils.py 283): INFO Epoch: [5] [1380/2502] eta: 0:44:51 lr: 0.000017 loss_cls: 3.2468 (3.1439) grad_norm: 2.6658 (3.0637) time: 2.3896 data: 0.0003 max mem: 13912 +[2024-12-06 13:24:28 root] (utils.py 283): INFO Epoch: [5] [1390/2502] eta: 0:44:27 lr: 0.000017 loss_cls: 3.3808 (3.1441) grad_norm: 2.7445 (3.0602) time: 2.3914 data: 0.0003 max mem: 13912 +[2024-12-06 13:24:52 root] (utils.py 283): INFO Epoch: [5] [1400/2502] eta: 0:44:03 lr: 0.000017 loss_cls: 3.2361 (3.1444) grad_norm: 2.4888 (3.0582) time: 2.3906 data: 0.0003 max mem: 13912 +[2024-12-06 13:25:16 root] (utils.py 283): INFO Epoch: [5] [1410/2502] eta: 0:43:38 lr: 0.000017 loss_cls: 3.1638 (3.1432) grad_norm: 2.5504 (3.0590) time: 2.3879 data: 0.0003 max mem: 13912 +[2024-12-06 13:25:40 root] (utils.py 283): INFO Epoch: [5] [1420/2502] eta: 0:43:14 lr: 0.000017 loss_cls: 3.1741 (3.1426) grad_norm: 2.5504 (3.0579) time: 2.3864 data: 0.0003 max mem: 13912 +[2024-12-06 13:26:04 root] (utils.py 283): INFO Epoch: [5] [1430/2502] eta: 0:42:50 lr: 0.000017 loss_cls: 3.1741 (3.1427) grad_norm: 2.6392 (3.0559) time: 2.3872 data: 0.0003 max mem: 13912 +[2024-12-06 13:26:27 root] (utils.py 283): INFO Epoch: [5] [1440/2502] eta: 0:42:26 lr: 0.000017 loss_cls: 3.1975 (3.1417) grad_norm: 2.7589 (3.0549) time: 2.3647 data: 0.0003 max mem: 13912 +[2024-12-06 13:26:51 root] (utils.py 283): INFO Epoch: [5] [1450/2502] eta: 0:42:02 lr: 0.000017 loss_cls: 3.2146 (3.1414) grad_norm: 2.8634 (3.0552) time: 2.3609 data: 0.0003 max mem: 13912 +[2024-12-06 13:27:15 root] (utils.py 283): INFO Epoch: [5] [1460/2502] eta: 0:41:38 lr: 0.000017 loss_cls: 3.2263 (3.1411) grad_norm: 2.8618 (3.0550) time: 2.3884 data: 0.0003 max mem: 13912 +[2024-12-06 13:27:39 root] (utils.py 283): INFO Epoch: [5] [1470/2502] eta: 0:41:14 lr: 0.000017 loss_cls: 3.2447 (3.1402) grad_norm: 2.6223 (3.0522) time: 2.3986 data: 0.0003 max mem: 13912 +[2024-12-06 13:28:03 root] (utils.py 283): INFO Epoch: [5] [1480/2502] eta: 0:40:50 lr: 0.000017 loss_cls: 3.2768 (3.1416) grad_norm: 2.5660 (3.0495) time: 2.4100 data: 0.0003 max mem: 13912 +[2024-12-06 13:28:27 root] (utils.py 283): INFO Epoch: [5] [1490/2502] eta: 0:40:26 lr: 0.000017 loss_cls: 3.3170 (3.1430) grad_norm: 2.5711 (3.0465) time: 2.4155 data: 0.0003 max mem: 13912 +[2024-12-06 13:28:51 root] (utils.py 283): INFO Epoch: [5] [1500/2502] eta: 0:40:02 lr: 0.000017 loss_cls: 3.4474 (3.1444) grad_norm: 2.6257 (3.0468) time: 2.4062 data: 0.0003 max mem: 13912 +[2024-12-06 13:29:15 root] (utils.py 283): INFO Epoch: [5] [1510/2502] eta: 0:39:38 lr: 0.000017 loss_cls: 3.2546 (3.1435) grad_norm: 2.6760 (3.0512) time: 2.4016 data: 0.0003 max mem: 13912 +[2024-12-06 13:29:39 root] (utils.py 283): INFO Epoch: [5] [1520/2502] eta: 0:39:14 lr: 0.000017 loss_cls: 3.1673 (3.1447) grad_norm: 2.8088 (3.0509) time: 2.4006 data: 0.0003 max mem: 13912 +[2024-12-06 13:30:03 root] (utils.py 283): INFO Epoch: [5] [1530/2502] eta: 0:38:50 lr: 0.000017 loss_cls: 3.2463 (3.1437) grad_norm: 2.7107 (3.0486) time: 2.3998 data: 0.0003 max mem: 13912 +[2024-12-06 13:30:27 root] (utils.py 283): INFO Epoch: [5] [1540/2502] eta: 0:38:26 lr: 0.000017 loss_cls: 3.1662 (3.1449) grad_norm: 2.6717 (3.0467) time: 2.3964 data: 0.0003 max mem: 13912 +[2024-12-06 13:30:51 root] (utils.py 283): INFO Epoch: [5] [1550/2502] eta: 0:38:02 lr: 0.000017 loss_cls: 3.0947 (3.1438) grad_norm: 2.6717 (3.0458) time: 2.3980 data: 0.0003 max mem: 13912 +[2024-12-06 13:31:15 root] (utils.py 283): INFO Epoch: [5] [1560/2502] eta: 0:37:38 lr: 0.000017 loss_cls: 3.1785 (3.1459) grad_norm: 2.7439 (3.0477) time: 2.3967 data: 0.0003 max mem: 13912 +[2024-12-06 13:31:39 root] (utils.py 283): INFO Epoch: [5] [1570/2502] eta: 0:37:14 lr: 0.000017 loss_cls: 3.4000 (3.1471) grad_norm: 2.7735 (3.0473) time: 2.3926 data: 0.0003 max mem: 13912 +[2024-12-06 13:32:03 root] (utils.py 283): INFO Epoch: [5] [1580/2502] eta: 0:36:50 lr: 0.000017 loss_cls: 3.3736 (3.1488) grad_norm: 2.7218 (3.0476) time: 2.3891 data: 0.0003 max mem: 13912 +[2024-12-06 13:32:27 root] (utils.py 283): INFO Epoch: [5] [1590/2502] eta: 0:36:26 lr: 0.000017 loss_cls: 3.2450 (3.1491) grad_norm: 2.8129 (3.0554) time: 2.3865 data: 0.0003 max mem: 13912 +[2024-12-06 13:32:50 root] (utils.py 283): INFO Epoch: [5] [1600/2502] eta: 0:36:02 lr: 0.000017 loss_cls: 3.0672 (3.1483) grad_norm: 2.7662 (3.0528) time: 2.3841 data: 0.0003 max mem: 13912 +[2024-12-06 13:33:14 root] (utils.py 283): INFO Epoch: [5] [1610/2502] eta: 0:35:38 lr: 0.000017 loss_cls: 2.9895 (3.1473) grad_norm: 2.5643 (3.0502) time: 2.3839 data: 0.0003 max mem: 13912 +[2024-12-06 13:33:38 root] (utils.py 283): INFO Epoch: [5] [1620/2502] eta: 0:35:14 lr: 0.000017 loss_cls: 3.1785 (3.1481) grad_norm: 2.5643 (3.0491) time: 2.3922 data: 0.0003 max mem: 13912 +[2024-12-06 13:34:02 root] (utils.py 283): INFO Epoch: [5] [1630/2502] eta: 0:34:50 lr: 0.000017 loss_cls: 3.3245 (3.1488) grad_norm: 2.8439 (3.0480) time: 2.3953 data: 0.0003 max mem: 13912 +[2024-12-06 13:34:26 root] (utils.py 283): INFO Epoch: [5] [1640/2502] eta: 0:34:26 lr: 0.000017 loss_cls: 3.4492 (3.1502) grad_norm: 2.7653 (3.0466) time: 2.3945 data: 0.0003 max mem: 13912 +[2024-12-06 13:34:50 root] (utils.py 283): INFO Epoch: [5] [1650/2502] eta: 0:34:02 lr: 0.000017 loss_cls: 3.4049 (3.1505) grad_norm: 2.6174 (3.0446) time: 2.3918 data: 0.0003 max mem: 13912 +[2024-12-06 13:35:14 root] (utils.py 283): INFO Epoch: [5] [1660/2502] eta: 0:33:38 lr: 0.000017 loss_cls: 3.1671 (3.1496) grad_norm: 2.6501 (3.0481) time: 2.3919 data: 0.0003 max mem: 13912 +[2024-12-06 13:35:38 root] (utils.py 283): INFO Epoch: [5] [1670/2502] eta: 0:33:14 lr: 0.000017 loss_cls: 2.8987 (3.1489) grad_norm: 2.8446 (3.0485) time: 2.3923 data: 0.0003 max mem: 13912 +[2024-12-06 13:36:02 root] (utils.py 283): INFO Epoch: [5] [1680/2502] eta: 0:32:50 lr: 0.000017 loss_cls: 3.3418 (3.1506) grad_norm: 2.8446 (3.0475) time: 2.3925 data: 0.0003 max mem: 13912 +[2024-12-06 13:36:26 root] (utils.py 283): INFO Epoch: [5] [1690/2502] eta: 0:32:26 lr: 0.000017 loss_cls: 3.3239 (3.1518) grad_norm: 3.0306 (3.0537) time: 2.3958 data: 0.0003 max mem: 13912 +[2024-12-06 13:36:50 root] (utils.py 283): INFO Epoch: [5] [1700/2502] eta: 0:32:02 lr: 0.000017 loss_cls: 3.1477 (3.1491) grad_norm: 2.7768 (3.0521) time: 2.3923 data: 0.0003 max mem: 13912 +[2024-12-06 13:37:14 root] (utils.py 283): INFO Epoch: [5] [1710/2502] eta: 0:31:38 lr: 0.000017 loss_cls: 2.7639 (3.1484) grad_norm: 2.6929 (3.0508) time: 2.3883 data: 0.0003 max mem: 13912 +[2024-12-06 13:37:37 root] (utils.py 283): INFO Epoch: [5] [1720/2502] eta: 0:31:14 lr: 0.000017 loss_cls: 2.9914 (3.1475) grad_norm: 2.5650 (3.0487) time: 2.3669 data: 0.0003 max mem: 13912 +[2024-12-06 13:38:01 root] (utils.py 283): INFO Epoch: [5] [1730/2502] eta: 0:30:50 lr: 0.000017 loss_cls: 3.1950 (3.1478) grad_norm: 2.5431 (3.0474) time: 2.3691 data: 0.0003 max mem: 13912 +[2024-12-06 13:38:25 root] (utils.py 283): INFO Epoch: [5] [1740/2502] eta: 0:30:26 lr: 0.000017 loss_cls: 3.3091 (3.1485) grad_norm: 2.7121 (3.0462) time: 2.3903 data: 0.0003 max mem: 13912 +[2024-12-06 13:38:49 root] (utils.py 283): INFO Epoch: [5] [1750/2502] eta: 0:30:02 lr: 0.000017 loss_cls: 3.0585 (3.1478) grad_norm: 2.6406 (3.0500) time: 2.3882 data: 0.0003 max mem: 13912 +[2024-12-06 13:39:13 root] (utils.py 283): INFO Epoch: [5] [1760/2502] eta: 0:29:38 lr: 0.000017 loss_cls: 3.0307 (3.1474) grad_norm: 2.8081 (3.0481) time: 2.3888 data: 0.0003 max mem: 13912 +[2024-12-06 13:39:37 root] (utils.py 283): INFO Epoch: [5] [1770/2502] eta: 0:29:14 lr: 0.000017 loss_cls: 2.9798 (3.1464) grad_norm: 2.6933 (3.0467) time: 2.3888 data: 0.0003 max mem: 13912 +[2024-12-06 13:40:00 root] (utils.py 283): INFO Epoch: [5] [1780/2502] eta: 0:28:50 lr: 0.000017 loss_cls: 3.1394 (3.1472) grad_norm: 2.6045 (3.0450) time: 2.3878 data: 0.0003 max mem: 13912 +[2024-12-06 13:40:24 root] (utils.py 283): INFO Epoch: [5] [1790/2502] eta: 0:28:26 lr: 0.000017 loss_cls: 3.2647 (3.1472) grad_norm: 2.6045 (3.0429) time: 2.3887 data: 0.0003 max mem: 13912 +[2024-12-06 13:40:48 root] (utils.py 283): INFO Epoch: [5] [1800/2502] eta: 0:28:02 lr: 0.000017 loss_cls: 3.0052 (3.1459) grad_norm: 2.5274 (3.0405) time: 2.3896 data: 0.0003 max mem: 13912 +[2024-12-06 13:41:12 root] (utils.py 283): INFO Epoch: [5] [1810/2502] eta: 0:27:38 lr: 0.000017 loss_cls: 3.1564 (3.1468) grad_norm: 2.5975 (3.0433) time: 2.3844 data: 0.0003 max mem: 13912 +[2024-12-06 13:41:36 root] (utils.py 283): INFO Epoch: [5] [1820/2502] eta: 0:27:14 lr: 0.000017 loss_cls: 3.2839 (3.1472) grad_norm: 2.6548 (3.0416) time: 2.3754 data: 0.0003 max mem: 13912 +[2024-12-06 13:42:00 root] (utils.py 283): INFO Epoch: [5] [1830/2502] eta: 0:26:50 lr: 0.000017 loss_cls: 3.1677 (3.1469) grad_norm: 2.5185 (3.0393) time: 2.3736 data: 0.0003 max mem: 13912 +[2024-12-06 13:42:23 root] (utils.py 283): INFO Epoch: [5] [1840/2502] eta: 0:26:26 lr: 0.000017 loss_cls: 3.2459 (3.1473) grad_norm: 2.7048 (3.0375) time: 2.3810 data: 0.0003 max mem: 13912 +[2024-12-06 13:42:47 root] (utils.py 283): INFO Epoch: [5] [1850/2502] eta: 0:26:02 lr: 0.000017 loss_cls: 3.2137 (3.1466) grad_norm: 2.7120 (3.0393) time: 2.3873 data: 0.0003 max mem: 13912 +[2024-12-06 13:43:11 root] (utils.py 283): INFO Epoch: [5] [1860/2502] eta: 0:25:38 lr: 0.000017 loss_cls: 3.4411 (3.1483) grad_norm: 2.7617 (3.0382) time: 2.3941 data: 0.0003 max mem: 13912 +[2024-12-06 13:43:35 root] (utils.py 283): INFO Epoch: [5] [1870/2502] eta: 0:25:14 lr: 0.000017 loss_cls: 3.4120 (3.1486) grad_norm: 2.7426 (3.0363) time: 2.3950 data: 0.0003 max mem: 13912 +[2024-12-06 13:43:59 root] (utils.py 283): INFO Epoch: [5] [1880/2502] eta: 0:24:50 lr: 0.000017 loss_cls: 3.1790 (3.1481) grad_norm: 2.7110 (3.0363) time: 2.3887 data: 0.0003 max mem: 13912 +[2024-12-06 13:44:23 root] (utils.py 283): INFO Epoch: [5] [1890/2502] eta: 0:24:26 lr: 0.000017 loss_cls: 3.2114 (3.1492) grad_norm: 2.8751 (3.0360) time: 2.3883 data: 0.0003 max mem: 13912 +[2024-12-06 13:44:47 root] (utils.py 283): INFO Epoch: [5] [1900/2502] eta: 0:24:02 lr: 0.000017 loss_cls: 3.3037 (3.1498) grad_norm: 2.7938 (3.0362) time: 2.3866 data: 0.0003 max mem: 13912 +[2024-12-06 13:45:11 root] (utils.py 283): INFO Epoch: [5] [1910/2502] eta: 0:23:38 lr: 0.000017 loss_cls: 3.3219 (3.1494) grad_norm: 2.6232 (3.0370) time: 2.3840 data: 0.0003 max mem: 13912 +[2024-12-06 13:45:35 root] (utils.py 283): INFO Epoch: [5] [1920/2502] eta: 0:23:14 lr: 0.000017 loss_cls: 3.4213 (3.1510) grad_norm: 2.6866 (3.0361) time: 2.3871 data: 0.0003 max mem: 13912 +[2024-12-06 13:45:58 root] (utils.py 283): INFO Epoch: [5] [1930/2502] eta: 0:22:50 lr: 0.000017 loss_cls: 3.4213 (3.1506) grad_norm: 2.8354 (3.0350) time: 2.3864 data: 0.0003 max mem: 13912 +[2024-12-06 13:46:22 root] (utils.py 283): INFO Epoch: [5] [1940/2502] eta: 0:22:26 lr: 0.000017 loss_cls: 3.2266 (3.1505) grad_norm: 2.6494 (3.0330) time: 2.3870 data: 0.0003 max mem: 13912 +[2024-12-06 13:46:46 root] (utils.py 283): INFO Epoch: [5] [1950/2502] eta: 0:22:02 lr: 0.000017 loss_cls: 3.2905 (3.1511) grad_norm: 2.6915 (3.0335) time: 2.3835 data: 0.0003 max mem: 13912 +[2024-12-06 13:47:10 root] (utils.py 283): INFO Epoch: [5] [1960/2502] eta: 0:21:38 lr: 0.000017 loss_cls: 3.4448 (3.1528) grad_norm: 2.7920 (3.0331) time: 2.3755 data: 0.0003 max mem: 13912 +[2024-12-06 13:47:34 root] (utils.py 283): INFO Epoch: [5] [1970/2502] eta: 0:21:14 lr: 0.000017 loss_cls: 3.4448 (3.1530) grad_norm: 2.8896 (3.0331) time: 2.3841 data: 0.0003 max mem: 13912 +[2024-12-06 13:47:58 root] (utils.py 283): INFO Epoch: [5] [1980/2502] eta: 0:20:50 lr: 0.000017 loss_cls: 3.4759 (3.1547) grad_norm: 2.7690 (3.0325) time: 2.4148 data: 0.0003 max mem: 13912 +[2024-12-06 13:48:22 root] (utils.py 283): INFO Epoch: [5] [1990/2502] eta: 0:20:26 lr: 0.000017 loss_cls: 3.3385 (3.1545) grad_norm: 2.7629 (3.0350) time: 2.4132 data: 0.0003 max mem: 13912 +[2024-12-06 13:48:46 root] (utils.py 283): INFO Epoch: [5] [2000/2502] eta: 0:20:02 lr: 0.000017 loss_cls: 2.9314 (3.1540) grad_norm: 2.8075 (3.0351) time: 2.3929 data: 0.0003 max mem: 13912 +[2024-12-06 13:49:10 root] (utils.py 283): INFO Epoch: [5] [2010/2502] eta: 0:19:38 lr: 0.000017 loss_cls: 3.3036 (3.1553) grad_norm: 2.5356 (3.0330) time: 2.3859 data: 0.0003 max mem: 13912 +[2024-12-06 13:49:33 root] (utils.py 283): INFO Epoch: [5] [2020/2502] eta: 0:19:14 lr: 0.000017 loss_cls: 3.3514 (3.1551) grad_norm: 2.7212 (3.0337) time: 2.3749 data: 0.0003 max mem: 13912 +[2024-12-06 13:49:57 root] (utils.py 283): INFO Epoch: [5] [2030/2502] eta: 0:18:50 lr: 0.000017 loss_cls: 3.1780 (3.1551) grad_norm: 2.7506 (3.0317) time: 2.3733 data: 0.0003 max mem: 13912 +[2024-12-06 13:50:21 root] (utils.py 283): INFO Epoch: [5] [2040/2502] eta: 0:18:26 lr: 0.000017 loss_cls: 3.0703 (3.1540) grad_norm: 2.5743 (3.0299) time: 2.3796 data: 0.0003 max mem: 13912 +[2024-12-06 13:50:45 root] (utils.py 283): INFO Epoch: [5] [2050/2502] eta: 0:18:02 lr: 0.000017 loss_cls: 3.0486 (3.1547) grad_norm: 2.7634 (3.0295) time: 2.3883 data: 0.0003 max mem: 13912 +[2024-12-06 13:51:09 root] (utils.py 283): INFO Epoch: [5] [2060/2502] eta: 0:17:38 lr: 0.000017 loss_cls: 3.3782 (3.1542) grad_norm: 2.7928 (3.0300) time: 2.3912 data: 0.0003 max mem: 13912 +[2024-12-06 13:51:33 root] (utils.py 283): INFO Epoch: [5] [2070/2502] eta: 0:17:14 lr: 0.000017 loss_cls: 3.3070 (3.1542) grad_norm: 2.7187 (3.0280) time: 2.3870 data: 0.0003 max mem: 13912 +[2024-12-06 13:51:57 root] (utils.py 283): INFO Epoch: [5] [2080/2502] eta: 0:16:50 lr: 0.000017 loss_cls: 3.2715 (3.1544) grad_norm: 2.5687 (3.0274) time: 2.3836 data: 0.0003 max mem: 13912 +[2024-12-06 13:52:20 root] (utils.py 283): INFO Epoch: [5] [2090/2502] eta: 0:16:26 lr: 0.000017 loss_cls: 3.3895 (3.1548) grad_norm: 2.7040 (3.0312) time: 2.3867 data: 0.0003 max mem: 13912 +[2024-12-06 13:52:44 root] (utils.py 283): INFO Epoch: [5] [2100/2502] eta: 0:16:02 lr: 0.000017 loss_cls: 3.4307 (3.1564) grad_norm: 2.7386 (3.0300) time: 2.3885 data: 0.0003 max mem: 13912 +[2024-12-06 13:53:08 root] (utils.py 283): INFO Epoch: [5] [2110/2502] eta: 0:15:38 lr: 0.000017 loss_cls: 3.4531 (3.1574) grad_norm: 2.6748 (3.0291) time: 2.3826 data: 0.0003 max mem: 13912 +[2024-12-06 13:53:32 root] (utils.py 283): INFO Epoch: [5] [2120/2502] eta: 0:15:14 lr: 0.000017 loss_cls: 3.2459 (3.1573) grad_norm: 2.5829 (3.0274) time: 2.3700 data: 0.0003 max mem: 13912 +[2024-12-06 13:53:56 root] (utils.py 283): INFO Epoch: [5] [2130/2502] eta: 0:14:50 lr: 0.000017 loss_cls: 3.1297 (3.1559) grad_norm: 2.5829 (3.0253) time: 2.3756 data: 0.0003 max mem: 13912 +[2024-12-06 13:54:19 root] (utils.py 283): INFO Epoch: [5] [2140/2502] eta: 0:14:26 lr: 0.000017 loss_cls: 3.0633 (3.1558) grad_norm: 2.6258 (3.0242) time: 2.3839 data: 0.0003 max mem: 13912 +[2024-12-06 13:54:43 root] (utils.py 283): INFO Epoch: [5] [2150/2502] eta: 0:14:02 lr: 0.000017 loss_cls: 3.2815 (3.1567) grad_norm: 2.6890 (3.0240) time: 2.3820 data: 0.0003 max mem: 13912 +[2024-12-06 13:55:07 root] (utils.py 283): INFO Epoch: [5] [2160/2502] eta: 0:13:39 lr: 0.000017 loss_cls: 3.2255 (3.1556) grad_norm: 2.5340 (3.0215) time: 2.3845 data: 0.0003 max mem: 13912 +[2024-12-06 13:55:31 root] (utils.py 283): INFO Epoch: [5] [2170/2502] eta: 0:13:15 lr: 0.000017 loss_cls: 3.1062 (3.1544) grad_norm: 2.4537 (3.0205) time: 2.3793 data: 0.0003 max mem: 13912 +[2024-12-06 13:55:54 root] (utils.py 283): INFO Epoch: [5] [2180/2502] eta: 0:12:51 lr: 0.000017 loss_cls: 3.1023 (3.1538) grad_norm: 2.6030 (3.0191) time: 2.3686 data: 0.0003 max mem: 13912 +[2024-12-06 13:56:13 root] (utils.py 283): INFO Epoch: [5] [2190/2502] eta: 0:12:26 lr: 0.000017 loss_cls: 2.7894 (3.1508) grad_norm: 2.4003 (3.0168) time: 2.0862 data: 0.0002 max mem: 13912 +[2024-12-06 13:56:26 root] (utils.py 283): INFO Epoch: [5] [2200/2502] eta: 0:12:00 lr: 0.000017 loss_cls: 2.8143 (3.1508) grad_norm: 2.4911 (3.0168) time: 1.5551 data: 0.0002 max mem: 13912 +[2024-12-06 13:56:39 root] (utils.py 283): INFO Epoch: [5] [2210/2502] eta: 0:11:35 lr: 0.000017 loss_cls: 3.2788 (3.1504) grad_norm: 2.7003 (3.0169) time: 1.2970 data: 0.0002 max mem: 13912 +[2024-12-06 13:56:51 root] (utils.py 283): INFO Epoch: [5] [2220/2502] eta: 0:11:10 lr: 0.000017 loss_cls: 3.1742 (3.1505) grad_norm: 2.9812 (3.0214) time: 1.2917 data: 0.0002 max mem: 13912 +[2024-12-06 13:57:04 root] (utils.py 283): INFO Epoch: [5] [2230/2502] eta: 0:10:45 lr: 0.000017 loss_cls: 3.3615 (3.1509) grad_norm: 2.9011 (3.0284) time: 1.2891 data: 0.0002 max mem: 13912 +[2024-12-06 13:57:18 root] (utils.py 283): INFO Epoch: [5] [2240/2502] eta: 0:10:20 lr: 0.000017 loss_cls: 3.1948 (3.1506) grad_norm: 2.6065 (3.0281) time: 1.3225 data: 0.0003 max mem: 13912 +[2024-12-06 13:57:31 root] (utils.py 283): INFO Epoch: [5] [2250/2502] eta: 0:09:55 lr: 0.000017 loss_cls: 2.9360 (3.1496) grad_norm: 2.6952 (3.0323) time: 1.3310 data: 0.0003 max mem: 13912 +[2024-12-06 13:57:44 root] (utils.py 283): INFO Epoch: [5] [2260/2502] eta: 0:09:30 lr: 0.000017 loss_cls: 2.7898 (3.1489) grad_norm: 2.7926 (3.0334) time: 1.3060 data: 0.0003 max mem: 13912 +[2024-12-06 13:57:57 root] (utils.py 283): INFO Epoch: [5] [2270/2502] eta: 0:09:06 lr: 0.000017 loss_cls: 2.7975 (3.1475) grad_norm: 2.6903 (3.0331) time: 1.3069 data: 0.0003 max mem: 13912 +[2024-12-06 13:58:10 root] (utils.py 283): INFO Epoch: [5] [2280/2502] eta: 0:08:41 lr: 0.000017 loss_cls: 3.1972 (3.1488) grad_norm: 2.7772 (3.0327) time: 1.3051 data: 0.0003 max mem: 13912 +[2024-12-06 13:58:23 root] (utils.py 283): INFO Epoch: [5] [2290/2502] eta: 0:08:17 lr: 0.000017 loss_cls: 3.4038 (3.1494) grad_norm: 2.5274 (3.0303) time: 1.3042 data: 0.0003 max mem: 13912 +[2024-12-06 13:58:36 root] (utils.py 283): INFO Epoch: [5] [2300/2502] eta: 0:07:52 lr: 0.000017 loss_cls: 3.2784 (3.1494) grad_norm: 2.4922 (3.0285) time: 1.3070 data: 0.0002 max mem: 13912 +[2024-12-06 13:58:49 root] (utils.py 283): INFO Epoch: [5] [2310/2502] eta: 0:07:28 lr: 0.000017 loss_cls: 3.0599 (3.1484) grad_norm: 2.6882 (3.0289) time: 1.3073 data: 0.0002 max mem: 13912 +[2024-12-06 13:59:02 root] (utils.py 283): INFO Epoch: [5] [2320/2502] eta: 0:07:04 lr: 0.000017 loss_cls: 2.9690 (3.1479) grad_norm: 2.7253 (3.0290) time: 1.3041 data: 0.0002 max mem: 13912 +[2024-12-06 13:59:15 root] (utils.py 283): INFO Epoch: [5] [2330/2502] eta: 0:06:40 lr: 0.000017 loss_cls: 3.2342 (3.1480) grad_norm: 2.8678 (3.0291) time: 1.3044 data: 0.0003 max mem: 13912 +[2024-12-06 13:59:28 root] (utils.py 283): INFO Epoch: [5] [2340/2502] eta: 0:06:16 lr: 0.000017 loss_cls: 3.2949 (3.1486) grad_norm: 2.7932 (3.0278) time: 1.3058 data: 0.0003 max mem: 13912 +[2024-12-06 13:59:41 root] (utils.py 283): INFO Epoch: [5] [2350/2502] eta: 0:05:52 lr: 0.000017 loss_cls: 3.2428 (3.1482) grad_norm: 2.5746 (3.0277) time: 1.3037 data: 0.0003 max mem: 13912 +[2024-12-06 13:59:54 root] (utils.py 283): INFO Epoch: [5] [2360/2502] eta: 0:05:28 lr: 0.000017 loss_cls: 3.2604 (3.1485) grad_norm: 2.5597 (3.0265) time: 1.3030 data: 0.0002 max mem: 13912 +[2024-12-06 14:00:08 root] (utils.py 283): INFO Epoch: [5] [2370/2502] eta: 0:05:04 lr: 0.000017 loss_cls: 3.2560 (3.1482) grad_norm: 2.7000 (3.0267) time: 1.3036 data: 0.0003 max mem: 13912 +[2024-12-06 14:00:21 root] (utils.py 283): INFO Epoch: [5] [2380/2502] eta: 0:04:41 lr: 0.000017 loss_cls: 3.2391 (3.1481) grad_norm: 2.5614 (3.0247) time: 1.3034 data: 0.0003 max mem: 13912 +[2024-12-06 14:00:34 root] (utils.py 283): INFO Epoch: [5] [2390/2502] eta: 0:04:17 lr: 0.000017 loss_cls: 3.3803 (3.1480) grad_norm: 2.5952 (3.0258) time: 1.3060 data: 0.0003 max mem: 13912 +[2024-12-06 14:00:47 root] (utils.py 283): INFO Epoch: [5] [2400/2502] eta: 0:03:54 lr: 0.000017 loss_cls: 3.3165 (3.1484) grad_norm: 2.8301 (3.0252) time: 1.3075 data: 0.0003 max mem: 13912 +[2024-12-06 14:01:00 root] (utils.py 283): INFO Epoch: [5] [2410/2502] eta: 0:03:30 lr: 0.000017 loss_cls: 3.3165 (3.1492) grad_norm: 2.7677 (3.0266) time: 1.3032 data: 0.0002 max mem: 13912 +[2024-12-06 14:01:13 root] (utils.py 283): INFO Epoch: [5] [2420/2502] eta: 0:03:07 lr: 0.000017 loss_cls: 3.2969 (3.1482) grad_norm: 2.7855 (3.0258) time: 1.3007 data: 0.0002 max mem: 13912 +[2024-12-06 14:01:26 root] (utils.py 283): INFO Epoch: [5] [2430/2502] eta: 0:02:44 lr: 0.000017 loss_cls: 3.2538 (3.1488) grad_norm: 2.8747 (3.0256) time: 1.3031 data: 0.0002 max mem: 13912 +[2024-12-06 14:01:39 root] (utils.py 283): INFO Epoch: [5] [2440/2502] eta: 0:02:21 lr: 0.000017 loss_cls: 3.2841 (3.1488) grad_norm: 2.7743 (3.0248) time: 1.3033 data: 0.0003 max mem: 13912 +[2024-12-06 14:01:52 root] (utils.py 283): INFO Epoch: [5] [2450/2502] eta: 0:01:58 lr: 0.000017 loss_cls: 3.2073 (3.1491) grad_norm: 2.6306 (3.0232) time: 1.3012 data: 0.0003 max mem: 13912 +[2024-12-06 14:02:05 root] (utils.py 283): INFO Epoch: [5] [2460/2502] eta: 0:01:35 lr: 0.000017 loss_cls: 3.1446 (3.1485) grad_norm: 2.6154 (3.0224) time: 1.3006 data: 0.0003 max mem: 13912 +[2024-12-06 14:02:18 root] (utils.py 283): INFO Epoch: [5] [2470/2502] eta: 0:01:12 lr: 0.000017 loss_cls: 3.0026 (3.1484) grad_norm: 2.7512 (3.0242) time: 1.3024 data: 0.0003 max mem: 13912 +[2024-12-06 14:02:31 root] (utils.py 283): INFO Epoch: [5] [2480/2502] eta: 0:00:49 lr: 0.000017 loss_cls: 3.2847 (3.1487) grad_norm: 2.7749 (3.0236) time: 1.3028 data: 0.0003 max mem: 13912 +[2024-12-06 14:02:44 root] (utils.py 283): INFO Epoch: [5] [2490/2502] eta: 0:00:27 lr: 0.000017 loss_cls: 3.3199 (3.1492) grad_norm: 2.8276 (3.0244) time: 1.3266 data: 0.0252 max mem: 13912 +[2024-12-06 14:02:57 root] (utils.py 283): INFO Epoch: [5] [2500/2502] eta: 0:00:04 lr: 0.000017 loss_cls: 3.3199 (3.1495) grad_norm: 3.0107 (3.0275) time: 1.3261 data: 0.0252 max mem: 13912 +[2024-12-06 14:02:59 root] (utils.py 283): INFO Epoch: [5] [2501/2502] eta: 0:00:02 lr: 0.000017 loss_cls: 3.2693 (3.1496) grad_norm: 3.0928 (3.0275) time: 1.3268 data: 0.0252 max mem: 13912 +[2024-12-06 14:02:59 root] (utils.py 297): INFO Epoch: [5] Total time: 1:34:07 (2.2570 s / it) +[2024-12-06 14:02:59 root] (engine.py 179): INFO Averaged stats:lr: 0.000017 loss_cls: 3.2693 (3.1425) grad_norm: 3.0928 (3.0275) +[2024-12-06 14:03:00 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:22 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.4376 (0.4376) acc1: 92.9688 (92.9688) acc3: 98.4375 (98.4375) acc5: 99.2188 (99.2188) time: 0.2246 data: 0.0003 max mem: 13912 +[2024-12-06 14:03:02 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:19 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7033 (0.6844) acc1: 85.1562 (86.1506) acc3: 96.0938 (95.0994) acc5: 96.8750 (96.7330) time: 0.2272 data: 0.0004 max mem: 13912 +[2024-12-06 14:03:04 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:17 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7033 (0.7154) acc1: 84.3750 (85.0446) acc3: 95.3125 (94.7917) acc5: 96.8750 (96.5402) time: 0.2275 data: 0.0005 max mem: 13912 +[2024-12-06 14:03:07 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:15 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7755 (0.7405) acc1: 83.5938 (83.9214) acc3: 94.5312 (94.7581) acc5: 96.8750 (96.6734) time: 0.2278 data: 0.0005 max mem: 13912 +[2024-12-06 14:03:09 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:13 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7475 (0.7389) acc1: 83.5938 (83.9558) acc3: 94.5312 (94.7218) acc5: 96.8750 (96.7988) time: 0.2279 data: 0.0004 max mem: 13912 +[2024-12-06 14:03:11 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:10 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7951 (0.8216) acc1: 79.6875 (82.0619) acc3: 92.9688 (93.5202) acc5: 96.0938 (95.8027) time: 0.2280 data: 0.0005 max mem: 13912 +[2024-12-06 14:03:14 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:08 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0796 (0.8509) acc1: 78.1250 (81.7239) acc3: 89.8438 (92.8919) acc5: 91.4062 (95.1716) time: 0.2293 data: 0.0005 max mem: 13912 +[2024-12-06 14:03:16 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:06 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0670 (0.8814) acc1: 78.1250 (80.9969) acc3: 90.6250 (92.5726) acc5: 92.1875 (94.9274) time: 0.2293 data: 0.0005 max mem: 13912 +[2024-12-06 14:03:18 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:04 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0693 (0.9113) acc1: 76.5625 (80.2373) acc3: 89.0625 (92.0332) acc5: 92.1875 (94.5409) time: 0.2282 data: 0.0008 max mem: 13912 +[2024-12-06 14:03:20 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0940 (0.9368) acc1: 75.7812 (79.4471) acc3: 87.5000 (91.6380) acc5: 92.1875 (94.3338) time: 0.2286 data: 0.0008 max mem: 13912 +[2024-12-06 14:03:22 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.9752 (0.9305) acc1: 75.7812 (79.5520) acc3: 90.6250 (91.7360) acc5: 93.7500 (94.4560) time: 0.2249 data: 0.0007 max mem: 13912 +[2024-12-06 14:03:22 root] (utils.py 297): INFO Test: Total time: 0:00:22 (0.2277 s / it) +[2024-12-06 14:03:22 root] (engine.py 264): INFO * Acc@1 79.494 Acc@3 91.878 Acc@5 94.592 loss 0.928 flops 3.584 layer_flops 3.536 +[2024-12-06 14:03:22 root] (main.py 547): INFO Accuracy of the network on the 50000 test images: 79.5% +[2024-12-06 14:03:22 root] (main.py 551): INFO Max accuracy: 79.49% +[2024-12-06 14:03:24 root] (utils.py 283): INFO Epoch: [6] [ 0/2502] eta: 0:53:38 lr: 0.000015 loss_cls: 4.0424 (4.0424) grad_norm: 2.8071 (2.8071) time: 1.2863 data: 0.0003 max mem: 13912 +[2024-12-06 14:03:37 root] (utils.py 283): INFO Epoch: [6] [ 10/2502] eta: 0:53:39 lr: 0.000015 loss_cls: 3.2252 (3.0388) grad_norm: 2.9054 (3.5151) time: 1.2918 data: 0.0003 max mem: 13912 +[2024-12-06 14:03:50 root] (utils.py 283): INFO Epoch: [6] [ 20/2502] eta: 0:53:33 lr: 0.000015 loss_cls: 3.2252 (3.0936) grad_norm: 2.6421 (3.1691) time: 1.2950 data: 0.0003 max mem: 13912 +[2024-12-06 14:04:03 root] (utils.py 283): INFO Epoch: [6] [ 30/2502] eta: 0:53:26 lr: 0.000015 loss_cls: 3.3090 (3.1419) grad_norm: 2.6465 (3.1076) time: 1.3000 data: 0.0002 max mem: 13912 +[2024-12-06 14:04:16 root] (utils.py 283): INFO Epoch: [6] [ 40/2502] eta: 0:53:13 lr: 0.000015 loss_cls: 3.1808 (3.1077) grad_norm: 2.7155 (3.0562) time: 1.2994 data: 0.0002 max mem: 13912 +[2024-12-06 14:04:29 root] (utils.py 283): INFO Epoch: [6] [ 50/2502] eta: 0:53:04 lr: 0.000015 loss_cls: 3.0444 (3.0859) grad_norm: 2.7534 (3.1606) time: 1.3011 data: 0.0003 max mem: 13912 +[2024-12-06 14:04:42 root] (utils.py 283): INFO Epoch: [6] [ 60/2502] eta: 0:52:52 lr: 0.000015 loss_cls: 3.0444 (3.0494) grad_norm: 2.9252 (3.1014) time: 1.3040 data: 0.0003 max mem: 13912 +[2024-12-06 14:04:55 root] (utils.py 283): INFO Epoch: [6] [ 70/2502] eta: 0:52:40 lr: 0.000015 loss_cls: 3.1784 (3.0829) grad_norm: 2.8675 (3.1137) time: 1.3023 data: 0.0003 max mem: 13912 +[2024-12-06 14:05:08 root] (utils.py 283): INFO Epoch: [6] [ 80/2502] eta: 0:52:29 lr: 0.000015 loss_cls: 3.1874 (3.1009) grad_norm: 2.9517 (3.1358) time: 1.3042 data: 0.0003 max mem: 13912 +[2024-12-06 14:05:21 root] (utils.py 283): INFO Epoch: [6] [ 90/2502] eta: 0:52:17 lr: 0.000015 loss_cls: 3.1874 (3.1053) grad_norm: 2.6033 (3.0825) time: 1.3054 data: 0.0003 max mem: 13912 +[2024-12-06 14:05:34 root] (utils.py 283): INFO Epoch: [6] [ 100/2502] eta: 0:52:06 lr: 0.000015 loss_cls: 3.2297 (3.1099) grad_norm: 2.5476 (3.0579) time: 1.3066 data: 0.0003 max mem: 13912 +[2024-12-06 14:05:47 root] (utils.py 283): INFO Epoch: [6] [ 110/2502] eta: 0:51:54 lr: 0.000015 loss_cls: 3.2297 (3.1110) grad_norm: 2.5716 (3.0243) time: 1.3078 data: 0.0002 max mem: 13912 +[2024-12-06 14:06:00 root] (utils.py 283): INFO Epoch: [6] [ 120/2502] eta: 0:51:41 lr: 0.000015 loss_cls: 3.1129 (3.1002) grad_norm: 2.6342 (2.9944) time: 1.3042 data: 0.0003 max mem: 13912 +[2024-12-06 14:06:13 root] (utils.py 283): INFO Epoch: [6] [ 130/2502] eta: 0:51:29 lr: 0.000015 loss_cls: 2.9774 (3.1037) grad_norm: 2.7498 (2.9773) time: 1.3035 data: 0.0002 max mem: 13912 +[2024-12-06 14:06:26 root] (utils.py 283): INFO Epoch: [6] [ 140/2502] eta: 0:51:15 lr: 0.000015 loss_cls: 3.3353 (3.1306) grad_norm: 2.7847 (2.9563) time: 1.3031 data: 0.0003 max mem: 13912 +[2024-12-06 14:06:39 root] (utils.py 283): INFO Epoch: [6] [ 150/2502] eta: 0:51:02 lr: 0.000015 loss_cls: 3.2094 (3.1224) grad_norm: 2.6259 (2.9357) time: 1.3014 data: 0.0003 max mem: 13912 +[2024-12-06 14:06:52 root] (utils.py 283): INFO Epoch: [6] [ 160/2502] eta: 0:50:50 lr: 0.000015 loss_cls: 3.2369 (3.1273) grad_norm: 2.6480 (2.9510) time: 1.3047 data: 0.0003 max mem: 13912 +[2024-12-06 14:07:05 root] (utils.py 283): INFO Epoch: [6] [ 170/2502] eta: 0:50:39 lr: 0.000015 loss_cls: 3.3012 (3.1365) grad_norm: 2.7464 (2.9551) time: 1.3122 data: 0.0003 max mem: 13912 +[2024-12-06 14:07:18 root] (utils.py 283): INFO Epoch: [6] [ 180/2502] eta: 0:50:26 lr: 0.000015 loss_cls: 3.2421 (3.1253) grad_norm: 2.8034 (2.9907) time: 1.3086 data: 0.0003 max mem: 13912 +[2024-12-06 14:07:31 root] (utils.py 283): INFO Epoch: [6] [ 190/2502] eta: 0:50:12 lr: 0.000015 loss_cls: 3.1101 (3.1210) grad_norm: 2.7785 (2.9799) time: 1.3002 data: 0.0003 max mem: 13912 +[2024-12-06 14:07:44 root] (utils.py 283): INFO Epoch: [6] [ 200/2502] eta: 0:49:59 lr: 0.000015 loss_cls: 3.2395 (3.1268) grad_norm: 2.6550 (2.9646) time: 1.3004 data: 0.0003 max mem: 13912 +[2024-12-06 14:07:57 root] (utils.py 283): INFO Epoch: [6] [ 210/2502] eta: 0:49:46 lr: 0.000015 loss_cls: 3.2946 (3.1306) grad_norm: 2.6729 (2.9742) time: 1.3010 data: 0.0003 max mem: 13912 +[2024-12-06 14:08:10 root] (utils.py 283): INFO Epoch: [6] [ 220/2502] eta: 0:49:32 lr: 0.000015 loss_cls: 3.2659 (3.1304) grad_norm: 2.7508 (3.1248) time: 1.3012 data: 0.0003 max mem: 13912 +[2024-12-06 14:08:23 root] (utils.py 283): INFO Epoch: [6] [ 230/2502] eta: 0:49:20 lr: 0.000015 loss_cls: 3.1755 (3.1249) grad_norm: 2.5412 (3.1112) time: 1.3045 data: 0.0003 max mem: 13912 +[2024-12-06 14:08:37 root] (utils.py 283): INFO Epoch: [6] [ 240/2502] eta: 0:49:07 lr: 0.000015 loss_cls: 3.3020 (3.1316) grad_norm: 2.6579 (3.1062) time: 1.3068 data: 0.0003 max mem: 13912 +[2024-12-06 14:08:50 root] (utils.py 283): INFO Epoch: [6] [ 250/2502] eta: 0:48:54 lr: 0.000015 loss_cls: 3.3020 (3.1304) grad_norm: 2.7543 (3.1071) time: 1.3060 data: 0.0003 max mem: 13912 +[2024-12-06 14:09:03 root] (utils.py 283): INFO Epoch: [6] [ 260/2502] eta: 0:48:42 lr: 0.000015 loss_cls: 3.1312 (3.1335) grad_norm: 2.6247 (3.0968) time: 1.3058 data: 0.0003 max mem: 13912 +[2024-12-06 14:09:16 root] (utils.py 283): INFO Epoch: [6] [ 270/2502] eta: 0:48:29 lr: 0.000015 loss_cls: 3.3830 (3.1423) grad_norm: 2.5346 (3.1039) time: 1.3049 data: 0.0003 max mem: 13912 +[2024-12-06 14:09:29 root] (utils.py 283): INFO Epoch: [6] [ 280/2502] eta: 0:48:16 lr: 0.000015 loss_cls: 3.3947 (3.1428) grad_norm: 2.8794 (3.1052) time: 1.3053 data: 0.0002 max mem: 13912 +[2024-12-06 14:09:42 root] (utils.py 283): INFO Epoch: [6] [ 290/2502] eta: 0:48:02 lr: 0.000015 loss_cls: 3.2326 (3.1425) grad_norm: 2.8649 (3.0951) time: 1.3026 data: 0.0002 max mem: 13912 +[2024-12-06 14:09:55 root] (utils.py 283): INFO Epoch: [6] [ 300/2502] eta: 0:47:49 lr: 0.000015 loss_cls: 3.3075 (3.1401) grad_norm: 2.8168 (3.0962) time: 1.3001 data: 0.0003 max mem: 13912 +[2024-12-06 14:10:08 root] (utils.py 283): INFO Epoch: [6] [ 310/2502] eta: 0:47:36 lr: 0.000015 loss_cls: 3.3277 (3.1330) grad_norm: 2.7547 (3.0848) time: 1.3041 data: 0.0003 max mem: 13912 +[2024-12-06 14:10:21 root] (utils.py 283): INFO Epoch: [6] [ 320/2502] eta: 0:47:23 lr: 0.000015 loss_cls: 3.0659 (3.1274) grad_norm: 2.7547 (3.0831) time: 1.3043 data: 0.0003 max mem: 13912 +[2024-12-06 14:10:34 root] (utils.py 283): INFO Epoch: [6] [ 330/2502] eta: 0:47:10 lr: 0.000015 loss_cls: 3.0659 (3.1275) grad_norm: 2.6302 (3.0786) time: 1.3022 data: 0.0003 max mem: 13912 +[2024-12-06 14:10:47 root] (utils.py 283): INFO Epoch: [6] [ 340/2502] eta: 0:46:57 lr: 0.000015 loss_cls: 3.1442 (3.1298) grad_norm: 2.5910 (3.0894) time: 1.3038 data: 0.0003 max mem: 13912 +[2024-12-06 14:11:00 root] (utils.py 283): INFO Epoch: [6] [ 350/2502] eta: 0:46:44 lr: 0.000015 loss_cls: 3.3584 (3.1342) grad_norm: 2.9013 (3.1029) time: 1.3026 data: 0.0003 max mem: 13912 +[2024-12-06 14:11:13 root] (utils.py 283): INFO Epoch: [6] [ 360/2502] eta: 0:46:30 lr: 0.000015 loss_cls: 3.3584 (3.1407) grad_norm: 2.7724 (3.1088) time: 1.2959 data: 0.0002 max mem: 13912 +[2024-12-06 14:11:26 root] (utils.py 283): INFO Epoch: [6] [ 370/2502] eta: 0:46:17 lr: 0.000015 loss_cls: 3.3884 (3.1442) grad_norm: 2.6394 (3.1003) time: 1.2948 data: 0.0003 max mem: 13912 +[2024-12-06 14:11:39 root] (utils.py 283): INFO Epoch: [6] [ 380/2502] eta: 0:46:03 lr: 0.000015 loss_cls: 3.0680 (3.1332) grad_norm: 2.8438 (3.0978) time: 1.2954 data: 0.0003 max mem: 13912 +[2024-12-06 14:11:52 root] (utils.py 283): INFO Epoch: [6] [ 390/2502] eta: 0:45:50 lr: 0.000015 loss_cls: 2.7920 (3.1333) grad_norm: 2.7633 (3.0857) time: 1.2960 data: 0.0003 max mem: 13912 +[2024-12-06 14:12:05 root] (utils.py 283): INFO Epoch: [6] [ 400/2502] eta: 0:45:37 lr: 0.000015 loss_cls: 3.2397 (3.1345) grad_norm: 2.7315 (3.0853) time: 1.3007 data: 0.0002 max mem: 13912 +[2024-12-06 14:12:18 root] (utils.py 283): INFO Epoch: [6] [ 410/2502] eta: 0:45:24 lr: 0.000015 loss_cls: 3.2397 (3.1333) grad_norm: 2.9133 (3.1007) time: 1.3038 data: 0.0003 max mem: 13912 +[2024-12-06 14:12:31 root] (utils.py 283): INFO Epoch: [6] [ 420/2502] eta: 0:45:11 lr: 0.000015 loss_cls: 3.1262 (3.1276) grad_norm: 2.7635 (3.0932) time: 1.3042 data: 0.0003 max mem: 13912 +[2024-12-06 14:12:44 root] (utils.py 283): INFO Epoch: [6] [ 430/2502] eta: 0:44:58 lr: 0.000015 loss_cls: 3.2059 (3.1305) grad_norm: 2.6208 (3.1177) time: 1.3017 data: 0.0003 max mem: 13912 +[2024-12-06 14:12:57 root] (utils.py 283): INFO Epoch: [6] [ 440/2502] eta: 0:44:45 lr: 0.000015 loss_cls: 3.3491 (3.1308) grad_norm: 2.7127 (3.1111) time: 1.2992 data: 0.0003 max mem: 13912 +[2024-12-06 14:13:10 root] (utils.py 283): INFO Epoch: [6] [ 450/2502] eta: 0:44:32 lr: 0.000015 loss_cls: 3.2836 (3.1295) grad_norm: 2.6881 (3.0999) time: 1.2997 data: 0.0003 max mem: 13912 +[2024-12-06 14:13:23 root] (utils.py 283): INFO Epoch: [6] [ 460/2502] eta: 0:44:19 lr: 0.000015 loss_cls: 2.9113 (3.1226) grad_norm: 2.6881 (3.0999) time: 1.2998 data: 0.0003 max mem: 13912 +[2024-12-06 14:13:36 root] (utils.py 283): INFO Epoch: [6] [ 470/2502] eta: 0:44:06 lr: 0.000015 loss_cls: 3.0274 (3.1244) grad_norm: 2.6790 (3.0906) time: 1.3001 data: 0.0003 max mem: 13912 +[2024-12-06 14:13:49 root] (utils.py 283): INFO Epoch: [6] [ 480/2502] eta: 0:43:53 lr: 0.000015 loss_cls: 3.4265 (3.1310) grad_norm: 2.6788 (3.0954) time: 1.3017 data: 0.0003 max mem: 13912 +[2024-12-06 14:14:02 root] (utils.py 283): INFO Epoch: [6] [ 490/2502] eta: 0:43:40 lr: 0.000015 loss_cls: 3.4265 (3.1309) grad_norm: 2.7302 (3.0891) time: 1.3022 data: 0.0002 max mem: 13912 +[2024-12-06 14:14:15 root] (utils.py 283): INFO Epoch: [6] [ 500/2502] eta: 0:43:27 lr: 0.000015 loss_cls: 3.2656 (3.1310) grad_norm: 2.6943 (3.0926) time: 1.3037 data: 0.0002 max mem: 13912 +[2024-12-06 14:14:28 root] (utils.py 283): INFO Epoch: [6] [ 510/2502] eta: 0:43:14 lr: 0.000015 loss_cls: 3.3033 (3.1347) grad_norm: 2.4076 (3.0843) time: 1.3032 data: 0.0003 max mem: 13912 +[2024-12-06 14:14:41 root] (utils.py 283): INFO Epoch: [6] [ 520/2502] eta: 0:43:01 lr: 0.000015 loss_cls: 3.3033 (3.1336) grad_norm: 2.5523 (3.0834) time: 1.3032 data: 0.0003 max mem: 13912 +[2024-12-06 14:14:54 root] (utils.py 283): INFO Epoch: [6] [ 530/2502] eta: 0:42:48 lr: 0.000015 loss_cls: 3.2193 (3.1342) grad_norm: 2.8341 (3.1009) time: 1.3028 data: 0.0003 max mem: 13912 +[2024-12-06 14:15:07 root] (utils.py 283): INFO Epoch: [6] [ 540/2502] eta: 0:42:35 lr: 0.000015 loss_cls: 3.2193 (3.1360) grad_norm: 2.9105 (3.1070) time: 1.3035 data: 0.0003 max mem: 13912 +[2024-12-06 14:15:20 root] (utils.py 283): INFO Epoch: [6] [ 550/2502] eta: 0:42:22 lr: 0.000015 loss_cls: 3.2426 (3.1376) grad_norm: 2.8582 (3.1047) time: 1.3051 data: 0.0003 max mem: 13912 +[2024-12-06 14:15:33 root] (utils.py 283): INFO Epoch: [6] [ 560/2502] eta: 0:42:09 lr: 0.000015 loss_cls: 3.4265 (3.1407) grad_norm: 2.9069 (3.1044) time: 1.3042 data: 0.0003 max mem: 13912 +[2024-12-06 14:15:46 root] (utils.py 283): INFO Epoch: [6] [ 570/2502] eta: 0:41:56 lr: 0.000015 loss_cls: 3.3194 (3.1403) grad_norm: 2.9126 (3.1012) time: 1.3047 data: 0.0003 max mem: 13912 +[2024-12-06 14:15:59 root] (utils.py 283): INFO Epoch: [6] [ 580/2502] eta: 0:41:43 lr: 0.000015 loss_cls: 2.9065 (3.1392) grad_norm: 2.7310 (3.0950) time: 1.3043 data: 0.0003 max mem: 13912 +[2024-12-06 14:16:12 root] (utils.py 283): INFO Epoch: [6] [ 590/2502] eta: 0:41:30 lr: 0.000015 loss_cls: 3.1374 (3.1361) grad_norm: 2.6570 (3.0877) time: 1.3051 data: 0.0003 max mem: 13912 +[2024-12-06 14:16:25 root] (utils.py 283): INFO Epoch: [6] [ 600/2502] eta: 0:41:17 lr: 0.000015 loss_cls: 3.3241 (3.1390) grad_norm: 2.6880 (3.0867) time: 1.3061 data: 0.0003 max mem: 13912 +[2024-12-06 14:16:38 root] (utils.py 283): INFO Epoch: [6] [ 610/2502] eta: 0:41:04 lr: 0.000015 loss_cls: 3.3311 (3.1420) grad_norm: 2.7464 (3.0940) time: 1.3038 data: 0.0003 max mem: 13912 +[2024-12-06 14:16:52 root] (utils.py 283): INFO Epoch: [6] [ 620/2502] eta: 0:40:51 lr: 0.000015 loss_cls: 3.0875 (3.1440) grad_norm: 2.7378 (3.0927) time: 1.3050 data: 0.0003 max mem: 13912 +[2024-12-06 14:17:05 root] (utils.py 283): INFO Epoch: [6] [ 630/2502] eta: 0:40:38 lr: 0.000015 loss_cls: 3.0776 (3.1409) grad_norm: 2.7255 (3.1091) time: 1.3074 data: 0.0003 max mem: 13912 +[2024-12-06 14:17:18 root] (utils.py 283): INFO Epoch: [6] [ 640/2502] eta: 0:40:25 lr: 0.000015 loss_cls: 3.0776 (3.1377) grad_norm: 2.5920 (3.1040) time: 1.3071 data: 0.0003 max mem: 13912 +[2024-12-06 14:17:31 root] (utils.py 283): INFO Epoch: [6] [ 650/2502] eta: 0:40:12 lr: 0.000015 loss_cls: 3.2075 (3.1400) grad_norm: 2.7621 (3.1020) time: 1.3037 data: 0.0003 max mem: 13912 +[2024-12-06 14:17:44 root] (utils.py 283): INFO Epoch: [6] [ 660/2502] eta: 0:39:59 lr: 0.000015 loss_cls: 3.3075 (3.1403) grad_norm: 2.7991 (3.0992) time: 1.3033 data: 0.0003 max mem: 13912 +[2024-12-06 14:17:57 root] (utils.py 283): INFO Epoch: [6] [ 670/2502] eta: 0:39:46 lr: 0.000015 loss_cls: 3.1793 (3.1398) grad_norm: 2.9366 (3.1027) time: 1.3032 data: 0.0003 max mem: 13912 +[2024-12-06 14:18:10 root] (utils.py 283): INFO Epoch: [6] [ 680/2502] eta: 0:39:34 lr: 0.000015 loss_cls: 2.9806 (3.1381) grad_norm: 2.6310 (3.0956) time: 1.3087 data: 0.0002 max mem: 13912 +[2024-12-06 14:18:23 root] (utils.py 283): INFO Epoch: [6] [ 690/2502] eta: 0:39:21 lr: 0.000015 loss_cls: 3.0911 (3.1386) grad_norm: 2.5103 (3.1422) time: 1.3142 data: 0.0002 max mem: 13912 +[2024-12-06 14:18:36 root] (utils.py 283): INFO Epoch: [6] [ 700/2502] eta: 0:39:08 lr: 0.000015 loss_cls: 3.1954 (3.1391) grad_norm: 2.6235 (3.1363) time: 1.3084 data: 0.0002 max mem: 13912 +[2024-12-06 14:18:49 root] (utils.py 283): INFO Epoch: [6] [ 710/2502] eta: 0:38:55 lr: 0.000015 loss_cls: 3.0953 (3.1391) grad_norm: 2.6880 (3.1375) time: 1.3035 data: 0.0003 max mem: 13912 +[2024-12-06 14:19:02 root] (utils.py 283): INFO Epoch: [6] [ 720/2502] eta: 0:38:42 lr: 0.000015 loss_cls: 3.2396 (3.1396) grad_norm: 2.9362 (3.1345) time: 1.3038 data: 0.0003 max mem: 13912 +[2024-12-06 14:19:15 root] (utils.py 283): INFO Epoch: [6] [ 730/2502] eta: 0:38:29 lr: 0.000015 loss_cls: 3.2545 (3.1394) grad_norm: 2.9362 (3.1339) time: 1.3049 data: 0.0003 max mem: 13912 +[2024-12-06 14:19:28 root] (utils.py 283): INFO Epoch: [6] [ 740/2502] eta: 0:38:16 lr: 0.000015 loss_cls: 3.3190 (3.1410) grad_norm: 2.6778 (3.1284) time: 1.3040 data: 0.0003 max mem: 13912 +[2024-12-06 14:19:41 root] (utils.py 283): INFO Epoch: [6] [ 750/2502] eta: 0:38:03 lr: 0.000015 loss_cls: 3.1992 (3.1409) grad_norm: 2.6606 (3.1229) time: 1.3032 data: 0.0003 max mem: 13912 +[2024-12-06 14:19:54 root] (utils.py 283): INFO Epoch: [6] [ 760/2502] eta: 0:37:50 lr: 0.000015 loss_cls: 3.1687 (3.1390) grad_norm: 2.5755 (3.1179) time: 1.3024 data: 0.0002 max mem: 13912 +[2024-12-06 14:20:07 root] (utils.py 283): INFO Epoch: [6] [ 770/2502] eta: 0:37:37 lr: 0.000015 loss_cls: 3.0819 (3.1369) grad_norm: 2.5829 (3.1155) time: 1.3033 data: 0.0003 max mem: 13912 +[2024-12-06 14:20:20 root] (utils.py 283): INFO Epoch: [6] [ 780/2502] eta: 0:37:24 lr: 0.000015 loss_cls: 3.1302 (3.1381) grad_norm: 2.7998 (3.1147) time: 1.3029 data: 0.0003 max mem: 13912 +[2024-12-06 14:20:33 root] (utils.py 283): INFO Epoch: [6] [ 790/2502] eta: 0:37:11 lr: 0.000015 loss_cls: 3.3388 (3.1388) grad_norm: 2.6285 (3.1157) time: 1.3036 data: 0.0003 max mem: 13912 +[2024-12-06 14:20:46 root] (utils.py 283): INFO Epoch: [6] [ 800/2502] eta: 0:36:57 lr: 0.000015 loss_cls: 3.0677 (3.1362) grad_norm: 2.5501 (3.1112) time: 1.3024 data: 0.0003 max mem: 13912 +[2024-12-06 14:20:59 root] (utils.py 283): INFO Epoch: [6] [ 810/2502] eta: 0:36:44 lr: 0.000015 loss_cls: 3.0677 (3.1380) grad_norm: 2.5990 (3.1125) time: 1.3013 data: 0.0002 max mem: 13912 +[2024-12-06 14:21:12 root] (utils.py 283): INFO Epoch: [6] [ 820/2502] eta: 0:36:31 lr: 0.000015 loss_cls: 3.4426 (3.1391) grad_norm: 2.6827 (3.1129) time: 1.3028 data: 0.0003 max mem: 13912 +[2024-12-06 14:21:25 root] (utils.py 283): INFO Epoch: [6] [ 830/2502] eta: 0:36:18 lr: 0.000015 loss_cls: 3.2529 (3.1406) grad_norm: 2.8164 (3.1224) time: 1.3021 data: 0.0003 max mem: 13912 +[2024-12-06 14:21:38 root] (utils.py 283): INFO Epoch: [6] [ 840/2502] eta: 0:36:05 lr: 0.000015 loss_cls: 3.2529 (3.1384) grad_norm: 2.9457 (3.1304) time: 1.3010 data: 0.0003 max mem: 13912 +[2024-12-06 14:21:52 root] (utils.py 283): INFO Epoch: [6] [ 850/2502] eta: 0:35:53 lr: 0.000015 loss_cls: 3.3902 (3.1412) grad_norm: 2.7832 (3.1274) time: 1.3116 data: 0.0003 max mem: 13912 +[2024-12-06 14:22:05 root] (utils.py 283): INFO Epoch: [6] [ 860/2502] eta: 0:35:40 lr: 0.000015 loss_cls: 3.2798 (3.1405) grad_norm: 2.5866 (3.1212) time: 1.3141 data: 0.0003 max mem: 13912 +[2024-12-06 14:22:18 root] (utils.py 283): INFO Epoch: [6] [ 870/2502] eta: 0:35:26 lr: 0.000015 loss_cls: 3.3694 (3.1449) grad_norm: 2.5797 (3.1194) time: 1.3019 data: 0.0003 max mem: 13912 +[2024-12-06 14:22:31 root] (utils.py 283): INFO Epoch: [6] [ 880/2502] eta: 0:35:14 lr: 0.000015 loss_cls: 3.4039 (3.1469) grad_norm: 2.8035 (3.1212) time: 1.3021 data: 0.0002 max mem: 13912 +[2024-12-06 14:22:44 root] (utils.py 283): INFO Epoch: [6] [ 890/2502] eta: 0:35:00 lr: 0.000015 loss_cls: 3.1370 (3.1441) grad_norm: 2.7778 (3.1296) time: 1.3033 data: 0.0003 max mem: 13912 +[2024-12-06 14:22:57 root] (utils.py 283): INFO Epoch: [6] [ 900/2502] eta: 0:34:47 lr: 0.000015 loss_cls: 2.9286 (3.1435) grad_norm: 2.5742 (3.1297) time: 1.3010 data: 0.0003 max mem: 13912 +[2024-12-06 14:23:10 root] (utils.py 283): INFO Epoch: [6] [ 910/2502] eta: 0:34:34 lr: 0.000015 loss_cls: 3.0984 (3.1419) grad_norm: 2.7427 (3.1299) time: 1.3003 data: 0.0003 max mem: 13912 +[2024-12-06 14:23:23 root] (utils.py 283): INFO Epoch: [6] [ 920/2502] eta: 0:34:21 lr: 0.000015 loss_cls: 2.6785 (3.1361) grad_norm: 2.5887 (3.1231) time: 1.3015 data: 0.0003 max mem: 13912 +[2024-12-06 14:23:36 root] (utils.py 283): INFO Epoch: [6] [ 930/2502] eta: 0:34:08 lr: 0.000015 loss_cls: 2.9837 (3.1364) grad_norm: 2.4866 (3.1206) time: 1.3026 data: 0.0003 max mem: 13912 +[2024-12-06 14:23:49 root] (utils.py 283): INFO Epoch: [6] [ 940/2502] eta: 0:33:55 lr: 0.000015 loss_cls: 3.0923 (3.1353) grad_norm: 2.6013 (3.1153) time: 1.3005 data: 0.0003 max mem: 13912 +[2024-12-06 14:24:02 root] (utils.py 283): INFO Epoch: [6] [ 950/2502] eta: 0:33:42 lr: 0.000015 loss_cls: 3.0923 (3.1346) grad_norm: 2.5639 (3.1121) time: 1.3009 data: 0.0003 max mem: 13912 +[2024-12-06 14:24:15 root] (utils.py 283): INFO Epoch: [6] [ 960/2502] eta: 0:33:29 lr: 0.000015 loss_cls: 3.2892 (3.1333) grad_norm: 2.5258 (3.1061) time: 1.3016 data: 0.0003 max mem: 13912 +[2024-12-06 14:24:28 root] (utils.py 283): INFO Epoch: [6] [ 970/2502] eta: 0:33:16 lr: 0.000015 loss_cls: 3.0120 (3.1315) grad_norm: 2.5258 (3.1067) time: 1.3023 data: 0.0003 max mem: 13912 +[2024-12-06 14:24:41 root] (utils.py 283): INFO Epoch: [6] [ 980/2502] eta: 0:33:03 lr: 0.000015 loss_cls: 2.9887 (3.1299) grad_norm: 2.6736 (3.1046) time: 1.3013 data: 0.0003 max mem: 13912 +[2024-12-06 14:24:54 root] (utils.py 283): INFO Epoch: [6] [ 990/2502] eta: 0:32:50 lr: 0.000015 loss_cls: 3.1250 (3.1315) grad_norm: 2.6323 (3.1017) time: 1.3010 data: 0.0003 max mem: 13912 +[2024-12-06 14:25:07 root] (utils.py 283): INFO Epoch: [6] [1000/2502] eta: 0:32:37 lr: 0.000015 loss_cls: 3.1883 (3.1302) grad_norm: 2.6446 (3.1001) time: 1.3045 data: 0.0003 max mem: 13912 +[2024-12-06 14:25:20 root] (utils.py 283): INFO Epoch: [6] [1010/2502] eta: 0:32:24 lr: 0.000015 loss_cls: 3.1503 (3.1309) grad_norm: 2.7037 (3.1000) time: 1.3053 data: 0.0002 max mem: 13912 +[2024-12-06 14:25:33 root] (utils.py 283): INFO Epoch: [6] [1020/2502] eta: 0:32:11 lr: 0.000015 loss_cls: 3.1495 (3.1295) grad_norm: 2.6389 (3.0956) time: 1.3037 data: 0.0002 max mem: 13912 +[2024-12-06 14:25:46 root] (utils.py 283): INFO Epoch: [6] [1030/2502] eta: 0:31:58 lr: 0.000015 loss_cls: 3.1682 (3.1298) grad_norm: 2.5933 (3.0945) time: 1.3026 data: 0.0003 max mem: 13912 +[2024-12-06 14:25:59 root] (utils.py 283): INFO Epoch: [6] [1040/2502] eta: 0:31:45 lr: 0.000015 loss_cls: 3.1701 (3.1288) grad_norm: 2.6053 (3.0920) time: 1.3075 data: 0.0003 max mem: 13912 +[2024-12-06 14:26:12 root] (utils.py 283): INFO Epoch: [6] [1050/2502] eta: 0:31:32 lr: 0.000015 loss_cls: 3.1421 (3.1270) grad_norm: 2.8132 (3.0901) time: 1.3076 data: 0.0003 max mem: 13912 +[2024-12-06 14:26:25 root] (utils.py 283): INFO Epoch: [6] [1060/2502] eta: 0:31:19 lr: 0.000015 loss_cls: 3.1421 (3.1270) grad_norm: 2.7905 (3.0859) time: 1.3025 data: 0.0003 max mem: 13912 +[2024-12-06 14:26:38 root] (utils.py 283): INFO Epoch: [6] [1070/2502] eta: 0:31:06 lr: 0.000015 loss_cls: 3.3706 (3.1301) grad_norm: 2.6079 (3.0826) time: 1.3022 data: 0.0003 max mem: 13912 +[2024-12-06 14:26:51 root] (utils.py 283): INFO Epoch: [6] [1080/2502] eta: 0:30:53 lr: 0.000015 loss_cls: 3.3517 (3.1287) grad_norm: 2.5698 (3.0788) time: 1.3031 data: 0.0003 max mem: 13912 +[2024-12-06 14:27:04 root] (utils.py 283): INFO Epoch: [6] [1090/2502] eta: 0:30:40 lr: 0.000015 loss_cls: 3.1601 (3.1265) grad_norm: 2.6433 (3.0776) time: 1.3033 data: 0.0003 max mem: 13912 +[2024-12-06 14:27:17 root] (utils.py 283): INFO Epoch: [6] [1100/2502] eta: 0:30:27 lr: 0.000015 loss_cls: 3.0206 (3.1269) grad_norm: 2.6610 (3.0737) time: 1.3007 data: 0.0003 max mem: 13912 +[2024-12-06 14:27:30 root] (utils.py 283): INFO Epoch: [6] [1110/2502] eta: 0:30:14 lr: 0.000015 loss_cls: 3.0206 (3.1266) grad_norm: 2.6398 (3.0722) time: 1.3010 data: 0.0002 max mem: 13912 +[2024-12-06 14:27:43 root] (utils.py 283): INFO Epoch: [6] [1120/2502] eta: 0:30:00 lr: 0.000015 loss_cls: 3.0822 (3.1265) grad_norm: 2.7253 (3.0703) time: 1.3034 data: 0.0002 max mem: 13912 +[2024-12-06 14:27:56 root] (utils.py 283): INFO Epoch: [6] [1130/2502] eta: 0:29:47 lr: 0.000015 loss_cls: 3.3429 (3.1283) grad_norm: 2.6745 (3.0703) time: 1.3017 data: 0.0003 max mem: 13912 +[2024-12-06 14:28:09 root] (utils.py 283): INFO Epoch: [6] [1140/2502] eta: 0:29:34 lr: 0.000015 loss_cls: 3.3556 (3.1302) grad_norm: 2.5694 (3.0716) time: 1.3018 data: 0.0002 max mem: 13912 +[2024-12-06 14:28:23 root] (utils.py 283): INFO Epoch: [6] [1150/2502] eta: 0:29:21 lr: 0.000015 loss_cls: 3.1916 (3.1303) grad_norm: 2.6683 (3.0722) time: 1.3029 data: 0.0002 max mem: 13912 +[2024-12-06 14:28:36 root] (utils.py 283): INFO Epoch: [6] [1160/2502] eta: 0:29:08 lr: 0.000015 loss_cls: 3.2084 (3.1302) grad_norm: 2.6263 (3.0686) time: 1.3021 data: 0.0002 max mem: 13912 +[2024-12-06 14:28:49 root] (utils.py 283): INFO Epoch: [6] [1170/2502] eta: 0:28:55 lr: 0.000015 loss_cls: 3.0948 (3.1283) grad_norm: 2.7326 (3.0689) time: 1.3031 data: 0.0003 max mem: 13912 +[2024-12-06 14:29:02 root] (utils.py 283): INFO Epoch: [6] [1180/2502] eta: 0:28:42 lr: 0.000015 loss_cls: 2.9985 (3.1290) grad_norm: 2.8353 (3.0707) time: 1.3059 data: 0.0002 max mem: 13912 +[2024-12-06 14:29:15 root] (utils.py 283): INFO Epoch: [6] [1190/2502] eta: 0:28:29 lr: 0.000015 loss_cls: 3.3710 (3.1305) grad_norm: 2.7073 (3.0682) time: 1.3040 data: 0.0002 max mem: 13912 +[2024-12-06 14:29:28 root] (utils.py 283): INFO Epoch: [6] [1200/2502] eta: 0:28:16 lr: 0.000015 loss_cls: 3.4145 (3.1313) grad_norm: 2.7073 (3.0803) time: 1.3017 data: 0.0003 max mem: 13912 +[2024-12-06 14:29:41 root] (utils.py 283): INFO Epoch: [6] [1210/2502] eta: 0:28:03 lr: 0.000015 loss_cls: 3.1860 (3.1303) grad_norm: 2.7344 (3.0777) time: 1.3032 data: 0.0003 max mem: 13912 +[2024-12-06 14:29:54 root] (utils.py 283): INFO Epoch: [6] [1220/2502] eta: 0:27:50 lr: 0.000015 loss_cls: 3.2280 (3.1314) grad_norm: 2.6806 (3.0751) time: 1.3014 data: 0.0002 max mem: 13912 +[2024-12-06 14:30:07 root] (utils.py 283): INFO Epoch: [6] [1230/2502] eta: 0:27:37 lr: 0.000015 loss_cls: 3.0605 (3.1285) grad_norm: 2.7824 (3.0749) time: 1.3009 data: 0.0002 max mem: 13912 +[2024-12-06 14:30:20 root] (utils.py 283): INFO Epoch: [6] [1240/2502] eta: 0:27:24 lr: 0.000015 loss_cls: 2.9241 (3.1287) grad_norm: 2.8588 (3.0746) time: 1.3168 data: 0.0003 max mem: 13912 +[2024-12-06 14:30:33 root] (utils.py 283): INFO Epoch: [6] [1250/2502] eta: 0:27:12 lr: 0.000015 loss_cls: 3.0958 (3.1297) grad_norm: 2.8356 (3.0857) time: 1.3308 data: 0.0004 max mem: 13912 +[2024-12-06 14:30:46 root] (utils.py 283): INFO Epoch: [6] [1260/2502] eta: 0:26:59 lr: 0.000015 loss_cls: 3.0958 (3.1277) grad_norm: 2.7048 (3.0829) time: 1.3176 data: 0.0003 max mem: 13912 +[2024-12-06 14:30:59 root] (utils.py 283): INFO Epoch: [6] [1270/2502] eta: 0:26:46 lr: 0.000015 loss_cls: 3.0983 (3.1274) grad_norm: 2.5938 (3.0800) time: 1.3053 data: 0.0003 max mem: 13912 +[2024-12-06 14:31:13 root] (utils.py 283): INFO Epoch: [6] [1280/2502] eta: 0:26:33 lr: 0.000015 loss_cls: 3.2363 (3.1271) grad_norm: 2.7377 (3.0797) time: 1.3057 data: 0.0002 max mem: 13912 +[2024-12-06 14:31:26 root] (utils.py 283): INFO Epoch: [6] [1290/2502] eta: 0:26:20 lr: 0.000015 loss_cls: 3.0467 (3.1262) grad_norm: 2.6799 (3.0812) time: 1.3068 data: 0.0002 max mem: 13912 +[2024-12-06 14:31:39 root] (utils.py 283): INFO Epoch: [6] [1300/2502] eta: 0:26:06 lr: 0.000015 loss_cls: 3.1021 (3.1267) grad_norm: 2.5512 (3.0793) time: 1.3068 data: 0.0002 max mem: 13912 +[2024-12-06 14:31:52 root] (utils.py 283): INFO Epoch: [6] [1310/2502] eta: 0:25:53 lr: 0.000015 loss_cls: 3.1687 (3.1272) grad_norm: 2.6485 (3.0788) time: 1.3039 data: 0.0002 max mem: 13912 +[2024-12-06 14:32:05 root] (utils.py 283): INFO Epoch: [6] [1320/2502] eta: 0:25:40 lr: 0.000015 loss_cls: 3.2241 (3.1273) grad_norm: 2.7165 (3.0791) time: 1.3002 data: 0.0003 max mem: 13912 +[2024-12-06 14:32:18 root] (utils.py 283): INFO Epoch: [6] [1330/2502] eta: 0:25:27 lr: 0.000015 loss_cls: 3.2084 (3.1264) grad_norm: 2.6191 (3.0763) time: 1.3006 data: 0.0003 max mem: 13912 +[2024-12-06 14:32:31 root] (utils.py 283): INFO Epoch: [6] [1340/2502] eta: 0:25:14 lr: 0.000015 loss_cls: 3.0168 (3.1245) grad_norm: 2.7113 (3.0769) time: 1.3015 data: 0.0003 max mem: 13912 +[2024-12-06 14:32:44 root] (utils.py 283): INFO Epoch: [6] [1350/2502] eta: 0:25:01 lr: 0.000015 loss_cls: 3.0168 (3.1235) grad_norm: 2.8029 (3.0763) time: 1.3005 data: 0.0003 max mem: 13912 +[2024-12-06 14:32:57 root] (utils.py 283): INFO Epoch: [6] [1360/2502] eta: 0:24:48 lr: 0.000015 loss_cls: 3.1077 (3.1223) grad_norm: 2.8029 (3.0862) time: 1.3020 data: 0.0003 max mem: 13912 +[2024-12-06 14:33:10 root] (utils.py 283): INFO Epoch: [6] [1370/2502] eta: 0:24:35 lr: 0.000015 loss_cls: 2.9390 (3.1213) grad_norm: 2.8241 (3.0855) time: 1.3016 data: 0.0003 max mem: 13912 +[2024-12-06 14:33:23 root] (utils.py 283): INFO Epoch: [6] [1380/2502] eta: 0:24:22 lr: 0.000015 loss_cls: 3.1502 (3.1214) grad_norm: 2.7936 (3.0851) time: 1.3040 data: 0.0003 max mem: 13912 +[2024-12-06 14:33:36 root] (utils.py 283): INFO Epoch: [6] [1390/2502] eta: 0:24:09 lr: 0.000015 loss_cls: 3.1502 (3.1216) grad_norm: 2.7153 (3.0933) time: 1.3042 data: 0.0003 max mem: 13912 +[2024-12-06 14:33:49 root] (utils.py 283): INFO Epoch: [6] [1400/2502] eta: 0:23:56 lr: 0.000015 loss_cls: 3.2006 (3.1223) grad_norm: 2.8512 (3.0921) time: 1.3012 data: 0.0003 max mem: 13912 +[2024-12-06 14:34:02 root] (utils.py 283): INFO Epoch: [6] [1410/2502] eta: 0:23:43 lr: 0.000015 loss_cls: 3.1069 (3.1210) grad_norm: 2.9005 (3.0909) time: 1.3034 data: 0.0002 max mem: 13912 +[2024-12-06 14:34:15 root] (utils.py 283): INFO Epoch: [6] [1420/2502] eta: 0:23:30 lr: 0.000015 loss_cls: 3.0221 (3.1208) grad_norm: 2.5381 (3.0870) time: 1.2976 data: 0.0002 max mem: 13912 +[2024-12-06 14:34:28 root] (utils.py 283): INFO Epoch: [6] [1430/2502] eta: 0:23:17 lr: 0.000015 loss_cls: 3.2363 (3.1234) grad_norm: 2.5607 (3.0867) time: 1.2919 data: 0.0002 max mem: 13912 +[2024-12-06 14:34:41 root] (utils.py 283): INFO Epoch: [6] [1440/2502] eta: 0:23:04 lr: 0.000015 loss_cls: 3.2873 (3.1226) grad_norm: 2.6245 (3.0836) time: 1.2950 data: 0.0002 max mem: 13912 +[2024-12-06 14:34:54 root] (utils.py 283): INFO Epoch: [6] [1450/2502] eta: 0:22:51 lr: 0.000015 loss_cls: 3.0831 (3.1211) grad_norm: 2.6028 (3.0875) time: 1.2942 data: 0.0003 max mem: 13912 +[2024-12-06 14:35:07 root] (utils.py 283): INFO Epoch: [6] [1460/2502] eta: 0:22:37 lr: 0.000015 loss_cls: 3.1064 (3.1217) grad_norm: 2.8948 (3.0860) time: 1.2915 data: 0.0002 max mem: 13912 +[2024-12-06 14:35:20 root] (utils.py 283): INFO Epoch: [6] [1470/2502] eta: 0:22:24 lr: 0.000015 loss_cls: 3.0518 (3.1210) grad_norm: 2.8948 (3.0849) time: 1.2998 data: 0.0002 max mem: 13912 +[2024-12-06 14:35:33 root] (utils.py 283): INFO Epoch: [6] [1480/2502] eta: 0:22:11 lr: 0.000015 loss_cls: 2.9839 (3.1210) grad_norm: 2.6482 (3.0830) time: 1.2994 data: 0.0002 max mem: 13912 +[2024-12-06 14:35:45 root] (utils.py 283): INFO Epoch: [6] [1490/2502] eta: 0:21:58 lr: 0.000015 loss_cls: 3.3996 (3.1227) grad_norm: 2.6664 (3.0877) time: 1.2897 data: 0.0002 max mem: 13912 +[2024-12-06 14:35:58 root] (utils.py 283): INFO Epoch: [6] [1500/2502] eta: 0:21:45 lr: 0.000015 loss_cls: 3.3996 (3.1243) grad_norm: 2.6850 (3.0874) time: 1.2908 data: 0.0003 max mem: 13912 +[2024-12-06 14:36:11 root] (utils.py 283): INFO Epoch: [6] [1510/2502] eta: 0:21:32 lr: 0.000015 loss_cls: 3.2553 (3.1235) grad_norm: 2.7151 (3.0872) time: 1.2923 data: 0.0003 max mem: 13912 +[2024-12-06 14:36:24 root] (utils.py 283): INFO Epoch: [6] [1520/2502] eta: 0:21:19 lr: 0.000015 loss_cls: 3.0571 (3.1238) grad_norm: 2.8916 (3.0887) time: 1.2926 data: 0.0002 max mem: 13912 +[2024-12-06 14:36:37 root] (utils.py 283): INFO Epoch: [6] [1530/2502] eta: 0:21:06 lr: 0.000015 loss_cls: 3.0676 (3.1225) grad_norm: 2.7238 (3.0859) time: 1.2934 data: 0.0002 max mem: 13912 +[2024-12-06 14:36:50 root] (utils.py 283): INFO Epoch: [6] [1540/2502] eta: 0:20:53 lr: 0.000015 loss_cls: 3.0676 (3.1216) grad_norm: 2.6524 (3.0828) time: 1.2961 data: 0.0002 max mem: 13912 +[2024-12-06 14:37:03 root] (utils.py 283): INFO Epoch: [6] [1550/2502] eta: 0:20:40 lr: 0.000015 loss_cls: 3.0618 (3.1208) grad_norm: 2.6704 (3.0825) time: 1.3067 data: 0.0003 max mem: 13912 +[2024-12-06 14:37:16 root] (utils.py 283): INFO Epoch: [6] [1560/2502] eta: 0:20:27 lr: 0.000015 loss_cls: 3.0911 (3.1212) grad_norm: 2.6951 (3.0812) time: 1.3068 data: 0.0003 max mem: 13912 +[2024-12-06 14:37:29 root] (utils.py 283): INFO Epoch: [6] [1570/2502] eta: 0:20:14 lr: 0.000015 loss_cls: 3.3127 (3.1214) grad_norm: 2.5007 (3.0774) time: 1.3073 data: 0.0003 max mem: 13912 +[2024-12-06 14:37:42 root] (utils.py 283): INFO Epoch: [6] [1580/2502] eta: 0:20:01 lr: 0.000015 loss_cls: 3.3546 (3.1230) grad_norm: 2.6979 (3.0773) time: 1.3109 data: 0.0003 max mem: 13912 +[2024-12-06 14:37:56 root] (utils.py 283): INFO Epoch: [6] [1590/2502] eta: 0:19:48 lr: 0.000015 loss_cls: 3.1909 (3.1224) grad_norm: 2.9990 (3.0814) time: 1.3037 data: 0.0003 max mem: 13912 +[2024-12-06 14:38:09 root] (utils.py 283): INFO Epoch: [6] [1600/2502] eta: 0:19:35 lr: 0.000015 loss_cls: 3.0535 (3.1226) grad_norm: 2.6904 (3.0844) time: 1.3028 data: 0.0003 max mem: 13912 +[2024-12-06 14:38:22 root] (utils.py 283): INFO Epoch: [6] [1610/2502] eta: 0:19:22 lr: 0.000015 loss_cls: 3.0802 (3.1225) grad_norm: 2.7081 (3.0835) time: 1.3013 data: 0.0003 max mem: 13912 +[2024-12-06 14:38:35 root] (utils.py 283): INFO Epoch: [6] [1620/2502] eta: 0:19:09 lr: 0.000015 loss_cls: 3.1600 (3.1238) grad_norm: 2.8279 (3.0863) time: 1.2997 data: 0.0003 max mem: 13912 +[2024-12-06 14:38:48 root] (utils.py 283): INFO Epoch: [6] [1630/2502] eta: 0:18:56 lr: 0.000015 loss_cls: 3.3780 (3.1236) grad_norm: 2.8078 (3.0845) time: 1.3008 data: 0.0003 max mem: 13912 +[2024-12-06 14:39:01 root] (utils.py 283): INFO Epoch: [6] [1640/2502] eta: 0:18:43 lr: 0.000015 loss_cls: 3.1608 (3.1229) grad_norm: 2.6491 (3.0831) time: 1.3030 data: 0.0003 max mem: 13912 +[2024-12-06 14:39:14 root] (utils.py 283): INFO Epoch: [6] [1650/2502] eta: 0:18:30 lr: 0.000015 loss_cls: 3.1608 (3.1233) grad_norm: 2.6491 (3.0816) time: 1.3026 data: 0.0003 max mem: 13912 +[2024-12-06 14:39:27 root] (utils.py 283): INFO Epoch: [6] [1660/2502] eta: 0:18:16 lr: 0.000015 loss_cls: 3.1484 (3.1231) grad_norm: 2.7244 (3.0811) time: 1.3001 data: 0.0003 max mem: 13912 +[2024-12-06 14:39:40 root] (utils.py 283): INFO Epoch: [6] [1670/2502] eta: 0:18:03 lr: 0.000015 loss_cls: 3.1985 (3.1230) grad_norm: 2.7221 (3.0809) time: 1.3013 data: 0.0002 max mem: 13912 +[2024-12-06 14:39:53 root] (utils.py 283): INFO Epoch: [6] [1680/2502] eta: 0:17:50 lr: 0.000015 loss_cls: 3.1985 (3.1233) grad_norm: 2.6882 (3.0823) time: 1.3035 data: 0.0002 max mem: 13912 +[2024-12-06 14:40:06 root] (utils.py 283): INFO Epoch: [6] [1690/2502] eta: 0:17:37 lr: 0.000015 loss_cls: 3.3147 (3.1249) grad_norm: 2.6882 (3.0841) time: 1.3019 data: 0.0002 max mem: 13912 +[2024-12-06 14:40:19 root] (utils.py 283): INFO Epoch: [6] [1700/2502] eta: 0:17:24 lr: 0.000015 loss_cls: 3.3939 (3.1258) grad_norm: 2.6583 (3.0838) time: 1.3038 data: 0.0003 max mem: 13912 +[2024-12-06 14:40:32 root] (utils.py 283): INFO Epoch: [6] [1710/2502] eta: 0:17:11 lr: 0.000015 loss_cls: 2.9929 (3.1244) grad_norm: 2.6583 (3.0810) time: 1.3045 data: 0.0003 max mem: 13912 +[2024-12-06 14:40:45 root] (utils.py 283): INFO Epoch: [6] [1720/2502] eta: 0:16:58 lr: 0.000015 loss_cls: 2.9929 (3.1233) grad_norm: 2.6410 (3.0822) time: 1.3035 data: 0.0003 max mem: 13912 +[2024-12-06 14:40:58 root] (utils.py 283): INFO Epoch: [6] [1730/2502] eta: 0:16:45 lr: 0.000015 loss_cls: 3.2323 (3.1240) grad_norm: 2.6410 (3.0812) time: 1.3028 data: 0.0003 max mem: 13912 +[2024-12-06 14:41:11 root] (utils.py 283): INFO Epoch: [6] [1740/2502] eta: 0:16:32 lr: 0.000015 loss_cls: 3.3582 (3.1253) grad_norm: 2.5640 (3.0791) time: 1.3015 data: 0.0002 max mem: 13912 +[2024-12-06 14:41:24 root] (utils.py 283): INFO Epoch: [6] [1750/2502] eta: 0:16:19 lr: 0.000015 loss_cls: 3.4610 (3.1265) grad_norm: 2.6182 (3.0806) time: 1.3038 data: 0.0003 max mem: 13912 +[2024-12-06 14:41:37 root] (utils.py 283): INFO Epoch: [6] [1760/2502] eta: 0:16:06 lr: 0.000015 loss_cls: 3.4965 (3.1289) grad_norm: 2.7169 (3.0837) time: 1.3051 data: 0.0003 max mem: 13912 +[2024-12-06 14:41:50 root] (utils.py 283): INFO Epoch: [6] [1770/2502] eta: 0:15:53 lr: 0.000015 loss_cls: 3.3271 (3.1289) grad_norm: 2.8672 (3.0847) time: 1.3063 data: 0.0002 max mem: 13912 +[2024-12-06 14:42:03 root] (utils.py 283): INFO Epoch: [6] [1780/2502] eta: 0:15:40 lr: 0.000015 loss_cls: 3.0280 (3.1269) grad_norm: 2.5776 (3.0819) time: 1.3094 data: 0.0002 max mem: 13912 +[2024-12-06 14:42:16 root] (utils.py 283): INFO Epoch: [6] [1790/2502] eta: 0:15:27 lr: 0.000015 loss_cls: 3.0280 (3.1261) grad_norm: 2.5776 (3.0825) time: 1.3098 data: 0.0003 max mem: 13912 +[2024-12-06 14:42:29 root] (utils.py 283): INFO Epoch: [6] [1800/2502] eta: 0:15:14 lr: 0.000015 loss_cls: 3.1686 (3.1254) grad_norm: 2.5781 (3.0802) time: 1.3059 data: 0.0003 max mem: 13912 +[2024-12-06 14:42:42 root] (utils.py 283): INFO Epoch: [6] [1810/2502] eta: 0:15:01 lr: 0.000015 loss_cls: 3.0483 (3.1254) grad_norm: 2.5691 (3.0793) time: 1.3064 data: 0.0003 max mem: 13912 +[2024-12-06 14:42:55 root] (utils.py 283): INFO Epoch: [6] [1820/2502] eta: 0:14:48 lr: 0.000015 loss_cls: 3.3020 (3.1261) grad_norm: 2.6194 (3.0775) time: 1.3067 data: 0.0003 max mem: 13912 +[2024-12-06 14:43:08 root] (utils.py 283): INFO Epoch: [6] [1830/2502] eta: 0:14:35 lr: 0.000015 loss_cls: 3.2877 (3.1270) grad_norm: 2.7153 (3.0780) time: 1.3047 data: 0.0003 max mem: 13912 +[2024-12-06 14:43:21 root] (utils.py 283): INFO Epoch: [6] [1840/2502] eta: 0:14:22 lr: 0.000015 loss_cls: 3.2220 (3.1278) grad_norm: 2.6280 (3.0770) time: 1.3036 data: 0.0003 max mem: 13912 +[2024-12-06 14:43:35 root] (utils.py 283): INFO Epoch: [6] [1850/2502] eta: 0:14:09 lr: 0.000015 loss_cls: 3.1438 (3.1268) grad_norm: 2.6280 (3.0769) time: 1.3037 data: 0.0003 max mem: 13912 +[2024-12-06 14:43:48 root] (utils.py 283): INFO Epoch: [6] [1860/2502] eta: 0:13:56 lr: 0.000015 loss_cls: 3.1883 (3.1275) grad_norm: 2.7911 (3.0770) time: 1.3054 data: 0.0003 max mem: 13912 +[2024-12-06 14:44:01 root] (utils.py 283): INFO Epoch: [6] [1870/2502] eta: 0:13:43 lr: 0.000015 loss_cls: 3.2553 (3.1283) grad_norm: 2.6383 (3.0747) time: 1.3055 data: 0.0002 max mem: 13912 +[2024-12-06 14:44:14 root] (utils.py 283): INFO Epoch: [6] [1880/2502] eta: 0:13:30 lr: 0.000015 loss_cls: 3.2394 (3.1279) grad_norm: 2.5072 (3.0731) time: 1.3049 data: 0.0003 max mem: 13912 +[2024-12-06 14:44:27 root] (utils.py 283): INFO Epoch: [6] [1890/2502] eta: 0:13:17 lr: 0.000015 loss_cls: 3.1161 (3.1265) grad_norm: 2.5072 (3.0703) time: 1.3070 data: 0.0003 max mem: 13912 +[2024-12-06 14:44:40 root] (utils.py 283): INFO Epoch: [6] [1900/2502] eta: 0:13:04 lr: 0.000015 loss_cls: 2.8576 (3.1253) grad_norm: 2.5015 (3.0680) time: 1.3069 data: 0.0003 max mem: 13912 +[2024-12-06 14:44:53 root] (utils.py 283): INFO Epoch: [6] [1910/2502] eta: 0:12:51 lr: 0.000015 loss_cls: 3.2481 (3.1264) grad_norm: 2.5461 (3.0660) time: 1.3066 data: 0.0003 max mem: 13912 +[2024-12-06 14:45:06 root] (utils.py 283): INFO Epoch: [6] [1920/2502] eta: 0:12:38 lr: 0.000015 loss_cls: 3.2374 (3.1258) grad_norm: 2.5920 (3.0634) time: 1.3056 data: 0.0003 max mem: 13912 +[2024-12-06 14:45:19 root] (utils.py 283): INFO Epoch: [6] [1930/2502] eta: 0:12:25 lr: 0.000015 loss_cls: 3.0326 (3.1249) grad_norm: 2.4839 (3.0645) time: 1.3031 data: 0.0003 max mem: 13912 +[2024-12-06 14:45:32 root] (utils.py 283): INFO Epoch: [6] [1940/2502] eta: 0:12:12 lr: 0.000015 loss_cls: 3.1480 (3.1266) grad_norm: 2.8242 (3.0670) time: 1.3020 data: 0.0003 max mem: 13912 +[2024-12-06 14:45:45 root] (utils.py 283): INFO Epoch: [6] [1950/2502] eta: 0:11:59 lr: 0.000015 loss_cls: 3.4179 (3.1260) grad_norm: 2.7125 (3.0657) time: 1.3021 data: 0.0003 max mem: 13912 +[2024-12-06 14:45:58 root] (utils.py 283): INFO Epoch: [6] [1960/2502] eta: 0:11:46 lr: 0.000015 loss_cls: 3.0677 (3.1250) grad_norm: 2.6707 (3.0658) time: 1.3032 data: 0.0003 max mem: 13912 +[2024-12-06 14:46:11 root] (utils.py 283): INFO Epoch: [6] [1970/2502] eta: 0:11:33 lr: 0.000015 loss_cls: 3.1189 (3.1248) grad_norm: 2.5333 (3.0645) time: 1.3029 data: 0.0003 max mem: 13912 +[2024-12-06 14:46:24 root] (utils.py 283): INFO Epoch: [6] [1980/2502] eta: 0:11:20 lr: 0.000015 loss_cls: 3.2271 (3.1245) grad_norm: 2.5333 (3.0622) time: 1.3032 data: 0.0002 max mem: 13912 +[2024-12-06 14:46:37 root] (utils.py 283): INFO Epoch: [6] [1990/2502] eta: 0:11:07 lr: 0.000015 loss_cls: 3.2271 (3.1255) grad_norm: 2.7583 (3.0651) time: 1.3034 data: 0.0002 max mem: 13912 +[2024-12-06 14:46:50 root] (utils.py 283): INFO Epoch: [6] [2000/2502] eta: 0:10:54 lr: 0.000015 loss_cls: 3.1081 (3.1252) grad_norm: 2.7583 (3.0637) time: 1.3053 data: 0.0003 max mem: 13912 +[2024-12-06 14:47:03 root] (utils.py 283): INFO Epoch: [6] [2010/2502] eta: 0:10:41 lr: 0.000015 loss_cls: 2.8905 (3.1234) grad_norm: 2.6209 (3.0617) time: 1.3047 data: 0.0003 max mem: 13912 +[2024-12-06 14:47:16 root] (utils.py 283): INFO Epoch: [6] [2020/2502] eta: 0:10:28 lr: 0.000015 loss_cls: 2.7403 (3.1227) grad_norm: 2.6132 (3.0619) time: 1.3046 data: 0.0003 max mem: 13912 +[2024-12-06 14:47:29 root] (utils.py 283): INFO Epoch: [6] [2030/2502] eta: 0:10:15 lr: 0.000015 loss_cls: 3.0456 (3.1223) grad_norm: 2.8705 (3.0617) time: 1.3044 data: 0.0003 max mem: 13912 +[2024-12-06 14:47:42 root] (utils.py 283): INFO Epoch: [6] [2040/2502] eta: 0:10:02 lr: 0.000015 loss_cls: 3.1101 (3.1217) grad_norm: 2.8742 (3.0606) time: 1.3024 data: 0.0003 max mem: 13912 +[2024-12-06 14:47:55 root] (utils.py 283): INFO Epoch: [6] [2050/2502] eta: 0:09:49 lr: 0.000015 loss_cls: 3.1864 (3.1218) grad_norm: 2.8125 (3.0639) time: 1.3053 data: 0.0003 max mem: 13912 +[2024-12-06 14:48:09 root] (utils.py 283): INFO Epoch: [6] [2060/2502] eta: 0:09:35 lr: 0.000015 loss_cls: 3.1062 (3.1213) grad_norm: 2.8125 (3.0624) time: 1.3058 data: 0.0002 max mem: 13912 +[2024-12-06 14:48:22 root] (utils.py 283): INFO Epoch: [6] [2070/2502] eta: 0:09:22 lr: 0.000015 loss_cls: 3.1578 (3.1218) grad_norm: 2.7198 (3.0631) time: 1.3015 data: 0.0003 max mem: 13912 +[2024-12-06 14:48:35 root] (utils.py 283): INFO Epoch: [6] [2080/2502] eta: 0:09:09 lr: 0.000015 loss_cls: 3.1860 (3.1215) grad_norm: 2.7198 (3.0623) time: 1.3009 data: 0.0003 max mem: 13912 +[2024-12-06 14:48:48 root] (utils.py 283): INFO Epoch: [6] [2090/2502] eta: 0:08:56 lr: 0.000015 loss_cls: 3.2456 (3.1222) grad_norm: 2.6226 (3.0613) time: 1.3013 data: 0.0003 max mem: 13912 +[2024-12-06 14:49:01 root] (utils.py 283): INFO Epoch: [6] [2100/2502] eta: 0:08:43 lr: 0.000015 loss_cls: 3.2416 (3.1218) grad_norm: 2.7445 (3.0601) time: 1.3009 data: 0.0002 max mem: 13912 +[2024-12-06 14:49:14 root] (utils.py 283): INFO Epoch: [6] [2110/2502] eta: 0:08:30 lr: 0.000015 loss_cls: 3.2416 (3.1230) grad_norm: 2.7535 (3.0581) time: 1.3033 data: 0.0003 max mem: 13912 +[2024-12-06 14:49:27 root] (utils.py 283): INFO Epoch: [6] [2120/2502] eta: 0:08:17 lr: 0.000015 loss_cls: 3.3136 (3.1227) grad_norm: 2.5529 (3.0584) time: 1.3020 data: 0.0003 max mem: 13912 +[2024-12-06 14:49:40 root] (utils.py 283): INFO Epoch: [6] [2130/2502] eta: 0:08:04 lr: 0.000015 loss_cls: 3.3136 (3.1232) grad_norm: 2.4886 (3.0572) time: 1.3019 data: 0.0003 max mem: 13912 +[2024-12-06 14:49:53 root] (utils.py 283): INFO Epoch: [6] [2140/2502] eta: 0:07:51 lr: 0.000015 loss_cls: 3.3264 (3.1234) grad_norm: 2.6700 (3.0562) time: 1.3047 data: 0.0003 max mem: 13912 +[2024-12-06 14:50:06 root] (utils.py 283): INFO Epoch: [6] [2150/2502] eta: 0:07:38 lr: 0.000015 loss_cls: 3.4177 (3.1242) grad_norm: 2.7473 (3.0562) time: 1.3058 data: 0.0003 max mem: 13912 +[2024-12-06 14:50:19 root] (utils.py 283): INFO Epoch: [6] [2160/2502] eta: 0:07:25 lr: 0.000015 loss_cls: 3.1247 (3.1241) grad_norm: 2.6226 (3.0540) time: 1.3092 data: 0.0003 max mem: 13912 +[2024-12-06 14:50:32 root] (utils.py 283): INFO Epoch: [6] [2170/2502] eta: 0:07:12 lr: 0.000015 loss_cls: 3.0240 (3.1231) grad_norm: 2.6560 (3.0556) time: 1.3063 data: 0.0003 max mem: 13912 +[2024-12-06 14:50:45 root] (utils.py 283): INFO Epoch: [6] [2180/2502] eta: 0:06:59 lr: 0.000015 loss_cls: 2.7757 (3.1218) grad_norm: 2.7680 (3.0547) time: 1.3016 data: 0.0003 max mem: 13912 +[2024-12-06 14:50:58 root] (utils.py 283): INFO Epoch: [6] [2190/2502] eta: 0:06:46 lr: 0.000015 loss_cls: 3.1361 (3.1227) grad_norm: 2.7184 (3.0535) time: 1.3030 data: 0.0003 max mem: 13912 +[2024-12-06 14:51:11 root] (utils.py 283): INFO Epoch: [6] [2200/2502] eta: 0:06:33 lr: 0.000015 loss_cls: 3.2807 (3.1230) grad_norm: 2.5604 (3.0543) time: 1.3033 data: 0.0003 max mem: 13912 +[2024-12-06 14:51:24 root] (utils.py 283): INFO Epoch: [6] [2210/2502] eta: 0:06:20 lr: 0.000015 loss_cls: 3.2013 (3.1223) grad_norm: 2.5604 (3.0523) time: 1.3083 data: 0.0003 max mem: 13912 +[2024-12-06 14:51:37 root] (utils.py 283): INFO Epoch: [6] [2220/2502] eta: 0:06:07 lr: 0.000015 loss_cls: 3.1725 (3.1221) grad_norm: 2.5818 (3.0506) time: 1.3065 data: 0.0003 max mem: 13912 +[2024-12-06 14:51:50 root] (utils.py 283): INFO Epoch: [6] [2230/2502] eta: 0:05:54 lr: 0.000015 loss_cls: 3.1958 (3.1224) grad_norm: 2.6215 (3.0504) time: 1.3003 data: 0.0002 max mem: 13912 +[2024-12-06 14:52:03 root] (utils.py 283): INFO Epoch: [6] [2240/2502] eta: 0:05:41 lr: 0.000015 loss_cls: 3.2906 (3.1225) grad_norm: 2.8421 (3.0501) time: 1.3037 data: 0.0003 max mem: 13912 +[2024-12-06 14:52:16 root] (utils.py 283): INFO Epoch: [6] [2250/2502] eta: 0:05:28 lr: 0.000015 loss_cls: 3.2906 (3.1229) grad_norm: 2.8514 (3.0514) time: 1.3064 data: 0.0003 max mem: 13912 +[2024-12-06 14:52:29 root] (utils.py 283): INFO Epoch: [6] [2260/2502] eta: 0:05:15 lr: 0.000015 loss_cls: 3.1281 (3.1222) grad_norm: 2.6650 (3.0494) time: 1.3036 data: 0.0003 max mem: 13912 +[2024-12-06 14:52:42 root] (utils.py 283): INFO Epoch: [6] [2270/2502] eta: 0:05:02 lr: 0.000015 loss_cls: 3.1281 (3.1223) grad_norm: 2.6133 (3.0524) time: 1.3013 data: 0.0003 max mem: 13912 +[2024-12-06 14:52:55 root] (utils.py 283): INFO Epoch: [6] [2280/2502] eta: 0:04:49 lr: 0.000015 loss_cls: 3.3566 (3.1233) grad_norm: 2.7288 (3.0525) time: 1.3077 data: 0.0002 max mem: 13912 +[2024-12-06 14:53:08 root] (utils.py 283): INFO Epoch: [6] [2290/2502] eta: 0:04:36 lr: 0.000015 loss_cls: 3.2213 (3.1233) grad_norm: 2.9107 (3.0539) time: 1.3090 data: 0.0002 max mem: 13912 +[2024-12-06 14:53:21 root] (utils.py 283): INFO Epoch: [6] [2300/2502] eta: 0:04:23 lr: 0.000015 loss_cls: 3.2213 (3.1238) grad_norm: 2.7202 (3.0521) time: 1.3038 data: 0.0003 max mem: 13912 +[2024-12-06 14:53:35 root] (utils.py 283): INFO Epoch: [6] [2310/2502] eta: 0:04:10 lr: 0.000015 loss_cls: 3.3405 (3.1237) grad_norm: 2.5768 (3.0507) time: 1.3038 data: 0.0003 max mem: 13912 +[2024-12-06 14:53:48 root] (utils.py 283): INFO Epoch: [6] [2320/2502] eta: 0:03:57 lr: 0.000015 loss_cls: 3.3866 (3.1245) grad_norm: 2.6426 (3.0491) time: 1.3029 data: 0.0002 max mem: 13912 +[2024-12-06 14:54:01 root] (utils.py 283): INFO Epoch: [6] [2330/2502] eta: 0:03:44 lr: 0.000015 loss_cls: 3.2384 (3.1237) grad_norm: 2.6525 (3.0489) time: 1.3041 data: 0.0003 max mem: 13912 +[2024-12-06 14:54:14 root] (utils.py 283): INFO Epoch: [6] [2340/2502] eta: 0:03:31 lr: 0.000015 loss_cls: 3.1800 (3.1243) grad_norm: 2.7596 (3.0474) time: 1.3026 data: 0.0003 max mem: 13912 +[2024-12-06 14:54:27 root] (utils.py 283): INFO Epoch: [6] [2350/2502] eta: 0:03:18 lr: 0.000015 loss_cls: 3.2271 (3.1236) grad_norm: 2.7596 (3.0466) time: 1.2975 data: 0.0002 max mem: 13912 +[2024-12-06 14:54:40 root] (utils.py 283): INFO Epoch: [6] [2360/2502] eta: 0:03:05 lr: 0.000015 loss_cls: 3.1198 (3.1232) grad_norm: 2.7825 (3.0457) time: 1.2961 data: 0.0002 max mem: 13912 +[2024-12-06 14:54:52 root] (utils.py 283): INFO Epoch: [6] [2370/2502] eta: 0:02:52 lr: 0.000015 loss_cls: 2.9548 (3.1218) grad_norm: 2.6574 (3.0445) time: 1.2931 data: 0.0003 max mem: 13912 +[2024-12-06 14:55:05 root] (utils.py 283): INFO Epoch: [6] [2380/2502] eta: 0:02:38 lr: 0.000015 loss_cls: 3.0358 (3.1222) grad_norm: 2.6584 (3.0434) time: 1.2911 data: 0.0002 max mem: 13912 +[2024-12-06 14:55:18 root] (utils.py 283): INFO Epoch: [6] [2390/2502] eta: 0:02:25 lr: 0.000015 loss_cls: 3.3522 (3.1227) grad_norm: 2.5087 (3.0412) time: 1.2936 data: 0.0002 max mem: 13912 +[2024-12-06 14:55:31 root] (utils.py 283): INFO Epoch: [6] [2400/2502] eta: 0:02:12 lr: 0.000015 loss_cls: 3.2920 (3.1229) grad_norm: 2.4193 (3.0394) time: 1.2987 data: 0.0002 max mem: 13912 +[2024-12-06 14:55:44 root] (utils.py 283): INFO Epoch: [6] [2410/2502] eta: 0:01:59 lr: 0.000015 loss_cls: 3.3175 (3.1242) grad_norm: 2.4615 (3.0379) time: 1.3035 data: 0.0003 max mem: 13912 +[2024-12-06 14:55:57 root] (utils.py 283): INFO Epoch: [6] [2420/2502] eta: 0:01:46 lr: 0.000015 loss_cls: 3.3245 (3.1241) grad_norm: 2.4615 (3.0376) time: 1.3042 data: 0.0003 max mem: 13912 +[2024-12-06 14:56:10 root] (utils.py 283): INFO Epoch: [6] [2430/2502] eta: 0:01:33 lr: 0.000015 loss_cls: 3.0874 (3.1239) grad_norm: 2.4509 (3.0381) time: 1.3033 data: 0.0003 max mem: 13912 +[2024-12-06 14:56:24 root] (utils.py 283): INFO Epoch: [6] [2440/2502] eta: 0:01:20 lr: 0.000015 loss_cls: 3.0433 (3.1229) grad_norm: 2.7471 (3.0400) time: 1.3109 data: 0.0003 max mem: 13912 +[2024-12-06 14:56:37 root] (utils.py 283): INFO Epoch: [6] [2450/2502] eta: 0:01:07 lr: 0.000015 loss_cls: 3.0657 (3.1223) grad_norm: 2.6407 (3.0400) time: 1.3112 data: 0.0003 max mem: 13912 +[2024-12-06 14:56:50 root] (utils.py 283): INFO Epoch: [6] [2460/2502] eta: 0:00:54 lr: 0.000015 loss_cls: 3.3047 (3.1225) grad_norm: 2.6235 (3.0421) time: 1.3033 data: 0.0003 max mem: 13912 +[2024-12-06 14:57:03 root] (utils.py 283): INFO Epoch: [6] [2470/2502] eta: 0:00:41 lr: 0.000015 loss_cls: 3.2755 (3.1222) grad_norm: 2.6061 (3.0412) time: 1.3055 data: 0.0003 max mem: 13912 +[2024-12-06 14:57:16 root] (utils.py 283): INFO Epoch: [6] [2480/2502] eta: 0:00:28 lr: 0.000015 loss_cls: 3.2998 (3.1224) grad_norm: 2.6107 (3.0402) time: 1.3066 data: 0.0003 max mem: 13912 +[2024-12-06 14:57:29 root] (utils.py 283): INFO Epoch: [6] [2490/2502] eta: 0:00:15 lr: 0.000015 loss_cls: 3.0658 (3.1212) grad_norm: 2.7379 (3.0398) time: 1.3298 data: 0.0250 max mem: 13912 +[2024-12-06 14:57:42 root] (utils.py 283): INFO Epoch: [6] [2500/2502] eta: 0:00:02 lr: 0.000015 loss_cls: 3.0242 (3.1216) grad_norm: 2.6060 (3.0378) time: 1.3308 data: 0.0250 max mem: 13912 +[2024-12-06 14:57:44 root] (utils.py 283): INFO Epoch: [6] [2501/2502] eta: 0:00:01 lr: 0.000015 loss_cls: 3.0602 (3.1217) grad_norm: 2.6060 (3.0377) time: 1.3314 data: 0.0250 max mem: 13912 +[2024-12-06 14:57:44 root] (utils.py 297): INFO Epoch: [6] Total time: 0:54:21 (1.3035 s / it) +[2024-12-06 14:57:44 root] (engine.py 179): INFO Averaged stats:lr: 0.000015 loss_cls: 3.0602 (3.1335) grad_norm: 2.6060 (3.0377) +[2024-12-06 14:57:45 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:22 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.4115 (0.4115) acc1: 91.4062 (91.4062) acc3: 98.4375 (98.4375) acc5: 98.4375 (98.4375) time: 0.2251 data: 0.0004 max mem: 13912 +[2024-12-06 14:57:47 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:19 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.6372 (0.6624) acc1: 87.5000 (86.2926) acc3: 96.0938 (95.3835) acc5: 96.8750 (97.0881) time: 0.2271 data: 0.0005 max mem: 13912 +[2024-12-06 14:57:49 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:17 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.6983 (0.7038) acc1: 85.1562 (85.3795) acc3: 95.3125 (94.9405) acc5: 96.8750 (96.7634) time: 0.2275 data: 0.0005 max mem: 13912 +[2024-12-06 14:57:52 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:15 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7473 (0.7267) acc1: 83.5938 (84.3498) acc3: 94.5312 (94.7833) acc5: 96.8750 (96.7238) time: 0.2277 data: 0.0005 max mem: 13912 +[2024-12-06 14:57:54 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:13 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7786 (0.7287) acc1: 83.5938 (84.2607) acc3: 95.3125 (94.8933) acc5: 96.8750 (96.8178) time: 0.2279 data: 0.0005 max mem: 13912 +[2024-12-06 14:57:56 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:10 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.8078 (0.8118) acc1: 78.9062 (82.1998) acc3: 92.9688 (93.5815) acc5: 95.3125 (95.9252) time: 0.2282 data: 0.0005 max mem: 13912 +[2024-12-06 14:57:59 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:08 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0785 (0.8433) acc1: 75.7812 (81.8519) acc3: 88.2812 (92.8919) acc5: 92.1875 (95.2741) time: 0.2283 data: 0.0005 max mem: 13912 +[2024-12-06 14:58:01 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:06 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0690 (0.8744) acc1: 79.6875 (81.0629) acc3: 90.6250 (92.6166) acc5: 92.9688 (95.0374) time: 0.2296 data: 0.0005 max mem: 13912 +[2024-12-06 14:58:03 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:04 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0725 (0.9041) acc1: 75.7812 (80.2373) acc3: 89.8438 (92.0621) acc5: 92.9688 (94.6952) time: 0.2298 data: 0.0007 max mem: 13912 +[2024-12-06 14:58:05 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.1333 (0.9270) acc1: 73.4375 (79.5072) acc3: 88.2812 (91.7067) acc5: 92.1875 (94.4797) time: 0.2290 data: 0.0007 max mem: 13912 +[2024-12-06 14:58:07 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0029 (0.9219) acc1: 74.2188 (79.5920) acc3: 89.8438 (91.8240) acc5: 93.7500 (94.5440) time: 0.2253 data: 0.0007 max mem: 13912 +[2024-12-06 14:58:07 root] (utils.py 297): INFO Test: Total time: 0:00:22 (0.2279 s / it) +[2024-12-06 14:58:07 root] (engine.py 264): INFO * Acc@1 79.484 Acc@3 91.992 Acc@5 94.656 loss 0.918 flops 3.584 layer_flops 3.536 +[2024-12-06 14:58:07 root] (main.py 547): INFO Accuracy of the network on the 50000 test images: 79.5% +[2024-12-06 14:58:07 root] (main.py 551): INFO Max accuracy: 79.49% +[2024-12-06 14:58:08 root] (utils.py 283): INFO Epoch: [7] [ 0/2502] eta: 0:54:34 lr: 0.000013 loss_cls: 3.5394 (3.5394) grad_norm: 2.4168 (2.4168) time: 1.3089 data: 0.0003 max mem: 13912 +[2024-12-06 14:58:21 root] (utils.py 283): INFO Epoch: [7] [ 10/2502] eta: 0:54:14 lr: 0.000013 loss_cls: 3.5183 (3.3204) grad_norm: 2.7809 (2.8090) time: 1.3060 data: 0.0003 max mem: 13912 +[2024-12-06 14:58:34 root] (utils.py 283): INFO Epoch: [7] [ 20/2502] eta: 0:53:59 lr: 0.000013 loss_cls: 3.3346 (3.1818) grad_norm: 2.8307 (3.4602) time: 1.3049 data: 0.0002 max mem: 13912 +[2024-12-06 14:58:47 root] (utils.py 283): INFO Epoch: [7] [ 30/2502] eta: 0:53:44 lr: 0.000013 loss_cls: 3.3346 (3.2111) grad_norm: 2.9306 (3.3192) time: 1.3035 data: 0.0004 max mem: 13912 +[2024-12-06 14:59:00 root] (utils.py 283): INFO Epoch: [7] [ 40/2502] eta: 0:53:29 lr: 0.000013 loss_cls: 3.2277 (3.1587) grad_norm: 2.6815 (3.3875) time: 1.3024 data: 0.0005 max mem: 13912 +[2024-12-06 14:59:14 root] (utils.py 283): INFO Epoch: [7] [ 50/2502] eta: 0:53:17 lr: 0.000013 loss_cls: 3.1857 (3.1383) grad_norm: 2.5518 (3.2916) time: 1.3032 data: 0.0003 max mem: 13912 +[2024-12-06 14:59:27 root] (utils.py 283): INFO Epoch: [7] [ 60/2502] eta: 0:53:02 lr: 0.000013 loss_cls: 3.3514 (3.1635) grad_norm: 2.5373 (3.2014) time: 1.3017 data: 0.0003 max mem: 13912 +[2024-12-06 14:59:40 root] (utils.py 283): INFO Epoch: [7] [ 70/2502] eta: 0:52:49 lr: 0.000013 loss_cls: 3.3049 (3.1496) grad_norm: 2.7005 (3.1378) time: 1.3016 data: 0.0003 max mem: 13912 +[2024-12-06 14:59:53 root] (utils.py 283): INFO Epoch: [7] [ 80/2502] eta: 0:52:36 lr: 0.000013 loss_cls: 3.2676 (3.1584) grad_norm: 2.5912 (3.0874) time: 1.3041 data: 0.0003 max mem: 13912 +[2024-12-06 15:00:06 root] (utils.py 283): INFO Epoch: [7] [ 90/2502] eta: 0:52:24 lr: 0.000013 loss_cls: 3.2815 (3.1658) grad_norm: 2.4781 (3.0362) time: 1.3050 data: 0.0003 max mem: 13912 +[2024-12-06 15:00:19 root] (utils.py 283): INFO Epoch: [7] [ 100/2502] eta: 0:52:11 lr: 0.000013 loss_cls: 3.2742 (3.1658) grad_norm: 2.4781 (2.9929) time: 1.3047 data: 0.0003 max mem: 13912 +[2024-12-06 15:00:32 root] (utils.py 283): INFO Epoch: [7] [ 110/2502] eta: 0:52:01 lr: 0.000013 loss_cls: 3.2865 (3.1672) grad_norm: 2.5568 (2.9622) time: 1.3103 data: 0.0003 max mem: 13912 +[2024-12-06 15:00:45 root] (utils.py 283): INFO Epoch: [7] [ 120/2502] eta: 0:51:47 lr: 0.000013 loss_cls: 3.1978 (3.1444) grad_norm: 2.5568 (2.9301) time: 1.3089 data: 0.0002 max mem: 13912 +[2024-12-06 15:00:58 root] (utils.py 283): INFO Epoch: [7] [ 130/2502] eta: 0:51:33 lr: 0.000013 loss_cls: 3.1779 (3.1526) grad_norm: 2.5985 (2.9325) time: 1.3017 data: 0.0003 max mem: 13912 +[2024-12-06 15:01:11 root] (utils.py 283): INFO Epoch: [7] [ 140/2502] eta: 0:51:20 lr: 0.000013 loss_cls: 3.3299 (3.1534) grad_norm: 2.7854 (2.9361) time: 1.3029 data: 0.0003 max mem: 13912 +[2024-12-06 15:01:24 root] (utils.py 283): INFO Epoch: [7] [ 150/2502] eta: 0:51:07 lr: 0.000013 loss_cls: 3.0509 (3.1478) grad_norm: 2.7854 (2.9358) time: 1.3028 data: 0.0003 max mem: 13912 +[2024-12-06 15:01:37 root] (utils.py 283): INFO Epoch: [7] [ 160/2502] eta: 0:50:54 lr: 0.000013 loss_cls: 3.1563 (3.1608) grad_norm: 2.8722 (2.9630) time: 1.3044 data: 0.0003 max mem: 13912 +[2024-12-06 15:01:50 root] (utils.py 283): INFO Epoch: [7] [ 170/2502] eta: 0:50:42 lr: 0.000013 loss_cls: 3.2562 (3.1376) grad_norm: 2.7114 (2.9579) time: 1.3070 data: 0.0003 max mem: 13912 +[2024-12-06 15:02:03 root] (utils.py 283): INFO Epoch: [7] [ 180/2502] eta: 0:50:29 lr: 0.000013 loss_cls: 2.3358 (3.1074) grad_norm: 2.5534 (2.9425) time: 1.3068 data: 0.0003 max mem: 13912 +[2024-12-06 15:02:16 root] (utils.py 283): INFO Epoch: [7] [ 190/2502] eta: 0:50:16 lr: 0.000013 loss_cls: 3.2134 (3.1232) grad_norm: 2.6118 (3.0004) time: 1.3077 data: 0.0003 max mem: 13912 +[2024-12-06 15:02:29 root] (utils.py 283): INFO Epoch: [7] [ 200/2502] eta: 0:50:03 lr: 0.000013 loss_cls: 3.3997 (3.1232) grad_norm: 2.8310 (3.0103) time: 1.3063 data: 0.0003 max mem: 13912 +[2024-12-06 15:02:42 root] (utils.py 283): INFO Epoch: [7] [ 210/2502] eta: 0:49:50 lr: 0.000013 loss_cls: 3.0580 (3.1136) grad_norm: 2.7549 (3.0032) time: 1.3024 data: 0.0003 max mem: 13912 +[2024-12-06 15:02:55 root] (utils.py 283): INFO Epoch: [7] [ 220/2502] eta: 0:49:36 lr: 0.000013 loss_cls: 2.9144 (3.1043) grad_norm: 2.7325 (3.0178) time: 1.3015 data: 0.0003 max mem: 13912 +[2024-12-06 15:03:08 root] (utils.py 283): INFO Epoch: [7] [ 230/2502] eta: 0:49:23 lr: 0.000013 loss_cls: 3.1321 (3.1100) grad_norm: 2.7325 (3.0095) time: 1.3034 data: 0.0003 max mem: 13912 +[2024-12-06 15:03:22 root] (utils.py 283): INFO Epoch: [7] [ 240/2502] eta: 0:49:14 lr: 0.000013 loss_cls: 3.2953 (3.1201) grad_norm: 2.7642 (3.0070) time: 1.3235 data: 0.0003 max mem: 13912 +[2024-12-06 15:03:35 root] (utils.py 283): INFO Epoch: [7] [ 250/2502] eta: 0:49:04 lr: 0.000013 loss_cls: 3.2953 (3.1204) grad_norm: 2.7057 (3.0061) time: 1.3418 data: 0.0003 max mem: 13912 +[2024-12-06 15:03:48 root] (utils.py 283): INFO Epoch: [7] [ 260/2502] eta: 0:48:51 lr: 0.000013 loss_cls: 3.2225 (3.1177) grad_norm: 2.6170 (3.0208) time: 1.3250 data: 0.0003 max mem: 13912 +[2024-12-06 15:04:01 root] (utils.py 283): INFO Epoch: [7] [ 270/2502] eta: 0:48:37 lr: 0.000013 loss_cls: 3.0631 (3.1116) grad_norm: 2.5477 (3.0058) time: 1.3049 data: 0.0003 max mem: 13912 +[2024-12-06 15:04:14 root] (utils.py 283): INFO Epoch: [7] [ 280/2502] eta: 0:48:24 lr: 0.000013 loss_cls: 3.2821 (3.1131) grad_norm: 2.6052 (3.0851) time: 1.3047 data: 0.0003 max mem: 13912 +[2024-12-06 15:04:27 root] (utils.py 283): INFO Epoch: [7] [ 290/2502] eta: 0:48:11 lr: 0.000013 loss_cls: 3.3234 (3.1164) grad_norm: 2.6538 (3.0889) time: 1.3052 data: 0.0003 max mem: 13912 +[2024-12-06 15:04:40 root] (utils.py 283): INFO Epoch: [7] [ 300/2502] eta: 0:47:58 lr: 0.000013 loss_cls: 3.3585 (3.1174) grad_norm: 2.7221 (3.0907) time: 1.3034 data: 0.0003 max mem: 13912 +[2024-12-06 15:04:54 root] (utils.py 283): INFO Epoch: [7] [ 310/2502] eta: 0:47:44 lr: 0.000013 loss_cls: 3.1734 (3.1087) grad_norm: 2.7848 (3.0854) time: 1.3051 data: 0.0002 max mem: 13912 +[2024-12-06 15:05:07 root] (utils.py 283): INFO Epoch: [7] [ 320/2502] eta: 0:47:31 lr: 0.000013 loss_cls: 2.8857 (3.1081) grad_norm: 2.7848 (3.0843) time: 1.3023 data: 0.0002 max mem: 13912 +[2024-12-06 15:05:20 root] (utils.py 283): INFO Epoch: [7] [ 330/2502] eta: 0:47:18 lr: 0.000013 loss_cls: 3.1040 (3.1096) grad_norm: 2.7131 (3.0768) time: 1.3016 data: 0.0003 max mem: 13912 +[2024-12-06 15:05:33 root] (utils.py 283): INFO Epoch: [7] [ 340/2502] eta: 0:47:04 lr: 0.000013 loss_cls: 3.0965 (3.1059) grad_norm: 2.6102 (3.0632) time: 1.3037 data: 0.0003 max mem: 13912 +[2024-12-06 15:05:46 root] (utils.py 283): INFO Epoch: [7] [ 350/2502] eta: 0:46:51 lr: 0.000013 loss_cls: 3.1985 (3.1051) grad_norm: 2.4944 (3.0533) time: 1.3032 data: 0.0003 max mem: 13912 +[2024-12-06 15:05:59 root] (utils.py 283): INFO Epoch: [7] [ 360/2502] eta: 0:46:38 lr: 0.000013 loss_cls: 3.3363 (3.1108) grad_norm: 2.6676 (3.0558) time: 1.3076 data: 0.0003 max mem: 13912 +[2024-12-06 15:06:12 root] (utils.py 283): INFO Epoch: [7] [ 370/2502] eta: 0:46:25 lr: 0.000013 loss_cls: 3.2911 (3.1060) grad_norm: 2.7448 (3.0525) time: 1.3081 data: 0.0003 max mem: 13912 +[2024-12-06 15:06:25 root] (utils.py 283): INFO Epoch: [7] [ 380/2502] eta: 0:46:12 lr: 0.000013 loss_cls: 3.1094 (3.1080) grad_norm: 2.7448 (3.0556) time: 1.3027 data: 0.0003 max mem: 13912 +[2024-12-06 15:06:38 root] (utils.py 283): INFO Epoch: [7] [ 390/2502] eta: 0:45:58 lr: 0.000013 loss_cls: 3.3634 (3.1109) grad_norm: 2.7466 (3.0625) time: 1.3009 data: 0.0003 max mem: 13912 +[2024-12-06 15:06:51 root] (utils.py 283): INFO Epoch: [7] [ 400/2502] eta: 0:45:45 lr: 0.000013 loss_cls: 3.3152 (3.1160) grad_norm: 2.5672 (3.0552) time: 1.3009 data: 0.0003 max mem: 13912 +[2024-12-06 15:07:04 root] (utils.py 283): INFO Epoch: [7] [ 410/2502] eta: 0:45:32 lr: 0.000013 loss_cls: 3.3299 (3.1182) grad_norm: 2.7725 (3.0610) time: 1.3018 data: 0.0003 max mem: 13912 +[2024-12-06 15:07:17 root] (utils.py 283): INFO Epoch: [7] [ 420/2502] eta: 0:45:20 lr: 0.000013 loss_cls: 2.9823 (3.1140) grad_norm: 3.0415 (3.0573) time: 1.3125 data: 0.0003 max mem: 13912 +[2024-12-06 15:07:30 root] (utils.py 283): INFO Epoch: [7] [ 430/2502] eta: 0:45:06 lr: 0.000013 loss_cls: 2.9823 (3.1159) grad_norm: 2.7652 (3.0631) time: 1.3126 data: 0.0003 max mem: 13912 +[2024-12-06 15:07:43 root] (utils.py 283): INFO Epoch: [7] [ 440/2502] eta: 0:44:53 lr: 0.000013 loss_cls: 3.1065 (3.1102) grad_norm: 2.7703 (3.0560) time: 1.3060 data: 0.0003 max mem: 13912 +[2024-12-06 15:07:56 root] (utils.py 283): INFO Epoch: [7] [ 450/2502] eta: 0:44:40 lr: 0.000013 loss_cls: 2.9446 (3.1093) grad_norm: 2.7602 (3.0516) time: 1.3061 data: 0.0003 max mem: 13912 +[2024-12-06 15:08:09 root] (utils.py 283): INFO Epoch: [7] [ 460/2502] eta: 0:44:27 lr: 0.000013 loss_cls: 3.3198 (3.1122) grad_norm: 2.4853 (3.0435) time: 1.3050 data: 0.0003 max mem: 13912 +[2024-12-06 15:08:22 root] (utils.py 283): INFO Epoch: [7] [ 470/2502] eta: 0:44:14 lr: 0.000013 loss_cls: 3.2950 (3.1084) grad_norm: 2.4853 (3.0367) time: 1.3049 data: 0.0003 max mem: 13912 +[2024-12-06 15:08:35 root] (utils.py 283): INFO Epoch: [7] [ 480/2502] eta: 0:44:01 lr: 0.000013 loss_cls: 2.9599 (3.1048) grad_norm: 2.5617 (3.0320) time: 1.3056 data: 0.0002 max mem: 13912 +[2024-12-06 15:08:49 root] (utils.py 283): INFO Epoch: [7] [ 490/2502] eta: 0:43:48 lr: 0.000013 loss_cls: 3.2644 (3.1074) grad_norm: 2.6828 (3.0355) time: 1.3081 data: 0.0003 max mem: 13912 +[2024-12-06 15:09:02 root] (utils.py 283): INFO Epoch: [7] [ 500/2502] eta: 0:43:35 lr: 0.000013 loss_cls: 3.3547 (3.1074) grad_norm: 2.6828 (3.0304) time: 1.3128 data: 0.0003 max mem: 13912 +[2024-12-06 15:09:15 root] (utils.py 283): INFO Epoch: [7] [ 510/2502] eta: 0:43:22 lr: 0.000013 loss_cls: 3.2547 (3.1098) grad_norm: 2.7561 (3.0248) time: 1.3110 data: 0.0003 max mem: 13912 +[2024-12-06 15:09:28 root] (utils.py 283): INFO Epoch: [7] [ 520/2502] eta: 0:43:09 lr: 0.000013 loss_cls: 3.2547 (3.1090) grad_norm: 2.8378 (3.0261) time: 1.3033 data: 0.0002 max mem: 13912 +[2024-12-06 15:09:41 root] (utils.py 283): INFO Epoch: [7] [ 530/2502] eta: 0:42:56 lr: 0.000013 loss_cls: 3.3012 (3.1101) grad_norm: 2.8693 (3.0246) time: 1.3027 data: 0.0002 max mem: 13912 +[2024-12-06 15:09:54 root] (utils.py 283): INFO Epoch: [7] [ 540/2502] eta: 0:42:43 lr: 0.000013 loss_cls: 3.3334 (3.1062) grad_norm: 2.8296 (3.0260) time: 1.3039 data: 0.0003 max mem: 13912 +[2024-12-06 15:10:07 root] (utils.py 283): INFO Epoch: [7] [ 550/2502] eta: 0:42:29 lr: 0.000013 loss_cls: 3.1755 (3.1064) grad_norm: 2.6188 (3.0248) time: 1.3046 data: 0.0003 max mem: 13912 +[2024-12-06 15:10:20 root] (utils.py 283): INFO Epoch: [7] [ 560/2502] eta: 0:42:16 lr: 0.000013 loss_cls: 3.3101 (3.1106) grad_norm: 2.6380 (3.0206) time: 1.3053 data: 0.0002 max mem: 13912 +[2024-12-06 15:10:33 root] (utils.py 283): INFO Epoch: [7] [ 570/2502] eta: 0:42:03 lr: 0.000013 loss_cls: 3.2964 (3.1108) grad_norm: 2.6574 (3.0157) time: 1.3078 data: 0.0002 max mem: 13912 +[2024-12-06 15:10:46 root] (utils.py 283): INFO Epoch: [7] [ 580/2502] eta: 0:41:50 lr: 0.000013 loss_cls: 3.3281 (3.1150) grad_norm: 2.6807 (3.0123) time: 1.3071 data: 0.0003 max mem: 13912 +[2024-12-06 15:10:59 root] (utils.py 283): INFO Epoch: [7] [ 590/2502] eta: 0:41:37 lr: 0.000013 loss_cls: 3.4578 (3.1194) grad_norm: 2.8288 (3.0261) time: 1.3070 data: 0.0003 max mem: 13912 +[2024-12-06 15:11:12 root] (utils.py 283): INFO Epoch: [7] [ 600/2502] eta: 0:41:24 lr: 0.000013 loss_cls: 3.4084 (3.1190) grad_norm: 2.7158 (3.0226) time: 1.3068 data: 0.0003 max mem: 13912 +[2024-12-06 15:11:25 root] (utils.py 283): INFO Epoch: [7] [ 610/2502] eta: 0:41:11 lr: 0.000013 loss_cls: 3.0917 (3.1187) grad_norm: 2.6124 (3.0194) time: 1.3021 data: 0.0003 max mem: 13912 +[2024-12-06 15:11:38 root] (utils.py 283): INFO Epoch: [7] [ 620/2502] eta: 0:40:58 lr: 0.000013 loss_cls: 3.0696 (3.1173) grad_norm: 2.7361 (3.0168) time: 1.3038 data: 0.0003 max mem: 13912 +[2024-12-06 15:11:51 root] (utils.py 283): INFO Epoch: [7] [ 630/2502] eta: 0:40:45 lr: 0.000013 loss_cls: 3.1291 (3.1184) grad_norm: 2.6294 (3.0102) time: 1.3055 data: 0.0003 max mem: 13912 +[2024-12-06 15:12:04 root] (utils.py 283): INFO Epoch: [7] [ 640/2502] eta: 0:40:32 lr: 0.000013 loss_cls: 3.3296 (3.1181) grad_norm: 2.6100 (3.0117) time: 1.3035 data: 0.0003 max mem: 13912 +[2024-12-06 15:12:17 root] (utils.py 283): INFO Epoch: [7] [ 650/2502] eta: 0:40:19 lr: 0.000013 loss_cls: 3.1558 (3.1166) grad_norm: 2.7763 (3.0098) time: 1.3059 data: 0.0003 max mem: 13912 +[2024-12-06 15:12:31 root] (utils.py 283): INFO Epoch: [7] [ 660/2502] eta: 0:40:06 lr: 0.000013 loss_cls: 3.2887 (3.1209) grad_norm: 2.7938 (3.0220) time: 1.3067 data: 0.0003 max mem: 13912 +[2024-12-06 15:12:44 root] (utils.py 283): INFO Epoch: [7] [ 670/2502] eta: 0:39:52 lr: 0.000013 loss_cls: 3.3709 (3.1211) grad_norm: 2.7820 (3.0187) time: 1.3043 data: 0.0003 max mem: 13912 +[2024-12-06 15:12:57 root] (utils.py 283): INFO Epoch: [7] [ 680/2502] eta: 0:39:39 lr: 0.000013 loss_cls: 3.2647 (3.1227) grad_norm: 2.8086 (3.0162) time: 1.3030 data: 0.0003 max mem: 13912 +[2024-12-06 15:13:10 root] (utils.py 283): INFO Epoch: [7] [ 690/2502] eta: 0:39:26 lr: 0.000013 loss_cls: 3.2998 (3.1237) grad_norm: 2.8529 (3.0182) time: 1.3044 data: 0.0003 max mem: 13912 +[2024-12-06 15:13:23 root] (utils.py 283): INFO Epoch: [7] [ 700/2502] eta: 0:39:13 lr: 0.000013 loss_cls: 3.1668 (3.1253) grad_norm: 2.5957 (3.0183) time: 1.3055 data: 0.0002 max mem: 13912 +[2024-12-06 15:13:36 root] (utils.py 283): INFO Epoch: [7] [ 710/2502] eta: 0:39:00 lr: 0.000013 loss_cls: 3.1928 (3.1256) grad_norm: 2.5957 (3.0160) time: 1.3049 data: 0.0003 max mem: 13912 +[2024-12-06 15:13:49 root] (utils.py 283): INFO Epoch: [7] [ 720/2502] eta: 0:38:47 lr: 0.000013 loss_cls: 3.3384 (3.1270) grad_norm: 2.6607 (3.0121) time: 1.3048 data: 0.0003 max mem: 13912 +[2024-12-06 15:14:02 root] (utils.py 283): INFO Epoch: [7] [ 730/2502] eta: 0:38:34 lr: 0.000013 loss_cls: 3.3497 (3.1277) grad_norm: 2.6392 (3.0141) time: 1.3031 data: 0.0003 max mem: 13912 +[2024-12-06 15:14:15 root] (utils.py 283): INFO Epoch: [7] [ 740/2502] eta: 0:38:21 lr: 0.000013 loss_cls: 3.3266 (3.1294) grad_norm: 2.5864 (3.0138) time: 1.3030 data: 0.0003 max mem: 13912 +[2024-12-06 15:14:28 root] (utils.py 283): INFO Epoch: [7] [ 750/2502] eta: 0:38:08 lr: 0.000013 loss_cls: 3.3028 (3.1320) grad_norm: 2.5864 (3.0150) time: 1.3054 data: 0.0003 max mem: 13912 +[2024-12-06 15:14:41 root] (utils.py 283): INFO Epoch: [7] [ 760/2502] eta: 0:37:55 lr: 0.000013 loss_cls: 3.3028 (3.1342) grad_norm: 2.6299 (3.0165) time: 1.3058 data: 0.0002 max mem: 13912 +[2024-12-06 15:14:54 root] (utils.py 283): INFO Epoch: [7] [ 770/2502] eta: 0:37:41 lr: 0.000013 loss_cls: 3.3469 (3.1362) grad_norm: 2.7091 (3.0127) time: 1.3061 data: 0.0002 max mem: 13912 +[2024-12-06 15:15:07 root] (utils.py 283): INFO Epoch: [7] [ 780/2502] eta: 0:37:28 lr: 0.000013 loss_cls: 3.2157 (3.1345) grad_norm: 2.8271 (3.0206) time: 1.3075 data: 0.0003 max mem: 13912 +[2024-12-06 15:15:20 root] (utils.py 283): INFO Epoch: [7] [ 790/2502] eta: 0:37:15 lr: 0.000013 loss_cls: 3.2775 (3.1371) grad_norm: 3.3250 (3.0254) time: 1.3059 data: 0.0003 max mem: 13912 +[2024-12-06 15:15:33 root] (utils.py 283): INFO Epoch: [7] [ 800/2502] eta: 0:37:02 lr: 0.000013 loss_cls: 3.2426 (3.1361) grad_norm: 2.7824 (3.0232) time: 1.3054 data: 0.0003 max mem: 13912 +[2024-12-06 15:15:46 root] (utils.py 283): INFO Epoch: [7] [ 810/2502] eta: 0:36:49 lr: 0.000013 loss_cls: 3.2737 (3.1385) grad_norm: 2.9212 (3.0366) time: 1.3062 data: 0.0003 max mem: 13912 +[2024-12-06 15:15:59 root] (utils.py 283): INFO Epoch: [7] [ 820/2502] eta: 0:36:36 lr: 0.000013 loss_cls: 3.3238 (3.1375) grad_norm: 3.0582 (3.0349) time: 1.3045 data: 0.0003 max mem: 13912 +[2024-12-06 15:16:12 root] (utils.py 283): INFO Epoch: [7] [ 830/2502] eta: 0:36:23 lr: 0.000013 loss_cls: 2.9666 (3.1368) grad_norm: 2.7652 (3.0326) time: 1.3061 data: 0.0003 max mem: 13912 +[2024-12-06 15:16:25 root] (utils.py 283): INFO Epoch: [7] [ 840/2502] eta: 0:36:10 lr: 0.000013 loss_cls: 2.8734 (3.1344) grad_norm: 2.6027 (3.0286) time: 1.3062 data: 0.0003 max mem: 13912 +[2024-12-06 15:16:39 root] (utils.py 283): INFO Epoch: [7] [ 850/2502] eta: 0:35:57 lr: 0.000013 loss_cls: 3.1020 (3.1343) grad_norm: 2.5159 (3.0224) time: 1.3053 data: 0.0003 max mem: 13912 +[2024-12-06 15:16:52 root] (utils.py 283): INFO Epoch: [7] [ 860/2502] eta: 0:35:44 lr: 0.000013 loss_cls: 3.1778 (3.1348) grad_norm: 2.5921 (3.0209) time: 1.3057 data: 0.0003 max mem: 13912 +[2024-12-06 15:17:05 root] (utils.py 283): INFO Epoch: [7] [ 870/2502] eta: 0:35:31 lr: 0.000013 loss_cls: 3.3332 (3.1367) grad_norm: 2.7169 (3.0218) time: 1.3023 data: 0.0003 max mem: 13912 +[2024-12-06 15:17:18 root] (utils.py 283): INFO Epoch: [7] [ 880/2502] eta: 0:35:18 lr: 0.000013 loss_cls: 3.3549 (3.1361) grad_norm: 2.6267 (3.0183) time: 1.2990 data: 0.0003 max mem: 13912 +[2024-12-06 15:17:31 root] (utils.py 283): INFO Epoch: [7] [ 890/2502] eta: 0:35:04 lr: 0.000013 loss_cls: 3.2808 (3.1377) grad_norm: 2.5946 (3.0160) time: 1.2990 data: 0.0002 max mem: 13912 +[2024-12-06 15:17:44 root] (utils.py 283): INFO Epoch: [7] [ 900/2502] eta: 0:34:51 lr: 0.000013 loss_cls: 3.2779 (3.1391) grad_norm: 2.6036 (3.0125) time: 1.3025 data: 0.0002 max mem: 13912 +[2024-12-06 15:17:57 root] (utils.py 283): INFO Epoch: [7] [ 910/2502] eta: 0:34:38 lr: 0.000013 loss_cls: 3.1380 (3.1387) grad_norm: 2.5890 (3.0104) time: 1.3061 data: 0.0002 max mem: 13912 +[2024-12-06 15:18:10 root] (utils.py 283): INFO Epoch: [7] [ 920/2502] eta: 0:34:25 lr: 0.000013 loss_cls: 3.1066 (3.1377) grad_norm: 2.7086 (3.0108) time: 1.3076 data: 0.0003 max mem: 13912 +[2024-12-06 15:18:23 root] (utils.py 283): INFO Epoch: [7] [ 930/2502] eta: 0:34:12 lr: 0.000013 loss_cls: 2.8395 (3.1346) grad_norm: 2.9044 (3.0129) time: 1.3066 data: 0.0003 max mem: 13912 +[2024-12-06 15:18:36 root] (utils.py 283): INFO Epoch: [7] [ 940/2502] eta: 0:33:59 lr: 0.000013 loss_cls: 2.8395 (3.1319) grad_norm: 3.0949 (3.0182) time: 1.3039 data: 0.0002 max mem: 13912 +[2024-12-06 15:18:49 root] (utils.py 283): INFO Epoch: [7] [ 950/2502] eta: 0:33:46 lr: 0.000013 loss_cls: 2.9734 (3.1305) grad_norm: 2.7723 (3.0288) time: 1.3027 data: 0.0002 max mem: 13912 +[2024-12-06 15:19:02 root] (utils.py 283): INFO Epoch: [7] [ 960/2502] eta: 0:33:33 lr: 0.000013 loss_cls: 3.1023 (3.1318) grad_norm: 2.8280 (3.0313) time: 1.3058 data: 0.0002 max mem: 13912 +[2024-12-06 15:19:15 root] (utils.py 283): INFO Epoch: [7] [ 970/2502] eta: 0:33:20 lr: 0.000013 loss_cls: 3.1877 (3.1317) grad_norm: 2.7112 (3.0323) time: 1.3070 data: 0.0003 max mem: 13912 +[2024-12-06 15:19:28 root] (utils.py 283): INFO Epoch: [7] [ 980/2502] eta: 0:33:07 lr: 0.000013 loss_cls: 3.1877 (3.1326) grad_norm: 2.7112 (3.0375) time: 1.3115 data: 0.0003 max mem: 13912 +[2024-12-06 15:19:41 root] (utils.py 283): INFO Epoch: [7] [ 990/2502] eta: 0:32:54 lr: 0.000013 loss_cls: 3.2552 (3.1342) grad_norm: 2.8035 (3.0347) time: 1.3093 data: 0.0003 max mem: 13912 +[2024-12-06 15:19:54 root] (utils.py 283): INFO Epoch: [7] [1000/2502] eta: 0:32:41 lr: 0.000013 loss_cls: 3.2552 (3.1341) grad_norm: 2.6771 (3.0345) time: 1.3016 data: 0.0002 max mem: 13912 +[2024-12-06 15:20:07 root] (utils.py 283): INFO Epoch: [7] [1010/2502] eta: 0:32:28 lr: 0.000013 loss_cls: 3.2002 (3.1346) grad_norm: 2.6374 (3.0332) time: 1.3024 data: 0.0003 max mem: 13912 +[2024-12-06 15:20:20 root] (utils.py 283): INFO Epoch: [7] [1020/2502] eta: 0:32:15 lr: 0.000013 loss_cls: 3.2352 (3.1339) grad_norm: 2.7275 (3.0325) time: 1.3025 data: 0.0003 max mem: 13912 +[2024-12-06 15:20:33 root] (utils.py 283): INFO Epoch: [7] [1030/2502] eta: 0:32:01 lr: 0.000013 loss_cls: 2.8114 (3.1296) grad_norm: 2.8092 (3.0343) time: 1.3021 data: 0.0003 max mem: 13912 +[2024-12-06 15:20:46 root] (utils.py 283): INFO Epoch: [7] [1040/2502] eta: 0:31:48 lr: 0.000013 loss_cls: 2.9705 (3.1315) grad_norm: 2.8049 (3.0308) time: 1.3035 data: 0.0003 max mem: 13912 +[2024-12-06 15:20:59 root] (utils.py 283): INFO Epoch: [7] [1050/2502] eta: 0:31:35 lr: 0.000013 loss_cls: 3.1726 (3.1304) grad_norm: 2.8626 (3.0321) time: 1.3052 data: 0.0003 max mem: 13912 +[2024-12-06 15:21:12 root] (utils.py 283): INFO Epoch: [7] [1060/2502] eta: 0:31:22 lr: 0.000013 loss_cls: 3.1637 (3.1307) grad_norm: 2.7806 (3.0299) time: 1.3025 data: 0.0002 max mem: 13912 +[2024-12-06 15:21:25 root] (utils.py 283): INFO Epoch: [7] [1070/2502] eta: 0:31:09 lr: 0.000013 loss_cls: 3.0756 (3.1310) grad_norm: 2.6236 (3.0284) time: 1.2998 data: 0.0002 max mem: 13912 +[2024-12-06 15:21:38 root] (utils.py 283): INFO Epoch: [7] [1080/2502] eta: 0:30:56 lr: 0.000013 loss_cls: 3.0723 (3.1300) grad_norm: 2.5773 (3.0242) time: 1.2972 data: 0.0002 max mem: 13912 +[2024-12-06 15:21:51 root] (utils.py 283): INFO Epoch: [7] [1090/2502] eta: 0:30:43 lr: 0.000013 loss_cls: 3.2368 (3.1307) grad_norm: 2.7146 (3.0227) time: 1.2974 data: 0.0002 max mem: 13912 +[2024-12-06 15:22:04 root] (utils.py 283): INFO Epoch: [7] [1100/2502] eta: 0:30:30 lr: 0.000013 loss_cls: 3.2379 (3.1297) grad_norm: 2.7930 (3.0229) time: 1.2973 data: 0.0002 max mem: 13912 +[2024-12-06 15:22:17 root] (utils.py 283): INFO Epoch: [7] [1110/2502] eta: 0:30:16 lr: 0.000013 loss_cls: 3.1355 (3.1277) grad_norm: 2.6912 (3.0252) time: 1.2987 data: 0.0003 max mem: 13912 +[2024-12-06 15:22:31 root] (utils.py 283): INFO Epoch: [7] [1120/2502] eta: 0:30:04 lr: 0.000013 loss_cls: 2.9198 (3.1258) grad_norm: 2.8070 (3.0238) time: 1.3110 data: 0.0003 max mem: 13912 +[2024-12-06 15:22:44 root] (utils.py 283): INFO Epoch: [7] [1130/2502] eta: 0:29:50 lr: 0.000013 loss_cls: 3.1058 (3.1264) grad_norm: 2.6346 (3.0224) time: 1.3095 data: 0.0002 max mem: 13912 +[2024-12-06 15:22:56 root] (utils.py 283): INFO Epoch: [7] [1140/2502] eta: 0:29:37 lr: 0.000013 loss_cls: 3.1491 (3.1257) grad_norm: 2.5527 (3.0215) time: 1.2978 data: 0.0002 max mem: 13912 +[2024-12-06 15:23:09 root] (utils.py 283): INFO Epoch: [7] [1150/2502] eta: 0:29:24 lr: 0.000013 loss_cls: 3.0685 (3.1254) grad_norm: 2.7115 (3.0196) time: 1.2957 data: 0.0003 max mem: 13912 +[2024-12-06 15:23:23 root] (utils.py 283): INFO Epoch: [7] [1160/2502] eta: 0:29:11 lr: 0.000013 loss_cls: 3.0741 (3.1252) grad_norm: 2.7115 (3.0237) time: 1.3021 data: 0.0003 max mem: 13912 +[2024-12-06 15:23:36 root] (utils.py 283): INFO Epoch: [7] [1170/2502] eta: 0:28:58 lr: 0.000013 loss_cls: 3.2792 (3.1247) grad_norm: 2.9054 (3.0246) time: 1.3088 data: 0.0003 max mem: 13912 +[2024-12-06 15:23:49 root] (utils.py 283): INFO Epoch: [7] [1180/2502] eta: 0:28:45 lr: 0.000013 loss_cls: 3.2929 (3.1265) grad_norm: 2.9554 (3.0483) time: 1.3053 data: 0.0003 max mem: 13912 +[2024-12-06 15:24:02 root] (utils.py 283): INFO Epoch: [7] [1190/2502] eta: 0:28:32 lr: 0.000013 loss_cls: 3.2935 (3.1253) grad_norm: 2.7158 (3.0490) time: 1.3010 data: 0.0003 max mem: 13912 +[2024-12-06 15:24:15 root] (utils.py 283): INFO Epoch: [7] [1200/2502] eta: 0:28:19 lr: 0.000013 loss_cls: 2.8098 (3.1232) grad_norm: 2.6064 (3.0469) time: 1.3004 data: 0.0003 max mem: 13912 +[2024-12-06 15:24:28 root] (utils.py 283): INFO Epoch: [7] [1210/2502] eta: 0:28:06 lr: 0.000013 loss_cls: 2.8481 (3.1233) grad_norm: 2.6982 (3.0461) time: 1.3043 data: 0.0003 max mem: 13912 +[2024-12-06 15:24:41 root] (utils.py 283): INFO Epoch: [7] [1220/2502] eta: 0:27:53 lr: 0.000013 loss_cls: 3.0772 (3.1210) grad_norm: 2.7712 (3.0451) time: 1.3047 data: 0.0003 max mem: 13912 +[2024-12-06 15:24:54 root] (utils.py 283): INFO Epoch: [7] [1230/2502] eta: 0:27:40 lr: 0.000013 loss_cls: 3.0431 (3.1208) grad_norm: 2.7555 (3.0442) time: 1.3045 data: 0.0003 max mem: 13912 +[2024-12-06 15:25:07 root] (utils.py 283): INFO Epoch: [7] [1240/2502] eta: 0:27:27 lr: 0.000013 loss_cls: 3.1375 (3.1198) grad_norm: 2.5782 (3.0408) time: 1.3053 data: 0.0003 max mem: 13912 +[2024-12-06 15:25:20 root] (utils.py 283): INFO Epoch: [7] [1250/2502] eta: 0:27:14 lr: 0.000013 loss_cls: 3.0422 (3.1189) grad_norm: 2.4988 (3.0377) time: 1.3054 data: 0.0003 max mem: 13912 +[2024-12-06 15:25:33 root] (utils.py 283): INFO Epoch: [7] [1260/2502] eta: 0:27:00 lr: 0.000013 loss_cls: 3.2762 (3.1207) grad_norm: 2.6727 (3.0370) time: 1.3046 data: 0.0003 max mem: 13912 +[2024-12-06 15:25:46 root] (utils.py 283): INFO Epoch: [7] [1270/2502] eta: 0:26:47 lr: 0.000013 loss_cls: 3.2778 (3.1221) grad_norm: 2.8940 (3.0405) time: 1.3011 data: 0.0003 max mem: 13912 +[2024-12-06 15:25:59 root] (utils.py 283): INFO Epoch: [7] [1280/2502] eta: 0:26:34 lr: 0.000013 loss_cls: 3.2263 (3.1218) grad_norm: 2.6168 (3.0365) time: 1.3012 data: 0.0003 max mem: 13912 +[2024-12-06 15:26:12 root] (utils.py 283): INFO Epoch: [7] [1290/2502] eta: 0:26:21 lr: 0.000013 loss_cls: 3.0910 (3.1222) grad_norm: 2.5600 (3.0369) time: 1.3077 data: 0.0002 max mem: 13912 +[2024-12-06 15:26:25 root] (utils.py 283): INFO Epoch: [7] [1300/2502] eta: 0:26:08 lr: 0.000013 loss_cls: 3.0943 (3.1230) grad_norm: 2.6417 (3.0345) time: 1.3093 data: 0.0002 max mem: 13912 +[2024-12-06 15:26:38 root] (utils.py 283): INFO Epoch: [7] [1310/2502] eta: 0:25:55 lr: 0.000013 loss_cls: 3.0919 (3.1219) grad_norm: 2.5867 (3.0381) time: 1.3041 data: 0.0002 max mem: 13912 +[2024-12-06 15:26:51 root] (utils.py 283): INFO Epoch: [7] [1320/2502] eta: 0:25:42 lr: 0.000013 loss_cls: 3.1576 (3.1217) grad_norm: 2.7208 (3.0365) time: 1.3061 data: 0.0002 max mem: 13912 +[2024-12-06 15:27:04 root] (utils.py 283): INFO Epoch: [7] [1330/2502] eta: 0:25:29 lr: 0.000013 loss_cls: 3.1576 (3.1198) grad_norm: 2.6690 (3.0362) time: 1.3083 data: 0.0002 max mem: 13912 +[2024-12-06 15:27:17 root] (utils.py 283): INFO Epoch: [7] [1340/2502] eta: 0:25:16 lr: 0.000013 loss_cls: 2.9258 (3.1189) grad_norm: 2.5543 (3.0339) time: 1.3076 data: 0.0003 max mem: 13912 +[2024-12-06 15:27:30 root] (utils.py 283): INFO Epoch: [7] [1350/2502] eta: 0:25:03 lr: 0.000013 loss_cls: 3.2482 (3.1209) grad_norm: 2.6926 (3.0377) time: 1.3038 data: 0.0003 max mem: 13912 +[2024-12-06 15:27:43 root] (utils.py 283): INFO Epoch: [7] [1360/2502] eta: 0:24:50 lr: 0.000013 loss_cls: 3.3696 (3.1204) grad_norm: 2.6572 (3.0346) time: 1.3005 data: 0.0003 max mem: 13912 +[2024-12-06 15:27:57 root] (utils.py 283): INFO Epoch: [7] [1370/2502] eta: 0:24:37 lr: 0.000013 loss_cls: 3.3482 (3.1214) grad_norm: 2.6272 (3.0318) time: 1.3082 data: 0.0002 max mem: 13912 +[2024-12-06 15:28:10 root] (utils.py 283): INFO Epoch: [7] [1380/2502] eta: 0:24:24 lr: 0.000013 loss_cls: 3.3987 (3.1236) grad_norm: 2.7124 (3.0318) time: 1.3075 data: 0.0002 max mem: 13912 +[2024-12-06 15:28:23 root] (utils.py 283): INFO Epoch: [7] [1390/2502] eta: 0:24:11 lr: 0.000013 loss_cls: 3.3553 (3.1242) grad_norm: 2.7124 (3.0404) time: 1.3011 data: 0.0003 max mem: 13912 +[2024-12-06 15:28:36 root] (utils.py 283): INFO Epoch: [7] [1400/2502] eta: 0:23:58 lr: 0.000013 loss_cls: 3.1592 (3.1231) grad_norm: 2.7478 (3.0396) time: 1.3033 data: 0.0003 max mem: 13912 +[2024-12-06 15:28:49 root] (utils.py 283): INFO Epoch: [7] [1410/2502] eta: 0:23:45 lr: 0.000013 loss_cls: 3.1010 (3.1230) grad_norm: 2.6211 (3.0367) time: 1.3053 data: 0.0002 max mem: 13912 +[2024-12-06 15:29:02 root] (utils.py 283): INFO Epoch: [7] [1420/2502] eta: 0:23:32 lr: 0.000013 loss_cls: 3.1166 (3.1222) grad_norm: 2.6211 (3.0370) time: 1.3073 data: 0.0003 max mem: 13912 +[2024-12-06 15:29:15 root] (utils.py 283): INFO Epoch: [7] [1430/2502] eta: 0:23:19 lr: 0.000013 loss_cls: 3.1166 (3.1213) grad_norm: 2.7742 (3.0359) time: 1.3071 data: 0.0003 max mem: 13912 +[2024-12-06 15:29:28 root] (utils.py 283): INFO Epoch: [7] [1440/2502] eta: 0:23:06 lr: 0.000013 loss_cls: 3.0446 (3.1196) grad_norm: 2.5948 (3.0323) time: 1.3032 data: 0.0003 max mem: 13912 +[2024-12-06 15:29:41 root] (utils.py 283): INFO Epoch: [7] [1450/2502] eta: 0:22:52 lr: 0.000013 loss_cls: 3.1157 (3.1189) grad_norm: 2.5627 (3.0397) time: 1.3024 data: 0.0003 max mem: 13912 +[2024-12-06 15:29:54 root] (utils.py 283): INFO Epoch: [7] [1460/2502] eta: 0:22:39 lr: 0.000013 loss_cls: 3.1793 (3.1206) grad_norm: 2.7309 (3.0385) time: 1.3042 data: 0.0003 max mem: 13912 +[2024-12-06 15:30:07 root] (utils.py 283): INFO Epoch: [7] [1470/2502] eta: 0:22:26 lr: 0.000013 loss_cls: 3.4318 (3.1206) grad_norm: 2.6102 (3.0360) time: 1.3065 data: 0.0003 max mem: 13912 +[2024-12-06 15:30:20 root] (utils.py 283): INFO Epoch: [7] [1480/2502] eta: 0:22:13 lr: 0.000013 loss_cls: 3.0110 (3.1193) grad_norm: 2.6256 (3.0340) time: 1.3058 data: 0.0003 max mem: 13912 +[2024-12-06 15:30:33 root] (utils.py 283): INFO Epoch: [7] [1490/2502] eta: 0:22:00 lr: 0.000013 loss_cls: 3.1308 (3.1194) grad_norm: 2.7132 (3.0391) time: 1.3022 data: 0.0003 max mem: 13912 +[2024-12-06 15:30:46 root] (utils.py 283): INFO Epoch: [7] [1500/2502] eta: 0:21:47 lr: 0.000013 loss_cls: 3.2037 (3.1191) grad_norm: 2.8782 (3.0383) time: 1.2973 data: 0.0003 max mem: 13912 +[2024-12-06 15:30:59 root] (utils.py 283): INFO Epoch: [7] [1510/2502] eta: 0:21:34 lr: 0.000013 loss_cls: 3.2037 (3.1193) grad_norm: 2.7534 (3.0396) time: 1.2944 data: 0.0003 max mem: 13912 +[2024-12-06 15:31:12 root] (utils.py 283): INFO Epoch: [7] [1520/2502] eta: 0:21:21 lr: 0.000013 loss_cls: 3.0257 (3.1176) grad_norm: 2.6128 (3.0433) time: 1.2952 data: 0.0003 max mem: 13912 +[2024-12-06 15:31:25 root] (utils.py 283): INFO Epoch: [7] [1530/2502] eta: 0:21:08 lr: 0.000013 loss_cls: 2.6942 (3.1156) grad_norm: 2.5886 (3.0410) time: 1.2958 data: 0.0003 max mem: 13912 +[2024-12-06 15:31:38 root] (utils.py 283): INFO Epoch: [7] [1540/2502] eta: 0:20:55 lr: 0.000013 loss_cls: 3.1611 (3.1170) grad_norm: 2.6246 (3.0389) time: 1.2972 data: 0.0003 max mem: 13912 +[2024-12-06 15:31:51 root] (utils.py 283): INFO Epoch: [7] [1550/2502] eta: 0:20:42 lr: 0.000013 loss_cls: 3.3738 (3.1177) grad_norm: 2.5020 (3.0381) time: 1.2967 data: 0.0003 max mem: 13912 +[2024-12-06 15:32:04 root] (utils.py 283): INFO Epoch: [7] [1560/2502] eta: 0:20:29 lr: 0.000013 loss_cls: 3.0989 (3.1180) grad_norm: 2.4433 (3.0354) time: 1.2964 data: 0.0002 max mem: 13912 +[2024-12-06 15:32:17 root] (utils.py 283): INFO Epoch: [7] [1570/2502] eta: 0:20:15 lr: 0.000013 loss_cls: 3.2889 (3.1195) grad_norm: 2.4433 (3.0337) time: 1.2950 data: 0.0002 max mem: 13912 +[2024-12-06 15:32:30 root] (utils.py 283): INFO Epoch: [7] [1580/2502] eta: 0:20:02 lr: 0.000013 loss_cls: 3.1669 (3.1184) grad_norm: 2.7314 (3.0379) time: 1.2958 data: 0.0002 max mem: 13912 +[2024-12-06 15:32:43 root] (utils.py 283): INFO Epoch: [7] [1590/2502] eta: 0:19:49 lr: 0.000013 loss_cls: 3.1444 (3.1201) grad_norm: 2.7314 (3.0352) time: 1.2971 data: 0.0003 max mem: 13912 +[2024-12-06 15:32:56 root] (utils.py 283): INFO Epoch: [7] [1600/2502] eta: 0:19:36 lr: 0.000013 loss_cls: 3.3173 (3.1215) grad_norm: 2.7435 (3.0353) time: 1.2963 data: 0.0003 max mem: 13912 +[2024-12-06 15:33:09 root] (utils.py 283): INFO Epoch: [7] [1610/2502] eta: 0:19:23 lr: 0.000013 loss_cls: 3.2710 (3.1207) grad_norm: 2.8458 (3.0350) time: 1.2973 data: 0.0003 max mem: 13912 +[2024-12-06 15:33:22 root] (utils.py 283): INFO Epoch: [7] [1620/2502] eta: 0:19:10 lr: 0.000013 loss_cls: 2.9951 (3.1183) grad_norm: 2.6646 (3.0347) time: 1.2955 data: 0.0003 max mem: 13912 +[2024-12-06 15:33:35 root] (utils.py 283): INFO Epoch: [7] [1630/2502] eta: 0:18:57 lr: 0.000013 loss_cls: 3.0276 (3.1184) grad_norm: 2.8198 (3.0339) time: 1.2943 data: 0.0002 max mem: 13912 +[2024-12-06 15:33:47 root] (utils.py 283): INFO Epoch: [7] [1640/2502] eta: 0:18:44 lr: 0.000013 loss_cls: 3.2618 (3.1184) grad_norm: 2.5958 (3.0318) time: 1.2951 data: 0.0002 max mem: 13912 +[2024-12-06 15:34:00 root] (utils.py 283): INFO Epoch: [7] [1650/2502] eta: 0:18:31 lr: 0.000013 loss_cls: 3.2548 (3.1177) grad_norm: 2.5958 (3.0298) time: 1.2956 data: 0.0002 max mem: 13912 +[2024-12-06 15:34:13 root] (utils.py 283): INFO Epoch: [7] [1660/2502] eta: 0:18:18 lr: 0.000013 loss_cls: 3.4287 (3.1194) grad_norm: 2.6788 (3.0296) time: 1.2948 data: 0.0002 max mem: 13912 +[2024-12-06 15:34:26 root] (utils.py 283): INFO Epoch: [7] [1670/2502] eta: 0:18:05 lr: 0.000013 loss_cls: 3.4901 (3.1203) grad_norm: 2.7136 (3.0279) time: 1.2958 data: 0.0003 max mem: 13912 +[2024-12-06 15:34:39 root] (utils.py 283): INFO Epoch: [7] [1680/2502] eta: 0:17:51 lr: 0.000013 loss_cls: 3.2811 (3.1201) grad_norm: 2.6852 (3.0254) time: 1.3014 data: 0.0003 max mem: 13912 +[2024-12-06 15:34:52 root] (utils.py 283): INFO Epoch: [7] [1690/2502] eta: 0:17:38 lr: 0.000013 loss_cls: 3.2007 (3.1201) grad_norm: 2.6379 (3.0237) time: 1.3027 data: 0.0003 max mem: 13912 +[2024-12-06 15:35:05 root] (utils.py 283): INFO Epoch: [7] [1700/2502] eta: 0:17:25 lr: 0.000013 loss_cls: 3.2501 (3.1212) grad_norm: 2.5017 (3.0214) time: 1.2983 data: 0.0002 max mem: 13912 +[2024-12-06 15:35:18 root] (utils.py 283): INFO Epoch: [7] [1710/2502] eta: 0:17:12 lr: 0.000013 loss_cls: 3.3626 (3.1225) grad_norm: 2.5017 (3.0205) time: 1.3038 data: 0.0002 max mem: 13912 +[2024-12-06 15:35:31 root] (utils.py 283): INFO Epoch: [7] [1720/2502] eta: 0:16:59 lr: 0.000013 loss_cls: 3.3296 (3.1236) grad_norm: 2.6690 (3.0238) time: 1.3045 data: 0.0002 max mem: 13912 +[2024-12-06 15:35:44 root] (utils.py 283): INFO Epoch: [7] [1730/2502] eta: 0:16:46 lr: 0.000013 loss_cls: 3.2477 (3.1231) grad_norm: 2.8318 (3.0233) time: 1.2985 data: 0.0002 max mem: 13912 +[2024-12-06 15:35:57 root] (utils.py 283): INFO Epoch: [7] [1740/2502] eta: 0:16:33 lr: 0.000013 loss_cls: 3.2076 (3.1222) grad_norm: 2.7819 (3.0231) time: 1.2958 data: 0.0002 max mem: 13912 +[2024-12-06 15:36:10 root] (utils.py 283): INFO Epoch: [7] [1750/2502] eta: 0:16:20 lr: 0.000013 loss_cls: 3.0866 (3.1218) grad_norm: 2.6436 (3.0266) time: 1.2949 data: 0.0003 max mem: 13912 +[2024-12-06 15:36:23 root] (utils.py 283): INFO Epoch: [7] [1760/2502] eta: 0:16:07 lr: 0.000013 loss_cls: 3.1112 (3.1207) grad_norm: 2.7143 (3.0310) time: 1.2997 data: 0.0003 max mem: 13912 +[2024-12-06 15:36:37 root] (utils.py 283): INFO Epoch: [7] [1770/2502] eta: 0:15:54 lr: 0.000013 loss_cls: 2.7633 (3.1193) grad_norm: 2.6938 (3.0288) time: 1.3285 data: 0.0003 max mem: 13912 +[2024-12-06 15:36:50 root] (utils.py 283): INFO Epoch: [7] [1780/2502] eta: 0:15:41 lr: 0.000013 loss_cls: 3.2434 (3.1198) grad_norm: 2.5934 (3.0281) time: 1.3241 data: 0.0003 max mem: 13912 +[2024-12-06 15:37:03 root] (utils.py 283): INFO Epoch: [7] [1790/2502] eta: 0:15:28 lr: 0.000013 loss_cls: 3.1943 (3.1199) grad_norm: 2.7506 (3.0265) time: 1.2948 data: 0.0002 max mem: 13912 +[2024-12-06 15:37:16 root] (utils.py 283): INFO Epoch: [7] [1800/2502] eta: 0:15:15 lr: 0.000013 loss_cls: 3.1577 (3.1202) grad_norm: 2.6982 (3.0251) time: 1.2955 data: 0.0002 max mem: 13912 +[2024-12-06 15:37:29 root] (utils.py 283): INFO Epoch: [7] [1810/2502] eta: 0:15:02 lr: 0.000013 loss_cls: 3.1101 (3.1194) grad_norm: 2.6982 (3.0281) time: 1.2962 data: 0.0003 max mem: 13912 +[2024-12-06 15:37:42 root] (utils.py 283): INFO Epoch: [7] [1820/2502] eta: 0:14:49 lr: 0.000013 loss_cls: 3.0800 (3.1185) grad_norm: 2.6511 (3.0297) time: 1.2981 data: 0.0003 max mem: 13912 +[2024-12-06 15:37:55 root] (utils.py 283): INFO Epoch: [7] [1830/2502] eta: 0:14:36 lr: 0.000013 loss_cls: 3.1872 (3.1199) grad_norm: 2.7480 (3.0291) time: 1.2995 data: 0.0003 max mem: 13912 +[2024-12-06 15:38:08 root] (utils.py 283): INFO Epoch: [7] [1840/2502] eta: 0:14:23 lr: 0.000013 loss_cls: 3.3020 (3.1199) grad_norm: 2.7625 (3.0317) time: 1.2994 data: 0.0002 max mem: 13912 +[2024-12-06 15:38:21 root] (utils.py 283): INFO Epoch: [7] [1850/2502] eta: 0:14:10 lr: 0.000013 loss_cls: 3.2955 (3.1204) grad_norm: 2.7625 (3.0319) time: 1.3049 data: 0.0003 max mem: 13912 +[2024-12-06 15:38:34 root] (utils.py 283): INFO Epoch: [7] [1860/2502] eta: 0:13:57 lr: 0.000013 loss_cls: 3.1634 (3.1191) grad_norm: 2.6703 (3.0301) time: 1.3062 data: 0.0003 max mem: 13912 +[2024-12-06 15:38:47 root] (utils.py 283): INFO Epoch: [7] [1870/2502] eta: 0:13:44 lr: 0.000013 loss_cls: 2.9384 (3.1185) grad_norm: 2.5969 (3.0279) time: 1.3020 data: 0.0002 max mem: 13912 +[2024-12-06 15:39:00 root] (utils.py 283): INFO Epoch: [7] [1880/2502] eta: 0:13:31 lr: 0.000013 loss_cls: 2.9384 (3.1168) grad_norm: 2.5992 (3.0263) time: 1.3020 data: 0.0002 max mem: 13912 +[2024-12-06 15:39:13 root] (utils.py 283): INFO Epoch: [7] [1890/2502] eta: 0:13:17 lr: 0.000013 loss_cls: 3.0160 (3.1171) grad_norm: 2.6489 (3.0248) time: 1.3021 data: 0.0002 max mem: 13912 +[2024-12-06 15:39:26 root] (utils.py 283): INFO Epoch: [7] [1900/2502] eta: 0:13:04 lr: 0.000013 loss_cls: 3.3588 (3.1181) grad_norm: 2.7529 (3.0257) time: 1.3025 data: 0.0002 max mem: 13912 +[2024-12-06 15:39:39 root] (utils.py 283): INFO Epoch: [7] [1910/2502] eta: 0:12:51 lr: 0.000013 loss_cls: 3.4685 (3.1201) grad_norm: 2.6261 (3.0251) time: 1.3019 data: 0.0002 max mem: 13912 +[2024-12-06 15:39:52 root] (utils.py 283): INFO Epoch: [7] [1920/2502] eta: 0:12:38 lr: 0.000013 loss_cls: 3.2868 (3.1200) grad_norm: 2.5041 (3.0235) time: 1.3034 data: 0.0002 max mem: 13912 +[2024-12-06 15:40:05 root] (utils.py 283): INFO Epoch: [7] [1930/2502] eta: 0:12:25 lr: 0.000013 loss_cls: 3.2667 (3.1209) grad_norm: 2.7559 (3.0239) time: 1.2998 data: 0.0002 max mem: 13912 +[2024-12-06 15:40:18 root] (utils.py 283): INFO Epoch: [7] [1940/2502] eta: 0:12:12 lr: 0.000013 loss_cls: 3.3772 (3.1217) grad_norm: 2.9304 (3.0247) time: 1.2959 data: 0.0003 max mem: 13912 +[2024-12-06 15:40:31 root] (utils.py 283): INFO Epoch: [7] [1950/2502] eta: 0:11:59 lr: 0.000013 loss_cls: 3.2979 (3.1219) grad_norm: 2.6909 (3.0240) time: 1.2975 data: 0.0003 max mem: 13912 +[2024-12-06 15:40:44 root] (utils.py 283): INFO Epoch: [7] [1960/2502] eta: 0:11:46 lr: 0.000013 loss_cls: 3.2546 (3.1220) grad_norm: 2.7722 (3.0233) time: 1.2971 data: 0.0002 max mem: 13912 +[2024-12-06 15:40:57 root] (utils.py 283): INFO Epoch: [7] [1970/2502] eta: 0:11:33 lr: 0.000013 loss_cls: 3.3904 (3.1234) grad_norm: 2.8202 (3.0228) time: 1.2960 data: 0.0002 max mem: 13912 +[2024-12-06 15:41:10 root] (utils.py 283): INFO Epoch: [7] [1980/2502] eta: 0:11:20 lr: 0.000013 loss_cls: 3.4028 (3.1238) grad_norm: 2.4343 (3.0200) time: 1.2949 data: 0.0003 max mem: 13912 +[2024-12-06 15:41:23 root] (utils.py 283): INFO Epoch: [7] [1990/2502] eta: 0:11:07 lr: 0.000013 loss_cls: 3.0830 (3.1235) grad_norm: 2.5164 (3.0179) time: 1.2944 data: 0.0003 max mem: 13912 +[2024-12-06 15:41:36 root] (utils.py 283): INFO Epoch: [7] [2000/2502] eta: 0:10:54 lr: 0.000013 loss_cls: 3.0162 (3.1232) grad_norm: 2.7214 (3.0177) time: 1.2948 data: 0.0002 max mem: 13912 +[2024-12-06 15:41:49 root] (utils.py 283): INFO Epoch: [7] [2010/2502] eta: 0:10:41 lr: 0.000013 loss_cls: 3.1633 (3.1235) grad_norm: 2.8179 (3.0186) time: 1.2948 data: 0.0002 max mem: 13912 +[2024-12-06 15:42:02 root] (utils.py 283): INFO Epoch: [7] [2020/2502] eta: 0:10:28 lr: 0.000013 loss_cls: 3.2951 (3.1236) grad_norm: 2.8179 (3.0182) time: 1.3022 data: 0.0002 max mem: 13912 +[2024-12-06 15:42:15 root] (utils.py 283): INFO Epoch: [7] [2030/2502] eta: 0:10:15 lr: 0.000013 loss_cls: 3.3867 (3.1244) grad_norm: 3.0312 (3.0185) time: 1.3031 data: 0.0002 max mem: 13912 +[2024-12-06 15:42:28 root] (utils.py 283): INFO Epoch: [7] [2040/2502] eta: 0:10:02 lr: 0.000013 loss_cls: 3.3602 (3.1241) grad_norm: 2.6504 (3.0171) time: 1.2969 data: 0.0002 max mem: 13912 +[2024-12-06 15:42:41 root] (utils.py 283): INFO Epoch: [7] [2050/2502] eta: 0:09:49 lr: 0.000013 loss_cls: 3.2004 (3.1246) grad_norm: 2.6032 (3.0167) time: 1.3008 data: 0.0003 max mem: 13912 +[2024-12-06 15:42:54 root] (utils.py 283): INFO Epoch: [7] [2060/2502] eta: 0:09:36 lr: 0.000013 loss_cls: 3.3266 (3.1261) grad_norm: 2.7870 (3.0189) time: 1.3056 data: 0.0003 max mem: 13912 +[2024-12-06 15:43:07 root] (utils.py 283): INFO Epoch: [7] [2070/2502] eta: 0:09:23 lr: 0.000013 loss_cls: 3.3565 (3.1258) grad_norm: 2.7745 (3.0208) time: 1.3031 data: 0.0003 max mem: 13912 +[2024-12-06 15:43:20 root] (utils.py 283): INFO Epoch: [7] [2080/2502] eta: 0:09:10 lr: 0.000013 loss_cls: 3.2515 (3.1261) grad_norm: 2.7745 (3.0204) time: 1.3015 data: 0.0002 max mem: 13912 +[2024-12-06 15:43:33 root] (utils.py 283): INFO Epoch: [7] [2090/2502] eta: 0:08:57 lr: 0.000013 loss_cls: 3.2067 (3.1258) grad_norm: 2.7176 (3.0195) time: 1.3093 data: 0.0003 max mem: 13912 +[2024-12-06 15:43:46 root] (utils.py 283): INFO Epoch: [7] [2100/2502] eta: 0:08:44 lr: 0.000013 loss_cls: 3.1106 (3.1261) grad_norm: 2.6519 (3.0189) time: 1.3081 data: 0.0003 max mem: 13912 +[2024-12-06 15:43:59 root] (utils.py 283): INFO Epoch: [7] [2110/2502] eta: 0:08:30 lr: 0.000013 loss_cls: 3.1044 (3.1262) grad_norm: 2.6918 (3.0182) time: 1.3022 data: 0.0003 max mem: 13912 +[2024-12-06 15:44:12 root] (utils.py 283): INFO Epoch: [7] [2120/2502] eta: 0:08:17 lr: 0.000013 loss_cls: 3.3916 (3.1267) grad_norm: 2.7865 (3.0187) time: 1.3025 data: 0.0003 max mem: 13912 +[2024-12-06 15:44:25 root] (utils.py 283): INFO Epoch: [7] [2130/2502] eta: 0:08:04 lr: 0.000013 loss_cls: 3.3245 (3.1262) grad_norm: 2.8623 (3.0175) time: 1.3040 data: 0.0003 max mem: 13912 +[2024-12-06 15:44:38 root] (utils.py 283): INFO Epoch: [7] [2140/2502] eta: 0:07:51 lr: 0.000013 loss_cls: 2.8679 (3.1252) grad_norm: 2.8039 (3.0193) time: 1.3025 data: 0.0003 max mem: 13912 +[2024-12-06 15:44:51 root] (utils.py 283): INFO Epoch: [7] [2150/2502] eta: 0:07:38 lr: 0.000013 loss_cls: 2.8949 (3.1243) grad_norm: 2.6515 (3.0190) time: 1.3009 data: 0.0002 max mem: 13912 +[2024-12-06 15:45:04 root] (utils.py 283): INFO Epoch: [7] [2160/2502] eta: 0:07:25 lr: 0.000013 loss_cls: 2.9982 (3.1243) grad_norm: 2.6622 (3.0182) time: 1.3013 data: 0.0002 max mem: 13912 +[2024-12-06 15:45:17 root] (utils.py 283): INFO Epoch: [7] [2170/2502] eta: 0:07:12 lr: 0.000013 loss_cls: 3.0179 (3.1244) grad_norm: 2.9542 (3.0214) time: 1.3006 data: 0.0002 max mem: 13912 +[2024-12-06 15:45:30 root] (utils.py 283): INFO Epoch: [7] [2180/2502] eta: 0:06:59 lr: 0.000013 loss_cls: 3.2342 (3.1240) grad_norm: 2.7409 (3.0213) time: 1.3032 data: 0.0003 max mem: 13912 +[2024-12-06 15:45:43 root] (utils.py 283): INFO Epoch: [7] [2190/2502] eta: 0:06:46 lr: 0.000013 loss_cls: 3.3500 (3.1250) grad_norm: 2.6074 (3.0195) time: 1.3043 data: 0.0003 max mem: 13912 +[2024-12-06 15:45:56 root] (utils.py 283): INFO Epoch: [7] [2200/2502] eta: 0:06:33 lr: 0.000013 loss_cls: 3.1618 (3.1238) grad_norm: 2.5870 (3.0180) time: 1.3038 data: 0.0003 max mem: 13912 +[2024-12-06 15:46:09 root] (utils.py 283): INFO Epoch: [7] [2210/2502] eta: 0:06:20 lr: 0.000013 loss_cls: 2.9228 (3.1244) grad_norm: 2.7132 (3.0166) time: 1.3030 data: 0.0003 max mem: 13912 +[2024-12-06 15:46:22 root] (utils.py 283): INFO Epoch: [7] [2220/2502] eta: 0:06:07 lr: 0.000013 loss_cls: 3.2219 (3.1248) grad_norm: 2.7311 (3.0162) time: 1.3049 data: 0.0003 max mem: 13912 +[2024-12-06 15:46:35 root] (utils.py 283): INFO Epoch: [7] [2230/2502] eta: 0:05:54 lr: 0.000013 loss_cls: 3.2556 (3.1248) grad_norm: 2.6819 (3.0144) time: 1.3064 data: 0.0003 max mem: 13912 +[2024-12-06 15:46:48 root] (utils.py 283): INFO Epoch: [7] [2240/2502] eta: 0:05:41 lr: 0.000013 loss_cls: 3.2712 (3.1244) grad_norm: 2.6106 (3.0138) time: 1.3046 data: 0.0003 max mem: 13912 +[2024-12-06 15:47:02 root] (utils.py 283): INFO Epoch: [7] [2250/2502] eta: 0:05:28 lr: 0.000013 loss_cls: 3.2793 (3.1250) grad_norm: 2.5158 (3.0123) time: 1.3054 data: 0.0002 max mem: 13912 +[2024-12-06 15:47:15 root] (utils.py 283): INFO Epoch: [7] [2260/2502] eta: 0:05:15 lr: 0.000013 loss_cls: 3.2457 (3.1244) grad_norm: 2.5548 (3.0184) time: 1.3080 data: 0.0003 max mem: 13912 +[2024-12-06 15:47:28 root] (utils.py 283): INFO Epoch: [7] [2270/2502] eta: 0:05:02 lr: 0.000013 loss_cls: 3.0366 (3.1243) grad_norm: 2.6804 (3.0176) time: 1.3071 data: 0.0003 max mem: 13912 +[2024-12-06 15:47:41 root] (utils.py 283): INFO Epoch: [7] [2280/2502] eta: 0:04:49 lr: 0.000013 loss_cls: 3.0366 (3.1241) grad_norm: 2.7337 (3.0208) time: 1.3029 data: 0.0003 max mem: 13912 +[2024-12-06 15:47:54 root] (utils.py 283): INFO Epoch: [7] [2290/2502] eta: 0:04:36 lr: 0.000013 loss_cls: 3.2521 (3.1245) grad_norm: 2.7337 (3.0202) time: 1.2998 data: 0.0003 max mem: 13912 +[2024-12-06 15:48:07 root] (utils.py 283): INFO Epoch: [7] [2300/2502] eta: 0:04:23 lr: 0.000013 loss_cls: 3.0976 (3.1231) grad_norm: 2.7338 (3.0191) time: 1.3013 data: 0.0003 max mem: 13912 +[2024-12-06 15:48:20 root] (utils.py 283): INFO Epoch: [7] [2310/2502] eta: 0:04:10 lr: 0.000013 loss_cls: 3.0472 (3.1228) grad_norm: 2.8007 (3.0212) time: 1.3032 data: 0.0003 max mem: 13912 +[2024-12-06 15:48:33 root] (utils.py 283): INFO Epoch: [7] [2320/2502] eta: 0:03:57 lr: 0.000013 loss_cls: 3.2706 (3.1226) grad_norm: 2.8513 (3.0221) time: 1.3048 data: 0.0002 max mem: 13912 +[2024-12-06 15:48:46 root] (utils.py 283): INFO Epoch: [7] [2330/2502] eta: 0:03:44 lr: 0.000013 loss_cls: 3.2456 (3.1232) grad_norm: 2.8513 (3.0215) time: 1.3052 data: 0.0002 max mem: 13912 +[2024-12-06 15:48:59 root] (utils.py 283): INFO Epoch: [7] [2340/2502] eta: 0:03:31 lr: 0.000013 loss_cls: 3.1397 (3.1221) grad_norm: 2.8852 (3.0272) time: 1.3045 data: 0.0003 max mem: 13912 +[2024-12-06 15:49:12 root] (utils.py 283): INFO Epoch: [7] [2350/2502] eta: 0:03:18 lr: 0.000013 loss_cls: 3.1397 (3.1224) grad_norm: 2.6626 (3.0254) time: 1.3022 data: 0.0003 max mem: 13912 +[2024-12-06 15:49:25 root] (utils.py 283): INFO Epoch: [7] [2360/2502] eta: 0:03:05 lr: 0.000013 loss_cls: 3.2499 (3.1231) grad_norm: 2.6626 (3.0263) time: 1.2989 data: 0.0002 max mem: 13912 +[2024-12-06 15:49:38 root] (utils.py 283): INFO Epoch: [7] [2370/2502] eta: 0:02:52 lr: 0.000013 loss_cls: 3.2270 (3.1231) grad_norm: 2.6800 (3.0253) time: 1.2991 data: 0.0002 max mem: 13912 +[2024-12-06 15:49:51 root] (utils.py 283): INFO Epoch: [7] [2380/2502] eta: 0:02:39 lr: 0.000013 loss_cls: 3.2491 (3.1238) grad_norm: 2.6800 (3.0281) time: 1.3010 data: 0.0002 max mem: 13912 +[2024-12-06 15:50:04 root] (utils.py 283): INFO Epoch: [7] [2390/2502] eta: 0:02:25 lr: 0.000013 loss_cls: 3.2607 (3.1242) grad_norm: 2.7878 (3.0272) time: 1.3026 data: 0.0003 max mem: 13912 +[2024-12-06 15:50:17 root] (utils.py 283): INFO Epoch: [7] [2400/2502] eta: 0:02:12 lr: 0.000013 loss_cls: 3.1615 (3.1232) grad_norm: 2.6074 (3.0262) time: 1.3029 data: 0.0003 max mem: 13912 +[2024-12-06 15:50:30 root] (utils.py 283): INFO Epoch: [7] [2410/2502] eta: 0:01:59 lr: 0.000013 loss_cls: 3.1498 (3.1236) grad_norm: 2.6730 (3.0261) time: 1.2998 data: 0.0003 max mem: 13912 +[2024-12-06 15:50:43 root] (utils.py 283): INFO Epoch: [7] [2420/2502] eta: 0:01:46 lr: 0.000013 loss_cls: 3.1811 (3.1239) grad_norm: 2.7814 (3.0255) time: 1.2988 data: 0.0003 max mem: 13912 +[2024-12-06 15:50:56 root] (utils.py 283): INFO Epoch: [7] [2430/2502] eta: 0:01:33 lr: 0.000013 loss_cls: 3.1811 (3.1235) grad_norm: 2.7071 (3.0261) time: 1.3020 data: 0.0003 max mem: 13912 +[2024-12-06 15:51:09 root] (utils.py 283): INFO Epoch: [7] [2440/2502] eta: 0:01:20 lr: 0.000013 loss_cls: 3.1991 (3.1240) grad_norm: 2.4508 (3.0241) time: 1.3077 data: 0.0002 max mem: 13912 +[2024-12-06 15:51:22 root] (utils.py 283): INFO Epoch: [7] [2450/2502] eta: 0:01:07 lr: 0.000013 loss_cls: 3.2148 (3.1236) grad_norm: 2.5592 (3.0235) time: 1.3074 data: 0.0002 max mem: 13912 +[2024-12-06 15:51:35 root] (utils.py 283): INFO Epoch: [7] [2460/2502] eta: 0:00:54 lr: 0.000013 loss_cls: 3.0533 (3.1228) grad_norm: 2.6737 (3.0230) time: 1.3030 data: 0.0002 max mem: 13912 +[2024-12-06 15:51:48 root] (utils.py 283): INFO Epoch: [7] [2470/2502] eta: 0:00:41 lr: 0.000013 loss_cls: 2.9586 (3.1224) grad_norm: 2.6445 (3.0225) time: 1.3044 data: 0.0002 max mem: 13912 +[2024-12-06 15:52:01 root] (utils.py 283): INFO Epoch: [7] [2480/2502] eta: 0:00:28 lr: 0.000013 loss_cls: 2.9586 (3.1222) grad_norm: 2.6227 (3.0206) time: 1.3062 data: 0.0002 max mem: 13912 +[2024-12-06 15:52:15 root] (utils.py 283): INFO Epoch: [7] [2490/2502] eta: 0:00:15 lr: 0.000013 loss_cls: 3.0219 (3.1224) grad_norm: 2.4340 (3.0186) time: 1.3309 data: 0.0245 max mem: 13912 +[2024-12-06 15:52:28 root] (utils.py 283): INFO Epoch: [7] [2500/2502] eta: 0:00:02 lr: 0.000013 loss_cls: 3.0219 (3.1219) grad_norm: 2.4810 (3.0177) time: 1.3277 data: 0.0245 max mem: 13912 +[2024-12-06 15:52:29 root] (utils.py 283): INFO Epoch: [7] [2501/2502] eta: 0:00:01 lr: 0.000013 loss_cls: 3.1024 (3.1220) grad_norm: 2.4810 (3.0174) time: 1.3276 data: 0.0245 max mem: 13912 +[2024-12-06 15:52:29 root] (utils.py 297): INFO Epoch: [7] Total time: 0:54:22 (1.3038 s / it) +[2024-12-06 15:52:29 root] (engine.py 179): INFO Averaged stats:lr: 0.000013 loss_cls: 3.1024 (3.1189) grad_norm: 2.4810 (3.0174) +[2024-12-06 15:52:30 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:22 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.4553 (0.4553) acc1: 91.4062 (91.4062) acc3: 97.6562 (97.6562) acc5: 98.4375 (98.4375) time: 0.2252 data: 0.0004 max mem: 13912 +[2024-12-06 15:52:33 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:19 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.6596 (0.6667) acc1: 86.7188 (86.2216) acc3: 96.0938 (95.5966) acc5: 96.8750 (97.1591) time: 0.2271 data: 0.0004 max mem: 13912 +[2024-12-06 15:52:35 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:17 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7080 (0.7169) acc1: 85.1562 (85.2679) acc3: 95.3125 (94.9405) acc5: 96.8750 (96.6518) time: 0.2276 data: 0.0005 max mem: 13912 +[2024-12-06 15:52:37 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:15 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7948 (0.7364) acc1: 82.8125 (84.1986) acc3: 94.5312 (94.9093) acc5: 96.8750 (96.5474) time: 0.2279 data: 0.0005 max mem: 13912 +[2024-12-06 15:52:39 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:13 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7631 (0.7419) acc1: 82.8125 (83.9939) acc3: 94.5312 (94.8742) acc5: 96.8750 (96.5701) time: 0.2279 data: 0.0005 max mem: 13912 +[2024-12-06 15:52:42 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:10 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.8362 (0.8197) acc1: 78.9062 (82.0312) acc3: 90.6250 (93.5509) acc5: 94.5312 (95.7567) time: 0.2278 data: 0.0005 max mem: 13912 +[2024-12-06 15:52:44 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:08 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0610 (0.8511) acc1: 75.0000 (81.6086) acc3: 88.2812 (92.8791) acc5: 92.9688 (95.2485) time: 0.2289 data: 0.0005 max mem: 13912 +[2024-12-06 15:52:46 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:06 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0495 (0.8801) acc1: 79.6875 (80.8759) acc3: 89.8438 (92.5176) acc5: 93.7500 (95.0264) time: 0.2290 data: 0.0005 max mem: 13912 +[2024-12-06 15:52:49 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:04 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0491 (0.9102) acc1: 75.7812 (80.2083) acc3: 89.0625 (92.0042) acc5: 92.1875 (94.6084) time: 0.2283 data: 0.0007 max mem: 13912 +[2024-12-06 15:52:51 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0702 (0.9344) acc1: 74.2188 (79.4128) acc3: 88.2812 (91.6724) acc5: 91.4062 (94.3595) time: 0.2284 data: 0.0006 max mem: 13912 +[2024-12-06 15:52:52 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0320 (0.9292) acc1: 74.2188 (79.4560) acc3: 89.8438 (91.7680) acc5: 92.9688 (94.4400) time: 0.2246 data: 0.0006 max mem: 13912 +[2024-12-06 15:52:52 root] (utils.py 297): INFO Test: Total time: 0:00:22 (0.2276 s / it) +[2024-12-06 15:52:52 root] (engine.py 264): INFO * Acc@1 79.454 Acc@3 91.962 Acc@5 94.638 loss 0.924 flops 3.584 layer_flops 3.536 +[2024-12-06 15:52:52 root] (main.py 547): INFO Accuracy of the network on the 50000 test images: 79.5% +[2024-12-06 15:52:52 root] (main.py 551): INFO Max accuracy: 79.49% +[2024-12-06 15:52:54 root] (utils.py 283): INFO Epoch: [8] [ 0/2502] eta: 0:54:03 lr: 0.000011 loss_cls: 3.2744 (3.2744) grad_norm: 2.7744 (2.7744) time: 1.2965 data: 0.0003 max mem: 13912 +[2024-12-06 15:53:07 root] (utils.py 283): INFO Epoch: [8] [ 10/2502] eta: 0:54:24 lr: 0.000011 loss_cls: 3.3820 (3.1480) grad_norm: 2.7744 (3.2057) time: 1.3099 data: 0.0003 max mem: 13912 +[2024-12-06 15:53:20 root] (utils.py 283): INFO Epoch: [8] [ 20/2502] eta: 0:54:17 lr: 0.000011 loss_cls: 3.3870 (3.2762) grad_norm: 2.7102 (3.4801) time: 1.3134 data: 0.0002 max mem: 13912 +[2024-12-06 15:53:33 root] (utils.py 283): INFO Epoch: [8] [ 30/2502] eta: 0:53:54 lr: 0.000011 loss_cls: 3.5033 (3.2877) grad_norm: 2.7691 (3.2239) time: 1.3073 data: 0.0003 max mem: 13912 +[2024-12-06 15:53:46 root] (utils.py 283): INFO Epoch: [8] [ 40/2502] eta: 0:53:39 lr: 0.000011 loss_cls: 3.3754 (3.2573) grad_norm: 2.6736 (3.0728) time: 1.3021 data: 0.0003 max mem: 13912 +[2024-12-06 15:53:59 root] (utils.py 283): INFO Epoch: [8] [ 50/2502] eta: 0:53:21 lr: 0.000011 loss_cls: 3.3406 (3.2417) grad_norm: 2.7034 (3.0896) time: 1.3021 data: 0.0003 max mem: 13912 +[2024-12-06 15:54:12 root] (utils.py 283): INFO Epoch: [8] [ 60/2502] eta: 0:53:07 lr: 0.000011 loss_cls: 3.2208 (3.2552) grad_norm: 2.7034 (3.0275) time: 1.3004 data: 0.0002 max mem: 13912 +[2024-12-06 15:54:25 root] (utils.py 283): INFO Epoch: [8] [ 70/2502] eta: 0:52:52 lr: 0.000011 loss_cls: 3.1931 (3.2419) grad_norm: 2.4760 (2.9542) time: 1.3008 data: 0.0002 max mem: 13912 +[2024-12-06 15:54:38 root] (utils.py 283): INFO Epoch: [8] [ 80/2502] eta: 0:52:37 lr: 0.000011 loss_cls: 3.2600 (3.2559) grad_norm: 2.4884 (2.9351) time: 1.2999 data: 0.0003 max mem: 13912 +[2024-12-06 15:54:51 root] (utils.py 283): INFO Epoch: [8] [ 90/2502] eta: 0:52:24 lr: 0.000011 loss_cls: 3.2626 (3.2322) grad_norm: 2.7283 (2.9137) time: 1.3015 data: 0.0003 max mem: 13912 +[2024-12-06 15:55:04 root] (utils.py 283): INFO Epoch: [8] [ 100/2502] eta: 0:52:11 lr: 0.000011 loss_cls: 3.0247 (3.1980) grad_norm: 2.5377 (2.8725) time: 1.3023 data: 0.0003 max mem: 13912 +[2024-12-06 15:55:17 root] (utils.py 283): INFO Epoch: [8] [ 110/2502] eta: 0:51:57 lr: 0.000011 loss_cls: 3.0565 (3.1764) grad_norm: 2.5118 (2.8900) time: 1.3023 data: 0.0002 max mem: 13912 +[2024-12-06 15:55:30 root] (utils.py 283): INFO Epoch: [8] [ 120/2502] eta: 0:51:44 lr: 0.000011 loss_cls: 3.0762 (3.1608) grad_norm: 2.6078 (2.8919) time: 1.3026 data: 0.0002 max mem: 13912 +[2024-12-06 15:55:43 root] (utils.py 283): INFO Epoch: [8] [ 130/2502] eta: 0:51:30 lr: 0.000011 loss_cls: 3.0763 (3.1568) grad_norm: 2.6426 (2.8828) time: 1.3006 data: 0.0002 max mem: 13912 +[2024-12-06 15:55:56 root] (utils.py 283): INFO Epoch: [8] [ 140/2502] eta: 0:51:17 lr: 0.000011 loss_cls: 3.2087 (3.1650) grad_norm: 2.5373 (2.8689) time: 1.2999 data: 0.0002 max mem: 13912 +[2024-12-06 15:56:09 root] (utils.py 283): INFO Epoch: [8] [ 150/2502] eta: 0:51:07 lr: 0.000011 loss_cls: 3.1577 (3.1536) grad_norm: 2.4607 (2.8418) time: 1.3106 data: 0.0003 max mem: 13912 +[2024-12-06 15:56:22 root] (utils.py 283): INFO Epoch: [8] [ 160/2502] eta: 0:50:54 lr: 0.000011 loss_cls: 3.0147 (3.1447) grad_norm: 2.5013 (2.8398) time: 1.3137 data: 0.0003 max mem: 13912 +[2024-12-06 15:56:35 root] (utils.py 283): INFO Epoch: [8] [ 170/2502] eta: 0:50:41 lr: 0.000011 loss_cls: 2.8195 (3.1248) grad_norm: 2.5770 (2.8350) time: 1.3053 data: 0.0003 max mem: 13912 +[2024-12-06 15:56:48 root] (utils.py 283): INFO Epoch: [8] [ 180/2502] eta: 0:50:28 lr: 0.000011 loss_cls: 2.9260 (3.1204) grad_norm: 2.6317 (2.8671) time: 1.3048 data: 0.0003 max mem: 13912 +[2024-12-06 15:57:01 root] (utils.py 283): INFO Epoch: [8] [ 190/2502] eta: 0:50:15 lr: 0.000011 loss_cls: 3.0652 (3.1158) grad_norm: 2.7093 (2.8645) time: 1.3045 data: 0.0003 max mem: 13912 +[2024-12-06 15:57:14 root] (utils.py 283): INFO Epoch: [8] [ 200/2502] eta: 0:50:02 lr: 0.000011 loss_cls: 3.0958 (3.1118) grad_norm: 2.9302 (2.8827) time: 1.3028 data: 0.0003 max mem: 13912 +[2024-12-06 15:57:27 root] (utils.py 283): INFO Epoch: [8] [ 210/2502] eta: 0:49:48 lr: 0.000011 loss_cls: 3.2173 (3.1173) grad_norm: 2.8370 (2.8679) time: 1.3023 data: 0.0003 max mem: 13912 +[2024-12-06 15:57:41 root] (utils.py 283): INFO Epoch: [8] [ 220/2502] eta: 0:49:35 lr: 0.000011 loss_cls: 3.2173 (3.1159) grad_norm: 2.5873 (2.8678) time: 1.3019 data: 0.0003 max mem: 13912 +[2024-12-06 15:57:54 root] (utils.py 283): INFO Epoch: [8] [ 230/2502] eta: 0:49:22 lr: 0.000011 loss_cls: 3.2960 (3.1198) grad_norm: 2.7703 (2.9153) time: 1.3034 data: 0.0002 max mem: 13912 +[2024-12-06 15:58:07 root] (utils.py 283): INFO Epoch: [8] [ 240/2502] eta: 0:49:09 lr: 0.000011 loss_cls: 3.0193 (3.1022) grad_norm: 2.8386 (2.9489) time: 1.3052 data: 0.0002 max mem: 13912 +[2024-12-06 15:58:20 root] (utils.py 283): INFO Epoch: [8] [ 250/2502] eta: 0:48:56 lr: 0.000011 loss_cls: 2.9868 (3.1026) grad_norm: 2.6949 (2.9407) time: 1.3037 data: 0.0003 max mem: 13912 +[2024-12-06 15:58:33 root] (utils.py 283): INFO Epoch: [8] [ 260/2502] eta: 0:48:43 lr: 0.000011 loss_cls: 3.2977 (3.1013) grad_norm: 2.6815 (2.9837) time: 1.3030 data: 0.0003 max mem: 13912 +[2024-12-06 15:58:46 root] (utils.py 283): INFO Epoch: [8] [ 270/2502] eta: 0:48:30 lr: 0.000011 loss_cls: 3.3382 (3.1099) grad_norm: 2.6815 (2.9847) time: 1.3053 data: 0.0003 max mem: 13912 +[2024-12-06 15:58:59 root] (utils.py 283): INFO Epoch: [8] [ 280/2502] eta: 0:48:17 lr: 0.000011 loss_cls: 3.3980 (3.1165) grad_norm: 2.5920 (2.9735) time: 1.3049 data: 0.0003 max mem: 13912 +[2024-12-06 15:59:12 root] (utils.py 283): INFO Epoch: [8] [ 290/2502] eta: 0:48:04 lr: 0.000011 loss_cls: 3.2497 (3.1031) grad_norm: 2.5072 (2.9617) time: 1.3033 data: 0.0003 max mem: 13912 +[2024-12-06 15:59:25 root] (utils.py 283): INFO Epoch: [8] [ 300/2502] eta: 0:47:51 lr: 0.000011 loss_cls: 3.0954 (3.1075) grad_norm: 2.6110 (2.9561) time: 1.3032 data: 0.0002 max mem: 13912 +[2024-12-06 15:59:38 root] (utils.py 283): INFO Epoch: [8] [ 310/2502] eta: 0:47:39 lr: 0.000011 loss_cls: 3.1514 (3.1074) grad_norm: 2.5769 (2.9440) time: 1.3103 data: 0.0002 max mem: 13912 +[2024-12-06 15:59:51 root] (utils.py 283): INFO Epoch: [8] [ 320/2502] eta: 0:47:25 lr: 0.000011 loss_cls: 3.2535 (3.1171) grad_norm: 2.5612 (2.9377) time: 1.3078 data: 0.0003 max mem: 13912 +[2024-12-06 16:00:04 root] (utils.py 283): INFO Epoch: [8] [ 330/2502] eta: 0:47:12 lr: 0.000011 loss_cls: 3.0899 (3.1077) grad_norm: 2.7079 (2.9320) time: 1.2992 data: 0.0003 max mem: 13912 +[2024-12-06 16:00:17 root] (utils.py 283): INFO Epoch: [8] [ 340/2502] eta: 0:46:59 lr: 0.000011 loss_cls: 2.8865 (3.1055) grad_norm: 2.6904 (2.9256) time: 1.3023 data: 0.0003 max mem: 13912 +[2024-12-06 16:00:30 root] (utils.py 283): INFO Epoch: [8] [ 350/2502] eta: 0:46:46 lr: 0.000011 loss_cls: 2.8883 (3.1048) grad_norm: 2.5980 (2.9276) time: 1.3041 data: 0.0003 max mem: 13912 +[2024-12-06 16:00:43 root] (utils.py 283): INFO Epoch: [8] [ 360/2502] eta: 0:46:33 lr: 0.000011 loss_cls: 3.2322 (3.1086) grad_norm: 2.4864 (2.9165) time: 1.3021 data: 0.0003 max mem: 13912 +[2024-12-06 16:00:56 root] (utils.py 283): INFO Epoch: [8] [ 370/2502] eta: 0:46:19 lr: 0.000011 loss_cls: 3.2117 (3.1047) grad_norm: 2.6365 (2.9191) time: 1.3009 data: 0.0002 max mem: 13912 +[2024-12-06 16:01:09 root] (utils.py 283): INFO Epoch: [8] [ 380/2502] eta: 0:46:07 lr: 0.000011 loss_cls: 3.3204 (3.1086) grad_norm: 2.7696 (2.9214) time: 1.3056 data: 0.0003 max mem: 13912 +[2024-12-06 16:01:22 root] (utils.py 283): INFO Epoch: [8] [ 390/2502] eta: 0:45:54 lr: 0.000011 loss_cls: 3.2754 (3.1051) grad_norm: 2.8201 (2.9253) time: 1.3078 data: 0.0003 max mem: 13912 +[2024-12-06 16:01:35 root] (utils.py 283): INFO Epoch: [8] [ 400/2502] eta: 0:45:41 lr: 0.000011 loss_cls: 2.9824 (3.1022) grad_norm: 2.7948 (2.9217) time: 1.3055 data: 0.0003 max mem: 13912 +[2024-12-06 16:01:48 root] (utils.py 283): INFO Epoch: [8] [ 410/2502] eta: 0:45:28 lr: 0.000011 loss_cls: 3.3310 (3.1092) grad_norm: 2.6673 (2.9216) time: 1.3032 data: 0.0002 max mem: 13912 +[2024-12-06 16:02:01 root] (utils.py 283): INFO Epoch: [8] [ 420/2502] eta: 0:45:14 lr: 0.000011 loss_cls: 3.3310 (3.1079) grad_norm: 2.6493 (2.9243) time: 1.2997 data: 0.0003 max mem: 13912 +[2024-12-06 16:02:14 root] (utils.py 283): INFO Epoch: [8] [ 430/2502] eta: 0:45:01 lr: 0.000011 loss_cls: 3.0744 (3.1043) grad_norm: 2.6579 (2.9218) time: 1.2981 data: 0.0003 max mem: 13912 +[2024-12-06 16:02:27 root] (utils.py 283): INFO Epoch: [8] [ 440/2502] eta: 0:44:48 lr: 0.000011 loss_cls: 3.1218 (3.1065) grad_norm: 2.8588 (2.9268) time: 1.2992 data: 0.0002 max mem: 13912 +[2024-12-06 16:02:40 root] (utils.py 283): INFO Epoch: [8] [ 450/2502] eta: 0:44:35 lr: 0.000011 loss_cls: 3.3253 (3.1078) grad_norm: 2.6846 (2.9267) time: 1.3024 data: 0.0002 max mem: 13912 +[2024-12-06 16:02:53 root] (utils.py 283): INFO Epoch: [8] [ 460/2502] eta: 0:44:22 lr: 0.000011 loss_cls: 3.2220 (3.1108) grad_norm: 2.6072 (2.9252) time: 1.3026 data: 0.0002 max mem: 13912 +[2024-12-06 16:03:06 root] (utils.py 283): INFO Epoch: [8] [ 470/2502] eta: 0:44:08 lr: 0.000011 loss_cls: 3.2428 (3.1133) grad_norm: 2.6609 (2.9203) time: 1.3008 data: 0.0003 max mem: 13912 +[2024-12-06 16:03:19 root] (utils.py 283): INFO Epoch: [8] [ 480/2502] eta: 0:43:55 lr: 0.000011 loss_cls: 3.1673 (3.1111) grad_norm: 2.7514 (2.9195) time: 1.3003 data: 0.0003 max mem: 13912 +[2024-12-06 16:03:32 root] (utils.py 283): INFO Epoch: [8] [ 490/2502] eta: 0:43:42 lr: 0.000011 loss_cls: 3.0572 (3.1121) grad_norm: 2.7793 (2.9204) time: 1.3023 data: 0.0003 max mem: 13912 +[2024-12-06 16:03:45 root] (utils.py 283): INFO Epoch: [8] [ 500/2502] eta: 0:43:29 lr: 0.000011 loss_cls: 3.2065 (3.1127) grad_norm: 2.5713 (2.9107) time: 1.3038 data: 0.0003 max mem: 13912 +[2024-12-06 16:03:58 root] (utils.py 283): INFO Epoch: [8] [ 510/2502] eta: 0:43:16 lr: 0.000011 loss_cls: 3.2606 (3.1125) grad_norm: 2.4939 (2.9306) time: 1.3032 data: 0.0002 max mem: 13912 +[2024-12-06 16:04:12 root] (utils.py 283): INFO Epoch: [8] [ 520/2502] eta: 0:43:03 lr: 0.000011 loss_cls: 3.2606 (3.1136) grad_norm: 2.6188 (2.9240) time: 1.3034 data: 0.0002 max mem: 13912 +[2024-12-06 16:04:25 root] (utils.py 283): INFO Epoch: [8] [ 530/2502] eta: 0:42:50 lr: 0.000011 loss_cls: 3.2619 (3.1165) grad_norm: 2.6188 (2.9225) time: 1.3028 data: 0.0003 max mem: 13912 +[2024-12-06 16:04:38 root] (utils.py 283): INFO Epoch: [8] [ 540/2502] eta: 0:42:37 lr: 0.000011 loss_cls: 3.1079 (3.1126) grad_norm: 2.7832 (2.9366) time: 1.3009 data: 0.0003 max mem: 13912 +[2024-12-06 16:04:51 root] (utils.py 283): INFO Epoch: [8] [ 550/2502] eta: 0:42:24 lr: 0.000011 loss_cls: 3.1001 (3.1161) grad_norm: 2.7262 (2.9338) time: 1.3017 data: 0.0002 max mem: 13912 +[2024-12-06 16:05:04 root] (utils.py 283): INFO Epoch: [8] [ 560/2502] eta: 0:42:11 lr: 0.000011 loss_cls: 3.3573 (3.1192) grad_norm: 2.6524 (2.9381) time: 1.3020 data: 0.0002 max mem: 13912 +[2024-12-06 16:05:17 root] (utils.py 283): INFO Epoch: [8] [ 570/2502] eta: 0:41:58 lr: 0.000011 loss_cls: 3.2144 (3.1214) grad_norm: 2.6394 (2.9421) time: 1.3040 data: 0.0002 max mem: 13912 +[2024-12-06 16:05:30 root] (utils.py 283): INFO Epoch: [8] [ 580/2502] eta: 0:41:45 lr: 0.000011 loss_cls: 3.1917 (3.1204) grad_norm: 2.6014 (2.9577) time: 1.3070 data: 0.0002 max mem: 13912 +[2024-12-06 16:05:43 root] (utils.py 283): INFO Epoch: [8] [ 590/2502] eta: 0:41:32 lr: 0.000011 loss_cls: 3.1415 (3.1215) grad_norm: 2.8767 (2.9607) time: 1.3032 data: 0.0002 max mem: 13912 +[2024-12-06 16:05:56 root] (utils.py 283): INFO Epoch: [8] [ 600/2502] eta: 0:41:19 lr: 0.000011 loss_cls: 3.3305 (3.1270) grad_norm: 2.8767 (2.9604) time: 1.3000 data: 0.0002 max mem: 13912 +[2024-12-06 16:06:09 root] (utils.py 283): INFO Epoch: [8] [ 610/2502] eta: 0:41:06 lr: 0.000011 loss_cls: 3.3305 (3.1260) grad_norm: 2.8254 (2.9865) time: 1.3026 data: 0.0003 max mem: 13912 +[2024-12-06 16:06:22 root] (utils.py 283): INFO Epoch: [8] [ 620/2502] eta: 0:40:53 lr: 0.000011 loss_cls: 3.2359 (3.1245) grad_norm: 2.6303 (2.9961) time: 1.3039 data: 0.0002 max mem: 13912 +[2024-12-06 16:06:35 root] (utils.py 283): INFO Epoch: [8] [ 630/2502] eta: 0:40:40 lr: 0.000011 loss_cls: 3.2846 (3.1246) grad_norm: 2.6628 (2.9916) time: 1.3043 data: 0.0003 max mem: 13912 +[2024-12-06 16:06:48 root] (utils.py 283): INFO Epoch: [8] [ 640/2502] eta: 0:40:27 lr: 0.000011 loss_cls: 3.3339 (3.1281) grad_norm: 2.6628 (3.0056) time: 1.3046 data: 0.0003 max mem: 13912 +[2024-12-06 16:07:01 root] (utils.py 283): INFO Epoch: [8] [ 650/2502] eta: 0:40:13 lr: 0.000011 loss_cls: 3.2929 (3.1256) grad_norm: 3.0247 (3.0364) time: 1.3016 data: 0.0003 max mem: 13912 +[2024-12-06 16:07:14 root] (utils.py 283): INFO Epoch: [8] [ 660/2502] eta: 0:40:00 lr: 0.000011 loss_cls: 3.0167 (3.1266) grad_norm: 3.0247 (3.0449) time: 1.3021 data: 0.0002 max mem: 13912 +[2024-12-06 16:07:27 root] (utils.py 283): INFO Epoch: [8] [ 670/2502] eta: 0:39:48 lr: 0.000011 loss_cls: 3.1286 (3.1265) grad_norm: 2.7580 (3.0405) time: 1.3100 data: 0.0002 max mem: 13912 +[2024-12-06 16:07:40 root] (utils.py 283): INFO Epoch: [8] [ 680/2502] eta: 0:39:35 lr: 0.000011 loss_cls: 3.1286 (3.1267) grad_norm: 2.7218 (3.0351) time: 1.3093 data: 0.0002 max mem: 13912 +[2024-12-06 16:07:53 root] (utils.py 283): INFO Epoch: [8] [ 690/2502] eta: 0:39:22 lr: 0.000011 loss_cls: 3.1344 (3.1265) grad_norm: 2.5554 (3.0321) time: 1.3039 data: 0.0002 max mem: 13912 +[2024-12-06 16:08:06 root] (utils.py 283): INFO Epoch: [8] [ 700/2502] eta: 0:39:09 lr: 0.000011 loss_cls: 3.1746 (3.1274) grad_norm: 2.6218 (3.0330) time: 1.3049 data: 0.0002 max mem: 13912 +[2024-12-06 16:08:19 root] (utils.py 283): INFO Epoch: [8] [ 710/2502] eta: 0:38:56 lr: 0.000011 loss_cls: 3.2555 (3.1297) grad_norm: 2.6408 (3.0290) time: 1.3049 data: 0.0002 max mem: 13912 +[2024-12-06 16:08:32 root] (utils.py 283): INFO Epoch: [8] [ 720/2502] eta: 0:38:43 lr: 0.000011 loss_cls: 3.0777 (3.1268) grad_norm: 2.5697 (3.0244) time: 1.3025 data: 0.0002 max mem: 13912 +[2024-12-06 16:08:45 root] (utils.py 283): INFO Epoch: [8] [ 730/2502] eta: 0:38:30 lr: 0.000011 loss_cls: 2.9811 (3.1279) grad_norm: 2.5697 (3.0312) time: 1.3035 data: 0.0002 max mem: 13912 +[2024-12-06 16:08:58 root] (utils.py 283): INFO Epoch: [8] [ 740/2502] eta: 0:38:17 lr: 0.000011 loss_cls: 3.2810 (3.1274) grad_norm: 2.8816 (3.0309) time: 1.3059 data: 0.0002 max mem: 13912 +[2024-12-06 16:09:11 root] (utils.py 283): INFO Epoch: [8] [ 750/2502] eta: 0:38:04 lr: 0.000011 loss_cls: 3.2690 (3.1274) grad_norm: 2.6584 (3.0273) time: 1.3055 data: 0.0003 max mem: 13912 +[2024-12-06 16:09:24 root] (utils.py 283): INFO Epoch: [8] [ 760/2502] eta: 0:37:50 lr: 0.000011 loss_cls: 3.2690 (3.1274) grad_norm: 2.6275 (3.0242) time: 1.3018 data: 0.0003 max mem: 13912 +[2024-12-06 16:09:38 root] (utils.py 283): INFO Epoch: [8] [ 770/2502] eta: 0:37:39 lr: 0.000011 loss_cls: 3.3596 (3.1306) grad_norm: 2.7371 (3.0300) time: 1.3275 data: 0.0003 max mem: 13912 +[2024-12-06 16:09:51 root] (utils.py 283): INFO Epoch: [8] [ 780/2502] eta: 0:37:26 lr: 0.000011 loss_cls: 3.3596 (3.1272) grad_norm: 3.0461 (3.0309) time: 1.3373 data: 0.0003 max mem: 13912 +[2024-12-06 16:10:04 root] (utils.py 283): INFO Epoch: [8] [ 790/2502] eta: 0:37:13 lr: 0.000011 loss_cls: 2.7571 (3.1251) grad_norm: 2.7648 (3.0292) time: 1.3121 data: 0.0003 max mem: 13912 +[2024-12-06 16:10:17 root] (utils.py 283): INFO Epoch: [8] [ 800/2502] eta: 0:37:00 lr: 0.000011 loss_cls: 3.3578 (3.1271) grad_norm: 2.7648 (3.0246) time: 1.3067 data: 0.0002 max mem: 13912 +[2024-12-06 16:10:30 root] (utils.py 283): INFO Epoch: [8] [ 810/2502] eta: 0:36:47 lr: 0.000011 loss_cls: 3.3177 (3.1260) grad_norm: 2.8126 (3.0244) time: 1.3057 data: 0.0003 max mem: 13912 +[2024-12-06 16:10:43 root] (utils.py 283): INFO Epoch: [8] [ 820/2502] eta: 0:36:34 lr: 0.000011 loss_cls: 3.3177 (3.1297) grad_norm: 2.7917 (3.0203) time: 1.3061 data: 0.0003 max mem: 13912 +[2024-12-06 16:10:57 root] (utils.py 283): INFO Epoch: [8] [ 830/2502] eta: 0:36:21 lr: 0.000011 loss_cls: 3.4322 (3.1319) grad_norm: 2.5607 (3.0167) time: 1.3067 data: 0.0003 max mem: 13912 +[2024-12-06 16:11:10 root] (utils.py 283): INFO Epoch: [8] [ 840/2502] eta: 0:36:08 lr: 0.000011 loss_cls: 3.2040 (3.1312) grad_norm: 2.5607 (3.0131) time: 1.3056 data: 0.0002 max mem: 13912 +[2024-12-06 16:11:23 root] (utils.py 283): INFO Epoch: [8] [ 850/2502] eta: 0:35:55 lr: 0.000011 loss_cls: 3.0625 (3.1275) grad_norm: 2.4636 (3.0090) time: 1.3040 data: 0.0003 max mem: 13912 +[2024-12-06 16:11:36 root] (utils.py 283): INFO Epoch: [8] [ 860/2502] eta: 0:35:42 lr: 0.000011 loss_cls: 3.1861 (3.1280) grad_norm: 2.5213 (3.0117) time: 1.3043 data: 0.0003 max mem: 13912 +[2024-12-06 16:11:49 root] (utils.py 283): INFO Epoch: [8] [ 870/2502] eta: 0:35:29 lr: 0.000011 loss_cls: 3.2163 (3.1249) grad_norm: 2.7591 (3.0106) time: 1.3060 data: 0.0002 max mem: 13912 +[2024-12-06 16:12:02 root] (utils.py 283): INFO Epoch: [8] [ 880/2502] eta: 0:35:16 lr: 0.000011 loss_cls: 3.1242 (3.1261) grad_norm: 2.5453 (3.0057) time: 1.3051 data: 0.0002 max mem: 13912 +[2024-12-06 16:12:15 root] (utils.py 283): INFO Epoch: [8] [ 890/2502] eta: 0:35:03 lr: 0.000011 loss_cls: 3.1242 (3.1263) grad_norm: 2.6085 (3.0085) time: 1.3123 data: 0.0002 max mem: 13912 +[2024-12-06 16:12:28 root] (utils.py 283): INFO Epoch: [8] [ 900/2502] eta: 0:34:50 lr: 0.000011 loss_cls: 3.0920 (3.1260) grad_norm: 2.8220 (3.0107) time: 1.3092 data: 0.0003 max mem: 13912 +[2024-12-06 16:12:41 root] (utils.py 283): INFO Epoch: [8] [ 910/2502] eta: 0:34:37 lr: 0.000011 loss_cls: 3.2536 (3.1258) grad_norm: 2.7959 (3.0116) time: 1.3018 data: 0.0003 max mem: 13912 +[2024-12-06 16:12:54 root] (utils.py 283): INFO Epoch: [8] [ 920/2502] eta: 0:34:24 lr: 0.000011 loss_cls: 3.2308 (3.1246) grad_norm: 2.6510 (3.0068) time: 1.3087 data: 0.0003 max mem: 13912 +[2024-12-06 16:13:07 root] (utils.py 283): INFO Epoch: [8] [ 930/2502] eta: 0:34:11 lr: 0.000011 loss_cls: 2.8868 (3.1196) grad_norm: 2.5706 (3.0026) time: 1.3064 data: 0.0002 max mem: 13912 +[2024-12-06 16:13:20 root] (utils.py 283): INFO Epoch: [8] [ 940/2502] eta: 0:33:58 lr: 0.000011 loss_cls: 3.0238 (3.1227) grad_norm: 2.7651 (3.0223) time: 1.3028 data: 0.0003 max mem: 13912 +[2024-12-06 16:13:33 root] (utils.py 283): INFO Epoch: [8] [ 950/2502] eta: 0:33:44 lr: 0.000011 loss_cls: 3.2782 (3.1218) grad_norm: 2.8050 (3.0189) time: 1.3051 data: 0.0003 max mem: 13912 +[2024-12-06 16:13:46 root] (utils.py 283): INFO Epoch: [8] [ 960/2502] eta: 0:33:31 lr: 0.000011 loss_cls: 3.2249 (3.1194) grad_norm: 2.6252 (3.0164) time: 1.3027 data: 0.0003 max mem: 13912 +[2024-12-06 16:13:59 root] (utils.py 283): INFO Epoch: [8] [ 970/2502] eta: 0:33:18 lr: 0.000011 loss_cls: 2.9259 (3.1183) grad_norm: 2.6822 (3.0488) time: 1.2998 data: 0.0002 max mem: 13912 +[2024-12-06 16:14:12 root] (utils.py 283): INFO Epoch: [8] [ 980/2502] eta: 0:33:05 lr: 0.000011 loss_cls: 3.2158 (3.1199) grad_norm: 2.9042 (3.0524) time: 1.3004 data: 0.0003 max mem: 13912 +[2024-12-06 16:14:25 root] (utils.py 283): INFO Epoch: [8] [ 990/2502] eta: 0:32:52 lr: 0.000011 loss_cls: 3.3636 (3.1222) grad_norm: 2.9088 (3.0504) time: 1.3021 data: 0.0003 max mem: 13912 +[2024-12-06 16:14:38 root] (utils.py 283): INFO Epoch: [8] [1000/2502] eta: 0:32:39 lr: 0.000011 loss_cls: 3.2777 (3.1205) grad_norm: 2.7739 (3.0548) time: 1.3040 data: 0.0003 max mem: 13912 +[2024-12-06 16:14:52 root] (utils.py 283): INFO Epoch: [8] [1010/2502] eta: 0:32:26 lr: 0.000011 loss_cls: 2.7313 (3.1197) grad_norm: 2.7451 (3.0542) time: 1.3135 data: 0.0003 max mem: 13912 +[2024-12-06 16:15:05 root] (utils.py 283): INFO Epoch: [8] [1020/2502] eta: 0:32:13 lr: 0.000011 loss_cls: 3.1989 (3.1200) grad_norm: 2.5495 (3.0499) time: 1.3146 data: 0.0003 max mem: 13912 +[2024-12-06 16:15:18 root] (utils.py 283): INFO Epoch: [8] [1030/2502] eta: 0:32:00 lr: 0.000011 loss_cls: 3.2456 (3.1203) grad_norm: 2.7139 (3.0486) time: 1.3042 data: 0.0002 max mem: 13912 +[2024-12-06 16:15:31 root] (utils.py 283): INFO Epoch: [8] [1040/2502] eta: 0:31:47 lr: 0.000011 loss_cls: 3.1797 (3.1190) grad_norm: 2.5831 (3.0451) time: 1.3005 data: 0.0002 max mem: 13912 +[2024-12-06 16:15:44 root] (utils.py 283): INFO Epoch: [8] [1050/2502] eta: 0:31:34 lr: 0.000011 loss_cls: 2.9432 (3.1197) grad_norm: 2.4850 (3.0494) time: 1.3004 data: 0.0002 max mem: 13912 +[2024-12-06 16:15:57 root] (utils.py 283): INFO Epoch: [8] [1060/2502] eta: 0:31:21 lr: 0.000011 loss_cls: 3.0054 (3.1177) grad_norm: 2.7673 (3.0489) time: 1.3030 data: 0.0003 max mem: 13912 +[2024-12-06 16:16:10 root] (utils.py 283): INFO Epoch: [8] [1070/2502] eta: 0:31:08 lr: 0.000011 loss_cls: 3.1187 (3.1174) grad_norm: 2.7099 (3.0466) time: 1.3051 data: 0.0003 max mem: 13912 +[2024-12-06 16:16:23 root] (utils.py 283): INFO Epoch: [8] [1080/2502] eta: 0:30:55 lr: 0.000011 loss_cls: 3.2560 (3.1160) grad_norm: 2.6908 (3.0477) time: 1.3048 data: 0.0002 max mem: 13912 +[2024-12-06 16:16:36 root] (utils.py 283): INFO Epoch: [8] [1090/2502] eta: 0:30:42 lr: 0.000011 loss_cls: 3.2560 (3.1147) grad_norm: 2.6908 (3.0480) time: 1.3072 data: 0.0003 max mem: 13912 +[2024-12-06 16:16:49 root] (utils.py 283): INFO Epoch: [8] [1100/2502] eta: 0:30:29 lr: 0.000011 loss_cls: 2.9653 (3.1141) grad_norm: 2.6849 (3.0469) time: 1.3063 data: 0.0003 max mem: 13912 +[2024-12-06 16:17:02 root] (utils.py 283): INFO Epoch: [8] [1110/2502] eta: 0:30:16 lr: 0.000011 loss_cls: 2.9653 (3.1121) grad_norm: 2.6717 (3.0446) time: 1.3038 data: 0.0002 max mem: 13912 +[2024-12-06 16:17:15 root] (utils.py 283): INFO Epoch: [8] [1120/2502] eta: 0:30:03 lr: 0.000011 loss_cls: 3.0821 (3.1104) grad_norm: 2.6586 (3.0415) time: 1.3019 data: 0.0002 max mem: 13912 +[2024-12-06 16:17:28 root] (utils.py 283): INFO Epoch: [8] [1130/2502] eta: 0:29:49 lr: 0.000011 loss_cls: 3.1871 (3.1119) grad_norm: 2.4891 (3.0375) time: 1.3009 data: 0.0002 max mem: 13912 +[2024-12-06 16:17:41 root] (utils.py 283): INFO Epoch: [8] [1140/2502] eta: 0:29:36 lr: 0.000011 loss_cls: 3.3100 (3.1131) grad_norm: 2.5950 (3.0402) time: 1.3030 data: 0.0003 max mem: 13912 +[2024-12-06 16:17:54 root] (utils.py 283): INFO Epoch: [8] [1150/2502] eta: 0:29:23 lr: 0.000011 loss_cls: 3.2345 (3.1131) grad_norm: 2.7623 (3.0387) time: 1.3015 data: 0.0003 max mem: 13912 +[2024-12-06 16:18:07 root] (utils.py 283): INFO Epoch: [8] [1160/2502] eta: 0:29:10 lr: 0.000011 loss_cls: 3.1671 (3.1123) grad_norm: 2.7623 (3.0371) time: 1.3022 data: 0.0003 max mem: 13912 +[2024-12-06 16:18:20 root] (utils.py 283): INFO Epoch: [8] [1170/2502] eta: 0:28:57 lr: 0.000011 loss_cls: 3.3011 (3.1138) grad_norm: 2.7290 (3.0361) time: 1.3042 data: 0.0002 max mem: 13912 +[2024-12-06 16:18:33 root] (utils.py 283): INFO Epoch: [8] [1180/2502] eta: 0:28:44 lr: 0.000011 loss_cls: 3.4183 (3.1159) grad_norm: 2.6111 (3.0409) time: 1.3103 data: 0.0002 max mem: 13912 +[2024-12-06 16:18:46 root] (utils.py 283): INFO Epoch: [8] [1190/2502] eta: 0:28:31 lr: 0.000011 loss_cls: 3.2195 (3.1164) grad_norm: 2.6024 (3.0393) time: 1.3073 data: 0.0002 max mem: 13912 +[2024-12-06 16:18:59 root] (utils.py 283): INFO Epoch: [8] [1200/2502] eta: 0:28:18 lr: 0.000011 loss_cls: 3.3489 (3.1175) grad_norm: 2.6476 (3.0415) time: 1.3008 data: 0.0003 max mem: 13912 +[2024-12-06 16:19:12 root] (utils.py 283): INFO Epoch: [8] [1210/2502] eta: 0:28:05 lr: 0.000011 loss_cls: 3.3489 (3.1168) grad_norm: 2.6343 (3.0438) time: 1.3027 data: 0.0003 max mem: 13912 +[2024-12-06 16:19:25 root] (utils.py 283): INFO Epoch: [8] [1220/2502] eta: 0:27:52 lr: 0.000011 loss_cls: 3.2166 (3.1173) grad_norm: 2.7815 (3.0451) time: 1.3041 data: 0.0003 max mem: 13912 +[2024-12-06 16:19:38 root] (utils.py 283): INFO Epoch: [8] [1230/2502] eta: 0:27:39 lr: 0.000011 loss_cls: 3.1569 (3.1178) grad_norm: 2.8879 (3.0430) time: 1.3057 data: 0.0003 max mem: 13912 +[2024-12-06 16:19:52 root] (utils.py 283): INFO Epoch: [8] [1240/2502] eta: 0:27:26 lr: 0.000011 loss_cls: 3.1592 (3.1178) grad_norm: 2.6744 (3.0400) time: 1.3055 data: 0.0003 max mem: 13912 +[2024-12-06 16:20:05 root] (utils.py 283): INFO Epoch: [8] [1250/2502] eta: 0:27:13 lr: 0.000011 loss_cls: 3.3014 (3.1180) grad_norm: 2.6652 (3.0414) time: 1.3033 data: 0.0002 max mem: 13912 +[2024-12-06 16:20:18 root] (utils.py 283): INFO Epoch: [8] [1260/2502] eta: 0:27:00 lr: 0.000011 loss_cls: 3.3014 (3.1181) grad_norm: 2.6252 (3.0383) time: 1.3033 data: 0.0003 max mem: 13912 +[2024-12-06 16:20:31 root] (utils.py 283): INFO Epoch: [8] [1270/2502] eta: 0:26:47 lr: 0.000011 loss_cls: 3.2298 (3.1191) grad_norm: 2.6255 (3.0364) time: 1.3047 data: 0.0003 max mem: 13912 +[2024-12-06 16:20:44 root] (utils.py 283): INFO Epoch: [8] [1280/2502] eta: 0:26:34 lr: 0.000011 loss_cls: 3.0924 (3.1175) grad_norm: 2.5873 (3.0333) time: 1.3049 data: 0.0002 max mem: 13912 +[2024-12-06 16:20:57 root] (utils.py 283): INFO Epoch: [8] [1290/2502] eta: 0:26:21 lr: 0.000011 loss_cls: 2.9510 (3.1153) grad_norm: 2.5767 (3.0322) time: 1.3061 data: 0.0003 max mem: 13912 +[2024-12-06 16:21:10 root] (utils.py 283): INFO Epoch: [8] [1300/2502] eta: 0:26:08 lr: 0.000011 loss_cls: 2.7111 (3.1110) grad_norm: 2.6932 (3.0309) time: 1.3061 data: 0.0003 max mem: 13912 +[2024-12-06 16:21:23 root] (utils.py 283): INFO Epoch: [8] [1310/2502] eta: 0:25:55 lr: 0.000011 loss_cls: 2.7854 (3.1121) grad_norm: 2.6497 (3.0291) time: 1.3049 data: 0.0002 max mem: 13912 +[2024-12-06 16:21:36 root] (utils.py 283): INFO Epoch: [8] [1320/2502] eta: 0:25:42 lr: 0.000011 loss_cls: 3.3445 (3.1125) grad_norm: 2.6321 (3.0281) time: 1.3023 data: 0.0002 max mem: 13912 +[2024-12-06 16:21:49 root] (utils.py 283): INFO Epoch: [8] [1330/2502] eta: 0:25:28 lr: 0.000011 loss_cls: 3.2978 (3.1132) grad_norm: 2.7986 (3.0297) time: 1.3025 data: 0.0003 max mem: 13912 +[2024-12-06 16:22:02 root] (utils.py 283): INFO Epoch: [8] [1340/2502] eta: 0:25:16 lr: 0.000011 loss_cls: 3.2978 (3.1151) grad_norm: 2.6556 (3.0264) time: 1.3095 data: 0.0003 max mem: 13912 +[2024-12-06 16:22:15 root] (utils.py 283): INFO Epoch: [8] [1350/2502] eta: 0:25:02 lr: 0.000011 loss_cls: 3.3182 (3.1167) grad_norm: 2.6465 (3.0244) time: 1.3080 data: 0.0003 max mem: 13912 +[2024-12-06 16:22:28 root] (utils.py 283): INFO Epoch: [8] [1360/2502] eta: 0:24:49 lr: 0.000011 loss_cls: 3.3011 (3.1169) grad_norm: 2.6830 (3.0282) time: 1.3032 data: 0.0003 max mem: 13912 +[2024-12-06 16:22:41 root] (utils.py 283): INFO Epoch: [8] [1370/2502] eta: 0:24:36 lr: 0.000011 loss_cls: 3.2190 (3.1155) grad_norm: 2.6789 (3.0270) time: 1.3038 data: 0.0003 max mem: 13912 +[2024-12-06 16:22:54 root] (utils.py 283): INFO Epoch: [8] [1380/2502] eta: 0:24:23 lr: 0.000011 loss_cls: 3.2367 (3.1165) grad_norm: 2.8870 (3.0293) time: 1.3038 data: 0.0003 max mem: 13912 +[2024-12-06 16:23:07 root] (utils.py 283): INFO Epoch: [8] [1390/2502] eta: 0:24:10 lr: 0.000011 loss_cls: 3.0146 (3.1154) grad_norm: 2.8251 (3.0279) time: 1.3044 data: 0.0003 max mem: 13912 +[2024-12-06 16:23:20 root] (utils.py 283): INFO Epoch: [8] [1400/2502] eta: 0:23:57 lr: 0.000011 loss_cls: 2.8685 (3.1142) grad_norm: 2.5883 (3.0280) time: 1.3026 data: 0.0003 max mem: 13912 +[2024-12-06 16:23:33 root] (utils.py 283): INFO Epoch: [8] [1410/2502] eta: 0:23:44 lr: 0.000011 loss_cls: 3.2433 (3.1140) grad_norm: 2.4711 (3.0263) time: 1.3006 data: 0.0003 max mem: 13912 +[2024-12-06 16:23:46 root] (utils.py 283): INFO Epoch: [8] [1420/2502] eta: 0:23:31 lr: 0.000011 loss_cls: 3.1718 (3.1131) grad_norm: 2.4336 (3.0238) time: 1.3019 data: 0.0003 max mem: 13912 +[2024-12-06 16:23:59 root] (utils.py 283): INFO Epoch: [8] [1430/2502] eta: 0:23:18 lr: 0.000011 loss_cls: 2.9933 (3.1123) grad_norm: 2.5871 (3.0224) time: 1.3005 data: 0.0003 max mem: 13912 +[2024-12-06 16:24:12 root] (utils.py 283): INFO Epoch: [8] [1440/2502] eta: 0:23:05 lr: 0.000011 loss_cls: 2.9933 (3.1120) grad_norm: 2.6299 (3.0218) time: 1.3029 data: 0.0003 max mem: 13912 +[2024-12-06 16:24:25 root] (utils.py 283): INFO Epoch: [8] [1450/2502] eta: 0:22:52 lr: 0.000011 loss_cls: 3.1950 (3.1103) grad_norm: 2.8163 (3.0225) time: 1.3049 data: 0.0003 max mem: 13912 +[2024-12-06 16:24:38 root] (utils.py 283): INFO Epoch: [8] [1460/2502] eta: 0:22:39 lr: 0.000011 loss_cls: 3.1977 (3.1113) grad_norm: 2.6712 (3.0201) time: 1.3025 data: 0.0002 max mem: 13912 +[2024-12-06 16:24:51 root] (utils.py 283): INFO Epoch: [8] [1470/2502] eta: 0:22:26 lr: 0.000011 loss_cls: 3.2460 (3.1110) grad_norm: 2.7215 (3.0185) time: 1.3045 data: 0.0003 max mem: 13912 +[2024-12-06 16:25:05 root] (utils.py 283): INFO Epoch: [8] [1480/2502] eta: 0:22:13 lr: 0.000011 loss_cls: 3.1377 (3.1097) grad_norm: 2.7536 (3.0181) time: 1.3073 data: 0.0003 max mem: 13912 +[2024-12-06 16:25:18 root] (utils.py 283): INFO Epoch: [8] [1490/2502] eta: 0:22:00 lr: 0.000011 loss_cls: 2.8455 (3.1088) grad_norm: 2.5742 (3.0157) time: 1.3086 data: 0.0002 max mem: 13912 +[2024-12-06 16:25:31 root] (utils.py 283): INFO Epoch: [8] [1500/2502] eta: 0:21:47 lr: 0.000011 loss_cls: 3.1757 (3.1080) grad_norm: 2.5742 (3.0130) time: 1.3052 data: 0.0003 max mem: 13912 +[2024-12-06 16:25:44 root] (utils.py 283): INFO Epoch: [8] [1510/2502] eta: 0:21:34 lr: 0.000011 loss_cls: 3.2415 (3.1085) grad_norm: 2.7112 (3.0122) time: 1.3033 data: 0.0003 max mem: 13912 +[2024-12-06 16:25:57 root] (utils.py 283): INFO Epoch: [8] [1520/2502] eta: 0:21:21 lr: 0.000011 loss_cls: 3.1002 (3.1064) grad_norm: 2.7668 (3.0118) time: 1.3032 data: 0.0003 max mem: 13912 +[2024-12-06 16:26:10 root] (utils.py 283): INFO Epoch: [8] [1530/2502] eta: 0:21:08 lr: 0.000011 loss_cls: 2.6404 (3.1052) grad_norm: 2.7668 (3.0131) time: 1.3103 data: 0.0003 max mem: 13912 +[2024-12-06 16:26:23 root] (utils.py 283): INFO Epoch: [8] [1540/2502] eta: 0:20:55 lr: 0.000011 loss_cls: 2.8732 (3.1039) grad_norm: 2.8794 (3.0646) time: 1.3125 data: 0.0003 max mem: 13912 +[2024-12-06 16:26:36 root] (utils.py 283): INFO Epoch: [8] [1550/2502] eta: 0:20:42 lr: 0.000011 loss_cls: 3.2314 (3.1049) grad_norm: 3.0683 (3.0643) time: 1.3060 data: 0.0003 max mem: 13912 +[2024-12-06 16:26:49 root] (utils.py 283): INFO Epoch: [8] [1560/2502] eta: 0:20:28 lr: 0.000011 loss_cls: 3.3501 (3.1060) grad_norm: 2.9891 (3.0643) time: 1.3053 data: 0.0002 max mem: 13912 +[2024-12-06 16:27:02 root] (utils.py 283): INFO Epoch: [8] [1570/2502] eta: 0:20:15 lr: 0.000011 loss_cls: 3.3501 (3.1066) grad_norm: 2.8279 (3.0628) time: 1.3071 data: 0.0002 max mem: 13912 +[2024-12-06 16:27:15 root] (utils.py 283): INFO Epoch: [8] [1580/2502] eta: 0:20:02 lr: 0.000011 loss_cls: 3.0892 (3.1044) grad_norm: 2.6465 (3.0600) time: 1.3058 data: 0.0003 max mem: 13912 +[2024-12-06 16:27:28 root] (utils.py 283): INFO Epoch: [8] [1590/2502] eta: 0:19:49 lr: 0.000011 loss_cls: 2.8766 (3.1030) grad_norm: 2.8171 (3.0601) time: 1.3037 data: 0.0002 max mem: 13912 +[2024-12-06 16:27:41 root] (utils.py 283): INFO Epoch: [8] [1600/2502] eta: 0:19:36 lr: 0.000011 loss_cls: 2.7727 (3.1008) grad_norm: 2.8872 (3.0597) time: 1.3080 data: 0.0002 max mem: 13912 +[2024-12-06 16:27:54 root] (utils.py 283): INFO Epoch: [8] [1610/2502] eta: 0:19:23 lr: 0.000011 loss_cls: 3.2704 (3.1028) grad_norm: 2.9213 (3.0650) time: 1.3069 data: 0.0002 max mem: 13912 +[2024-12-06 16:28:07 root] (utils.py 283): INFO Epoch: [8] [1620/2502] eta: 0:19:10 lr: 0.000011 loss_cls: 3.4213 (3.1029) grad_norm: 2.9213 (3.0636) time: 1.3028 data: 0.0002 max mem: 13912 +[2024-12-06 16:28:20 root] (utils.py 283): INFO Epoch: [8] [1630/2502] eta: 0:18:57 lr: 0.000011 loss_cls: 3.4176 (3.1034) grad_norm: 2.5399 (3.0615) time: 1.3019 data: 0.0003 max mem: 13912 +[2024-12-06 16:28:33 root] (utils.py 283): INFO Epoch: [8] [1640/2502] eta: 0:18:44 lr: 0.000011 loss_cls: 3.2131 (3.1037) grad_norm: 2.7687 (3.0695) time: 1.3006 data: 0.0003 max mem: 13912 +[2024-12-06 16:28:46 root] (utils.py 283): INFO Epoch: [8] [1650/2502] eta: 0:18:31 lr: 0.000011 loss_cls: 3.0946 (3.1031) grad_norm: 2.8674 (3.0675) time: 1.3029 data: 0.0002 max mem: 13912 +[2024-12-06 16:29:00 root] (utils.py 283): INFO Epoch: [8] [1660/2502] eta: 0:18:18 lr: 0.000011 loss_cls: 3.1053 (3.1039) grad_norm: 2.7478 (3.0703) time: 1.3047 data: 0.0002 max mem: 13912 +[2024-12-06 16:29:13 root] (utils.py 283): INFO Epoch: [8] [1670/2502] eta: 0:18:05 lr: 0.000011 loss_cls: 3.2211 (3.1044) grad_norm: 2.6914 (3.0808) time: 1.3023 data: 0.0002 max mem: 13912 +[2024-12-06 16:29:26 root] (utils.py 283): INFO Epoch: [8] [1680/2502] eta: 0:17:52 lr: 0.000011 loss_cls: 3.2555 (3.1053) grad_norm: 2.6605 (3.0806) time: 1.3022 data: 0.0002 max mem: 13912 +[2024-12-06 16:29:39 root] (utils.py 283): INFO Epoch: [8] [1690/2502] eta: 0:17:39 lr: 0.000011 loss_cls: 3.1765 (3.1049) grad_norm: 2.6605 (3.0791) time: 1.3027 data: 0.0002 max mem: 13912 +[2024-12-06 16:29:52 root] (utils.py 283): INFO Epoch: [8] [1700/2502] eta: 0:17:26 lr: 0.000011 loss_cls: 3.1765 (3.1058) grad_norm: 2.9415 (3.0809) time: 1.3005 data: 0.0002 max mem: 13912 +[2024-12-06 16:30:05 root] (utils.py 283): INFO Epoch: [8] [1710/2502] eta: 0:17:13 lr: 0.000011 loss_cls: 3.3023 (3.1067) grad_norm: 2.9394 (3.0789) time: 1.3001 data: 0.0002 max mem: 13912 +[2024-12-06 16:30:18 root] (utils.py 283): INFO Epoch: [8] [1720/2502] eta: 0:17:00 lr: 0.000011 loss_cls: 3.3236 (3.1074) grad_norm: 2.6003 (3.0807) time: 1.3004 data: 0.0002 max mem: 13912 +[2024-12-06 16:30:31 root] (utils.py 283): INFO Epoch: [8] [1730/2502] eta: 0:16:47 lr: 0.000011 loss_cls: 3.3399 (3.1070) grad_norm: 2.7420 (3.0796) time: 1.3006 data: 0.0002 max mem: 13912 +[2024-12-06 16:30:44 root] (utils.py 283): INFO Epoch: [8] [1740/2502] eta: 0:16:34 lr: 0.000011 loss_cls: 3.1995 (3.1070) grad_norm: 2.7022 (3.0771) time: 1.3021 data: 0.0003 max mem: 13912 +[2024-12-06 16:30:57 root] (utils.py 283): INFO Epoch: [8] [1750/2502] eta: 0:16:21 lr: 0.000011 loss_cls: 3.3888 (3.1089) grad_norm: 2.6591 (3.0768) time: 1.3087 data: 0.0003 max mem: 13912 +[2024-12-06 16:31:10 root] (utils.py 283): INFO Epoch: [8] [1760/2502] eta: 0:16:07 lr: 0.000011 loss_cls: 3.3888 (3.1083) grad_norm: 2.7078 (3.0747) time: 1.3098 data: 0.0002 max mem: 13912 +[2024-12-06 16:31:23 root] (utils.py 283): INFO Epoch: [8] [1770/2502] eta: 0:15:54 lr: 0.000011 loss_cls: 2.9881 (3.1086) grad_norm: 2.7545 (3.0740) time: 1.3044 data: 0.0002 max mem: 13912 +[2024-12-06 16:31:36 root] (utils.py 283): INFO Epoch: [8] [1780/2502] eta: 0:15:41 lr: 0.000011 loss_cls: 2.9881 (3.1069) grad_norm: 2.5665 (3.0736) time: 1.3048 data: 0.0002 max mem: 13912 +[2024-12-06 16:31:49 root] (utils.py 283): INFO Epoch: [8] [1790/2502] eta: 0:15:28 lr: 0.000011 loss_cls: 3.1550 (3.1076) grad_norm: 2.5665 (3.0739) time: 1.3041 data: 0.0002 max mem: 13912 +[2024-12-06 16:32:02 root] (utils.py 283): INFO Epoch: [8] [1800/2502] eta: 0:15:15 lr: 0.000011 loss_cls: 3.3256 (3.1093) grad_norm: 2.7268 (3.0718) time: 1.3046 data: 0.0003 max mem: 13912 +[2024-12-06 16:32:15 root] (utils.py 283): INFO Epoch: [8] [1810/2502] eta: 0:15:02 lr: 0.000011 loss_cls: 3.4003 (3.1096) grad_norm: 2.7268 (3.0755) time: 1.3053 data: 0.0003 max mem: 13912 +[2024-12-06 16:32:28 root] (utils.py 283): INFO Epoch: [8] [1820/2502] eta: 0:14:49 lr: 0.000011 loss_cls: 3.2233 (3.1094) grad_norm: 2.8191 (3.0810) time: 1.3013 data: 0.0002 max mem: 13912 +[2024-12-06 16:32:41 root] (utils.py 283): INFO Epoch: [8] [1830/2502] eta: 0:14:36 lr: 0.000011 loss_cls: 2.9075 (3.1094) grad_norm: 2.7593 (3.0801) time: 1.2959 data: 0.0003 max mem: 13912 +[2024-12-06 16:32:54 root] (utils.py 283): INFO Epoch: [8] [1840/2502] eta: 0:14:23 lr: 0.000011 loss_cls: 2.8930 (3.1076) grad_norm: 2.6105 (3.0786) time: 1.2962 data: 0.0003 max mem: 13912 +[2024-12-06 16:33:07 root] (utils.py 283): INFO Epoch: [8] [1850/2502] eta: 0:14:10 lr: 0.000011 loss_cls: 3.1806 (3.1082) grad_norm: 2.6320 (3.0802) time: 1.2955 data: 0.0002 max mem: 13912 +[2024-12-06 16:33:20 root] (utils.py 283): INFO Epoch: [8] [1860/2502] eta: 0:13:57 lr: 0.000011 loss_cls: 3.3649 (3.1094) grad_norm: 2.8005 (3.0799) time: 1.2947 data: 0.0002 max mem: 13912 +[2024-12-06 16:33:33 root] (utils.py 283): INFO Epoch: [8] [1870/2502] eta: 0:13:44 lr: 0.000011 loss_cls: 3.1244 (3.1072) grad_norm: 2.6699 (3.0856) time: 1.2967 data: 0.0002 max mem: 13912 +[2024-12-06 16:33:46 root] (utils.py 283): INFO Epoch: [8] [1880/2502] eta: 0:13:31 lr: 0.000011 loss_cls: 3.2005 (3.1090) grad_norm: 2.6466 (3.0840) time: 1.2961 data: 0.0002 max mem: 13912 +[2024-12-06 16:33:59 root] (utils.py 283): INFO Epoch: [8] [1890/2502] eta: 0:13:18 lr: 0.000011 loss_cls: 3.3833 (3.1097) grad_norm: 2.6459 (3.0833) time: 1.2945 data: 0.0002 max mem: 13912 +[2024-12-06 16:34:12 root] (utils.py 283): INFO Epoch: [8] [1900/2502] eta: 0:13:05 lr: 0.000011 loss_cls: 3.3391 (3.1097) grad_norm: 2.5665 (3.0807) time: 1.3014 data: 0.0003 max mem: 13912 +[2024-12-06 16:34:25 root] (utils.py 283): INFO Epoch: [8] [1910/2502] eta: 0:12:52 lr: 0.000011 loss_cls: 3.2929 (3.1106) grad_norm: 2.7303 (3.0808) time: 1.3020 data: 0.0003 max mem: 13912 +[2024-12-06 16:34:38 root] (utils.py 283): INFO Epoch: [8] [1920/2502] eta: 0:12:38 lr: 0.000011 loss_cls: 3.2285 (3.1106) grad_norm: 2.8454 (3.0858) time: 1.2952 data: 0.0003 max mem: 13912 +[2024-12-06 16:34:51 root] (utils.py 283): INFO Epoch: [8] [1930/2502] eta: 0:12:25 lr: 0.000011 loss_cls: 3.1951 (3.1102) grad_norm: 2.6013 (3.0844) time: 1.2949 data: 0.0002 max mem: 13912 +[2024-12-06 16:35:04 root] (utils.py 283): INFO Epoch: [8] [1940/2502] eta: 0:12:12 lr: 0.000011 loss_cls: 3.1094 (3.1096) grad_norm: 2.5949 (3.0848) time: 1.2952 data: 0.0003 max mem: 13912 +[2024-12-06 16:35:17 root] (utils.py 283): INFO Epoch: [8] [1950/2502] eta: 0:11:59 lr: 0.000011 loss_cls: 2.8704 (3.1090) grad_norm: 2.6706 (3.0851) time: 1.2979 data: 0.0003 max mem: 13912 +[2024-12-06 16:35:30 root] (utils.py 283): INFO Epoch: [8] [1960/2502] eta: 0:11:46 lr: 0.000011 loss_cls: 3.0294 (3.1105) grad_norm: 2.5649 (3.0835) time: 1.2998 data: 0.0003 max mem: 13912 +[2024-12-06 16:35:43 root] (utils.py 283): INFO Epoch: [8] [1970/2502] eta: 0:11:33 lr: 0.000011 loss_cls: 3.4231 (3.1108) grad_norm: 2.5649 (3.0812) time: 1.2987 data: 0.0003 max mem: 13912 +[2024-12-06 16:35:56 root] (utils.py 283): INFO Epoch: [8] [1980/2502] eta: 0:11:20 lr: 0.000011 loss_cls: 3.2484 (3.1104) grad_norm: 2.6816 (3.0802) time: 1.2955 data: 0.0002 max mem: 13912 +[2024-12-06 16:36:08 root] (utils.py 283): INFO Epoch: [8] [1990/2502] eta: 0:11:07 lr: 0.000011 loss_cls: 3.1305 (3.1109) grad_norm: 2.7877 (3.0793) time: 1.2946 data: 0.0002 max mem: 13912 +[2024-12-06 16:36:21 root] (utils.py 283): INFO Epoch: [8] [2000/2502] eta: 0:10:54 lr: 0.000011 loss_cls: 3.3402 (3.1109) grad_norm: 2.7721 (3.0786) time: 1.2952 data: 0.0003 max mem: 13912 +[2024-12-06 16:36:34 root] (utils.py 283): INFO Epoch: [8] [2010/2502] eta: 0:10:41 lr: 0.000011 loss_cls: 2.9524 (3.1096) grad_norm: 2.4459 (3.0756) time: 1.2970 data: 0.0003 max mem: 13912 +[2024-12-06 16:36:47 root] (utils.py 283): INFO Epoch: [8] [2020/2502] eta: 0:10:28 lr: 0.000011 loss_cls: 2.8612 (3.1078) grad_norm: 2.4490 (3.0733) time: 1.2995 data: 0.0003 max mem: 13912 +[2024-12-06 16:37:00 root] (utils.py 283): INFO Epoch: [8] [2030/2502] eta: 0:10:15 lr: 0.000011 loss_cls: 3.0670 (3.1082) grad_norm: 2.6488 (3.0746) time: 1.3009 data: 0.0002 max mem: 13912 +[2024-12-06 16:37:14 root] (utils.py 283): INFO Epoch: [8] [2040/2502] eta: 0:10:02 lr: 0.000011 loss_cls: 3.1463 (3.1081) grad_norm: 2.6690 (3.0766) time: 1.3037 data: 0.0002 max mem: 13912 +[2024-12-06 16:37:27 root] (utils.py 283): INFO Epoch: [8] [2050/2502] eta: 0:09:49 lr: 0.000011 loss_cls: 3.1458 (3.1090) grad_norm: 2.7532 (3.0748) time: 1.3043 data: 0.0002 max mem: 13912 +[2024-12-06 16:37:40 root] (utils.py 283): INFO Epoch: [8] [2060/2502] eta: 0:09:36 lr: 0.000011 loss_cls: 3.1286 (3.1083) grad_norm: 2.6565 (3.0734) time: 1.3061 data: 0.0002 max mem: 13912 +[2024-12-06 16:37:53 root] (utils.py 283): INFO Epoch: [8] [2070/2502] eta: 0:09:23 lr: 0.000011 loss_cls: 3.1286 (3.1082) grad_norm: 2.6565 (3.0719) time: 1.3118 data: 0.0002 max mem: 13912 +[2024-12-06 16:38:06 root] (utils.py 283): INFO Epoch: [8] [2080/2502] eta: 0:09:10 lr: 0.000011 loss_cls: 3.0691 (3.1079) grad_norm: 2.7699 (3.0705) time: 1.3102 data: 0.0002 max mem: 13912 +[2024-12-06 16:38:19 root] (utils.py 283): INFO Epoch: [8] [2090/2502] eta: 0:08:57 lr: 0.000011 loss_cls: 3.1283 (3.1089) grad_norm: 2.7663 (3.0685) time: 1.3065 data: 0.0002 max mem: 13912 +[2024-12-06 16:38:32 root] (utils.py 283): INFO Epoch: [8] [2100/2502] eta: 0:08:44 lr: 0.000011 loss_cls: 3.0938 (3.1071) grad_norm: 2.5582 (3.0674) time: 1.3051 data: 0.0002 max mem: 13912 +[2024-12-06 16:38:45 root] (utils.py 283): INFO Epoch: [8] [2110/2502] eta: 0:08:31 lr: 0.000011 loss_cls: 2.8208 (3.1067) grad_norm: 2.6862 (3.0709) time: 1.3043 data: 0.0002 max mem: 13912 +[2024-12-06 16:38:58 root] (utils.py 283): INFO Epoch: [8] [2120/2502] eta: 0:08:18 lr: 0.000011 loss_cls: 3.0285 (3.1071) grad_norm: 2.8741 (3.0695) time: 1.3041 data: 0.0002 max mem: 13912 +[2024-12-06 16:39:11 root] (utils.py 283): INFO Epoch: [8] [2130/2502] eta: 0:08:05 lr: 0.000011 loss_cls: 3.2088 (3.1068) grad_norm: 2.9391 (3.0694) time: 1.3038 data: 0.0002 max mem: 13912 +[2024-12-06 16:39:24 root] (utils.py 283): INFO Epoch: [8] [2140/2502] eta: 0:07:51 lr: 0.000011 loss_cls: 3.1308 (3.1067) grad_norm: 3.0845 (3.0713) time: 1.3036 data: 0.0002 max mem: 13912 +[2024-12-06 16:39:37 root] (utils.py 283): INFO Epoch: [8] [2150/2502] eta: 0:07:38 lr: 0.000011 loss_cls: 3.0019 (3.1061) grad_norm: 2.8708 (3.0706) time: 1.3039 data: 0.0003 max mem: 13912 +[2024-12-06 16:39:50 root] (utils.py 283): INFO Epoch: [8] [2160/2502] eta: 0:07:25 lr: 0.000011 loss_cls: 3.2471 (3.1068) grad_norm: 2.8708 (3.0715) time: 1.3079 data: 0.0003 max mem: 13912 +[2024-12-06 16:40:03 root] (utils.py 283): INFO Epoch: [8] [2170/2502] eta: 0:07:12 lr: 0.000011 loss_cls: 3.5155 (3.1086) grad_norm: 2.8256 (3.0701) time: 1.3089 data: 0.0003 max mem: 13912 +[2024-12-06 16:40:16 root] (utils.py 283): INFO Epoch: [8] [2180/2502] eta: 0:06:59 lr: 0.000011 loss_cls: 3.4232 (3.1075) grad_norm: 2.7965 (3.0708) time: 1.3034 data: 0.0003 max mem: 13912 +[2024-12-06 16:40:29 root] (utils.py 283): INFO Epoch: [8] [2190/2502] eta: 0:06:46 lr: 0.000011 loss_cls: 3.3786 (3.1077) grad_norm: 2.7657 (3.0706) time: 1.3018 data: 0.0003 max mem: 13912 +[2024-12-06 16:40:42 root] (utils.py 283): INFO Epoch: [8] [2200/2502] eta: 0:06:33 lr: 0.000011 loss_cls: 3.3786 (3.1071) grad_norm: 2.5420 (3.0790) time: 1.3041 data: 0.0002 max mem: 13912 +[2024-12-06 16:40:55 root] (utils.py 283): INFO Epoch: [8] [2210/2502] eta: 0:06:20 lr: 0.000011 loss_cls: 3.2221 (3.1077) grad_norm: 2.7616 (3.0781) time: 1.3054 data: 0.0002 max mem: 13912 +[2024-12-06 16:41:08 root] (utils.py 283): INFO Epoch: [8] [2220/2502] eta: 0:06:07 lr: 0.000011 loss_cls: 3.2336 (3.1072) grad_norm: 2.6828 (3.0761) time: 1.3010 data: 0.0003 max mem: 13912 +[2024-12-06 16:41:21 root] (utils.py 283): INFO Epoch: [8] [2230/2502] eta: 0:05:54 lr: 0.000011 loss_cls: 3.1912 (3.1071) grad_norm: 2.7721 (3.0773) time: 1.2991 data: 0.0003 max mem: 13912 +[2024-12-06 16:41:35 root] (utils.py 283): INFO Epoch: [8] [2240/2502] eta: 0:05:41 lr: 0.000011 loss_cls: 3.3491 (3.1081) grad_norm: 2.8524 (3.0765) time: 1.3041 data: 0.0003 max mem: 13912 +[2024-12-06 16:41:48 root] (utils.py 283): INFO Epoch: [8] [2250/2502] eta: 0:05:28 lr: 0.000011 loss_cls: 3.4787 (3.1094) grad_norm: 2.6850 (3.0783) time: 1.3035 data: 0.0003 max mem: 13912 +[2024-12-06 16:42:01 root] (utils.py 283): INFO Epoch: [8] [2260/2502] eta: 0:05:15 lr: 0.000011 loss_cls: 3.4909 (3.1109) grad_norm: 2.8227 (3.0832) time: 1.3099 data: 0.0003 max mem: 13912 +[2024-12-06 16:42:14 root] (utils.py 283): INFO Epoch: [8] [2270/2502] eta: 0:05:02 lr: 0.000011 loss_cls: 3.3443 (3.1109) grad_norm: 2.7774 (3.0840) time: 1.3123 data: 0.0003 max mem: 13912 +[2024-12-06 16:42:27 root] (utils.py 283): INFO Epoch: [8] [2280/2502] eta: 0:04:49 lr: 0.000011 loss_cls: 2.9785 (3.1103) grad_norm: 2.7253 (3.0822) time: 1.3137 data: 0.0003 max mem: 13912 +[2024-12-06 16:42:41 root] (utils.py 283): INFO Epoch: [8] [2290/2502] eta: 0:04:36 lr: 0.000011 loss_cls: 2.9785 (3.1099) grad_norm: 2.7263 (3.0808) time: 1.3377 data: 0.0003 max mem: 13912 +[2024-12-06 16:42:54 root] (utils.py 283): INFO Epoch: [8] [2300/2502] eta: 0:04:23 lr: 0.000011 loss_cls: 3.2338 (3.1105) grad_norm: 2.7559 (3.0802) time: 1.3279 data: 0.0003 max mem: 13912 +[2024-12-06 16:43:07 root] (utils.py 283): INFO Epoch: [8] [2310/2502] eta: 0:04:10 lr: 0.000011 loss_cls: 3.2374 (3.1109) grad_norm: 2.6836 (3.0791) time: 1.3044 data: 0.0003 max mem: 13912 +[2024-12-06 16:43:20 root] (utils.py 283): INFO Epoch: [8] [2320/2502] eta: 0:03:57 lr: 0.000011 loss_cls: 3.3791 (3.1125) grad_norm: 2.6691 (3.0777) time: 1.3052 data: 0.0003 max mem: 13912 +[2024-12-06 16:43:33 root] (utils.py 283): INFO Epoch: [8] [2330/2502] eta: 0:03:44 lr: 0.000011 loss_cls: 3.4297 (3.1135) grad_norm: 2.6366 (3.0771) time: 1.3029 data: 0.0003 max mem: 13912 +[2024-12-06 16:43:46 root] (utils.py 283): INFO Epoch: [8] [2340/2502] eta: 0:03:31 lr: 0.000011 loss_cls: 3.1636 (3.1129) grad_norm: 2.6336 (3.0759) time: 1.3040 data: 0.0003 max mem: 13912 +[2024-12-06 16:43:59 root] (utils.py 283): INFO Epoch: [8] [2350/2502] eta: 0:03:18 lr: 0.000011 loss_cls: 3.1395 (3.1134) grad_norm: 2.6149 (3.0746) time: 1.3043 data: 0.0003 max mem: 13912 +[2024-12-06 16:44:12 root] (utils.py 283): INFO Epoch: [8] [2360/2502] eta: 0:03:05 lr: 0.000011 loss_cls: 3.2251 (3.1130) grad_norm: 2.6485 (3.0735) time: 1.3005 data: 0.0003 max mem: 13912 +[2024-12-06 16:44:25 root] (utils.py 283): INFO Epoch: [8] [2370/2502] eta: 0:02:52 lr: 0.000011 loss_cls: 3.2002 (3.1135) grad_norm: 2.7030 (3.0725) time: 1.2992 data: 0.0003 max mem: 13912 +[2024-12-06 16:44:38 root] (utils.py 283): INFO Epoch: [8] [2380/2502] eta: 0:02:39 lr: 0.000011 loss_cls: 3.3571 (3.1146) grad_norm: 2.8464 (3.0719) time: 1.3021 data: 0.0003 max mem: 13912 +[2024-12-06 16:44:51 root] (utils.py 283): INFO Epoch: [8] [2390/2502] eta: 0:02:26 lr: 0.000011 loss_cls: 3.3602 (3.1145) grad_norm: 2.7197 (3.0704) time: 1.3030 data: 0.0002 max mem: 13912 +[2024-12-06 16:45:04 root] (utils.py 283): INFO Epoch: [8] [2400/2502] eta: 0:02:13 lr: 0.000011 loss_cls: 3.3083 (3.1146) grad_norm: 2.7664 (3.0706) time: 1.3025 data: 0.0002 max mem: 13912 +[2024-12-06 16:45:17 root] (utils.py 283): INFO Epoch: [8] [2410/2502] eta: 0:01:59 lr: 0.000011 loss_cls: 3.0717 (3.1141) grad_norm: 2.7664 (3.0688) time: 1.3050 data: 0.0002 max mem: 13912 +[2024-12-06 16:45:30 root] (utils.py 283): INFO Epoch: [8] [2420/2502] eta: 0:01:46 lr: 0.000011 loss_cls: 3.2381 (3.1148) grad_norm: 2.6857 (3.0743) time: 1.3053 data: 0.0003 max mem: 13912 +[2024-12-06 16:45:43 root] (utils.py 283): INFO Epoch: [8] [2430/2502] eta: 0:01:33 lr: 0.000011 loss_cls: 3.3263 (3.1157) grad_norm: 2.7536 (3.0753) time: 1.3010 data: 0.0003 max mem: 13912 +[2024-12-06 16:45:56 root] (utils.py 283): INFO Epoch: [8] [2440/2502] eta: 0:01:20 lr: 0.000011 loss_cls: 3.3118 (3.1155) grad_norm: 2.7211 (3.0738) time: 1.3007 data: 0.0002 max mem: 13912 +[2024-12-06 16:46:09 root] (utils.py 283): INFO Epoch: [8] [2450/2502] eta: 0:01:07 lr: 0.000011 loss_cls: 3.2549 (3.1165) grad_norm: 2.6761 (3.0742) time: 1.3018 data: 0.0002 max mem: 13912 +[2024-12-06 16:46:22 root] (utils.py 283): INFO Epoch: [8] [2460/2502] eta: 0:00:54 lr: 0.000011 loss_cls: 3.3861 (3.1159) grad_norm: 2.8265 (3.0731) time: 1.3016 data: 0.0003 max mem: 13912 +[2024-12-06 16:46:35 root] (utils.py 283): INFO Epoch: [8] [2470/2502] eta: 0:00:41 lr: 0.000011 loss_cls: 3.1973 (3.1158) grad_norm: 2.7486 (3.0726) time: 1.3007 data: 0.0003 max mem: 13912 +[2024-12-06 16:46:48 root] (utils.py 283): INFO Epoch: [8] [2480/2502] eta: 0:00:28 lr: 0.000011 loss_cls: 3.1455 (3.1154) grad_norm: 2.7989 (3.0722) time: 1.3013 data: 0.0002 max mem: 13912 +[2024-12-06 16:47:02 root] (utils.py 283): INFO Epoch: [8] [2490/2502] eta: 0:00:15 lr: 0.000011 loss_cls: 3.1799 (3.1161) grad_norm: 2.6573 (3.0711) time: 1.3277 data: 0.0243 max mem: 13912 +[2024-12-06 16:47:15 root] (utils.py 283): INFO Epoch: [8] [2500/2502] eta: 0:00:02 lr: 0.000011 loss_cls: 3.4450 (3.1180) grad_norm: 2.7162 (3.0715) time: 1.3283 data: 0.0243 max mem: 13912 +[2024-12-06 16:47:16 root] (utils.py 283): INFO Epoch: [8] [2501/2502] eta: 0:00:01 lr: 0.000011 loss_cls: 3.4450 (3.1180) grad_norm: 2.7162 (3.0715) time: 1.3286 data: 0.0242 max mem: 13912 +[2024-12-06 16:47:16 root] (utils.py 297): INFO Epoch: [8] Total time: 0:54:23 (1.3044 s / it) +[2024-12-06 16:47:16 root] (engine.py 179): INFO Averaged stats:lr: 0.000011 loss_cls: 3.4450 (3.1184) grad_norm: 2.7162 (3.0715) +[2024-12-06 16:47:17 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:21 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.4306 (0.4306) acc1: 91.4062 (91.4062) acc3: 97.6562 (97.6562) acc5: 99.2188 (99.2188) time: 0.2241 data: 0.0004 max mem: 13912 +[2024-12-06 16:47:19 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:19 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.6639 (0.6564) acc1: 87.5000 (86.2926) acc3: 96.0938 (95.5256) acc5: 96.8750 (97.0881) time: 0.2271 data: 0.0004 max mem: 13912 +[2024-12-06 16:47:22 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:17 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.6639 (0.6981) acc1: 84.3750 (85.3423) acc3: 95.3125 (94.9405) acc5: 96.8750 (96.6890) time: 0.2276 data: 0.0004 max mem: 13912 +[2024-12-06 16:47:24 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:15 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7347 (0.7204) acc1: 83.5938 (84.6018) acc3: 94.5312 (94.9849) acc5: 96.8750 (96.7238) time: 0.2278 data: 0.0004 max mem: 13912 +[2024-12-06 16:47:26 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:13 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7315 (0.7230) acc1: 84.3750 (84.4131) acc3: 94.5312 (94.8742) acc5: 97.6562 (96.8559) time: 0.2279 data: 0.0005 max mem: 13912 +[2024-12-06 16:47:28 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:10 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.8193 (0.8034) acc1: 78.9062 (82.4602) acc3: 91.4062 (93.7347) acc5: 95.3125 (96.0018) time: 0.2286 data: 0.0004 max mem: 13912 +[2024-12-06 16:47:31 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:08 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0876 (0.8370) acc1: 75.0000 (81.9288) acc3: 89.0625 (93.0712) acc5: 92.1875 (95.3509) time: 0.2286 data: 0.0004 max mem: 13912 +[2024-12-06 16:47:33 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:06 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0314 (0.8668) acc1: 79.6875 (81.2280) acc3: 90.6250 (92.7707) acc5: 92.9688 (95.0814) time: 0.2282 data: 0.0004 max mem: 13912 +[2024-12-06 16:47:35 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:04 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0314 (0.8984) acc1: 75.7812 (80.5941) acc3: 90.6250 (92.1875) acc5: 92.9688 (94.7434) time: 0.2286 data: 0.0009 max mem: 13912 +[2024-12-06 16:47:38 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.1239 (0.9225) acc1: 75.7812 (79.8334) acc3: 88.2812 (91.8183) acc5: 92.1875 (94.5398) time: 0.2282 data: 0.0009 max mem: 13912 +[2024-12-06 16:47:39 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0105 (0.9179) acc1: 74.2188 (79.8320) acc3: 90.6250 (91.9200) acc5: 93.7500 (94.6160) time: 0.2241 data: 0.0007 max mem: 13912 +[2024-12-06 16:47:39 root] (utils.py 297): INFO Test: Total time: 0:00:22 (0.2275 s / it) +[2024-12-06 16:47:39 root] (engine.py 264): INFO * Acc@1 79.618 Acc@3 91.998 Acc@5 94.686 loss 0.918 flops 3.584 layer_flops 3.536 +[2024-12-06 16:47:39 root] (main.py 547): INFO Accuracy of the network on the 50000 test images: 79.6% +[2024-12-06 16:47:40 root] (main.py 551): INFO Max accuracy: 79.62% +[2024-12-06 16:47:41 root] (utils.py 283): INFO Epoch: [9] [ 0/2502] eta: 0:53:43 lr: 0.000010 loss_cls: 3.0290 (3.0290) grad_norm: 2.4965 (2.4965) time: 1.2883 data: 0.0003 max mem: 13912 +[2024-12-06 16:47:54 root] (utils.py 283): INFO Epoch: [9] [ 10/2502] eta: 0:53:55 lr: 0.000010 loss_cls: 3.1707 (3.1813) grad_norm: 3.1072 (3.4707) time: 1.2982 data: 0.0002 max mem: 13912 +[2024-12-06 16:48:07 root] (utils.py 283): INFO Epoch: [9] [ 20/2502] eta: 0:53:48 lr: 0.000010 loss_cls: 3.1707 (3.0834) grad_norm: 2.9335 (3.4167) time: 1.3014 data: 0.0003 max mem: 13912 +[2024-12-06 16:48:20 root] (utils.py 283): INFO Epoch: [9] [ 30/2502] eta: 0:53:34 lr: 0.000010 loss_cls: 3.0836 (3.0407) grad_norm: 2.8425 (3.2182) time: 1.3019 data: 0.0003 max mem: 13912 +[2024-12-06 16:48:33 root] (utils.py 283): INFO Epoch: [9] [ 40/2502] eta: 0:53:26 lr: 0.000010 loss_cls: 3.0836 (3.0558) grad_norm: 2.9165 (3.1416) time: 1.3040 data: 0.0003 max mem: 13912 +[2024-12-06 16:48:46 root] (utils.py 283): INFO Epoch: [9] [ 50/2502] eta: 0:53:13 lr: 0.000010 loss_cls: 2.7989 (2.9957) grad_norm: 2.7113 (3.0502) time: 1.3051 data: 0.0003 max mem: 13912 +[2024-12-06 16:48:59 root] (utils.py 283): INFO Epoch: [9] [ 60/2502] eta: 0:53:01 lr: 0.000010 loss_cls: 3.3546 (3.0510) grad_norm: 2.7113 (3.0918) time: 1.3035 data: 0.0002 max mem: 13912 +[2024-12-06 16:49:12 root] (utils.py 283): INFO Epoch: [9] [ 70/2502] eta: 0:52:48 lr: 0.000010 loss_cls: 3.3001 (3.0333) grad_norm: 2.8843 (3.0656) time: 1.3035 data: 0.0003 max mem: 13912 +[2024-12-06 16:49:25 root] (utils.py 283): INFO Epoch: [9] [ 80/2502] eta: 0:52:39 lr: 0.000010 loss_cls: 2.9545 (3.0203) grad_norm: 2.7722 (3.3139) time: 1.3106 data: 0.0003 max mem: 13912 +[2024-12-06 16:49:38 root] (utils.py 283): INFO Epoch: [9] [ 90/2502] eta: 0:52:24 lr: 0.000010 loss_cls: 3.0974 (3.0319) grad_norm: 2.7518 (3.2736) time: 1.3073 data: 0.0002 max mem: 13912 +[2024-12-06 16:49:51 root] (utils.py 283): INFO Epoch: [9] [ 100/2502] eta: 0:52:11 lr: 0.000010 loss_cls: 3.0974 (3.0290) grad_norm: 2.6580 (3.2727) time: 1.3002 data: 0.0002 max mem: 13912 +[2024-12-06 16:50:04 root] (utils.py 283): INFO Epoch: [9] [ 110/2502] eta: 0:51:57 lr: 0.000010 loss_cls: 3.3763 (3.0631) grad_norm: 2.6663 (3.2328) time: 1.3023 data: 0.0002 max mem: 13912 +[2024-12-06 16:50:17 root] (utils.py 283): INFO Epoch: [9] [ 120/2502] eta: 0:51:44 lr: 0.000010 loss_cls: 3.3016 (3.0793) grad_norm: 2.6567 (3.1821) time: 1.3019 data: 0.0003 max mem: 13912 +[2024-12-06 16:50:30 root] (utils.py 283): INFO Epoch: [9] [ 130/2502] eta: 0:51:31 lr: 0.000010 loss_cls: 3.1793 (3.0749) grad_norm: 2.6071 (3.1458) time: 1.3038 data: 0.0003 max mem: 13912 +[2024-12-06 16:50:43 root] (utils.py 283): INFO Epoch: [9] [ 140/2502] eta: 0:51:18 lr: 0.000010 loss_cls: 3.2563 (3.0928) grad_norm: 2.4810 (3.1165) time: 1.3026 data: 0.0003 max mem: 13912 +[2024-12-06 16:50:56 root] (utils.py 283): INFO Epoch: [9] [ 150/2502] eta: 0:51:05 lr: 0.000010 loss_cls: 3.2716 (3.0967) grad_norm: 2.7960 (3.1068) time: 1.3015 data: 0.0003 max mem: 13912 +[2024-12-06 16:51:09 root] (utils.py 283): INFO Epoch: [9] [ 160/2502] eta: 0:50:51 lr: 0.000010 loss_cls: 2.9269 (3.0856) grad_norm: 2.7960 (3.0902) time: 1.3007 data: 0.0003 max mem: 13912 +[2024-12-06 16:51:23 root] (utils.py 283): INFO Epoch: [9] [ 170/2502] eta: 0:50:38 lr: 0.000010 loss_cls: 2.7491 (3.0653) grad_norm: 2.6553 (3.1311) time: 1.3018 data: 0.0003 max mem: 13912 +[2024-12-06 16:51:36 root] (utils.py 283): INFO Epoch: [9] [ 180/2502] eta: 0:50:27 lr: 0.000010 loss_cls: 2.6745 (3.0516) grad_norm: 2.6104 (3.1125) time: 1.3089 data: 0.0003 max mem: 13912 +[2024-12-06 16:51:49 root] (utils.py 283): INFO Epoch: [9] [ 190/2502] eta: 0:50:13 lr: 0.000010 loss_cls: 2.8513 (3.0518) grad_norm: 2.6827 (3.1256) time: 1.3072 data: 0.0002 max mem: 13912 +[2024-12-06 16:52:02 root] (utils.py 283): INFO Epoch: [9] [ 200/2502] eta: 0:50:00 lr: 0.000010 loss_cls: 3.2080 (3.0606) grad_norm: 2.6827 (3.1230) time: 1.3000 data: 0.0002 max mem: 13912 +[2024-12-06 16:52:15 root] (utils.py 283): INFO Epoch: [9] [ 210/2502] eta: 0:49:47 lr: 0.000010 loss_cls: 3.2807 (3.0589) grad_norm: 2.6943 (3.1113) time: 1.3006 data: 0.0002 max mem: 13912 +[2024-12-06 16:52:28 root] (utils.py 283): INFO Epoch: [9] [ 220/2502] eta: 0:49:33 lr: 0.000010 loss_cls: 3.2684 (3.0667) grad_norm: 2.7571 (3.1521) time: 1.3010 data: 0.0003 max mem: 13912 +[2024-12-06 16:52:41 root] (utils.py 283): INFO Epoch: [9] [ 230/2502] eta: 0:49:20 lr: 0.000010 loss_cls: 3.1946 (3.0608) grad_norm: 2.8488 (3.1443) time: 1.3022 data: 0.0003 max mem: 13912 +[2024-12-06 16:52:54 root] (utils.py 283): INFO Epoch: [9] [ 240/2502] eta: 0:49:07 lr: 0.000010 loss_cls: 3.2262 (3.0673) grad_norm: 2.6377 (3.1487) time: 1.3031 data: 0.0002 max mem: 13912 +[2024-12-06 16:53:07 root] (utils.py 283): INFO Epoch: [9] [ 250/2502] eta: 0:48:54 lr: 0.000010 loss_cls: 3.3215 (3.0760) grad_norm: 2.8038 (3.1873) time: 1.3013 data: 0.0002 max mem: 13912 +[2024-12-06 16:53:20 root] (utils.py 283): INFO Epoch: [9] [ 260/2502] eta: 0:48:41 lr: 0.000010 loss_cls: 3.2618 (3.0751) grad_norm: 2.9489 (3.1838) time: 1.3013 data: 0.0003 max mem: 13912 +[2024-12-06 16:53:33 root] (utils.py 283): INFO Epoch: [9] [ 270/2502] eta: 0:48:28 lr: 0.000010 loss_cls: 3.2372 (3.0780) grad_norm: 2.7484 (3.1754) time: 1.3030 data: 0.0003 max mem: 13912 +[2024-12-06 16:53:46 root] (utils.py 283): INFO Epoch: [9] [ 280/2502] eta: 0:48:15 lr: 0.000010 loss_cls: 3.3055 (3.0806) grad_norm: 2.8618 (3.1949) time: 1.3045 data: 0.0002 max mem: 13912 +[2024-12-06 16:53:59 root] (utils.py 283): INFO Epoch: [9] [ 290/2502] eta: 0:48:02 lr: 0.000010 loss_cls: 3.3763 (3.0913) grad_norm: 2.8219 (3.2685) time: 1.3022 data: 0.0003 max mem: 13912 +[2024-12-06 16:54:12 root] (utils.py 283): INFO Epoch: [9] [ 300/2502] eta: 0:47:48 lr: 0.000010 loss_cls: 3.2781 (3.0872) grad_norm: 2.6653 (3.2586) time: 1.2995 data: 0.0003 max mem: 13912 +[2024-12-06 16:54:25 root] (utils.py 283): INFO Epoch: [9] [ 310/2502] eta: 0:47:35 lr: 0.000010 loss_cls: 3.0578 (3.0792) grad_norm: 2.6477 (3.2447) time: 1.3018 data: 0.0003 max mem: 13912 +[2024-12-06 16:54:38 root] (utils.py 283): INFO Epoch: [9] [ 320/2502] eta: 0:47:22 lr: 0.000010 loss_cls: 2.9542 (3.0792) grad_norm: 2.6112 (3.2519) time: 1.3035 data: 0.0003 max mem: 13912 +[2024-12-06 16:54:51 root] (utils.py 283): INFO Epoch: [9] [ 330/2502] eta: 0:47:09 lr: 0.000010 loss_cls: 3.2182 (3.0849) grad_norm: 2.5542 (3.2509) time: 1.3034 data: 0.0003 max mem: 13912 +[2024-12-06 16:55:04 root] (utils.py 283): INFO Epoch: [9] [ 340/2502] eta: 0:46:57 lr: 0.000010 loss_cls: 3.2785 (3.0849) grad_norm: 2.7182 (3.2438) time: 1.3052 data: 0.0003 max mem: 13912 +[2024-12-06 16:55:17 root] (utils.py 283): INFO Epoch: [9] [ 350/2502] eta: 0:46:43 lr: 0.000010 loss_cls: 3.2501 (3.0860) grad_norm: 2.7182 (3.2535) time: 1.3006 data: 0.0002 max mem: 13912 +[2024-12-06 16:55:30 root] (utils.py 283): INFO Epoch: [9] [ 360/2502] eta: 0:46:30 lr: 0.000010 loss_cls: 2.8041 (3.0748) grad_norm: 2.9161 (3.2560) time: 1.2980 data: 0.0003 max mem: 13912 +[2024-12-06 16:55:43 root] (utils.py 283): INFO Epoch: [9] [ 370/2502] eta: 0:46:17 lr: 0.000010 loss_cls: 3.0809 (3.0749) grad_norm: 2.6755 (3.2413) time: 1.3018 data: 0.0003 max mem: 13912 +[2024-12-06 16:55:56 root] (utils.py 283): INFO Epoch: [9] [ 380/2502] eta: 0:46:04 lr: 0.000010 loss_cls: 3.1705 (3.0752) grad_norm: 2.5272 (3.2310) time: 1.3022 data: 0.0003 max mem: 13912 +[2024-12-06 16:56:09 root] (utils.py 283): INFO Epoch: [9] [ 390/2502] eta: 0:45:51 lr: 0.000010 loss_cls: 3.2699 (3.0780) grad_norm: 2.5272 (3.2148) time: 1.3015 data: 0.0002 max mem: 13912 +[2024-12-06 16:56:22 root] (utils.py 283): INFO Epoch: [9] [ 400/2502] eta: 0:45:38 lr: 0.000010 loss_cls: 3.2390 (3.0814) grad_norm: 2.4882 (3.2384) time: 1.3072 data: 0.0002 max mem: 13912 +[2024-12-06 16:56:35 root] (utils.py 283): INFO Epoch: [9] [ 410/2502] eta: 0:45:25 lr: 0.000010 loss_cls: 3.1640 (3.0807) grad_norm: 2.5491 (3.2616) time: 1.3067 data: 0.0002 max mem: 13912 +[2024-12-06 16:56:48 root] (utils.py 283): INFO Epoch: [9] [ 420/2502] eta: 0:45:12 lr: 0.000010 loss_cls: 3.2539 (3.0829) grad_norm: 2.5542 (3.2509) time: 1.3021 data: 0.0002 max mem: 13912 +[2024-12-06 16:57:01 root] (utils.py 283): INFO Epoch: [9] [ 430/2502] eta: 0:44:59 lr: 0.000010 loss_cls: 3.2539 (3.0810) grad_norm: 2.6056 (3.2405) time: 1.3040 data: 0.0002 max mem: 13912 +[2024-12-06 16:57:14 root] (utils.py 283): INFO Epoch: [9] [ 440/2502] eta: 0:44:46 lr: 0.000010 loss_cls: 3.1856 (3.0816) grad_norm: 2.5729 (3.2278) time: 1.3046 data: 0.0003 max mem: 13912 +[2024-12-06 16:57:27 root] (utils.py 283): INFO Epoch: [9] [ 450/2502] eta: 0:44:33 lr: 0.000010 loss_cls: 3.2619 (3.0822) grad_norm: 2.6939 (3.2256) time: 1.3037 data: 0.0003 max mem: 13912 +[2024-12-06 16:57:40 root] (utils.py 283): INFO Epoch: [9] [ 460/2502] eta: 0:44:20 lr: 0.000010 loss_cls: 3.1869 (3.0814) grad_norm: 2.9548 (3.2203) time: 1.3014 data: 0.0003 max mem: 13912 +[2024-12-06 16:57:53 root] (utils.py 283): INFO Epoch: [9] [ 470/2502] eta: 0:44:07 lr: 0.000010 loss_cls: 3.1258 (3.0829) grad_norm: 2.9406 (3.2154) time: 1.3008 data: 0.0003 max mem: 13912 +[2024-12-06 16:58:07 root] (utils.py 283): INFO Epoch: [9] [ 480/2502] eta: 0:43:54 lr: 0.000010 loss_cls: 3.1258 (3.0808) grad_norm: 2.7237 (3.2067) time: 1.3074 data: 0.0003 max mem: 13912 +[2024-12-06 16:58:20 root] (utils.py 283): INFO Epoch: [9] [ 490/2502] eta: 0:43:41 lr: 0.000010 loss_cls: 3.2022 (3.0854) grad_norm: 2.7421 (3.2007) time: 1.3108 data: 0.0003 max mem: 13912 +[2024-12-06 16:58:33 root] (utils.py 283): INFO Epoch: [9] [ 500/2502] eta: 0:43:29 lr: 0.000010 loss_cls: 3.2427 (3.0885) grad_norm: 2.7616 (3.2133) time: 1.3101 data: 0.0003 max mem: 13912 +[2024-12-06 16:58:46 root] (utils.py 283): INFO Epoch: [9] [ 510/2502] eta: 0:43:16 lr: 0.000010 loss_cls: 3.2247 (3.0858) grad_norm: 2.5585 (3.2016) time: 1.3099 data: 0.0003 max mem: 13912 +[2024-12-06 16:58:59 root] (utils.py 283): INFO Epoch: [9] [ 520/2502] eta: 0:43:03 lr: 0.000010 loss_cls: 2.8721 (3.0834) grad_norm: 2.6441 (3.1941) time: 1.3055 data: 0.0003 max mem: 13912 +[2024-12-06 16:59:12 root] (utils.py 283): INFO Epoch: [9] [ 530/2502] eta: 0:42:50 lr: 0.000010 loss_cls: 2.7962 (3.0774) grad_norm: 2.7743 (3.1892) time: 1.3024 data: 0.0003 max mem: 13912 +[2024-12-06 16:59:25 root] (utils.py 283): INFO Epoch: [9] [ 540/2502] eta: 0:42:37 lr: 0.000010 loss_cls: 2.7962 (3.0750) grad_norm: 2.6457 (3.1816) time: 1.3009 data: 0.0003 max mem: 13912 +[2024-12-06 16:59:38 root] (utils.py 283): INFO Epoch: [9] [ 550/2502] eta: 0:42:24 lr: 0.000010 loss_cls: 3.1387 (3.0756) grad_norm: 2.8278 (3.1780) time: 1.3026 data: 0.0003 max mem: 13912 +[2024-12-06 16:59:51 root] (utils.py 283): INFO Epoch: [9] [ 560/2502] eta: 0:42:11 lr: 0.000010 loss_cls: 3.2219 (3.0751) grad_norm: 2.8188 (3.1703) time: 1.3040 data: 0.0003 max mem: 13912 +[2024-12-06 17:00:04 root] (utils.py 283): INFO Epoch: [9] [ 570/2502] eta: 0:41:58 lr: 0.000010 loss_cls: 3.2838 (3.0767) grad_norm: 2.7804 (3.1658) time: 1.3019 data: 0.0002 max mem: 13912 +[2024-12-06 17:00:17 root] (utils.py 283): INFO Epoch: [9] [ 580/2502] eta: 0:41:45 lr: 0.000010 loss_cls: 3.2838 (3.0778) grad_norm: 2.8044 (3.1830) time: 1.3021 data: 0.0002 max mem: 13912 +[2024-12-06 17:00:30 root] (utils.py 283): INFO Epoch: [9] [ 590/2502] eta: 0:41:31 lr: 0.000010 loss_cls: 3.3152 (3.0796) grad_norm: 3.2979 (3.1821) time: 1.3018 data: 0.0002 max mem: 13912 +[2024-12-06 17:00:43 root] (utils.py 283): INFO Epoch: [9] [ 600/2502] eta: 0:41:18 lr: 0.000010 loss_cls: 3.2282 (3.0751) grad_norm: 2.7939 (3.2769) time: 1.3001 data: 0.0003 max mem: 13912 +[2024-12-06 17:00:56 root] (utils.py 283): INFO Epoch: [9] [ 610/2502] eta: 0:41:05 lr: 0.000010 loss_cls: 3.0391 (3.0770) grad_norm: 2.6475 (3.2722) time: 1.3004 data: 0.0003 max mem: 13912 +[2024-12-06 17:01:09 root] (utils.py 283): INFO Epoch: [9] [ 620/2502] eta: 0:40:52 lr: 0.000010 loss_cls: 3.0508 (3.0754) grad_norm: 2.7752 (3.2657) time: 1.2999 data: 0.0002 max mem: 13912 +[2024-12-06 17:01:22 root] (utils.py 283): INFO Epoch: [9] [ 630/2502] eta: 0:40:39 lr: 0.000010 loss_cls: 3.2580 (3.0757) grad_norm: 2.7752 (3.2588) time: 1.3023 data: 0.0002 max mem: 13912 +[2024-12-06 17:01:35 root] (utils.py 283): INFO Epoch: [9] [ 640/2502] eta: 0:40:26 lr: 0.000010 loss_cls: 3.1149 (3.0753) grad_norm: 2.6490 (3.2527) time: 1.3028 data: 0.0002 max mem: 13912 +[2024-12-06 17:01:48 root] (utils.py 283): INFO Epoch: [9] [ 650/2502] eta: 0:40:13 lr: 0.000010 loss_cls: 3.0545 (3.0731) grad_norm: 2.6860 (3.2535) time: 1.3019 data: 0.0003 max mem: 13912 +[2024-12-06 17:02:01 root] (utils.py 283): INFO Epoch: [9] [ 660/2502] eta: 0:40:00 lr: 0.000010 loss_cls: 3.0243 (3.0732) grad_norm: 2.5956 (3.2451) time: 1.3017 data: 0.0003 max mem: 13912 +[2024-12-06 17:02:14 root] (utils.py 283): INFO Epoch: [9] [ 670/2502] eta: 0:39:47 lr: 0.000010 loss_cls: 3.0243 (3.0710) grad_norm: 2.6648 (3.2387) time: 1.3016 data: 0.0002 max mem: 13912 +[2024-12-06 17:02:27 root] (utils.py 283): INFO Epoch: [9] [ 680/2502] eta: 0:39:34 lr: 0.000010 loss_cls: 3.2648 (3.0740) grad_norm: 2.6648 (3.2332) time: 1.3020 data: 0.0002 max mem: 13912 +[2024-12-06 17:02:40 root] (utils.py 283): INFO Epoch: [9] [ 690/2502] eta: 0:39:21 lr: 0.000010 loss_cls: 3.4027 (3.0727) grad_norm: 2.5799 (3.2264) time: 1.3029 data: 0.0002 max mem: 13912 +[2024-12-06 17:02:53 root] (utils.py 283): INFO Epoch: [9] [ 700/2502] eta: 0:39:08 lr: 0.000010 loss_cls: 3.1293 (3.0733) grad_norm: 2.6031 (3.2223) time: 1.3020 data: 0.0002 max mem: 13912 +[2024-12-06 17:03:06 root] (utils.py 283): INFO Epoch: [9] [ 710/2502] eta: 0:38:55 lr: 0.000010 loss_cls: 3.2204 (3.0763) grad_norm: 2.6980 (3.2151) time: 1.3036 data: 0.0002 max mem: 13912 +[2024-12-06 17:03:19 root] (utils.py 283): INFO Epoch: [9] [ 720/2502] eta: 0:38:42 lr: 0.000010 loss_cls: 3.2204 (3.0782) grad_norm: 2.5918 (3.2066) time: 1.3051 data: 0.0002 max mem: 13912 +[2024-12-06 17:03:32 root] (utils.py 283): INFO Epoch: [9] [ 730/2502] eta: 0:38:29 lr: 0.000010 loss_cls: 3.1519 (3.0772) grad_norm: 2.5860 (3.2018) time: 1.3023 data: 0.0003 max mem: 13912 +[2024-12-06 17:03:45 root] (utils.py 283): INFO Epoch: [9] [ 740/2502] eta: 0:38:15 lr: 0.000010 loss_cls: 3.1704 (3.0788) grad_norm: 2.7651 (3.1955) time: 1.3013 data: 0.0003 max mem: 13912 +[2024-12-06 17:03:58 root] (utils.py 283): INFO Epoch: [9] [ 750/2502] eta: 0:38:02 lr: 0.000010 loss_cls: 3.2435 (3.0815) grad_norm: 2.8121 (3.1950) time: 1.3014 data: 0.0003 max mem: 13912 +[2024-12-06 17:04:11 root] (utils.py 283): INFO Epoch: [9] [ 760/2502] eta: 0:37:49 lr: 0.000010 loss_cls: 3.2640 (3.0830) grad_norm: 2.6099 (3.1909) time: 1.3026 data: 0.0003 max mem: 13912 +[2024-12-06 17:04:24 root] (utils.py 283): INFO Epoch: [9] [ 770/2502] eta: 0:37:36 lr: 0.000010 loss_cls: 2.8521 (3.0786) grad_norm: 2.6240 (3.1978) time: 1.3032 data: 0.0002 max mem: 13912 +[2024-12-06 17:04:37 root] (utils.py 283): INFO Epoch: [9] [ 780/2502] eta: 0:37:23 lr: 0.000010 loss_cls: 3.1576 (3.0788) grad_norm: 2.8330 (3.1925) time: 1.3037 data: 0.0002 max mem: 13912 +[2024-12-06 17:04:50 root] (utils.py 283): INFO Epoch: [9] [ 790/2502] eta: 0:37:10 lr: 0.000010 loss_cls: 3.3437 (3.0811) grad_norm: 2.6573 (3.1913) time: 1.3051 data: 0.0002 max mem: 13912 +[2024-12-06 17:05:04 root] (utils.py 283): INFO Epoch: [9] [ 800/2502] eta: 0:36:57 lr: 0.000010 loss_cls: 3.4172 (3.0835) grad_norm: 2.6608 (3.1847) time: 1.3036 data: 0.0002 max mem: 13912 +[2024-12-06 17:05:17 root] (utils.py 283): INFO Epoch: [9] [ 810/2502] eta: 0:36:44 lr: 0.000010 loss_cls: 3.3595 (3.0845) grad_norm: 2.5847 (3.1786) time: 1.3007 data: 0.0002 max mem: 13912 +[2024-12-06 17:05:30 root] (utils.py 283): INFO Epoch: [9] [ 820/2502] eta: 0:36:31 lr: 0.000010 loss_cls: 3.1868 (3.0838) grad_norm: 2.5727 (3.1767) time: 1.3018 data: 0.0002 max mem: 13912 +[2024-12-06 17:05:43 root] (utils.py 283): INFO Epoch: [9] [ 830/2502] eta: 0:36:18 lr: 0.000010 loss_cls: 3.3849 (3.0861) grad_norm: 2.7006 (3.1757) time: 1.3020 data: 0.0002 max mem: 13912 +[2024-12-06 17:05:56 root] (utils.py 283): INFO Epoch: [9] [ 840/2502] eta: 0:36:05 lr: 0.000010 loss_cls: 3.3946 (3.0847) grad_norm: 2.5730 (3.1672) time: 1.3017 data: 0.0002 max mem: 13912 +[2024-12-06 17:06:09 root] (utils.py 283): INFO Epoch: [9] [ 850/2502] eta: 0:35:52 lr: 0.000010 loss_cls: 3.1684 (3.0858) grad_norm: 2.4917 (3.1719) time: 1.3035 data: 0.0002 max mem: 13912 +[2024-12-06 17:06:22 root] (utils.py 283): INFO Epoch: [9] [ 860/2502] eta: 0:35:39 lr: 0.000010 loss_cls: 3.2827 (3.0845) grad_norm: 2.4520 (3.1646) time: 1.3028 data: 0.0002 max mem: 13912 +[2024-12-06 17:06:35 root] (utils.py 283): INFO Epoch: [9] [ 870/2502] eta: 0:35:26 lr: 0.000010 loss_cls: 3.0505 (3.0835) grad_norm: 2.4211 (3.1602) time: 1.3023 data: 0.0002 max mem: 13912 +[2024-12-06 17:06:48 root] (utils.py 283): INFO Epoch: [9] [ 880/2502] eta: 0:35:13 lr: 0.000010 loss_cls: 3.0686 (3.0830) grad_norm: 2.5825 (3.1592) time: 1.3024 data: 0.0002 max mem: 13912 +[2024-12-06 17:07:01 root] (utils.py 283): INFO Epoch: [9] [ 890/2502] eta: 0:35:00 lr: 0.000010 loss_cls: 2.9517 (3.0801) grad_norm: 2.7759 (3.1567) time: 1.3022 data: 0.0002 max mem: 13912 +[2024-12-06 17:07:14 root] (utils.py 283): INFO Epoch: [9] [ 900/2502] eta: 0:34:47 lr: 0.000010 loss_cls: 2.9517 (3.0781) grad_norm: 2.7759 (3.1551) time: 1.3010 data: 0.0003 max mem: 13912 +[2024-12-06 17:07:27 root] (utils.py 283): INFO Epoch: [9] [ 910/2502] eta: 0:34:34 lr: 0.000010 loss_cls: 3.0871 (3.0762) grad_norm: 2.6532 (3.1496) time: 1.2971 data: 0.0003 max mem: 13912 +[2024-12-06 17:07:40 root] (utils.py 283): INFO Epoch: [9] [ 920/2502] eta: 0:34:20 lr: 0.000010 loss_cls: 3.2163 (3.0771) grad_norm: 2.7436 (3.1569) time: 1.2923 data: 0.0003 max mem: 13912 +[2024-12-06 17:07:52 root] (utils.py 283): INFO Epoch: [9] [ 930/2502] eta: 0:34:07 lr: 0.000010 loss_cls: 3.1653 (3.0780) grad_norm: 2.7436 (3.1512) time: 1.2904 data: 0.0003 max mem: 13912 +[2024-12-06 17:08:05 root] (utils.py 283): INFO Epoch: [9] [ 940/2502] eta: 0:33:54 lr: 0.000010 loss_cls: 3.1689 (3.0791) grad_norm: 2.5212 (3.1528) time: 1.2941 data: 0.0003 max mem: 13912 +[2024-12-06 17:08:19 root] (utils.py 283): INFO Epoch: [9] [ 950/2502] eta: 0:33:41 lr: 0.000010 loss_cls: 3.4154 (3.0796) grad_norm: 2.8313 (3.1503) time: 1.3023 data: 0.0003 max mem: 13912 +[2024-12-06 17:08:31 root] (utils.py 283): INFO Epoch: [9] [ 960/2502] eta: 0:33:28 lr: 0.000010 loss_cls: 3.0996 (3.0804) grad_norm: 2.8313 (3.1527) time: 1.3003 data: 0.0003 max mem: 13912 +[2024-12-06 17:08:44 root] (utils.py 283): INFO Epoch: [9] [ 970/2502] eta: 0:33:15 lr: 0.000010 loss_cls: 3.0890 (3.0796) grad_norm: 2.6083 (3.1470) time: 1.2932 data: 0.0003 max mem: 13912 +[2024-12-06 17:08:57 root] (utils.py 283): INFO Epoch: [9] [ 980/2502] eta: 0:33:02 lr: 0.000010 loss_cls: 3.0093 (3.0770) grad_norm: 2.6393 (3.1424) time: 1.2955 data: 0.0003 max mem: 13912 +[2024-12-06 17:09:10 root] (utils.py 283): INFO Epoch: [9] [ 990/2502] eta: 0:32:48 lr: 0.000010 loss_cls: 2.9484 (3.0762) grad_norm: 2.7055 (3.1424) time: 1.2955 data: 0.0003 max mem: 13912 +[2024-12-06 17:09:23 root] (utils.py 283): INFO Epoch: [9] [1000/2502] eta: 0:32:35 lr: 0.000010 loss_cls: 3.2321 (3.0778) grad_norm: 2.6802 (3.1392) time: 1.2942 data: 0.0003 max mem: 13912 +[2024-12-06 17:09:36 root] (utils.py 283): INFO Epoch: [9] [1010/2502] eta: 0:32:22 lr: 0.000010 loss_cls: 3.1579 (3.0764) grad_norm: 2.6598 (3.1404) time: 1.2948 data: 0.0002 max mem: 13912 +[2024-12-06 17:09:49 root] (utils.py 283): INFO Epoch: [9] [1020/2502] eta: 0:32:09 lr: 0.000010 loss_cls: 3.2134 (3.0773) grad_norm: 2.7251 (3.1366) time: 1.2980 data: 0.0003 max mem: 13912 +[2024-12-06 17:10:02 root] (utils.py 283): INFO Epoch: [9] [1030/2502] eta: 0:31:56 lr: 0.000010 loss_cls: 3.2264 (3.0774) grad_norm: 2.7036 (3.1349) time: 1.3017 data: 0.0003 max mem: 13912 +[2024-12-06 17:10:15 root] (utils.py 283): INFO Epoch: [9] [1040/2502] eta: 0:31:43 lr: 0.000010 loss_cls: 3.0121 (3.0781) grad_norm: 2.6460 (3.1312) time: 1.3030 data: 0.0002 max mem: 13912 +[2024-12-06 17:10:28 root] (utils.py 283): INFO Epoch: [9] [1050/2502] eta: 0:31:30 lr: 0.000010 loss_cls: 3.2193 (3.0796) grad_norm: 2.7586 (3.1277) time: 1.3091 data: 0.0003 max mem: 13912 +[2024-12-06 17:10:41 root] (utils.py 283): INFO Epoch: [9] [1060/2502] eta: 0:31:17 lr: 0.000010 loss_cls: 3.2342 (3.0812) grad_norm: 2.7253 (3.1234) time: 1.3077 data: 0.0003 max mem: 13912 +[2024-12-06 17:10:54 root] (utils.py 283): INFO Epoch: [9] [1070/2502] eta: 0:31:04 lr: 0.000010 loss_cls: 3.2342 (3.0813) grad_norm: 2.7253 (3.1223) time: 1.3006 data: 0.0003 max mem: 13912 +[2024-12-06 17:11:07 root] (utils.py 283): INFO Epoch: [9] [1080/2502] eta: 0:30:51 lr: 0.000010 loss_cls: 3.2171 (3.0824) grad_norm: 2.8622 (3.1204) time: 1.3007 data: 0.0002 max mem: 13912 +[2024-12-06 17:11:20 root] (utils.py 283): INFO Epoch: [9] [1090/2502] eta: 0:30:38 lr: 0.000010 loss_cls: 3.3937 (3.0849) grad_norm: 2.9795 (3.1233) time: 1.3027 data: 0.0002 max mem: 13912 +[2024-12-06 17:11:34 root] (utils.py 283): INFO Epoch: [9] [1100/2502] eta: 0:30:25 lr: 0.000010 loss_cls: 3.4743 (3.0865) grad_norm: 3.0179 (3.1275) time: 1.3033 data: 0.0002 max mem: 13912 +[2024-12-06 17:11:47 root] (utils.py 283): INFO Epoch: [9] [1110/2502] eta: 0:30:12 lr: 0.000010 loss_cls: 3.2386 (3.0875) grad_norm: 2.8973 (3.1304) time: 1.3052 data: 0.0003 max mem: 13912 +[2024-12-06 17:12:00 root] (utils.py 283): INFO Epoch: [9] [1120/2502] eta: 0:29:59 lr: 0.000010 loss_cls: 3.0363 (3.0873) grad_norm: 2.6603 (3.1260) time: 1.3060 data: 0.0003 max mem: 13912 +[2024-12-06 17:12:13 root] (utils.py 283): INFO Epoch: [9] [1130/2502] eta: 0:29:46 lr: 0.000010 loss_cls: 3.0572 (3.0862) grad_norm: 2.6112 (3.1239) time: 1.3092 data: 0.0003 max mem: 13912 +[2024-12-06 17:12:26 root] (utils.py 283): INFO Epoch: [9] [1140/2502] eta: 0:29:33 lr: 0.000010 loss_cls: 3.1315 (3.0859) grad_norm: 2.7255 (3.1212) time: 1.3113 data: 0.0002 max mem: 13912 +[2024-12-06 17:12:39 root] (utils.py 283): INFO Epoch: [9] [1150/2502] eta: 0:29:20 lr: 0.000010 loss_cls: 3.2051 (3.0860) grad_norm: 2.7255 (3.1172) time: 1.3071 data: 0.0002 max mem: 13912 +[2024-12-06 17:12:52 root] (utils.py 283): INFO Epoch: [9] [1160/2502] eta: 0:29:07 lr: 0.000010 loss_cls: 3.3634 (3.0886) grad_norm: 2.6984 (3.1153) time: 1.3033 data: 0.0002 max mem: 13912 +[2024-12-06 17:13:05 root] (utils.py 283): INFO Epoch: [9] [1170/2502] eta: 0:28:54 lr: 0.000010 loss_cls: 3.3308 (3.0879) grad_norm: 2.6984 (3.1192) time: 1.3011 data: 0.0002 max mem: 13912 +[2024-12-06 17:13:18 root] (utils.py 283): INFO Epoch: [9] [1180/2502] eta: 0:28:41 lr: 0.000010 loss_cls: 3.2220 (3.0892) grad_norm: 2.7288 (3.1214) time: 1.3000 data: 0.0002 max mem: 13912 +[2024-12-06 17:13:31 root] (utils.py 283): INFO Epoch: [9] [1190/2502] eta: 0:28:28 lr: 0.000010 loss_cls: 3.2384 (3.0891) grad_norm: 2.7288 (3.1226) time: 1.2996 data: 0.0002 max mem: 13912 +[2024-12-06 17:13:44 root] (utils.py 283): INFO Epoch: [9] [1200/2502] eta: 0:28:15 lr: 0.000010 loss_cls: 3.2511 (3.0897) grad_norm: 2.6206 (3.1230) time: 1.3021 data: 0.0002 max mem: 13912 +[2024-12-06 17:13:57 root] (utils.py 283): INFO Epoch: [9] [1210/2502] eta: 0:28:02 lr: 0.000010 loss_cls: 3.2013 (3.0888) grad_norm: 2.6487 (3.1189) time: 1.3051 data: 0.0002 max mem: 13912 +[2024-12-06 17:14:10 root] (utils.py 283): INFO Epoch: [9] [1220/2502] eta: 0:27:49 lr: 0.000010 loss_cls: 3.0437 (3.0893) grad_norm: 2.5896 (3.1154) time: 1.3062 data: 0.0002 max mem: 13912 +[2024-12-06 17:14:23 root] (utils.py 283): INFO Epoch: [9] [1230/2502] eta: 0:27:36 lr: 0.000010 loss_cls: 3.1463 (3.0885) grad_norm: 2.5193 (3.1133) time: 1.3051 data: 0.0002 max mem: 13912 +[2024-12-06 17:14:36 root] (utils.py 283): INFO Epoch: [9] [1240/2502] eta: 0:27:23 lr: 0.000010 loss_cls: 3.1176 (3.0874) grad_norm: 2.7657 (3.1134) time: 1.3033 data: 0.0002 max mem: 13912 +[2024-12-06 17:14:49 root] (utils.py 283): INFO Epoch: [9] [1250/2502] eta: 0:27:10 lr: 0.000010 loss_cls: 3.1941 (3.0887) grad_norm: 2.7657 (3.1110) time: 1.3037 data: 0.0002 max mem: 13912 +[2024-12-06 17:15:02 root] (utils.py 283): INFO Epoch: [9] [1260/2502] eta: 0:26:57 lr: 0.000010 loss_cls: 3.2483 (3.0879) grad_norm: 2.6688 (3.1085) time: 1.3044 data: 0.0002 max mem: 13912 +[2024-12-06 17:15:15 root] (utils.py 283): INFO Epoch: [9] [1270/2502] eta: 0:26:44 lr: 0.000010 loss_cls: 2.7567 (3.0852) grad_norm: 2.6524 (3.1076) time: 1.3028 data: 0.0002 max mem: 13912 +[2024-12-06 17:15:29 root] (utils.py 283): INFO Epoch: [9] [1280/2502] eta: 0:26:31 lr: 0.000010 loss_cls: 2.9784 (3.0849) grad_norm: 2.7973 (3.1084) time: 1.3152 data: 0.0002 max mem: 13912 +[2024-12-06 17:15:42 root] (utils.py 283): INFO Epoch: [9] [1290/2502] eta: 0:26:19 lr: 0.000010 loss_cls: 3.0737 (3.0849) grad_norm: 2.6551 (3.1040) time: 1.3379 data: 0.0003 max mem: 13912 +[2024-12-06 17:15:55 root] (utils.py 283): INFO Epoch: [9] [1300/2502] eta: 0:26:06 lr: 0.000010 loss_cls: 3.0737 (3.0844) grad_norm: 2.4821 (3.1041) time: 1.3248 data: 0.0003 max mem: 13912 +[2024-12-06 17:16:08 root] (utils.py 283): INFO Epoch: [9] [1310/2502] eta: 0:25:53 lr: 0.000010 loss_cls: 3.2702 (3.0852) grad_norm: 2.8346 (3.1034) time: 1.3002 data: 0.0002 max mem: 13912 +[2024-12-06 17:16:21 root] (utils.py 283): INFO Epoch: [9] [1320/2502] eta: 0:25:40 lr: 0.000010 loss_cls: 3.3399 (3.0867) grad_norm: 2.7640 (3.1009) time: 1.3021 data: 0.0002 max mem: 13912 +[2024-12-06 17:16:34 root] (utils.py 283): INFO Epoch: [9] [1330/2502] eta: 0:25:27 lr: 0.000010 loss_cls: 3.1065 (3.0845) grad_norm: 2.7126 (3.0992) time: 1.3057 data: 0.0002 max mem: 13912 +[2024-12-06 17:16:47 root] (utils.py 283): INFO Epoch: [9] [1340/2502] eta: 0:25:14 lr: 0.000010 loss_cls: 3.0746 (3.0847) grad_norm: 2.8427 (3.0982) time: 1.3096 data: 0.0002 max mem: 13912 +[2024-12-06 17:17:00 root] (utils.py 283): INFO Epoch: [9] [1350/2502] eta: 0:25:01 lr: 0.000010 loss_cls: 3.3966 (3.0875) grad_norm: 2.7310 (3.1004) time: 1.3064 data: 0.0002 max mem: 13912 +[2024-12-06 17:17:13 root] (utils.py 283): INFO Epoch: [9] [1360/2502] eta: 0:24:48 lr: 0.000010 loss_cls: 3.3966 (3.0867) grad_norm: 2.5928 (3.0988) time: 1.3001 data: 0.0003 max mem: 13912 +[2024-12-06 17:17:26 root] (utils.py 283): INFO Epoch: [9] [1370/2502] eta: 0:24:35 lr: 0.000010 loss_cls: 3.2756 (3.0874) grad_norm: 2.5046 (3.0960) time: 1.3012 data: 0.0003 max mem: 13912 +[2024-12-06 17:17:39 root] (utils.py 283): INFO Epoch: [9] [1380/2502] eta: 0:24:22 lr: 0.000010 loss_cls: 3.3255 (3.0876) grad_norm: 2.5046 (3.0932) time: 1.3028 data: 0.0002 max mem: 13912 +[2024-12-06 17:17:52 root] (utils.py 283): INFO Epoch: [9] [1390/2502] eta: 0:24:09 lr: 0.000010 loss_cls: 3.2048 (3.0879) grad_norm: 2.7336 (3.0932) time: 1.3047 data: 0.0002 max mem: 13912 +[2024-12-06 17:18:05 root] (utils.py 283): INFO Epoch: [9] [1400/2502] eta: 0:23:56 lr: 0.000010 loss_cls: 3.1834 (3.0880) grad_norm: 2.7425 (3.0915) time: 1.3054 data: 0.0002 max mem: 13912 +[2024-12-06 17:18:19 root] (utils.py 283): INFO Epoch: [9] [1410/2502] eta: 0:23:42 lr: 0.000010 loss_cls: 3.1769 (3.0879) grad_norm: 2.7039 (3.0891) time: 1.3049 data: 0.0002 max mem: 13912 +[2024-12-06 17:18:32 root] (utils.py 283): INFO Epoch: [9] [1420/2502] eta: 0:23:29 lr: 0.000010 loss_cls: 3.1769 (3.0890) grad_norm: 2.6513 (3.0862) time: 1.3038 data: 0.0002 max mem: 13912 +[2024-12-06 17:18:45 root] (utils.py 283): INFO Epoch: [9] [1430/2502] eta: 0:23:16 lr: 0.000010 loss_cls: 3.1537 (3.0883) grad_norm: 2.6453 (3.0869) time: 1.3028 data: 0.0002 max mem: 13912 +[2024-12-06 17:18:58 root] (utils.py 283): INFO Epoch: [9] [1440/2502] eta: 0:23:03 lr: 0.000010 loss_cls: 3.0798 (3.0889) grad_norm: 2.7353 (3.0852) time: 1.2988 data: 0.0002 max mem: 13912 +[2024-12-06 17:19:11 root] (utils.py 283): INFO Epoch: [9] [1450/2502] eta: 0:22:50 lr: 0.000010 loss_cls: 3.2439 (3.0900) grad_norm: 2.8093 (3.0872) time: 1.2992 data: 0.0002 max mem: 13912 +[2024-12-06 17:19:24 root] (utils.py 283): INFO Epoch: [9] [1460/2502] eta: 0:22:37 lr: 0.000010 loss_cls: 3.3206 (3.0890) grad_norm: 2.9855 (3.0928) time: 1.3037 data: 0.0002 max mem: 13912 +[2024-12-06 17:19:37 root] (utils.py 283): INFO Epoch: [9] [1470/2502] eta: 0:22:24 lr: 0.000010 loss_cls: 3.1423 (3.0897) grad_norm: 2.7452 (3.1037) time: 1.3012 data: 0.0002 max mem: 13912 +[2024-12-06 17:19:50 root] (utils.py 283): INFO Epoch: [9] [1480/2502] eta: 0:22:11 lr: 0.000010 loss_cls: 2.9285 (3.0886) grad_norm: 2.7194 (3.1014) time: 1.2986 data: 0.0002 max mem: 13912 +[2024-12-06 17:20:03 root] (utils.py 283): INFO Epoch: [9] [1490/2502] eta: 0:21:58 lr: 0.000010 loss_cls: 3.3659 (3.0907) grad_norm: 2.6435 (3.0982) time: 1.3012 data: 0.0002 max mem: 13912 +[2024-12-06 17:20:16 root] (utils.py 283): INFO Epoch: [9] [1500/2502] eta: 0:21:45 lr: 0.000010 loss_cls: 3.3659 (3.0915) grad_norm: 2.7018 (3.0967) time: 1.3022 data: 0.0002 max mem: 13912 +[2024-12-06 17:20:29 root] (utils.py 283): INFO Epoch: [9] [1510/2502] eta: 0:21:32 lr: 0.000010 loss_cls: 3.2260 (3.0905) grad_norm: 2.9228 (3.0994) time: 1.3035 data: 0.0002 max mem: 13912 +[2024-12-06 17:20:42 root] (utils.py 283): INFO Epoch: [9] [1520/2502] eta: 0:21:19 lr: 0.000010 loss_cls: 2.6668 (3.0891) grad_norm: 2.9834 (3.1000) time: 1.3048 data: 0.0002 max mem: 13912 +[2024-12-06 17:20:55 root] (utils.py 283): INFO Epoch: [9] [1530/2502] eta: 0:21:06 lr: 0.000010 loss_cls: 3.0240 (3.0895) grad_norm: 2.6052 (3.0974) time: 1.3071 data: 0.0002 max mem: 13912 +[2024-12-06 17:21:08 root] (utils.py 283): INFO Epoch: [9] [1540/2502] eta: 0:20:53 lr: 0.000010 loss_cls: 3.3105 (3.0897) grad_norm: 2.6052 (3.0944) time: 1.3065 data: 0.0002 max mem: 13912 +[2024-12-06 17:21:21 root] (utils.py 283): INFO Epoch: [9] [1550/2502] eta: 0:20:40 lr: 0.000010 loss_cls: 3.2038 (3.0894) grad_norm: 2.6107 (3.0951) time: 1.3026 data: 0.0002 max mem: 13912 +[2024-12-06 17:21:34 root] (utils.py 283): INFO Epoch: [9] [1560/2502] eta: 0:20:27 lr: 0.000010 loss_cls: 3.0345 (3.0892) grad_norm: 2.5584 (3.0926) time: 1.3026 data: 0.0002 max mem: 13912 +[2024-12-06 17:21:47 root] (utils.py 283): INFO Epoch: [9] [1570/2502] eta: 0:20:14 lr: 0.000010 loss_cls: 3.1723 (3.0891) grad_norm: 2.8830 (3.0926) time: 1.3031 data: 0.0002 max mem: 13912 +[2024-12-06 17:22:00 root] (utils.py 283): INFO Epoch: [9] [1580/2502] eta: 0:20:01 lr: 0.000010 loss_cls: 3.2500 (3.0908) grad_norm: 3.1133 (3.1009) time: 1.3036 data: 0.0002 max mem: 13912 +[2024-12-06 17:22:13 root] (utils.py 283): INFO Epoch: [9] [1590/2502] eta: 0:19:48 lr: 0.000010 loss_cls: 3.2797 (3.0906) grad_norm: 2.9264 (3.0997) time: 1.3039 data: 0.0002 max mem: 13912 +[2024-12-06 17:22:26 root] (utils.py 283): INFO Epoch: [9] [1600/2502] eta: 0:19:35 lr: 0.000010 loss_cls: 3.2239 (3.0901) grad_norm: 2.6097 (3.0967) time: 1.3045 data: 0.0002 max mem: 13912 +[2024-12-06 17:22:39 root] (utils.py 283): INFO Epoch: [9] [1610/2502] eta: 0:19:22 lr: 0.000010 loss_cls: 3.1637 (3.0892) grad_norm: 2.4626 (3.0928) time: 1.3038 data: 0.0002 max mem: 13912 +[2024-12-06 17:22:52 root] (utils.py 283): INFO Epoch: [9] [1620/2502] eta: 0:19:09 lr: 0.000010 loss_cls: 3.2211 (3.0905) grad_norm: 2.6057 (3.0949) time: 1.3030 data: 0.0002 max mem: 13912 +[2024-12-06 17:23:05 root] (utils.py 283): INFO Epoch: [9] [1630/2502] eta: 0:18:56 lr: 0.000010 loss_cls: 3.3024 (3.0912) grad_norm: 2.7722 (3.0938) time: 1.3040 data: 0.0002 max mem: 13912 +[2024-12-06 17:23:18 root] (utils.py 283): INFO Epoch: [9] [1640/2502] eta: 0:18:43 lr: 0.000010 loss_cls: 3.2842 (3.0919) grad_norm: 2.6700 (3.0918) time: 1.3040 data: 0.0003 max mem: 13912 +[2024-12-06 17:23:31 root] (utils.py 283): INFO Epoch: [9] [1650/2502] eta: 0:18:30 lr: 0.000010 loss_cls: 3.2767 (3.0927) grad_norm: 2.7110 (3.0908) time: 1.3046 data: 0.0002 max mem: 13912 +[2024-12-06 17:23:44 root] (utils.py 283): INFO Epoch: [9] [1660/2502] eta: 0:18:17 lr: 0.000010 loss_cls: 3.0638 (3.0916) grad_norm: 2.7110 (3.0882) time: 1.3058 data: 0.0002 max mem: 13912 +[2024-12-06 17:23:57 root] (utils.py 283): INFO Epoch: [9] [1670/2502] eta: 0:18:04 lr: 0.000010 loss_cls: 3.0638 (3.0918) grad_norm: 2.6706 (3.0864) time: 1.3021 data: 0.0002 max mem: 13912 +[2024-12-06 17:24:11 root] (utils.py 283): INFO Epoch: [9] [1680/2502] eta: 0:17:51 lr: 0.000010 loss_cls: 3.1869 (3.0923) grad_norm: 2.7260 (3.0849) time: 1.3103 data: 0.0002 max mem: 13912 +[2024-12-06 17:24:24 root] (utils.py 283): INFO Epoch: [9] [1690/2502] eta: 0:17:38 lr: 0.000010 loss_cls: 3.1984 (3.0922) grad_norm: 2.5840 (3.0813) time: 1.3124 data: 0.0002 max mem: 13912 +[2024-12-06 17:24:37 root] (utils.py 283): INFO Epoch: [9] [1700/2502] eta: 0:17:25 lr: 0.000010 loss_cls: 3.1836 (3.0917) grad_norm: 2.5667 (3.0834) time: 1.3047 data: 0.0002 max mem: 13912 +[2024-12-06 17:24:50 root] (utils.py 283): INFO Epoch: [9] [1710/2502] eta: 0:17:12 lr: 0.000010 loss_cls: 3.2310 (3.0921) grad_norm: 2.6150 (3.0818) time: 1.3062 data: 0.0002 max mem: 13912 +[2024-12-06 17:25:03 root] (utils.py 283): INFO Epoch: [9] [1720/2502] eta: 0:16:59 lr: 0.000010 loss_cls: 3.2310 (3.0910) grad_norm: 2.7091 (3.0837) time: 1.3083 data: 0.0002 max mem: 13912 +[2024-12-06 17:25:16 root] (utils.py 283): INFO Epoch: [9] [1730/2502] eta: 0:16:46 lr: 0.000010 loss_cls: 3.2294 (3.0918) grad_norm: 2.7977 (3.0817) time: 1.3074 data: 0.0002 max mem: 13912 +[2024-12-06 17:25:29 root] (utils.py 283): INFO Epoch: [9] [1740/2502] eta: 0:16:33 lr: 0.000010 loss_cls: 3.1210 (3.0909) grad_norm: 2.7977 (3.0808) time: 1.3030 data: 0.0002 max mem: 13912 +[2024-12-06 17:25:42 root] (utils.py 283): INFO Epoch: [9] [1750/2502] eta: 0:16:20 lr: 0.000010 loss_cls: 3.2122 (3.0917) grad_norm: 2.8204 (3.0797) time: 1.3018 data: 0.0003 max mem: 13912 +[2024-12-06 17:25:55 root] (utils.py 283): INFO Epoch: [9] [1760/2502] eta: 0:16:06 lr: 0.000010 loss_cls: 3.2872 (3.0927) grad_norm: 2.7196 (3.0810) time: 1.2985 data: 0.0002 max mem: 13912 +[2024-12-06 17:26:08 root] (utils.py 283): INFO Epoch: [9] [1770/2502] eta: 0:15:53 lr: 0.000010 loss_cls: 3.1360 (3.0915) grad_norm: 2.7269 (3.0833) time: 1.3032 data: 0.0002 max mem: 13912 +[2024-12-06 17:26:21 root] (utils.py 283): INFO Epoch: [9] [1780/2502] eta: 0:15:40 lr: 0.000010 loss_cls: 3.0058 (3.0913) grad_norm: 2.8230 (3.0835) time: 1.3031 data: 0.0002 max mem: 13912 +[2024-12-06 17:26:34 root] (utils.py 283): INFO Epoch: [9] [1790/2502] eta: 0:15:27 lr: 0.000010 loss_cls: 3.3073 (3.0939) grad_norm: 2.6862 (3.0825) time: 1.2942 data: 0.0002 max mem: 13912 +[2024-12-06 17:26:47 root] (utils.py 283): INFO Epoch: [9] [1800/2502] eta: 0:15:14 lr: 0.000010 loss_cls: 3.4202 (3.0946) grad_norm: 2.6380 (3.0815) time: 1.2953 data: 0.0002 max mem: 13912 +[2024-12-06 17:27:00 root] (utils.py 283): INFO Epoch: [9] [1810/2502] eta: 0:15:01 lr: 0.000010 loss_cls: 3.0480 (3.0941) grad_norm: 2.6094 (3.0794) time: 1.3006 data: 0.0002 max mem: 13912 +[2024-12-06 17:27:13 root] (utils.py 283): INFO Epoch: [9] [1820/2502] eta: 0:14:48 lr: 0.000010 loss_cls: 2.9715 (3.0939) grad_norm: 2.6129 (3.0796) time: 1.3035 data: 0.0002 max mem: 13912 +[2024-12-06 17:27:26 root] (utils.py 283): INFO Epoch: [9] [1830/2502] eta: 0:14:35 lr: 0.000010 loss_cls: 3.2021 (3.0938) grad_norm: 2.8348 (3.0785) time: 1.3044 data: 0.0002 max mem: 13912 +[2024-12-06 17:27:39 root] (utils.py 283): INFO Epoch: [9] [1840/2502] eta: 0:14:22 lr: 0.000010 loss_cls: 3.1476 (3.0930) grad_norm: 2.7149 (3.0825) time: 1.3035 data: 0.0002 max mem: 13912 +[2024-12-06 17:27:52 root] (utils.py 283): INFO Epoch: [9] [1850/2502] eta: 0:14:09 lr: 0.000010 loss_cls: 3.0013 (3.0922) grad_norm: 2.7673 (3.0841) time: 1.3025 data: 0.0002 max mem: 13912 +[2024-12-06 17:28:05 root] (utils.py 283): INFO Epoch: [9] [1860/2502] eta: 0:13:56 lr: 0.000010 loss_cls: 2.9141 (3.0919) grad_norm: 2.8469 (3.0860) time: 1.3038 data: 0.0002 max mem: 13912 +[2024-12-06 17:28:18 root] (utils.py 283): INFO Epoch: [9] [1870/2502] eta: 0:13:43 lr: 0.000010 loss_cls: 2.9141 (3.0907) grad_norm: 2.9184 (3.0862) time: 1.3023 data: 0.0003 max mem: 13912 +[2024-12-06 17:28:31 root] (utils.py 283): INFO Epoch: [9] [1880/2502] eta: 0:13:30 lr: 0.000010 loss_cls: 2.9942 (3.0916) grad_norm: 3.0501 (3.0887) time: 1.3019 data: 0.0003 max mem: 13912 +[2024-12-06 17:28:44 root] (utils.py 283): INFO Epoch: [9] [1890/2502] eta: 0:13:17 lr: 0.000010 loss_cls: 3.3857 (3.0924) grad_norm: 2.9340 (3.0875) time: 1.3038 data: 0.0003 max mem: 13912 +[2024-12-06 17:28:57 root] (utils.py 283): INFO Epoch: [9] [1900/2502] eta: 0:13:04 lr: 0.000010 loss_cls: 3.2188 (3.0921) grad_norm: 2.8225 (3.0890) time: 1.3041 data: 0.0003 max mem: 13912 +[2024-12-06 17:29:10 root] (utils.py 283): INFO Epoch: [9] [1910/2502] eta: 0:12:51 lr: 0.000010 loss_cls: 3.0979 (3.0907) grad_norm: 2.7246 (3.0872) time: 1.3034 data: 0.0003 max mem: 13912 +[2024-12-06 17:29:23 root] (utils.py 283): INFO Epoch: [9] [1920/2502] eta: 0:12:38 lr: 0.000010 loss_cls: 3.1777 (3.0913) grad_norm: 2.6350 (3.0853) time: 1.3019 data: 0.0002 max mem: 13912 +[2024-12-06 17:29:36 root] (utils.py 283): INFO Epoch: [9] [1930/2502] eta: 0:12:25 lr: 0.000010 loss_cls: 2.9788 (3.0886) grad_norm: 2.5996 (3.0829) time: 1.2996 data: 0.0002 max mem: 13912 +[2024-12-06 17:29:49 root] (utils.py 283): INFO Epoch: [9] [1940/2502] eta: 0:12:12 lr: 0.000010 loss_cls: 2.8142 (3.0882) grad_norm: 2.5996 (3.0814) time: 1.2994 data: 0.0002 max mem: 13912 +[2024-12-06 17:30:02 root] (utils.py 283): INFO Epoch: [9] [1950/2502] eta: 0:11:59 lr: 0.000010 loss_cls: 3.1115 (3.0886) grad_norm: 2.5877 (3.0825) time: 1.3019 data: 0.0002 max mem: 13912 +[2024-12-06 17:30:15 root] (utils.py 283): INFO Epoch: [9] [1960/2502] eta: 0:11:46 lr: 0.000010 loss_cls: 3.1872 (3.0896) grad_norm: 2.6345 (3.0834) time: 1.2995 data: 0.0002 max mem: 13912 +[2024-12-06 17:30:28 root] (utils.py 283): INFO Epoch: [9] [1970/2502] eta: 0:11:33 lr: 0.000010 loss_cls: 3.3088 (3.0900) grad_norm: 2.6578 (3.0808) time: 1.2956 data: 0.0002 max mem: 13912 +[2024-12-06 17:30:41 root] (utils.py 283): INFO Epoch: [9] [1980/2502] eta: 0:11:20 lr: 0.000010 loss_cls: 3.2563 (3.0899) grad_norm: 2.5980 (3.0802) time: 1.2952 data: 0.0003 max mem: 13912 +[2024-12-06 17:30:54 root] (utils.py 283): INFO Epoch: [9] [1990/2502] eta: 0:11:07 lr: 0.000010 loss_cls: 3.2363 (3.0904) grad_norm: 2.5980 (3.0776) time: 1.3017 data: 0.0003 max mem: 13912 +[2024-12-06 17:31:07 root] (utils.py 283): INFO Epoch: [9] [2000/2502] eta: 0:10:54 lr: 0.000010 loss_cls: 3.1885 (3.0907) grad_norm: 2.6028 (3.0765) time: 1.2997 data: 0.0002 max mem: 13912 +[2024-12-06 17:31:20 root] (utils.py 283): INFO Epoch: [9] [2010/2502] eta: 0:10:41 lr: 0.000010 loss_cls: 3.1885 (3.0905) grad_norm: 2.8005 (3.0754) time: 1.2922 data: 0.0002 max mem: 13912 +[2024-12-06 17:31:33 root] (utils.py 283): INFO Epoch: [9] [2020/2502] eta: 0:10:27 lr: 0.000010 loss_cls: 3.2434 (3.0916) grad_norm: 2.7265 (3.0737) time: 1.2945 data: 0.0002 max mem: 13912 +[2024-12-06 17:31:46 root] (utils.py 283): INFO Epoch: [9] [2030/2502] eta: 0:10:14 lr: 0.000010 loss_cls: 3.3471 (3.0926) grad_norm: 2.6665 (3.0737) time: 1.2952 data: 0.0003 max mem: 13912 +[2024-12-06 17:31:59 root] (utils.py 283): INFO Epoch: [9] [2040/2502] eta: 0:10:01 lr: 0.000010 loss_cls: 3.1649 (3.0922) grad_norm: 2.5486 (3.0712) time: 1.2964 data: 0.0002 max mem: 13912 +[2024-12-06 17:32:12 root] (utils.py 283): INFO Epoch: [9] [2050/2502] eta: 0:09:48 lr: 0.000010 loss_cls: 2.8931 (3.0918) grad_norm: 2.4831 (3.0685) time: 1.2993 data: 0.0002 max mem: 13912 +[2024-12-06 17:32:25 root] (utils.py 283): INFO Epoch: [9] [2060/2502] eta: 0:09:35 lr: 0.000010 loss_cls: 3.2746 (3.0927) grad_norm: 2.5153 (3.0665) time: 1.2987 data: 0.0002 max mem: 13912 +[2024-12-06 17:32:38 root] (utils.py 283): INFO Epoch: [9] [2070/2502] eta: 0:09:22 lr: 0.000010 loss_cls: 3.2746 (3.0926) grad_norm: 2.5813 (3.0654) time: 1.3037 data: 0.0003 max mem: 13912 +[2024-12-06 17:32:51 root] (utils.py 283): INFO Epoch: [9] [2080/2502] eta: 0:09:09 lr: 0.000010 loss_cls: 3.1462 (3.0926) grad_norm: 2.6757 (3.0653) time: 1.3063 data: 0.0003 max mem: 13912 +[2024-12-06 17:33:04 root] (utils.py 283): INFO Epoch: [9] [2090/2502] eta: 0:08:56 lr: 0.000010 loss_cls: 3.0425 (3.0920) grad_norm: 2.6588 (3.0662) time: 1.3041 data: 0.0002 max mem: 13912 +[2024-12-06 17:33:17 root] (utils.py 283): INFO Epoch: [9] [2100/2502] eta: 0:08:43 lr: 0.000010 loss_cls: 2.9470 (3.0913) grad_norm: 2.6159 (3.0647) time: 1.3092 data: 0.0002 max mem: 13912 +[2024-12-06 17:33:30 root] (utils.py 283): INFO Epoch: [9] [2110/2502] eta: 0:08:30 lr: 0.000010 loss_cls: 3.1949 (3.0928) grad_norm: 2.7546 (3.0647) time: 1.3089 data: 0.0002 max mem: 13912 +[2024-12-06 17:33:43 root] (utils.py 283): INFO Epoch: [9] [2120/2502] eta: 0:08:17 lr: 0.000010 loss_cls: 3.2827 (3.0924) grad_norm: 2.8028 (3.0636) time: 1.3032 data: 0.0002 max mem: 13912 +[2024-12-06 17:33:56 root] (utils.py 283): INFO Epoch: [9] [2130/2502] eta: 0:08:04 lr: 0.000010 loss_cls: 3.1763 (3.0928) grad_norm: 2.6246 (3.0657) time: 1.3009 data: 0.0002 max mem: 13912 +[2024-12-06 17:34:09 root] (utils.py 283): INFO Epoch: [9] [2140/2502] eta: 0:07:51 lr: 0.000010 loss_cls: 3.3891 (3.0933) grad_norm: 2.9228 (3.0731) time: 1.3036 data: 0.0002 max mem: 13912 +[2024-12-06 17:34:22 root] (utils.py 283): INFO Epoch: [9] [2150/2502] eta: 0:07:38 lr: 0.000010 loss_cls: 3.3428 (3.0933) grad_norm: 2.7030 (3.0718) time: 1.3053 data: 0.0002 max mem: 13912 +[2024-12-06 17:34:35 root] (utils.py 283): INFO Epoch: [9] [2160/2502] eta: 0:07:25 lr: 0.000010 loss_cls: 3.2034 (3.0927) grad_norm: 2.7030 (3.0711) time: 1.3040 data: 0.0003 max mem: 13912 +[2024-12-06 17:34:48 root] (utils.py 283): INFO Epoch: [9] [2170/2502] eta: 0:07:12 lr: 0.000010 loss_cls: 3.0023 (3.0927) grad_norm: 2.7405 (3.0697) time: 1.3027 data: 0.0002 max mem: 13912 +[2024-12-06 17:35:01 root] (utils.py 283): INFO Epoch: [9] [2180/2502] eta: 0:06:59 lr: 0.000010 loss_cls: 3.1292 (3.0930) grad_norm: 2.6606 (3.0685) time: 1.3008 data: 0.0002 max mem: 13912 +[2024-12-06 17:35:14 root] (utils.py 283): INFO Epoch: [9] [2190/2502] eta: 0:06:46 lr: 0.000010 loss_cls: 3.1292 (3.0930) grad_norm: 2.8057 (3.0675) time: 1.2988 data: 0.0002 max mem: 13912 +[2024-12-06 17:35:27 root] (utils.py 283): INFO Epoch: [9] [2200/2502] eta: 0:06:33 lr: 0.000010 loss_cls: 3.2636 (3.0934) grad_norm: 2.9438 (3.0681) time: 1.2986 data: 0.0002 max mem: 13912 +[2024-12-06 17:35:40 root] (utils.py 283): INFO Epoch: [9] [2210/2502] eta: 0:06:20 lr: 0.000010 loss_cls: 3.2997 (3.0937) grad_norm: 2.7975 (3.0686) time: 1.2996 data: 0.0002 max mem: 13912 +[2024-12-06 17:35:53 root] (utils.py 283): INFO Epoch: [9] [2220/2502] eta: 0:06:07 lr: 0.000010 loss_cls: 3.4273 (3.0958) grad_norm: 2.8462 (3.0697) time: 1.3005 data: 0.0002 max mem: 13912 +[2024-12-06 17:36:07 root] (utils.py 283): INFO Epoch: [9] [2230/2502] eta: 0:05:54 lr: 0.000010 loss_cls: 3.4151 (3.0970) grad_norm: 2.8202 (3.0685) time: 1.3043 data: 0.0002 max mem: 13912 +[2024-12-06 17:36:20 root] (utils.py 283): INFO Epoch: [9] [2240/2502] eta: 0:05:41 lr: 0.000010 loss_cls: 3.3563 (3.0970) grad_norm: 2.6557 (3.0666) time: 1.3073 data: 0.0002 max mem: 13912 +[2024-12-06 17:36:33 root] (utils.py 283): INFO Epoch: [9] [2250/2502] eta: 0:05:28 lr: 0.000010 loss_cls: 3.1506 (3.0972) grad_norm: 2.6557 (3.0651) time: 1.3077 data: 0.0002 max mem: 13912 +[2024-12-06 17:36:46 root] (utils.py 283): INFO Epoch: [9] [2260/2502] eta: 0:05:15 lr: 0.000010 loss_cls: 3.3078 (3.0983) grad_norm: 2.7332 (3.0656) time: 1.3063 data: 0.0002 max mem: 13912 +[2024-12-06 17:36:59 root] (utils.py 283): INFO Epoch: [9] [2270/2502] eta: 0:05:02 lr: 0.000010 loss_cls: 2.8535 (3.0955) grad_norm: 2.5859 (3.0631) time: 1.3028 data: 0.0002 max mem: 13912 +[2024-12-06 17:37:12 root] (utils.py 283): INFO Epoch: [9] [2280/2502] eta: 0:04:49 lr: 0.000010 loss_cls: 2.6250 (3.0954) grad_norm: 2.6886 (3.0635) time: 1.3015 data: 0.0003 max mem: 13912 +[2024-12-06 17:37:25 root] (utils.py 283): INFO Epoch: [9] [2290/2502] eta: 0:04:36 lr: 0.000010 loss_cls: 3.1722 (3.0948) grad_norm: 2.8055 (3.0662) time: 1.3013 data: 0.0003 max mem: 13912 +[2024-12-06 17:37:38 root] (utils.py 283): INFO Epoch: [9] [2300/2502] eta: 0:04:23 lr: 0.000010 loss_cls: 3.2180 (3.0953) grad_norm: 2.6475 (3.0648) time: 1.3019 data: 0.0003 max mem: 13912 +[2024-12-06 17:37:51 root] (utils.py 283): INFO Epoch: [9] [2310/2502] eta: 0:04:10 lr: 0.000010 loss_cls: 3.2881 (3.0948) grad_norm: 2.6475 (3.0636) time: 1.3050 data: 0.0002 max mem: 13912 +[2024-12-06 17:38:04 root] (utils.py 283): INFO Epoch: [9] [2320/2502] eta: 0:03:57 lr: 0.000010 loss_cls: 3.0685 (3.0948) grad_norm: 2.6638 (3.0636) time: 1.3058 data: 0.0002 max mem: 13912 +[2024-12-06 17:38:17 root] (utils.py 283): INFO Epoch: [9] [2330/2502] eta: 0:03:44 lr: 0.000010 loss_cls: 3.0699 (3.0946) grad_norm: 2.6548 (3.0623) time: 1.3021 data: 0.0002 max mem: 13912 +[2024-12-06 17:38:30 root] (utils.py 283): INFO Epoch: [9] [2340/2502] eta: 0:03:31 lr: 0.000010 loss_cls: 3.1514 (3.0951) grad_norm: 2.5766 (3.0606) time: 1.3007 data: 0.0002 max mem: 13912 +[2024-12-06 17:38:43 root] (utils.py 283): INFO Epoch: [9] [2350/2502] eta: 0:03:18 lr: 0.000010 loss_cls: 3.3596 (3.0958) grad_norm: 2.7069 (3.0610) time: 1.3027 data: 0.0002 max mem: 13912 +[2024-12-06 17:38:56 root] (utils.py 283): INFO Epoch: [9] [2360/2502] eta: 0:03:05 lr: 0.000010 loss_cls: 3.1773 (3.0959) grad_norm: 2.7699 (3.0597) time: 1.3040 data: 0.0002 max mem: 13912 +[2024-12-06 17:39:09 root] (utils.py 283): INFO Epoch: [9] [2370/2502] eta: 0:02:51 lr: 0.000010 loss_cls: 3.2124 (3.0967) grad_norm: 2.7699 (3.0604) time: 1.3038 data: 0.0002 max mem: 13912 +[2024-12-06 17:39:22 root] (utils.py 283): INFO Epoch: [9] [2380/2502] eta: 0:02:38 lr: 0.000010 loss_cls: 3.2390 (3.0959) grad_norm: 2.7641 (3.0589) time: 1.3040 data: 0.0003 max mem: 13912 +[2024-12-06 17:39:35 root] (utils.py 283): INFO Epoch: [9] [2390/2502] eta: 0:02:25 lr: 0.000010 loss_cls: 3.1334 (3.0954) grad_norm: 2.6446 (3.0580) time: 1.3038 data: 0.0003 max mem: 13912 +[2024-12-06 17:39:48 root] (utils.py 283): INFO Epoch: [9] [2400/2502] eta: 0:02:12 lr: 0.000010 loss_cls: 3.0112 (3.0948) grad_norm: 2.6191 (3.0574) time: 1.3013 data: 0.0002 max mem: 13912 +[2024-12-06 17:40:01 root] (utils.py 283): INFO Epoch: [9] [2410/2502] eta: 0:01:59 lr: 0.000010 loss_cls: 3.1080 (3.0949) grad_norm: 2.6353 (3.0569) time: 1.2998 data: 0.0002 max mem: 13912 +[2024-12-06 17:40:14 root] (utils.py 283): INFO Epoch: [9] [2420/2502] eta: 0:01:46 lr: 0.000010 loss_cls: 3.2901 (3.0946) grad_norm: 2.7116 (3.0599) time: 1.3031 data: 0.0002 max mem: 13912 +[2024-12-06 17:40:27 root] (utils.py 283): INFO Epoch: [9] [2430/2502] eta: 0:01:33 lr: 0.000010 loss_cls: 3.3123 (3.0950) grad_norm: 2.7116 (3.0587) time: 1.3045 data: 0.0002 max mem: 13912 +[2024-12-06 17:40:40 root] (utils.py 283): INFO Epoch: [9] [2440/2502] eta: 0:01:20 lr: 0.000010 loss_cls: 3.3849 (3.0964) grad_norm: 2.6508 (3.0588) time: 1.3025 data: 0.0002 max mem: 13912 +[2024-12-06 17:40:53 root] (utils.py 283): INFO Epoch: [9] [2450/2502] eta: 0:01:07 lr: 0.000010 loss_cls: 3.3209 (3.0964) grad_norm: 2.8031 (3.0580) time: 1.3003 data: 0.0002 max mem: 13912 +[2024-12-06 17:41:06 root] (utils.py 283): INFO Epoch: [9] [2460/2502] eta: 0:00:54 lr: 0.000010 loss_cls: 3.3639 (3.0980) grad_norm: 2.9192 (3.0585) time: 1.2981 data: 0.0002 max mem: 13912 +[2024-12-06 17:41:19 root] (utils.py 283): INFO Epoch: [9] [2470/2502] eta: 0:00:41 lr: 0.000010 loss_cls: 3.3648 (3.0973) grad_norm: 2.6856 (3.0567) time: 1.2992 data: 0.0002 max mem: 13912 +[2024-12-06 17:41:32 root] (utils.py 283): INFO Epoch: [9] [2480/2502] eta: 0:00:28 lr: 0.000010 loss_cls: 3.0672 (3.0973) grad_norm: 2.5079 (3.0557) time: 1.3024 data: 0.0002 max mem: 13912 +[2024-12-06 17:41:46 root] (utils.py 283): INFO Epoch: [9] [2490/2502] eta: 0:00:15 lr: 0.000010 loss_cls: 3.1217 (3.0977) grad_norm: 2.6559 (3.0564) time: 1.3282 data: 0.0245 max mem: 13912 +[2024-12-06 17:41:59 root] (utils.py 283): INFO Epoch: [9] [2500/2502] eta: 0:00:02 lr: 0.000010 loss_cls: 3.2432 (3.0973) grad_norm: 2.8276 (3.0654) time: 1.3279 data: 0.0245 max mem: 13912 +[2024-12-06 17:42:00 root] (utils.py 283): INFO Epoch: [9] [2501/2502] eta: 0:00:01 lr: 0.000010 loss_cls: 3.2432 (3.0976) grad_norm: 2.8276 (3.0653) time: 1.3282 data: 0.0244 max mem: 13912 +[2024-12-06 17:42:00 root] (utils.py 297): INFO Epoch: [9] Total time: 0:54:20 (1.3031 s / it) +[2024-12-06 17:42:00 root] (engine.py 179): INFO Averaged stats:lr: 0.000010 loss_cls: 3.2432 (3.1001) grad_norm: 2.8276 (3.0653) +[2024-12-06 17:42:01 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:21 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.4423 (0.4423) acc1: 92.1875 (92.1875) acc3: 97.6562 (97.6562) acc5: 98.4375 (98.4375) time: 0.2245 data: 0.0004 max mem: 13912 +[2024-12-06 17:42:04 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:19 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.6815 (0.6691) acc1: 85.1562 (85.9375) acc3: 96.0938 (95.1705) acc5: 97.6562 (97.0881) time: 0.2269 data: 0.0004 max mem: 13912 +[2024-12-06 17:42:06 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:17 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.6815 (0.7036) acc1: 83.5938 (85.0818) acc3: 95.3125 (94.9405) acc5: 97.6562 (96.7634) time: 0.2276 data: 0.0004 max mem: 13912 +[2024-12-06 17:42:08 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:15 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7485 (0.7258) acc1: 83.5938 (84.3498) acc3: 95.3125 (94.9597) acc5: 96.8750 (96.7490) time: 0.2280 data: 0.0005 max mem: 13912 +[2024-12-06 17:42:10 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:13 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7485 (0.7291) acc1: 83.5938 (84.2607) acc3: 94.5312 (94.8742) acc5: 96.8750 (96.8369) time: 0.2280 data: 0.0005 max mem: 13912 +[2024-12-06 17:42:13 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:10 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.8147 (0.8145) acc1: 78.9062 (82.1232) acc3: 91.4062 (93.5202) acc5: 94.5312 (95.8180) time: 0.2285 data: 0.0004 max mem: 13912 +[2024-12-06 17:42:15 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:08 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0929 (0.8468) acc1: 74.2188 (81.7111) acc3: 88.2812 (92.8791) acc5: 91.4062 (95.1844) time: 0.2288 data: 0.0005 max mem: 13912 +[2024-12-06 17:42:17 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:06 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0062 (0.8722) acc1: 79.6875 (81.1620) acc3: 91.4062 (92.6717) acc5: 92.9688 (94.9714) time: 0.2294 data: 0.0005 max mem: 13912 +[2024-12-06 17:42:20 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:04 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0298 (0.9019) acc1: 75.7812 (80.4977) acc3: 89.8438 (92.0910) acc5: 92.9688 (94.5216) time: 0.2298 data: 0.0008 max mem: 13912 +[2024-12-06 17:42:22 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0942 (0.9287) acc1: 74.2188 (79.7304) acc3: 88.2812 (91.7153) acc5: 91.4062 (94.2909) time: 0.2287 data: 0.0008 max mem: 13912 +[2024-12-06 17:42:23 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0020 (0.9245) acc1: 75.0000 (79.7360) acc3: 89.0625 (91.8160) acc5: 92.9688 (94.4080) time: 0.2245 data: 0.0007 max mem: 13912 +[2024-12-06 17:42:23 root] (utils.py 297): INFO Test: Total time: 0:00:22 (0.2279 s / it) +[2024-12-06 17:42:23 root] (engine.py 264): INFO * Acc@1 79.690 Acc@3 92.086 Acc@5 94.680 loss 0.918 flops 3.584 layer_flops 3.536 +[2024-12-06 17:42:23 root] (main.py 547): INFO Accuracy of the network on the 50000 test images: 79.7% +[2024-12-06 17:42:24 root] (main.py 551): INFO Max accuracy: 79.69% +[2024-12-06 17:42:25 root] (utils.py 283): INFO Epoch: [10] [ 0/2502] eta: 0:53:38 lr: 0.000008 loss_cls: 2.7521 (2.7521) grad_norm: 3.0184 (3.0184) time: 1.2865 data: 0.0005 max mem: 13912 +[2024-12-06 17:42:38 root] (utils.py 283): INFO Epoch: [10] [ 10/2502] eta: 0:53:56 lr: 0.000008 loss_cls: 3.1306 (3.0914) grad_norm: 2.9434 (2.9232) time: 1.2988 data: 0.0003 max mem: 13912 +[2024-12-06 17:42:51 root] (utils.py 283): INFO Epoch: [10] [ 20/2502] eta: 0:53:48 lr: 0.000008 loss_cls: 3.2939 (3.1304) grad_norm: 2.5619 (2.9220) time: 1.3014 data: 0.0003 max mem: 13912 +[2024-12-06 17:43:04 root] (utils.py 283): INFO Epoch: [10] [ 30/2502] eta: 0:53:52 lr: 0.000008 loss_cls: 3.1497 (3.0935) grad_norm: 2.5609 (2.8504) time: 1.3122 data: 0.0003 max mem: 13912 +[2024-12-06 17:43:18 root] (utils.py 283): INFO Epoch: [10] [ 40/2502] eta: 0:53:37 lr: 0.000008 loss_cls: 2.9701 (3.0218) grad_norm: 2.8550 (3.0818) time: 1.3131 data: 0.0003 max mem: 13912 +[2024-12-06 17:43:31 root] (utils.py 283): INFO Epoch: [10] [ 50/2502] eta: 0:53:21 lr: 0.000008 loss_cls: 2.8697 (3.0127) grad_norm: 2.8550 (3.0181) time: 1.3033 data: 0.0003 max mem: 13912 +[2024-12-06 17:43:44 root] (utils.py 283): INFO Epoch: [10] [ 60/2502] eta: 0:53:08 lr: 0.000008 loss_cls: 3.1413 (3.0287) grad_norm: 2.5449 (2.9657) time: 1.3038 data: 0.0003 max mem: 13912 +[2024-12-06 17:43:57 root] (utils.py 283): INFO Epoch: [10] [ 70/2502] eta: 0:52:54 lr: 0.000008 loss_cls: 3.2579 (3.0483) grad_norm: 2.5449 (2.9384) time: 1.3046 data: 0.0003 max mem: 13912 +[2024-12-06 17:44:10 root] (utils.py 283): INFO Epoch: [10] [ 80/2502] eta: 0:52:41 lr: 0.000008 loss_cls: 3.2301 (3.0610) grad_norm: 2.7101 (2.9540) time: 1.3039 data: 0.0003 max mem: 13912 +[2024-12-06 17:44:23 root] (utils.py 283): INFO Epoch: [10] [ 90/2502] eta: 0:52:29 lr: 0.000008 loss_cls: 3.2193 (3.0464) grad_norm: 2.7622 (2.9849) time: 1.3063 data: 0.0003 max mem: 13912 +[2024-12-06 17:44:36 root] (utils.py 283): INFO Epoch: [10] [ 100/2502] eta: 0:52:17 lr: 0.000008 loss_cls: 2.6726 (3.0163) grad_norm: 2.7622 (2.9534) time: 1.3109 data: 0.0003 max mem: 13912 +[2024-12-06 17:44:49 root] (utils.py 283): INFO Epoch: [10] [ 110/2502] eta: 0:52:04 lr: 0.000008 loss_cls: 2.8949 (3.0181) grad_norm: 2.6649 (2.9814) time: 1.3083 data: 0.0003 max mem: 13912 +[2024-12-06 17:45:02 root] (utils.py 283): INFO Epoch: [10] [ 120/2502] eta: 0:51:51 lr: 0.000008 loss_cls: 3.0955 (3.0202) grad_norm: 2.8243 (2.9674) time: 1.3058 data: 0.0003 max mem: 13912 +[2024-12-06 17:45:15 root] (utils.py 283): INFO Epoch: [10] [ 130/2502] eta: 0:51:37 lr: 0.000008 loss_cls: 2.9945 (3.0171) grad_norm: 2.6105 (2.9459) time: 1.3041 data: 0.0003 max mem: 13912 +[2024-12-06 17:45:28 root] (utils.py 283): INFO Epoch: [10] [ 140/2502] eta: 0:51:24 lr: 0.000008 loss_cls: 2.9945 (3.0200) grad_norm: 2.7536 (2.9666) time: 1.3035 data: 0.0003 max mem: 13912 +[2024-12-06 17:45:41 root] (utils.py 283): INFO Epoch: [10] [ 150/2502] eta: 0:51:12 lr: 0.000008 loss_cls: 3.2273 (3.0341) grad_norm: 2.7756 (2.9643) time: 1.3086 data: 0.0003 max mem: 13912 +[2024-12-06 17:45:54 root] (utils.py 283): INFO Epoch: [10] [ 160/2502] eta: 0:50:58 lr: 0.000008 loss_cls: 3.3750 (3.0427) grad_norm: 2.4549 (2.9425) time: 1.3067 data: 0.0003 max mem: 13912 +[2024-12-06 17:46:07 root] (utils.py 283): INFO Epoch: [10] [ 170/2502] eta: 0:50:45 lr: 0.000008 loss_cls: 3.0884 (3.0282) grad_norm: 2.6131 (2.9456) time: 1.3028 data: 0.0003 max mem: 13912 +[2024-12-06 17:46:20 root] (utils.py 283): INFO Epoch: [10] [ 180/2502] eta: 0:50:31 lr: 0.000008 loss_cls: 3.1225 (3.0420) grad_norm: 2.7035 (3.0092) time: 1.3029 data: 0.0003 max mem: 13912 +[2024-12-06 17:46:33 root] (utils.py 283): INFO Epoch: [10] [ 190/2502] eta: 0:50:18 lr: 0.000008 loss_cls: 3.3562 (3.0626) grad_norm: 2.7478 (2.9924) time: 1.3040 data: 0.0003 max mem: 13912 +[2024-12-06 17:46:46 root] (utils.py 283): INFO Epoch: [10] [ 200/2502] eta: 0:50:05 lr: 0.000008 loss_cls: 3.4386 (3.0791) grad_norm: 2.7644 (2.9839) time: 1.3057 data: 0.0003 max mem: 13912 +[2024-12-06 17:46:59 root] (utils.py 283): INFO Epoch: [10] [ 210/2502] eta: 0:49:52 lr: 0.000008 loss_cls: 3.5289 (3.0921) grad_norm: 2.7864 (2.9733) time: 1.3056 data: 0.0003 max mem: 13912 +[2024-12-06 17:47:12 root] (utils.py 283): INFO Epoch: [10] [ 220/2502] eta: 0:49:38 lr: 0.000008 loss_cls: 3.2679 (3.0933) grad_norm: 2.6600 (2.9584) time: 1.3030 data: 0.0002 max mem: 13912 +[2024-12-06 17:47:25 root] (utils.py 283): INFO Epoch: [10] [ 230/2502] eta: 0:49:25 lr: 0.000008 loss_cls: 3.2078 (3.0945) grad_norm: 2.6600 (2.9701) time: 1.3014 data: 0.0003 max mem: 13912 +[2024-12-06 17:47:39 root] (utils.py 283): INFO Epoch: [10] [ 240/2502] eta: 0:49:12 lr: 0.000008 loss_cls: 3.2078 (3.0895) grad_norm: 2.8660 (2.9899) time: 1.3028 data: 0.0003 max mem: 13912 +[2024-12-06 17:47:52 root] (utils.py 283): INFO Epoch: [10] [ 250/2502] eta: 0:48:59 lr: 0.000008 loss_cls: 3.0680 (3.0869) grad_norm: 2.6013 (2.9805) time: 1.3065 data: 0.0002 max mem: 13912 +[2024-12-06 17:48:05 root] (utils.py 283): INFO Epoch: [10] [ 260/2502] eta: 0:48:46 lr: 0.000008 loss_cls: 3.1801 (3.0861) grad_norm: 2.6251 (2.9722) time: 1.3064 data: 0.0003 max mem: 13912 +[2024-12-06 17:48:18 root] (utils.py 283): INFO Epoch: [10] [ 270/2502] eta: 0:48:33 lr: 0.000008 loss_cls: 3.1801 (3.0861) grad_norm: 2.7496 (2.9638) time: 1.3061 data: 0.0003 max mem: 13912 +[2024-12-06 17:48:31 root] (utils.py 283): INFO Epoch: [10] [ 280/2502] eta: 0:48:25 lr: 0.000008 loss_cls: 3.3407 (3.0927) grad_norm: 2.7646 (2.9593) time: 1.3347 data: 0.0003 max mem: 13912 +[2024-12-06 17:48:44 root] (utils.py 283): INFO Epoch: [10] [ 290/2502] eta: 0:48:12 lr: 0.000008 loss_cls: 3.2670 (3.0895) grad_norm: 2.6583 (2.9515) time: 1.3352 data: 0.0003 max mem: 13912 +[2024-12-06 17:48:57 root] (utils.py 283): INFO Epoch: [10] [ 300/2502] eta: 0:47:58 lr: 0.000008 loss_cls: 3.0735 (3.0890) grad_norm: 2.8376 (2.9549) time: 1.3061 data: 0.0003 max mem: 13912 +[2024-12-06 17:49:10 root] (utils.py 283): INFO Epoch: [10] [ 310/2502] eta: 0:47:45 lr: 0.000008 loss_cls: 3.0573 (3.0876) grad_norm: 2.7763 (2.9464) time: 1.3022 data: 0.0003 max mem: 13912 +[2024-12-06 17:49:24 root] (utils.py 283): INFO Epoch: [10] [ 320/2502] eta: 0:47:31 lr: 0.000008 loss_cls: 3.2262 (3.0938) grad_norm: 2.6032 (2.9379) time: 1.3020 data: 0.0003 max mem: 13912 +[2024-12-06 17:49:37 root] (utils.py 283): INFO Epoch: [10] [ 330/2502] eta: 0:47:18 lr: 0.000008 loss_cls: 3.2760 (3.0925) grad_norm: 2.6560 (2.9311) time: 1.3015 data: 0.0003 max mem: 13912 +[2024-12-06 17:49:50 root] (utils.py 283): INFO Epoch: [10] [ 340/2502] eta: 0:47:04 lr: 0.000008 loss_cls: 3.2387 (3.0972) grad_norm: 2.6607 (2.9362) time: 1.3010 data: 0.0003 max mem: 13912 +[2024-12-06 17:50:03 root] (utils.py 283): INFO Epoch: [10] [ 350/2502] eta: 0:46:51 lr: 0.000008 loss_cls: 3.2042 (3.0954) grad_norm: 2.7065 (2.9355) time: 1.3036 data: 0.0003 max mem: 13912 +[2024-12-06 17:50:16 root] (utils.py 283): INFO Epoch: [10] [ 360/2502] eta: 0:46:38 lr: 0.000008 loss_cls: 3.1309 (3.0881) grad_norm: 2.7065 (2.9368) time: 1.3026 data: 0.0003 max mem: 13912 +[2024-12-06 17:50:29 root] (utils.py 283): INFO Epoch: [10] [ 370/2502] eta: 0:46:25 lr: 0.000008 loss_cls: 3.1309 (3.0895) grad_norm: 2.7185 (2.9319) time: 1.3011 data: 0.0003 max mem: 13912 +[2024-12-06 17:50:42 root] (utils.py 283): INFO Epoch: [10] [ 380/2502] eta: 0:46:11 lr: 0.000008 loss_cls: 3.2403 (3.0882) grad_norm: 2.7052 (2.9495) time: 1.3019 data: 0.0002 max mem: 13912 +[2024-12-06 17:50:55 root] (utils.py 283): INFO Epoch: [10] [ 390/2502] eta: 0:45:58 lr: 0.000008 loss_cls: 3.2632 (3.0868) grad_norm: 2.6386 (2.9583) time: 1.3027 data: 0.0003 max mem: 13912 +[2024-12-06 17:51:08 root] (utils.py 283): INFO Epoch: [10] [ 400/2502] eta: 0:45:45 lr: 0.000008 loss_cls: 3.1084 (3.0864) grad_norm: 2.6178 (2.9549) time: 1.3018 data: 0.0003 max mem: 13912 +[2024-12-06 17:51:21 root] (utils.py 283): INFO Epoch: [10] [ 410/2502] eta: 0:45:32 lr: 0.000008 loss_cls: 3.0371 (3.0839) grad_norm: 2.6246 (2.9558) time: 1.3045 data: 0.0003 max mem: 13912 +[2024-12-06 17:51:34 root] (utils.py 283): INFO Epoch: [10] [ 420/2502] eta: 0:45:18 lr: 0.000008 loss_cls: 3.2715 (3.0928) grad_norm: 2.7253 (2.9673) time: 1.3045 data: 0.0003 max mem: 13912 +[2024-12-06 17:51:47 root] (utils.py 283): INFO Epoch: [10] [ 430/2502] eta: 0:45:05 lr: 0.000008 loss_cls: 3.4590 (3.0990) grad_norm: 2.9840 (2.9720) time: 1.3012 data: 0.0003 max mem: 13912 +[2024-12-06 17:52:00 root] (utils.py 283): INFO Epoch: [10] [ 440/2502] eta: 0:44:52 lr: 0.000008 loss_cls: 3.2138 (3.0905) grad_norm: 2.7922 (2.9661) time: 1.3024 data: 0.0003 max mem: 13912 +[2024-12-06 17:52:13 root] (utils.py 283): INFO Epoch: [10] [ 450/2502] eta: 0:44:39 lr: 0.000008 loss_cls: 2.6055 (3.0848) grad_norm: 2.7172 (3.0410) time: 1.3060 data: 0.0003 max mem: 13912 +[2024-12-06 17:52:26 root] (utils.py 283): INFO Epoch: [10] [ 460/2502] eta: 0:44:26 lr: 0.000008 loss_cls: 3.1782 (3.0879) grad_norm: 2.7555 (3.0368) time: 1.3062 data: 0.0003 max mem: 13912 +[2024-12-06 17:52:39 root] (utils.py 283): INFO Epoch: [10] [ 470/2502] eta: 0:44:13 lr: 0.000008 loss_cls: 3.3769 (3.0894) grad_norm: 2.6058 (3.0270) time: 1.3012 data: 0.0003 max mem: 13912 +[2024-12-06 17:52:52 root] (utils.py 283): INFO Epoch: [10] [ 480/2502] eta: 0:43:59 lr: 0.000008 loss_cls: 3.2621 (3.0847) grad_norm: 2.5421 (3.0234) time: 1.3008 data: 0.0003 max mem: 13912 +[2024-12-06 17:53:05 root] (utils.py 283): INFO Epoch: [10] [ 490/2502] eta: 0:43:46 lr: 0.000008 loss_cls: 2.7587 (3.0803) grad_norm: 2.6232 (3.0182) time: 1.3036 data: 0.0003 max mem: 13912 +[2024-12-06 17:53:18 root] (utils.py 283): INFO Epoch: [10] [ 500/2502] eta: 0:43:33 lr: 0.000008 loss_cls: 2.9577 (3.0755) grad_norm: 2.7722 (3.0175) time: 1.3034 data: 0.0003 max mem: 13912 +[2024-12-06 17:53:31 root] (utils.py 283): INFO Epoch: [10] [ 510/2502] eta: 0:43:20 lr: 0.000008 loss_cls: 2.8696 (3.0677) grad_norm: 2.6223 (3.0104) time: 1.3025 data: 0.0003 max mem: 13912 +[2024-12-06 17:53:44 root] (utils.py 283): INFO Epoch: [10] [ 520/2502] eta: 0:43:07 lr: 0.000008 loss_cls: 2.5840 (3.0647) grad_norm: 2.6476 (3.0073) time: 1.3050 data: 0.0003 max mem: 13912 +[2024-12-06 17:53:57 root] (utils.py 283): INFO Epoch: [10] [ 530/2502] eta: 0:42:54 lr: 0.000008 loss_cls: 3.2120 (3.0660) grad_norm: 2.6987 (3.0060) time: 1.3053 data: 0.0002 max mem: 13912 +[2024-12-06 17:54:10 root] (utils.py 283): INFO Epoch: [10] [ 540/2502] eta: 0:42:41 lr: 0.000008 loss_cls: 3.2120 (3.0623) grad_norm: 2.6223 (3.0004) time: 1.3020 data: 0.0002 max mem: 13912 +[2024-12-06 17:54:23 root] (utils.py 283): INFO Epoch: [10] [ 550/2502] eta: 0:42:27 lr: 0.000008 loss_cls: 2.9744 (3.0641) grad_norm: 2.6540 (3.0042) time: 1.2992 data: 0.0002 max mem: 13912 +[2024-12-06 17:54:36 root] (utils.py 283): INFO Epoch: [10] [ 560/2502] eta: 0:42:14 lr: 0.000008 loss_cls: 3.1168 (3.0638) grad_norm: 2.6349 (2.9981) time: 1.2988 data: 0.0003 max mem: 13912 +[2024-12-06 17:54:49 root] (utils.py 283): INFO Epoch: [10] [ 570/2502] eta: 0:42:01 lr: 0.000008 loss_cls: 3.1168 (3.0673) grad_norm: 2.6095 (2.9934) time: 1.2998 data: 0.0003 max mem: 13912 +[2024-12-06 17:55:02 root] (utils.py 283): INFO Epoch: [10] [ 580/2502] eta: 0:41:48 lr: 0.000008 loss_cls: 3.0621 (3.0676) grad_norm: 2.5365 (2.9857) time: 1.3026 data: 0.0003 max mem: 13912 +[2024-12-06 17:55:15 root] (utils.py 283): INFO Epoch: [10] [ 590/2502] eta: 0:41:34 lr: 0.000008 loss_cls: 3.2386 (3.0698) grad_norm: 2.6723 (2.9838) time: 1.3016 data: 0.0002 max mem: 13912 +[2024-12-06 17:55:28 root] (utils.py 283): INFO Epoch: [10] [ 600/2502] eta: 0:41:21 lr: 0.000008 loss_cls: 3.3140 (3.0733) grad_norm: 2.7816 (2.9799) time: 1.3002 data: 0.0002 max mem: 13912 +[2024-12-06 17:55:41 root] (utils.py 283): INFO Epoch: [10] [ 610/2502] eta: 0:41:08 lr: 0.000008 loss_cls: 3.2894 (3.0733) grad_norm: 2.6104 (2.9748) time: 1.3041 data: 0.0003 max mem: 13912 +[2024-12-06 17:55:54 root] (utils.py 283): INFO Epoch: [10] [ 620/2502] eta: 0:40:55 lr: 0.000008 loss_cls: 3.1687 (3.0725) grad_norm: 2.6394 (2.9702) time: 1.3072 data: 0.0003 max mem: 13912 +[2024-12-06 17:56:07 root] (utils.py 283): INFO Epoch: [10] [ 630/2502] eta: 0:40:42 lr: 0.000008 loss_cls: 3.1574 (3.0708) grad_norm: 2.6732 (2.9779) time: 1.3069 data: 0.0003 max mem: 13912 +[2024-12-06 17:56:20 root] (utils.py 283): INFO Epoch: [10] [ 640/2502] eta: 0:40:29 lr: 0.000008 loss_cls: 3.1653 (3.0712) grad_norm: 2.7195 (2.9782) time: 1.3036 data: 0.0003 max mem: 13912 +[2024-12-06 17:56:33 root] (utils.py 283): INFO Epoch: [10] [ 650/2502] eta: 0:40:16 lr: 0.000008 loss_cls: 3.1653 (3.0696) grad_norm: 2.7436 (2.9830) time: 1.2989 data: 0.0003 max mem: 13912 +[2024-12-06 17:56:46 root] (utils.py 283): INFO Epoch: [10] [ 660/2502] eta: 0:40:03 lr: 0.000008 loss_cls: 3.0511 (3.0675) grad_norm: 2.7297 (2.9856) time: 1.2976 data: 0.0003 max mem: 13912 +[2024-12-06 17:56:59 root] (utils.py 283): INFO Epoch: [10] [ 670/2502] eta: 0:39:50 lr: 0.000008 loss_cls: 2.8520 (3.0649) grad_norm: 2.8668 (3.0126) time: 1.3016 data: 0.0003 max mem: 13912 +[2024-12-06 17:57:12 root] (utils.py 283): INFO Epoch: [10] [ 680/2502] eta: 0:39:36 lr: 0.000008 loss_cls: 2.8124 (3.0627) grad_norm: 2.8246 (3.0116) time: 1.3028 data: 0.0002 max mem: 13912 +[2024-12-06 17:57:26 root] (utils.py 283): INFO Epoch: [10] [ 690/2502] eta: 0:39:24 lr: 0.000008 loss_cls: 3.2238 (3.0661) grad_norm: 2.6589 (3.0073) time: 1.3088 data: 0.0002 max mem: 13912 +[2024-12-06 17:57:39 root] (utils.py 283): INFO Epoch: [10] [ 700/2502] eta: 0:39:10 lr: 0.000008 loss_cls: 3.4619 (3.0705) grad_norm: 2.6519 (3.0042) time: 1.3067 data: 0.0003 max mem: 13912 +[2024-12-06 17:57:52 root] (utils.py 283): INFO Epoch: [10] [ 710/2502] eta: 0:38:57 lr: 0.000008 loss_cls: 3.3280 (3.0702) grad_norm: 2.6776 (3.0244) time: 1.2996 data: 0.0003 max mem: 13912 +[2024-12-06 17:58:05 root] (utils.py 283): INFO Epoch: [10] [ 720/2502] eta: 0:38:44 lr: 0.000008 loss_cls: 3.1311 (3.0721) grad_norm: 2.5530 (3.0199) time: 1.3065 data: 0.0003 max mem: 13912 +[2024-12-06 17:58:18 root] (utils.py 283): INFO Epoch: [10] [ 730/2502] eta: 0:38:31 lr: 0.000008 loss_cls: 3.1045 (3.0701) grad_norm: 2.5504 (3.0210) time: 1.3074 data: 0.0003 max mem: 13912 +[2024-12-06 17:58:31 root] (utils.py 283): INFO Epoch: [10] [ 740/2502] eta: 0:38:18 lr: 0.000008 loss_cls: 3.2544 (3.0717) grad_norm: 2.6154 (3.0247) time: 1.3016 data: 0.0003 max mem: 13912 +[2024-12-06 17:58:44 root] (utils.py 283): INFO Epoch: [10] [ 750/2502] eta: 0:38:05 lr: 0.000008 loss_cls: 2.9785 (3.0671) grad_norm: 2.7952 (3.0239) time: 1.3010 data: 0.0003 max mem: 13912 +[2024-12-06 17:58:57 root] (utils.py 283): INFO Epoch: [10] [ 760/2502] eta: 0:37:52 lr: 0.000008 loss_cls: 2.7851 (3.0649) grad_norm: 2.7338 (3.0203) time: 1.3029 data: 0.0002 max mem: 13912 +[2024-12-06 17:59:10 root] (utils.py 283): INFO Epoch: [10] [ 770/2502] eta: 0:37:39 lr: 0.000008 loss_cls: 3.0479 (3.0657) grad_norm: 2.6680 (3.0169) time: 1.3019 data: 0.0003 max mem: 13912 +[2024-12-06 17:59:23 root] (utils.py 283): INFO Epoch: [10] [ 780/2502] eta: 0:37:26 lr: 0.000008 loss_cls: 3.0479 (3.0669) grad_norm: 2.6680 (3.0248) time: 1.3019 data: 0.0004 max mem: 13912 +[2024-12-06 17:59:36 root] (utils.py 283): INFO Epoch: [10] [ 790/2502] eta: 0:37:13 lr: 0.000008 loss_cls: 3.0445 (3.0686) grad_norm: 2.6008 (3.0216) time: 1.3043 data: 0.0004 max mem: 13912 +[2024-12-06 17:59:49 root] (utils.py 283): INFO Epoch: [10] [ 800/2502] eta: 0:37:00 lr: 0.000008 loss_cls: 3.0960 (3.0696) grad_norm: 2.5663 (3.0250) time: 1.3031 data: 0.0002 max mem: 13912 +[2024-12-06 18:00:02 root] (utils.py 283): INFO Epoch: [10] [ 810/2502] eta: 0:36:47 lr: 0.000008 loss_cls: 3.0686 (3.0699) grad_norm: 2.6599 (3.0224) time: 1.3018 data: 0.0003 max mem: 13912 +[2024-12-06 18:00:15 root] (utils.py 283): INFO Epoch: [10] [ 820/2502] eta: 0:36:34 lr: 0.000008 loss_cls: 3.0033 (3.0704) grad_norm: 2.7079 (3.0193) time: 1.3022 data: 0.0003 max mem: 13912 +[2024-12-06 18:00:28 root] (utils.py 283): INFO Epoch: [10] [ 830/2502] eta: 0:36:20 lr: 0.000008 loss_cls: 3.2572 (3.0733) grad_norm: 2.6929 (3.0263) time: 1.2972 data: 0.0002 max mem: 13912 +[2024-12-06 18:00:41 root] (utils.py 283): INFO Epoch: [10] [ 840/2502] eta: 0:36:07 lr: 0.000008 loss_cls: 3.3743 (3.0745) grad_norm: 2.7105 (3.0224) time: 1.2961 data: 0.0003 max mem: 13912 +[2024-12-06 18:00:54 root] (utils.py 283): INFO Epoch: [10] [ 850/2502] eta: 0:35:54 lr: 0.000008 loss_cls: 3.3743 (3.0767) grad_norm: 2.7389 (3.0242) time: 1.2992 data: 0.0003 max mem: 13912 +[2024-12-06 18:01:07 root] (utils.py 283): INFO Epoch: [10] [ 860/2502] eta: 0:35:41 lr: 0.000008 loss_cls: 3.1809 (3.0739) grad_norm: 2.7389 (3.0230) time: 1.2974 data: 0.0003 max mem: 13912 +[2024-12-06 18:01:20 root] (utils.py 283): INFO Epoch: [10] [ 870/2502] eta: 0:35:28 lr: 0.000008 loss_cls: 2.7187 (3.0695) grad_norm: 2.5638 (3.0195) time: 1.2961 data: 0.0002 max mem: 13912 +[2024-12-06 18:01:33 root] (utils.py 283): INFO Epoch: [10] [ 880/2502] eta: 0:35:14 lr: 0.000008 loss_cls: 2.8069 (3.0693) grad_norm: 2.6010 (3.0152) time: 1.2945 data: 0.0002 max mem: 13912 +[2024-12-06 18:01:46 root] (utils.py 283): INFO Epoch: [10] [ 890/2502] eta: 0:35:01 lr: 0.000008 loss_cls: 2.9512 (3.0664) grad_norm: 2.5395 (3.0096) time: 1.2990 data: 0.0003 max mem: 13912 +[2024-12-06 18:01:59 root] (utils.py 283): INFO Epoch: [10] [ 900/2502] eta: 0:34:48 lr: 0.000008 loss_cls: 3.1127 (3.0673) grad_norm: 2.5834 (3.0077) time: 1.3001 data: 0.0003 max mem: 13912 +[2024-12-06 18:02:12 root] (utils.py 283): INFO Epoch: [10] [ 910/2502] eta: 0:34:35 lr: 0.000008 loss_cls: 3.1832 (3.0684) grad_norm: 2.6463 (3.0047) time: 1.2961 data: 0.0003 max mem: 13912 +[2024-12-06 18:02:25 root] (utils.py 283): INFO Epoch: [10] [ 920/2502] eta: 0:34:22 lr: 0.000008 loss_cls: 3.1389 (3.0680) grad_norm: 2.6750 (3.0235) time: 1.2989 data: 0.0003 max mem: 13912 +[2024-12-06 18:02:38 root] (utils.py 283): INFO Epoch: [10] [ 930/2502] eta: 0:34:09 lr: 0.000008 loss_cls: 2.8468 (3.0656) grad_norm: 2.7631 (3.0205) time: 1.3040 data: 0.0003 max mem: 13912 +[2024-12-06 18:02:51 root] (utils.py 283): INFO Epoch: [10] [ 940/2502] eta: 0:33:56 lr: 0.000008 loss_cls: 2.8468 (3.0656) grad_norm: 2.5395 (3.0142) time: 1.3064 data: 0.0003 max mem: 13912 +[2024-12-06 18:03:04 root] (utils.py 283): INFO Epoch: [10] [ 950/2502] eta: 0:33:43 lr: 0.000008 loss_cls: 3.1314 (3.0660) grad_norm: 2.5161 (3.0149) time: 1.3050 data: 0.0003 max mem: 13912 +[2024-12-06 18:03:17 root] (utils.py 283): INFO Epoch: [10] [ 960/2502] eta: 0:33:30 lr: 0.000008 loss_cls: 3.0425 (3.0647) grad_norm: 2.7646 (3.0122) time: 1.3055 data: 0.0003 max mem: 13912 +[2024-12-06 18:03:30 root] (utils.py 283): INFO Epoch: [10] [ 970/2502] eta: 0:33:17 lr: 0.000008 loss_cls: 3.0032 (3.0630) grad_norm: 2.5988 (3.0128) time: 1.3108 data: 0.0003 max mem: 13912 +[2024-12-06 18:03:43 root] (utils.py 283): INFO Epoch: [10] [ 980/2502] eta: 0:33:04 lr: 0.000008 loss_cls: 2.9538 (3.0616) grad_norm: 2.5870 (3.0098) time: 1.3078 data: 0.0003 max mem: 13912 +[2024-12-06 18:03:56 root] (utils.py 283): INFO Epoch: [10] [ 990/2502] eta: 0:32:51 lr: 0.000008 loss_cls: 3.0690 (3.0624) grad_norm: 2.8394 (3.0213) time: 1.3035 data: 0.0003 max mem: 13912 +[2024-12-06 18:04:09 root] (utils.py 283): INFO Epoch: [10] [1000/2502] eta: 0:32:38 lr: 0.000008 loss_cls: 3.1570 (3.0622) grad_norm: 2.8254 (3.0188) time: 1.3039 data: 0.0003 max mem: 13912 +[2024-12-06 18:04:22 root] (utils.py 283): INFO Epoch: [10] [1010/2502] eta: 0:32:25 lr: 0.000008 loss_cls: 3.2216 (3.0627) grad_norm: 2.7784 (3.0214) time: 1.3031 data: 0.0003 max mem: 13912 +[2024-12-06 18:04:35 root] (utils.py 283): INFO Epoch: [10] [1020/2502] eta: 0:32:12 lr: 0.000008 loss_cls: 3.1884 (3.0625) grad_norm: 2.8096 (3.0245) time: 1.3030 data: 0.0003 max mem: 13912 +[2024-12-06 18:04:48 root] (utils.py 283): INFO Epoch: [10] [1030/2502] eta: 0:31:59 lr: 0.000008 loss_cls: 3.0203 (3.0614) grad_norm: 2.7637 (3.0226) time: 1.3009 data: 0.0003 max mem: 13912 +[2024-12-06 18:05:01 root] (utils.py 283): INFO Epoch: [10] [1040/2502] eta: 0:31:46 lr: 0.000008 loss_cls: 3.0203 (3.0611) grad_norm: 2.6679 (3.0190) time: 1.3014 data: 0.0003 max mem: 13912 +[2024-12-06 18:05:14 root] (utils.py 283): INFO Epoch: [10] [1050/2502] eta: 0:31:33 lr: 0.000008 loss_cls: 2.9891 (3.0599) grad_norm: 2.6080 (3.0197) time: 1.3029 data: 0.0002 max mem: 13912 +[2024-12-06 18:05:27 root] (utils.py 283): INFO Epoch: [10] [1060/2502] eta: 0:31:20 lr: 0.000008 loss_cls: 2.9589 (3.0586) grad_norm: 2.6614 (3.0156) time: 1.3036 data: 0.0002 max mem: 13912 +[2024-12-06 18:05:40 root] (utils.py 283): INFO Epoch: [10] [1070/2502] eta: 0:31:06 lr: 0.000008 loss_cls: 3.1359 (3.0608) grad_norm: 2.6956 (3.0160) time: 1.3033 data: 0.0003 max mem: 13912 +[2024-12-06 18:05:53 root] (utils.py 283): INFO Epoch: [10] [1080/2502] eta: 0:30:53 lr: 0.000008 loss_cls: 3.1359 (3.0609) grad_norm: 2.7960 (3.0160) time: 1.3045 data: 0.0003 max mem: 13912 +[2024-12-06 18:06:07 root] (utils.py 283): INFO Epoch: [10] [1090/2502] eta: 0:30:40 lr: 0.000008 loss_cls: 3.1103 (3.0614) grad_norm: 2.9536 (3.0272) time: 1.3060 data: 0.0003 max mem: 13912 +[2024-12-06 18:06:20 root] (utils.py 283): INFO Epoch: [10] [1100/2502] eta: 0:30:27 lr: 0.000008 loss_cls: 3.1124 (3.0589) grad_norm: 2.7476 (3.0245) time: 1.3063 data: 0.0003 max mem: 13912 +[2024-12-06 18:06:33 root] (utils.py 283): INFO Epoch: [10] [1110/2502] eta: 0:30:14 lr: 0.000008 loss_cls: 2.8392 (3.0591) grad_norm: 2.5825 (3.0222) time: 1.3035 data: 0.0003 max mem: 13912 +[2024-12-06 18:06:46 root] (utils.py 283): INFO Epoch: [10] [1120/2502] eta: 0:30:01 lr: 0.000008 loss_cls: 3.2967 (3.0598) grad_norm: 2.7113 (3.0203) time: 1.3033 data: 0.0003 max mem: 13912 +[2024-12-06 18:06:59 root] (utils.py 283): INFO Epoch: [10] [1130/2502] eta: 0:29:48 lr: 0.000008 loss_cls: 2.9452 (3.0587) grad_norm: 2.6850 (3.0174) time: 1.3078 data: 0.0002 max mem: 13912 +[2024-12-06 18:07:12 root] (utils.py 283): INFO Epoch: [10] [1140/2502] eta: 0:29:35 lr: 0.000008 loss_cls: 2.8635 (3.0576) grad_norm: 2.5771 (3.0149) time: 1.3053 data: 0.0003 max mem: 13912 +[2024-12-06 18:07:25 root] (utils.py 283): INFO Epoch: [10] [1150/2502] eta: 0:29:22 lr: 0.000008 loss_cls: 3.1661 (3.0586) grad_norm: 2.6549 (3.0178) time: 1.3009 data: 0.0003 max mem: 13912 +[2024-12-06 18:07:38 root] (utils.py 283): INFO Epoch: [10] [1160/2502] eta: 0:29:09 lr: 0.000008 loss_cls: 3.3261 (3.0604) grad_norm: 2.6713 (3.0160) time: 1.3010 data: 0.0003 max mem: 13912 +[2024-12-06 18:07:51 root] (utils.py 283): INFO Epoch: [10] [1170/2502] eta: 0:28:56 lr: 0.000008 loss_cls: 3.2136 (3.0596) grad_norm: 2.7127 (3.0140) time: 1.3013 data: 0.0003 max mem: 13912 +[2024-12-06 18:08:04 root] (utils.py 283): INFO Epoch: [10] [1180/2502] eta: 0:28:43 lr: 0.000008 loss_cls: 3.2420 (3.0605) grad_norm: 2.7285 (3.0148) time: 1.3010 data: 0.0003 max mem: 13912 +[2024-12-06 18:08:17 root] (utils.py 283): INFO Epoch: [10] [1190/2502] eta: 0:28:30 lr: 0.000008 loss_cls: 3.0917 (3.0593) grad_norm: 2.6088 (3.0134) time: 1.3000 data: 0.0003 max mem: 13912 +[2024-12-06 18:08:30 root] (utils.py 283): INFO Epoch: [10] [1200/2502] eta: 0:28:17 lr: 0.000008 loss_cls: 3.1099 (3.0607) grad_norm: 2.6559 (3.0135) time: 1.3025 data: 0.0003 max mem: 13912 +[2024-12-06 18:08:43 root] (utils.py 283): INFO Epoch: [10] [1210/2502] eta: 0:28:04 lr: 0.000008 loss_cls: 3.3320 (3.0599) grad_norm: 2.6354 (3.0121) time: 1.3031 data: 0.0003 max mem: 13912 +[2024-12-06 18:08:56 root] (utils.py 283): INFO Epoch: [10] [1220/2502] eta: 0:27:51 lr: 0.000008 loss_cls: 3.3320 (3.0598) grad_norm: 2.5420 (3.0086) time: 1.3021 data: 0.0003 max mem: 13912 +[2024-12-06 18:09:09 root] (utils.py 283): INFO Epoch: [10] [1230/2502] eta: 0:27:38 lr: 0.000008 loss_cls: 3.3497 (3.0614) grad_norm: 2.5083 (3.0072) time: 1.3052 data: 0.0003 max mem: 13912 +[2024-12-06 18:09:22 root] (utils.py 283): INFO Epoch: [10] [1240/2502] eta: 0:27:25 lr: 0.000008 loss_cls: 3.4251 (3.0632) grad_norm: 2.6241 (3.0069) time: 1.3053 data: 0.0003 max mem: 13912 +[2024-12-06 18:09:35 root] (utils.py 283): INFO Epoch: [10] [1250/2502] eta: 0:27:12 lr: 0.000008 loss_cls: 3.2914 (3.0637) grad_norm: 2.5945 (3.0065) time: 1.3030 data: 0.0003 max mem: 13912 +[2024-12-06 18:09:48 root] (utils.py 283): INFO Epoch: [10] [1260/2502] eta: 0:26:59 lr: 0.000008 loss_cls: 3.2667 (3.0652) grad_norm: 2.5804 (3.0062) time: 1.3012 data: 0.0003 max mem: 13912 +[2024-12-06 18:10:01 root] (utils.py 283): INFO Epoch: [10] [1270/2502] eta: 0:26:46 lr: 0.000008 loss_cls: 3.2667 (3.0663) grad_norm: 2.5572 (3.0048) time: 1.3020 data: 0.0003 max mem: 13912 +[2024-12-06 18:10:14 root] (utils.py 283): INFO Epoch: [10] [1280/2502] eta: 0:26:33 lr: 0.000008 loss_cls: 3.2693 (3.0664) grad_norm: 2.5626 (3.0027) time: 1.3030 data: 0.0003 max mem: 13912 +[2024-12-06 18:10:27 root] (utils.py 283): INFO Epoch: [10] [1290/2502] eta: 0:26:20 lr: 0.000008 loss_cls: 3.1995 (3.0658) grad_norm: 2.6978 (3.0007) time: 1.3033 data: 0.0003 max mem: 13912 +[2024-12-06 18:10:40 root] (utils.py 283): INFO Epoch: [10] [1300/2502] eta: 0:26:07 lr: 0.000008 loss_cls: 3.0458 (3.0664) grad_norm: 2.6502 (2.9984) time: 1.3041 data: 0.0003 max mem: 13912 +[2024-12-06 18:10:53 root] (utils.py 283): INFO Epoch: [10] [1310/2502] eta: 0:25:53 lr: 0.000008 loss_cls: 3.2348 (3.0663) grad_norm: 2.6529 (2.9973) time: 1.3024 data: 0.0003 max mem: 13912 +[2024-12-06 18:11:06 root] (utils.py 283): INFO Epoch: [10] [1320/2502] eta: 0:25:40 lr: 0.000008 loss_cls: 3.2348 (3.0672) grad_norm: 2.6734 (2.9959) time: 1.3046 data: 0.0003 max mem: 13912 +[2024-12-06 18:11:19 root] (utils.py 283): INFO Epoch: [10] [1330/2502] eta: 0:25:27 lr: 0.000008 loss_cls: 3.0670 (3.0663) grad_norm: 2.6526 (2.9952) time: 1.3063 data: 0.0002 max mem: 13912 +[2024-12-06 18:11:32 root] (utils.py 283): INFO Epoch: [10] [1340/2502] eta: 0:25:14 lr: 0.000008 loss_cls: 3.1299 (3.0676) grad_norm: 2.5873 (2.9921) time: 1.3037 data: 0.0003 max mem: 13912 +[2024-12-06 18:11:45 root] (utils.py 283): INFO Epoch: [10] [1350/2502] eta: 0:25:01 lr: 0.000008 loss_cls: 3.2090 (3.0674) grad_norm: 2.6218 (2.9929) time: 1.3030 data: 0.0003 max mem: 13912 +[2024-12-06 18:11:58 root] (utils.py 283): INFO Epoch: [10] [1360/2502] eta: 0:24:48 lr: 0.000008 loss_cls: 3.2778 (3.0686) grad_norm: 2.9081 (3.0007) time: 1.3026 data: 0.0003 max mem: 13912 +[2024-12-06 18:12:11 root] (utils.py 283): INFO Epoch: [10] [1370/2502] eta: 0:24:35 lr: 0.000008 loss_cls: 3.2759 (3.0696) grad_norm: 2.8106 (2.9995) time: 1.3032 data: 0.0003 max mem: 13912 +[2024-12-06 18:12:25 root] (utils.py 283): INFO Epoch: [10] [1380/2502] eta: 0:24:22 lr: 0.000008 loss_cls: 3.0938 (3.0706) grad_norm: 2.6090 (2.9969) time: 1.3062 data: 0.0003 max mem: 13912 +[2024-12-06 18:12:38 root] (utils.py 283): INFO Epoch: [10] [1390/2502] eta: 0:24:09 lr: 0.000008 loss_cls: 3.0594 (3.0695) grad_norm: 2.5358 (2.9995) time: 1.3080 data: 0.0003 max mem: 13912 +[2024-12-06 18:12:51 root] (utils.py 283): INFO Epoch: [10] [1400/2502] eta: 0:23:56 lr: 0.000008 loss_cls: 3.2189 (3.0707) grad_norm: 2.6071 (2.9962) time: 1.3129 data: 0.0003 max mem: 13912 +[2024-12-06 18:13:04 root] (utils.py 283): INFO Epoch: [10] [1410/2502] eta: 0:23:43 lr: 0.000008 loss_cls: 3.2709 (3.0710) grad_norm: 2.6218 (2.9957) time: 1.3069 data: 0.0003 max mem: 13912 +[2024-12-06 18:13:17 root] (utils.py 283): INFO Epoch: [10] [1420/2502] eta: 0:23:30 lr: 0.000008 loss_cls: 3.1806 (3.0713) grad_norm: 2.6410 (2.9936) time: 1.2964 data: 0.0003 max mem: 13912 +[2024-12-06 18:13:30 root] (utils.py 283): INFO Epoch: [10] [1430/2502] eta: 0:23:17 lr: 0.000008 loss_cls: 3.1619 (3.0716) grad_norm: 2.6410 (2.9936) time: 1.3027 data: 0.0003 max mem: 13912 +[2024-12-06 18:13:43 root] (utils.py 283): INFO Epoch: [10] [1440/2502] eta: 0:23:04 lr: 0.000008 loss_cls: 3.1822 (3.0722) grad_norm: 2.7996 (2.9971) time: 1.3030 data: 0.0003 max mem: 13912 +[2024-12-06 18:13:56 root] (utils.py 283): INFO Epoch: [10] [1450/2502] eta: 0:22:51 lr: 0.000008 loss_cls: 3.2074 (3.0724) grad_norm: 2.7996 (2.9961) time: 1.2961 data: 0.0002 max mem: 13912 +[2024-12-06 18:14:09 root] (utils.py 283): INFO Epoch: [10] [1460/2502] eta: 0:22:38 lr: 0.000008 loss_cls: 2.9268 (3.0704) grad_norm: 2.8233 (2.9956) time: 1.2961 data: 0.0002 max mem: 13912 +[2024-12-06 18:14:22 root] (utils.py 283): INFO Epoch: [10] [1470/2502] eta: 0:22:25 lr: 0.000008 loss_cls: 2.9658 (3.0704) grad_norm: 2.6520 (2.9924) time: 1.2996 data: 0.0002 max mem: 13912 +[2024-12-06 18:14:35 root] (utils.py 283): INFO Epoch: [10] [1480/2502] eta: 0:22:12 lr: 0.000008 loss_cls: 3.1777 (3.0714) grad_norm: 2.7396 (2.9948) time: 1.3043 data: 0.0003 max mem: 13912 +[2024-12-06 18:14:48 root] (utils.py 283): INFO Epoch: [10] [1490/2502] eta: 0:21:59 lr: 0.000008 loss_cls: 3.2979 (3.0715) grad_norm: 2.7762 (2.9948) time: 1.3050 data: 0.0003 max mem: 13912 +[2024-12-06 18:15:01 root] (utils.py 283): INFO Epoch: [10] [1500/2502] eta: 0:21:46 lr: 0.000008 loss_cls: 3.2223 (3.0715) grad_norm: 2.6202 (2.9930) time: 1.3024 data: 0.0003 max mem: 13912 +[2024-12-06 18:15:14 root] (utils.py 283): INFO Epoch: [10] [1510/2502] eta: 0:21:33 lr: 0.000008 loss_cls: 3.1569 (3.0719) grad_norm: 2.6994 (2.9951) time: 1.3034 data: 0.0003 max mem: 13912 +[2024-12-06 18:15:27 root] (utils.py 283): INFO Epoch: [10] [1520/2502] eta: 0:21:20 lr: 0.000008 loss_cls: 3.3271 (3.0747) grad_norm: 2.7237 (2.9929) time: 1.3022 data: 0.0002 max mem: 13912 +[2024-12-06 18:15:40 root] (utils.py 283): INFO Epoch: [10] [1530/2502] eta: 0:21:07 lr: 0.000008 loss_cls: 3.4443 (3.0755) grad_norm: 2.6767 (2.9945) time: 1.2986 data: 0.0003 max mem: 13912 +[2024-12-06 18:15:53 root] (utils.py 283): INFO Epoch: [10] [1540/2502] eta: 0:20:53 lr: 0.000008 loss_cls: 3.2240 (3.0760) grad_norm: 2.7021 (2.9939) time: 1.2957 data: 0.0003 max mem: 13912 +[2024-12-06 18:16:06 root] (utils.py 283): INFO Epoch: [10] [1550/2502] eta: 0:20:40 lr: 0.000008 loss_cls: 3.2439 (3.0769) grad_norm: 2.5564 (2.9910) time: 1.2928 data: 0.0002 max mem: 13912 +[2024-12-06 18:16:19 root] (utils.py 283): INFO Epoch: [10] [1560/2502] eta: 0:20:27 lr: 0.000008 loss_cls: 3.2402 (3.0771) grad_norm: 2.4586 (2.9899) time: 1.2926 data: 0.0003 max mem: 13912 +[2024-12-06 18:16:32 root] (utils.py 283): INFO Epoch: [10] [1570/2502] eta: 0:20:14 lr: 0.000008 loss_cls: 3.3808 (3.0779) grad_norm: 2.5701 (2.9881) time: 1.2934 data: 0.0003 max mem: 13912 +[2024-12-06 18:16:45 root] (utils.py 283): INFO Epoch: [10] [1580/2502] eta: 0:20:01 lr: 0.000008 loss_cls: 3.2645 (3.0770) grad_norm: 2.6869 (2.9864) time: 1.3001 data: 0.0002 max mem: 13912 +[2024-12-06 18:16:58 root] (utils.py 283): INFO Epoch: [10] [1590/2502] eta: 0:19:48 lr: 0.000008 loss_cls: 3.1831 (3.0770) grad_norm: 2.5512 (2.9857) time: 1.2999 data: 0.0002 max mem: 13912 +[2024-12-06 18:17:11 root] (utils.py 283): INFO Epoch: [10] [1600/2502] eta: 0:19:35 lr: 0.000008 loss_cls: 3.0879 (3.0765) grad_norm: 2.5512 (2.9838) time: 1.2922 data: 0.0002 max mem: 13912 +[2024-12-06 18:17:23 root] (utils.py 283): INFO Epoch: [10] [1610/2502] eta: 0:19:22 lr: 0.000008 loss_cls: 3.0623 (3.0757) grad_norm: 2.7632 (2.9831) time: 1.2914 data: 0.0002 max mem: 13912 +[2024-12-06 18:17:37 root] (utils.py 283): INFO Epoch: [10] [1620/2502] eta: 0:19:09 lr: 0.000008 loss_cls: 2.8169 (3.0739) grad_norm: 2.6936 (2.9810) time: 1.2994 data: 0.0002 max mem: 13912 +[2024-12-06 18:17:49 root] (utils.py 283): INFO Epoch: [10] [1630/2502] eta: 0:18:56 lr: 0.000008 loss_cls: 3.1014 (3.0745) grad_norm: 2.6573 (2.9800) time: 1.2996 data: 0.0002 max mem: 13912 +[2024-12-06 18:18:02 root] (utils.py 283): INFO Epoch: [10] [1640/2502] eta: 0:18:43 lr: 0.000008 loss_cls: 3.3841 (3.0766) grad_norm: 2.8534 (2.9948) time: 1.2931 data: 0.0002 max mem: 13912 +[2024-12-06 18:18:15 root] (utils.py 283): INFO Epoch: [10] [1650/2502] eta: 0:18:30 lr: 0.000008 loss_cls: 3.3977 (3.0777) grad_norm: 2.8767 (2.9953) time: 1.2922 data: 0.0002 max mem: 13912 +[2024-12-06 18:18:28 root] (utils.py 283): INFO Epoch: [10] [1660/2502] eta: 0:18:17 lr: 0.000008 loss_cls: 3.3364 (3.0784) grad_norm: 2.8469 (2.9949) time: 1.2911 data: 0.0002 max mem: 13912 +[2024-12-06 18:18:41 root] (utils.py 283): INFO Epoch: [10] [1670/2502] eta: 0:18:03 lr: 0.000008 loss_cls: 3.1778 (3.0785) grad_norm: 2.8494 (3.0019) time: 1.2909 data: 0.0002 max mem: 13912 +[2024-12-06 18:18:54 root] (utils.py 283): INFO Epoch: [10] [1680/2502] eta: 0:17:50 lr: 0.000008 loss_cls: 3.2145 (3.0786) grad_norm: 2.7634 (3.0004) time: 1.2905 data: 0.0002 max mem: 13912 +[2024-12-06 18:19:07 root] (utils.py 283): INFO Epoch: [10] [1690/2502] eta: 0:17:37 lr: 0.000008 loss_cls: 3.2185 (3.0792) grad_norm: 2.5613 (2.9982) time: 1.2914 data: 0.0002 max mem: 13912 +[2024-12-06 18:19:20 root] (utils.py 283): INFO Epoch: [10] [1700/2502] eta: 0:17:24 lr: 0.000008 loss_cls: 3.2278 (3.0797) grad_norm: 2.4675 (2.9981) time: 1.2930 data: 0.0002 max mem: 13912 +[2024-12-06 18:19:33 root] (utils.py 283): INFO Epoch: [10] [1710/2502] eta: 0:17:11 lr: 0.000008 loss_cls: 3.2746 (3.0804) grad_norm: 2.7437 (2.9995) time: 1.2937 data: 0.0002 max mem: 13912 +[2024-12-06 18:19:46 root] (utils.py 283): INFO Epoch: [10] [1720/2502] eta: 0:16:58 lr: 0.000008 loss_cls: 3.1288 (3.0802) grad_norm: 2.7377 (3.0027) time: 1.2931 data: 0.0002 max mem: 13912 +[2024-12-06 18:19:59 root] (utils.py 283): INFO Epoch: [10] [1730/2502] eta: 0:16:45 lr: 0.000008 loss_cls: 2.9613 (3.0804) grad_norm: 2.8736 (3.0033) time: 1.2968 data: 0.0002 max mem: 13912 +[2024-12-06 18:20:12 root] (utils.py 283): INFO Epoch: [10] [1740/2502] eta: 0:16:32 lr: 0.000008 loss_cls: 3.0864 (3.0791) grad_norm: 2.9083 (3.0099) time: 1.3028 data: 0.0002 max mem: 13912 +[2024-12-06 18:20:25 root] (utils.py 283): INFO Epoch: [10] [1750/2502] eta: 0:16:19 lr: 0.000008 loss_cls: 2.9733 (3.0795) grad_norm: 2.6135 (3.0080) time: 1.3053 data: 0.0002 max mem: 13912 +[2024-12-06 18:20:38 root] (utils.py 283): INFO Epoch: [10] [1760/2502] eta: 0:16:06 lr: 0.000008 loss_cls: 3.1295 (3.0798) grad_norm: 2.5788 (3.0092) time: 1.3040 data: 0.0002 max mem: 13912 +[2024-12-06 18:20:51 root] (utils.py 283): INFO Epoch: [10] [1770/2502] eta: 0:15:53 lr: 0.000008 loss_cls: 3.3185 (3.0810) grad_norm: 2.5788 (3.0097) time: 1.3002 data: 0.0002 max mem: 13912 +[2024-12-06 18:21:04 root] (utils.py 283): INFO Epoch: [10] [1780/2502] eta: 0:15:40 lr: 0.000008 loss_cls: 3.2800 (3.0805) grad_norm: 2.6797 (3.0078) time: 1.2983 data: 0.0002 max mem: 13912 +[2024-12-06 18:21:17 root] (utils.py 283): INFO Epoch: [10] [1790/2502] eta: 0:15:27 lr: 0.000008 loss_cls: 3.2259 (3.0813) grad_norm: 2.8429 (3.0108) time: 1.2996 data: 0.0003 max mem: 13912 +[2024-12-06 18:21:30 root] (utils.py 283): INFO Epoch: [10] [1800/2502] eta: 0:15:14 lr: 0.000008 loss_cls: 3.2690 (3.0813) grad_norm: 2.9241 (3.0102) time: 1.3313 data: 0.0003 max mem: 13912 +[2024-12-06 18:21:44 root] (utils.py 283): INFO Epoch: [10] [1810/2502] eta: 0:15:01 lr: 0.000008 loss_cls: 3.2465 (3.0822) grad_norm: 2.8805 (3.0148) time: 1.3364 data: 0.0003 max mem: 13912 +[2024-12-06 18:21:57 root] (utils.py 283): INFO Epoch: [10] [1820/2502] eta: 0:14:48 lr: 0.000008 loss_cls: 3.2080 (3.0804) grad_norm: 2.9505 (3.0153) time: 1.3088 data: 0.0003 max mem: 13912 +[2024-12-06 18:22:10 root] (utils.py 283): INFO Epoch: [10] [1830/2502] eta: 0:14:35 lr: 0.000008 loss_cls: 3.1533 (3.0807) grad_norm: 2.6960 (3.0136) time: 1.3058 data: 0.0003 max mem: 13912 +[2024-12-06 18:22:23 root] (utils.py 283): INFO Epoch: [10] [1840/2502] eta: 0:14:22 lr: 0.000008 loss_cls: 3.2304 (3.0814) grad_norm: 2.6622 (3.0125) time: 1.3052 data: 0.0003 max mem: 13912 +[2024-12-06 18:22:36 root] (utils.py 283): INFO Epoch: [10] [1850/2502] eta: 0:14:09 lr: 0.000008 loss_cls: 3.2321 (3.0814) grad_norm: 2.7764 (3.0118) time: 1.3058 data: 0.0003 max mem: 13912 +[2024-12-06 18:22:49 root] (utils.py 283): INFO Epoch: [10] [1860/2502] eta: 0:13:56 lr: 0.000008 loss_cls: 3.2476 (3.0824) grad_norm: 2.7764 (3.0104) time: 1.3049 data: 0.0003 max mem: 13912 +[2024-12-06 18:23:02 root] (utils.py 283): INFO Epoch: [10] [1870/2502] eta: 0:13:43 lr: 0.000008 loss_cls: 3.2155 (3.0813) grad_norm: 2.5480 (3.0093) time: 1.3042 data: 0.0002 max mem: 13912 +[2024-12-06 18:23:15 root] (utils.py 283): INFO Epoch: [10] [1880/2502] eta: 0:13:30 lr: 0.000008 loss_cls: 3.2155 (3.0820) grad_norm: 2.5764 (3.0096) time: 1.3036 data: 0.0002 max mem: 13912 +[2024-12-06 18:23:28 root] (utils.py 283): INFO Epoch: [10] [1890/2502] eta: 0:13:17 lr: 0.000008 loss_cls: 3.2268 (3.0821) grad_norm: 3.0519 (3.0101) time: 1.3018 data: 0.0002 max mem: 13912 +[2024-12-06 18:23:41 root] (utils.py 283): INFO Epoch: [10] [1900/2502] eta: 0:13:04 lr: 0.000008 loss_cls: 3.1140 (3.0823) grad_norm: 2.9497 (3.0095) time: 1.3009 data: 0.0002 max mem: 13912 +[2024-12-06 18:23:54 root] (utils.py 283): INFO Epoch: [10] [1910/2502] eta: 0:12:51 lr: 0.000008 loss_cls: 3.1265 (3.0816) grad_norm: 2.7456 (3.0079) time: 1.3025 data: 0.0002 max mem: 13912 +[2024-12-06 18:24:07 root] (utils.py 283): INFO Epoch: [10] [1920/2502] eta: 0:12:38 lr: 0.000008 loss_cls: 3.2018 (3.0824) grad_norm: 2.6900 (3.0099) time: 1.3024 data: 0.0003 max mem: 13912 +[2024-12-06 18:24:20 root] (utils.py 283): INFO Epoch: [10] [1930/2502] eta: 0:12:25 lr: 0.000008 loss_cls: 3.3268 (3.0829) grad_norm: 2.6678 (3.0108) time: 1.3018 data: 0.0003 max mem: 13912 +[2024-12-06 18:24:33 root] (utils.py 283): INFO Epoch: [10] [1940/2502] eta: 0:12:12 lr: 0.000008 loss_cls: 3.3268 (3.0836) grad_norm: 2.7234 (3.0101) time: 1.3014 data: 0.0003 max mem: 13912 +[2024-12-06 18:24:46 root] (utils.py 283): INFO Epoch: [10] [1950/2502] eta: 0:11:59 lr: 0.000008 loss_cls: 3.1961 (3.0836) grad_norm: 2.6844 (3.0085) time: 1.2990 data: 0.0003 max mem: 13912 +[2024-12-06 18:24:59 root] (utils.py 283): INFO Epoch: [10] [1960/2502] eta: 0:11:46 lr: 0.000008 loss_cls: 3.0841 (3.0832) grad_norm: 2.5686 (3.0086) time: 1.3004 data: 0.0003 max mem: 13912 +[2024-12-06 18:25:12 root] (utils.py 283): INFO Epoch: [10] [1970/2502] eta: 0:11:33 lr: 0.000008 loss_cls: 2.8906 (3.0827) grad_norm: 2.5686 (3.0066) time: 1.3012 data: 0.0003 max mem: 13912 +[2024-12-06 18:25:25 root] (utils.py 283): INFO Epoch: [10] [1980/2502] eta: 0:11:20 lr: 0.000008 loss_cls: 3.2197 (3.0836) grad_norm: 2.4902 (3.0092) time: 1.3000 data: 0.0002 max mem: 13912 +[2024-12-06 18:25:38 root] (utils.py 283): INFO Epoch: [10] [1990/2502] eta: 0:11:07 lr: 0.000008 loss_cls: 3.0416 (3.0827) grad_norm: 2.5590 (3.0084) time: 1.3036 data: 0.0003 max mem: 13912 +[2024-12-06 18:25:51 root] (utils.py 283): INFO Epoch: [10] [2000/2502] eta: 0:10:54 lr: 0.000008 loss_cls: 2.9704 (3.0825) grad_norm: 2.6662 (3.0072) time: 1.3042 data: 0.0003 max mem: 13912 +[2024-12-06 18:26:04 root] (utils.py 283): INFO Epoch: [10] [2010/2502] eta: 0:10:40 lr: 0.000008 loss_cls: 3.1821 (3.0832) grad_norm: 2.5627 (3.0059) time: 1.3017 data: 0.0002 max mem: 13912 +[2024-12-06 18:26:17 root] (utils.py 283): INFO Epoch: [10] [2020/2502] eta: 0:10:27 lr: 0.000008 loss_cls: 3.1976 (3.0829) grad_norm: 2.5832 (3.0047) time: 1.2995 data: 0.0002 max mem: 13912 +[2024-12-06 18:26:30 root] (utils.py 283): INFO Epoch: [10] [2030/2502] eta: 0:10:14 lr: 0.000008 loss_cls: 3.1980 (3.0843) grad_norm: 2.5893 (3.0033) time: 1.2942 data: 0.0002 max mem: 13912 +[2024-12-06 18:26:43 root] (utils.py 283): INFO Epoch: [10] [2040/2502] eta: 0:10:01 lr: 0.000008 loss_cls: 3.1449 (3.0837) grad_norm: 2.5893 (3.0038) time: 1.2927 data: 0.0002 max mem: 13912 +[2024-12-06 18:26:56 root] (utils.py 283): INFO Epoch: [10] [2050/2502] eta: 0:09:48 lr: 0.000008 loss_cls: 3.0718 (3.0835) grad_norm: 2.6421 (3.0022) time: 1.2941 data: 0.0003 max mem: 13912 +[2024-12-06 18:27:09 root] (utils.py 283): INFO Epoch: [10] [2060/2502] eta: 0:09:35 lr: 0.000008 loss_cls: 3.0885 (3.0827) grad_norm: 2.7144 (3.0012) time: 1.2951 data: 0.0002 max mem: 13912 +[2024-12-06 18:27:22 root] (utils.py 283): INFO Epoch: [10] [2070/2502] eta: 0:09:22 lr: 0.000008 loss_cls: 3.2264 (3.0847) grad_norm: 2.6946 (2.9995) time: 1.2948 data: 0.0002 max mem: 13912 +[2024-12-06 18:27:35 root] (utils.py 283): INFO Epoch: [10] [2080/2502] eta: 0:09:09 lr: 0.000008 loss_cls: 3.5075 (3.0856) grad_norm: 2.6696 (2.9996) time: 1.2960 data: 0.0003 max mem: 13912 +[2024-12-06 18:27:48 root] (utils.py 283): INFO Epoch: [10] [2090/2502] eta: 0:08:56 lr: 0.000008 loss_cls: 3.2479 (3.0866) grad_norm: 2.7059 (2.9994) time: 1.3036 data: 0.0003 max mem: 13912 +[2024-12-06 18:28:01 root] (utils.py 283): INFO Epoch: [10] [2100/2502] eta: 0:08:43 lr: 0.000008 loss_cls: 3.2461 (3.0872) grad_norm: 2.7059 (2.9982) time: 1.3066 data: 0.0003 max mem: 13912 +[2024-12-06 18:28:14 root] (utils.py 283): INFO Epoch: [10] [2110/2502] eta: 0:08:30 lr: 0.000008 loss_cls: 3.2461 (3.0876) grad_norm: 2.6661 (2.9978) time: 1.3060 data: 0.0003 max mem: 13912 +[2024-12-06 18:28:27 root] (utils.py 283): INFO Epoch: [10] [2120/2502] eta: 0:08:17 lr: 0.000008 loss_cls: 3.2784 (3.0874) grad_norm: 2.6661 (2.9977) time: 1.3031 data: 0.0002 max mem: 13912 +[2024-12-06 18:28:40 root] (utils.py 283): INFO Epoch: [10] [2130/2502] eta: 0:08:04 lr: 0.000008 loss_cls: 3.2771 (3.0881) grad_norm: 2.6678 (3.0034) time: 1.3003 data: 0.0003 max mem: 13912 +[2024-12-06 18:28:53 root] (utils.py 283): INFO Epoch: [10] [2140/2502] eta: 0:07:51 lr: 0.000008 loss_cls: 3.2096 (3.0890) grad_norm: 2.6573 (3.0036) time: 1.3076 data: 0.0003 max mem: 13912 +[2024-12-06 18:29:06 root] (utils.py 283): INFO Epoch: [10] [2150/2502] eta: 0:07:38 lr: 0.000008 loss_cls: 3.1703 (3.0886) grad_norm: 2.6267 (3.0049) time: 1.3095 data: 0.0003 max mem: 13912 +[2024-12-06 18:29:19 root] (utils.py 283): INFO Epoch: [10] [2160/2502] eta: 0:07:25 lr: 0.000008 loss_cls: 3.1691 (3.0889) grad_norm: 2.7248 (3.0043) time: 1.3044 data: 0.0003 max mem: 13912 +[2024-12-06 18:29:32 root] (utils.py 283): INFO Epoch: [10] [2170/2502] eta: 0:07:12 lr: 0.000008 loss_cls: 3.1691 (3.0884) grad_norm: 2.6989 (3.0050) time: 1.3037 data: 0.0003 max mem: 13912 +[2024-12-06 18:29:45 root] (utils.py 283): INFO Epoch: [10] [2180/2502] eta: 0:06:59 lr: 0.000008 loss_cls: 3.2171 (3.0888) grad_norm: 2.7037 (3.0045) time: 1.3037 data: 0.0003 max mem: 13912 +[2024-12-06 18:29:58 root] (utils.py 283): INFO Epoch: [10] [2190/2502] eta: 0:06:46 lr: 0.000008 loss_cls: 3.1948 (3.0881) grad_norm: 2.7306 (3.0064) time: 1.3020 data: 0.0003 max mem: 13912 +[2024-12-06 18:30:11 root] (utils.py 283): INFO Epoch: [10] [2200/2502] eta: 0:06:33 lr: 0.000008 loss_cls: 3.0039 (3.0878) grad_norm: 2.6550 (3.0049) time: 1.3028 data: 0.0002 max mem: 13912 +[2024-12-06 18:30:24 root] (utils.py 283): INFO Epoch: [10] [2210/2502] eta: 0:06:20 lr: 0.000008 loss_cls: 2.7875 (3.0857) grad_norm: 2.6656 (3.0038) time: 1.3060 data: 0.0003 max mem: 13912 +[2024-12-06 18:30:37 root] (utils.py 283): INFO Epoch: [10] [2220/2502] eta: 0:06:07 lr: 0.000008 loss_cls: 2.7875 (3.0859) grad_norm: 2.8100 (3.0031) time: 1.3054 data: 0.0003 max mem: 13912 +[2024-12-06 18:30:51 root] (utils.py 283): INFO Epoch: [10] [2230/2502] eta: 0:05:54 lr: 0.000008 loss_cls: 3.1447 (3.0852) grad_norm: 2.7967 (3.0032) time: 1.3037 data: 0.0003 max mem: 13912 +[2024-12-06 18:31:04 root] (utils.py 283): INFO Epoch: [10] [2240/2502] eta: 0:05:41 lr: 0.000008 loss_cls: 3.2991 (3.0863) grad_norm: 2.7175 (3.0018) time: 1.3028 data: 0.0002 max mem: 13912 +[2024-12-06 18:31:17 root] (utils.py 283): INFO Epoch: [10] [2250/2502] eta: 0:05:28 lr: 0.000008 loss_cls: 3.2749 (3.0869) grad_norm: 2.7163 (3.0013) time: 1.3025 data: 0.0002 max mem: 13912 +[2024-12-06 18:31:30 root] (utils.py 283): INFO Epoch: [10] [2260/2502] eta: 0:05:15 lr: 0.000008 loss_cls: 3.2643 (3.0866) grad_norm: 2.5945 (3.0000) time: 1.3046 data: 0.0002 max mem: 13912 +[2024-12-06 18:31:43 root] (utils.py 283): INFO Epoch: [10] [2270/2502] eta: 0:05:02 lr: 0.000008 loss_cls: 3.3016 (3.0872) grad_norm: 2.5543 (2.9985) time: 1.3063 data: 0.0002 max mem: 13912 +[2024-12-06 18:31:56 root] (utils.py 283): INFO Epoch: [10] [2280/2502] eta: 0:04:49 lr: 0.000008 loss_cls: 3.1148 (3.0862) grad_norm: 2.5768 (2.9994) time: 1.3059 data: 0.0003 max mem: 13912 +[2024-12-06 18:32:09 root] (utils.py 283): INFO Epoch: [10] [2290/2502] eta: 0:04:36 lr: 0.000008 loss_cls: 3.0385 (3.0857) grad_norm: 2.6214 (2.9983) time: 1.3091 data: 0.0003 max mem: 13912 +[2024-12-06 18:32:22 root] (utils.py 283): INFO Epoch: [10] [2300/2502] eta: 0:04:23 lr: 0.000008 loss_cls: 3.2477 (3.0860) grad_norm: 2.6793 (2.9980) time: 1.3086 data: 0.0003 max mem: 13912 +[2024-12-06 18:32:35 root] (utils.py 283): INFO Epoch: [10] [2310/2502] eta: 0:04:10 lr: 0.000008 loss_cls: 3.4004 (3.0868) grad_norm: 2.7491 (2.9983) time: 1.3079 data: 0.0002 max mem: 13912 +[2024-12-06 18:32:48 root] (utils.py 283): INFO Epoch: [10] [2320/2502] eta: 0:03:57 lr: 0.000008 loss_cls: 3.2613 (3.0859) grad_norm: 2.9070 (2.9982) time: 1.3068 data: 0.0002 max mem: 13912 +[2024-12-06 18:33:01 root] (utils.py 283): INFO Epoch: [10] [2330/2502] eta: 0:03:44 lr: 0.000008 loss_cls: 3.1960 (3.0861) grad_norm: 2.8713 (2.9976) time: 1.3021 data: 0.0002 max mem: 13912 +[2024-12-06 18:33:14 root] (utils.py 283): INFO Epoch: [10] [2340/2502] eta: 0:03:31 lr: 0.000008 loss_cls: 3.2919 (3.0855) grad_norm: 2.8531 (2.9989) time: 1.3020 data: 0.0002 max mem: 13912 +[2024-12-06 18:33:27 root] (utils.py 283): INFO Epoch: [10] [2350/2502] eta: 0:03:18 lr: 0.000008 loss_cls: 2.9553 (3.0845) grad_norm: 2.8181 (2.9984) time: 1.3028 data: 0.0003 max mem: 13912 +[2024-12-06 18:33:40 root] (utils.py 283): INFO Epoch: [10] [2360/2502] eta: 0:03:05 lr: 0.000008 loss_cls: 2.9553 (3.0853) grad_norm: 2.7456 (2.9976) time: 1.3021 data: 0.0003 max mem: 13912 +[2024-12-06 18:33:53 root] (utils.py 283): INFO Epoch: [10] [2370/2502] eta: 0:02:51 lr: 0.000008 loss_cls: 3.3428 (3.0858) grad_norm: 2.6612 (2.9971) time: 1.3000 data: 0.0003 max mem: 13912 +[2024-12-06 18:34:06 root] (utils.py 283): INFO Epoch: [10] [2380/2502] eta: 0:02:38 lr: 0.000008 loss_cls: 3.2578 (3.0859) grad_norm: 2.6612 (2.9963) time: 1.3014 data: 0.0003 max mem: 13912 +[2024-12-06 18:34:19 root] (utils.py 283): INFO Epoch: [10] [2390/2502] eta: 0:02:25 lr: 0.000008 loss_cls: 3.2238 (3.0859) grad_norm: 2.6871 (2.9964) time: 1.3029 data: 0.0003 max mem: 13912 +[2024-12-06 18:34:32 root] (utils.py 283): INFO Epoch: [10] [2400/2502] eta: 0:02:12 lr: 0.000008 loss_cls: 3.1494 (3.0857) grad_norm: 2.7595 (2.9985) time: 1.3050 data: 0.0003 max mem: 13912 +[2024-12-06 18:34:45 root] (utils.py 283): INFO Epoch: [10] [2410/2502] eta: 0:01:59 lr: 0.000008 loss_cls: 3.1231 (3.0849) grad_norm: 2.8282 (3.0007) time: 1.3024 data: 0.0003 max mem: 13912 +[2024-12-06 18:34:58 root] (utils.py 283): INFO Epoch: [10] [2420/2502] eta: 0:01:46 lr: 0.000008 loss_cls: 3.0932 (3.0853) grad_norm: 2.9040 (3.0004) time: 1.2948 data: 0.0003 max mem: 13912 +[2024-12-06 18:35:11 root] (utils.py 283): INFO Epoch: [10] [2430/2502] eta: 0:01:33 lr: 0.000008 loss_cls: 3.0932 (3.0849) grad_norm: 2.9040 (3.0049) time: 1.2916 data: 0.0003 max mem: 13912 +[2024-12-06 18:35:24 root] (utils.py 283): INFO Epoch: [10] [2440/2502] eta: 0:01:20 lr: 0.000008 loss_cls: 3.0865 (3.0848) grad_norm: 2.8690 (3.0052) time: 1.2934 data: 0.0003 max mem: 13912 +[2024-12-06 18:35:37 root] (utils.py 283): INFO Epoch: [10] [2450/2502] eta: 0:01:07 lr: 0.000008 loss_cls: 3.0985 (3.0842) grad_norm: 2.6594 (3.0047) time: 1.2959 data: 0.0003 max mem: 13912 +[2024-12-06 18:35:50 root] (utils.py 283): INFO Epoch: [10] [2460/2502] eta: 0:00:54 lr: 0.000008 loss_cls: 3.1527 (3.0846) grad_norm: 2.6594 (3.0041) time: 1.2968 data: 0.0003 max mem: 13912 +[2024-12-06 18:36:03 root] (utils.py 283): INFO Epoch: [10] [2470/2502] eta: 0:00:41 lr: 0.000008 loss_cls: 3.1828 (3.0845) grad_norm: 2.7457 (3.0035) time: 1.2983 data: 0.0003 max mem: 13912 +[2024-12-06 18:36:16 root] (utils.py 283): INFO Epoch: [10] [2480/2502] eta: 0:00:28 lr: 0.000008 loss_cls: 3.2160 (3.0849) grad_norm: 2.8377 (3.0099) time: 1.3023 data: 0.0003 max mem: 13912 +[2024-12-06 18:36:29 root] (utils.py 283): INFO Epoch: [10] [2490/2502] eta: 0:00:15 lr: 0.000008 loss_cls: 3.2160 (3.0840) grad_norm: 2.7132 (3.0105) time: 1.3235 data: 0.0249 max mem: 13912 +[2024-12-06 18:36:42 root] (utils.py 283): INFO Epoch: [10] [2500/2502] eta: 0:00:02 lr: 0.000008 loss_cls: 2.8531 (3.0835) grad_norm: 2.6467 (3.0096) time: 1.3181 data: 0.0249 max mem: 13912 +[2024-12-06 18:36:44 root] (utils.py 283): INFO Epoch: [10] [2501/2502] eta: 0:00:01 lr: 0.000008 loss_cls: 2.8531 (3.0834) grad_norm: 2.6996 (3.0096) time: 1.3181 data: 0.0249 max mem: 13912 +[2024-12-06 18:36:44 root] (utils.py 297): INFO Epoch: [10] Total time: 0:54:19 (1.3029 s / it) +[2024-12-06 18:36:44 root] (engine.py 179): INFO Averaged stats:lr: 0.000008 loss_cls: 2.8531 (3.0907) grad_norm: 2.6996 (3.0096) +[2024-12-06 18:36:45 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:22 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.4209 (0.4209) acc1: 91.4062 (91.4062) acc3: 98.4375 (98.4375) acc5: 98.4375 (98.4375) time: 0.2255 data: 0.0004 max mem: 13912 +[2024-12-06 18:36:47 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:20 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7078 (0.6682) acc1: 85.9375 (86.0085) acc3: 96.0938 (95.5256) acc5: 97.6562 (97.2301) time: 0.2273 data: 0.0004 max mem: 13912 +[2024-12-06 18:36:49 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:17 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7078 (0.7078) acc1: 83.5938 (85.1935) acc3: 95.3125 (95.0149) acc5: 97.6562 (96.8378) time: 0.2275 data: 0.0004 max mem: 13912 +[2024-12-06 18:36:52 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:15 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7438 (0.7303) acc1: 83.5938 (84.4506) acc3: 94.5312 (94.7833) acc5: 96.8750 (96.7994) time: 0.2277 data: 0.0004 max mem: 13912 +[2024-12-06 18:36:54 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:13 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7438 (0.7326) acc1: 82.8125 (84.1654) acc3: 94.5312 (94.8171) acc5: 96.8750 (96.8178) time: 0.2280 data: 0.0005 max mem: 13912 +[2024-12-06 18:36:56 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:10 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.8103 (0.8113) acc1: 79.6875 (82.3223) acc3: 91.4062 (93.5968) acc5: 95.3125 (95.9559) time: 0.2283 data: 0.0005 max mem: 13912 +[2024-12-06 18:36:58 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:08 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0807 (0.8430) acc1: 74.2188 (81.8648) acc3: 88.2812 (93.0072) acc5: 91.4062 (95.3125) time: 0.2294 data: 0.0005 max mem: 13912 +[2024-12-06 18:37:01 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:06 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0549 (0.8709) acc1: 79.6875 (81.2170) acc3: 90.6250 (92.7047) acc5: 92.1875 (95.0594) time: 0.2298 data: 0.0005 max mem: 13912 +[2024-12-06 18:37:03 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:04 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0549 (0.9008) acc1: 76.5625 (80.5459) acc3: 89.8438 (92.1586) acc5: 93.7500 (94.7434) time: 0.2291 data: 0.0008 max mem: 13912 +[2024-12-06 18:37:05 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0801 (0.9245) acc1: 74.2188 (79.8764) acc3: 89.0625 (91.8012) acc5: 92.1875 (94.4712) time: 0.2286 data: 0.0008 max mem: 13912 +[2024-12-06 18:37:07 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0040 (0.9202) acc1: 75.7812 (79.8640) acc3: 89.8438 (91.8800) acc5: 92.1875 (94.5680) time: 0.2245 data: 0.0006 max mem: 13912 +[2024-12-06 18:37:07 root] (utils.py 297): INFO Test: Total time: 0:00:22 (0.2279 s / it) +[2024-12-06 18:37:07 root] (engine.py 264): INFO * Acc@1 79.784 Acc@3 92.092 Acc@5 94.738 loss 0.916 flops 3.584 layer_flops 3.536 +[2024-12-06 18:37:07 root] (main.py 547): INFO Accuracy of the network on the 50000 test images: 79.8% +[2024-12-06 18:37:07 root] (main.py 551): INFO Max accuracy: 79.78% +[2024-12-06 18:37:09 root] (utils.py 283): INFO Epoch: [11] [ 0/2502] eta: 0:53:43 lr: 0.000006 loss_cls: 3.7149 (3.7149) grad_norm: 2.7569 (2.7569) time: 1.2885 data: 0.0002 max mem: 13912 +[2024-12-06 18:37:22 root] (utils.py 283): INFO Epoch: [11] [ 10/2502] eta: 0:54:00 lr: 0.000006 loss_cls: 3.5422 (3.3092) grad_norm: 2.7919 (3.4712) time: 1.3002 data: 0.0003 max mem: 13912 +[2024-12-06 18:37:35 root] (utils.py 283): INFO Epoch: [11] [ 20/2502] eta: 0:53:53 lr: 0.000006 loss_cls: 3.4305 (3.2958) grad_norm: 2.7919 (4.1739) time: 1.3034 data: 0.0003 max mem: 13912 +[2024-12-06 18:37:48 root] (utils.py 283): INFO Epoch: [11] [ 30/2502] eta: 0:53:38 lr: 0.000006 loss_cls: 3.3532 (3.2704) grad_norm: 2.8325 (4.2861) time: 1.3027 data: 0.0003 max mem: 13912 +[2024-12-06 18:38:01 root] (utils.py 283): INFO Epoch: [11] [ 40/2502] eta: 0:53:25 lr: 0.000006 loss_cls: 3.2867 (3.2335) grad_norm: 2.7257 (3.8859) time: 1.3016 data: 0.0003 max mem: 13912 +[2024-12-06 18:38:14 root] (utils.py 283): INFO Epoch: [11] [ 50/2502] eta: 0:53:13 lr: 0.000006 loss_cls: 3.1453 (3.1755) grad_norm: 2.6448 (3.6482) time: 1.3036 data: 0.0003 max mem: 13912 +[2024-12-06 18:38:27 root] (utils.py 283): INFO Epoch: [11] [ 60/2502] eta: 0:52:59 lr: 0.000006 loss_cls: 2.9860 (3.1473) grad_norm: 2.8855 (3.5276) time: 1.3014 data: 0.0002 max mem: 13912 +[2024-12-06 18:38:40 root] (utils.py 283): INFO Epoch: [11] [ 70/2502] eta: 0:52:46 lr: 0.000006 loss_cls: 3.0625 (3.1405) grad_norm: 2.9845 (3.4949) time: 1.3001 data: 0.0002 max mem: 13912 +[2024-12-06 18:38:53 root] (utils.py 283): INFO Epoch: [11] [ 80/2502] eta: 0:52:32 lr: 0.000006 loss_cls: 3.1735 (3.1398) grad_norm: 2.9845 (3.4102) time: 1.3008 data: 0.0002 max mem: 13912 +[2024-12-06 18:39:06 root] (utils.py 283): INFO Epoch: [11] [ 90/2502] eta: 0:52:19 lr: 0.000006 loss_cls: 3.1854 (3.1304) grad_norm: 2.8685 (3.3995) time: 1.3014 data: 0.0003 max mem: 13912 +[2024-12-06 18:39:19 root] (utils.py 283): INFO Epoch: [11] [ 100/2502] eta: 0:52:07 lr: 0.000006 loss_cls: 2.8902 (3.1025) grad_norm: 2.8685 (3.3464) time: 1.3027 data: 0.0002 max mem: 13912 +[2024-12-06 18:39:32 root] (utils.py 283): INFO Epoch: [11] [ 110/2502] eta: 0:51:53 lr: 0.000006 loss_cls: 3.1704 (3.1142) grad_norm: 2.6997 (3.3038) time: 1.3019 data: 0.0003 max mem: 13912 +[2024-12-06 18:39:45 root] (utils.py 283): INFO Epoch: [11] [ 120/2502] eta: 0:51:43 lr: 0.000006 loss_cls: 3.1917 (3.1047) grad_norm: 2.8265 (3.2730) time: 1.3070 data: 0.0002 max mem: 13912 +[2024-12-06 18:39:58 root] (utils.py 283): INFO Epoch: [11] [ 130/2502] eta: 0:51:30 lr: 0.000006 loss_cls: 2.9438 (3.0922) grad_norm: 2.9554 (3.2780) time: 1.3082 data: 0.0002 max mem: 13912 +[2024-12-06 18:40:11 root] (utils.py 283): INFO Epoch: [11] [ 140/2502] eta: 0:51:17 lr: 0.000006 loss_cls: 3.3047 (3.1150) grad_norm: 3.0185 (3.2645) time: 1.3032 data: 0.0002 max mem: 13912 +[2024-12-06 18:40:24 root] (utils.py 283): INFO Epoch: [11] [ 150/2502] eta: 0:51:03 lr: 0.000006 loss_cls: 3.3356 (3.1138) grad_norm: 2.7747 (3.2571) time: 1.3016 data: 0.0002 max mem: 13912 +[2024-12-06 18:40:37 root] (utils.py 283): INFO Epoch: [11] [ 160/2502] eta: 0:50:50 lr: 0.000006 loss_cls: 3.1196 (3.0877) grad_norm: 2.6518 (3.2243) time: 1.3018 data: 0.0002 max mem: 13912 +[2024-12-06 18:40:50 root] (utils.py 283): INFO Epoch: [11] [ 170/2502] eta: 0:50:37 lr: 0.000006 loss_cls: 3.1104 (3.0850) grad_norm: 2.6185 (3.2540) time: 1.3033 data: 0.0002 max mem: 13912 +[2024-12-06 18:41:03 root] (utils.py 283): INFO Epoch: [11] [ 180/2502] eta: 0:50:24 lr: 0.000006 loss_cls: 3.1104 (3.0840) grad_norm: 2.6185 (3.2609) time: 1.3027 data: 0.0003 max mem: 13912 +[2024-12-06 18:41:16 root] (utils.py 283): INFO Epoch: [11] [ 190/2502] eta: 0:50:12 lr: 0.000006 loss_cls: 3.0520 (3.0747) grad_norm: 2.6695 (3.2690) time: 1.3044 data: 0.0002 max mem: 13912 +[2024-12-06 18:41:29 root] (utils.py 283): INFO Epoch: [11] [ 200/2502] eta: 0:49:58 lr: 0.000006 loss_cls: 3.0977 (3.0780) grad_norm: 2.7490 (3.2860) time: 1.3031 data: 0.0002 max mem: 13912 +[2024-12-06 18:41:42 root] (utils.py 283): INFO Epoch: [11] [ 210/2502] eta: 0:49:45 lr: 0.000006 loss_cls: 3.1316 (3.0807) grad_norm: 2.9441 (3.2703) time: 1.3016 data: 0.0002 max mem: 13912 +[2024-12-06 18:41:55 root] (utils.py 283): INFO Epoch: [11] [ 220/2502] eta: 0:49:32 lr: 0.000006 loss_cls: 2.9845 (3.0714) grad_norm: 2.7677 (3.2488) time: 1.3010 data: 0.0003 max mem: 13912 +[2024-12-06 18:42:08 root] (utils.py 283): INFO Epoch: [11] [ 230/2502] eta: 0:49:19 lr: 0.000006 loss_cls: 3.0072 (3.0720) grad_norm: 2.7651 (3.2959) time: 1.3021 data: 0.0003 max mem: 13912 +[2024-12-06 18:42:21 root] (utils.py 283): INFO Epoch: [11] [ 240/2502] eta: 0:49:06 lr: 0.000006 loss_cls: 3.2288 (3.0755) grad_norm: 2.6266 (3.2675) time: 1.3056 data: 0.0003 max mem: 13912 +[2024-12-06 18:42:35 root] (utils.py 283): INFO Epoch: [11] [ 250/2502] eta: 0:48:54 lr: 0.000006 loss_cls: 3.2288 (3.0796) grad_norm: 2.6524 (3.3003) time: 1.3063 data: 0.0003 max mem: 13912 +[2024-12-06 18:42:48 root] (utils.py 283): INFO Epoch: [11] [ 260/2502] eta: 0:48:41 lr: 0.000006 loss_cls: 3.0677 (3.0768) grad_norm: 2.7789 (3.2757) time: 1.3042 data: 0.0003 max mem: 13912 +[2024-12-06 18:43:01 root] (utils.py 283): INFO Epoch: [11] [ 270/2502] eta: 0:48:27 lr: 0.000006 loss_cls: 3.0611 (3.0746) grad_norm: 2.7134 (3.3841) time: 1.3012 data: 0.0003 max mem: 13912 +[2024-12-06 18:43:14 root] (utils.py 283): INFO Epoch: [11] [ 280/2502] eta: 0:48:14 lr: 0.000006 loss_cls: 3.1648 (3.0828) grad_norm: 2.8274 (3.3687) time: 1.3022 data: 0.0003 max mem: 13912 +[2024-12-06 18:43:27 root] (utils.py 283): INFO Epoch: [11] [ 290/2502] eta: 0:48:01 lr: 0.000006 loss_cls: 3.3689 (3.0843) grad_norm: 2.7315 (3.3539) time: 1.3020 data: 0.0003 max mem: 13912 +[2024-12-06 18:43:40 root] (utils.py 283): INFO Epoch: [11] [ 300/2502] eta: 0:47:48 lr: 0.000006 loss_cls: 3.3736 (3.0869) grad_norm: 2.5347 (3.3235) time: 1.3017 data: 0.0003 max mem: 13912 +[2024-12-06 18:43:53 root] (utils.py 283): INFO Epoch: [11] [ 310/2502] eta: 0:47:35 lr: 0.000006 loss_cls: 3.3166 (3.0846) grad_norm: 2.5347 (3.3086) time: 1.3017 data: 0.0003 max mem: 13912 +[2024-12-06 18:44:06 root] (utils.py 283): INFO Epoch: [11] [ 320/2502] eta: 0:47:22 lr: 0.000006 loss_cls: 3.2866 (3.0899) grad_norm: 2.7631 (3.3036) time: 1.3008 data: 0.0003 max mem: 13912 +[2024-12-06 18:44:19 root] (utils.py 283): INFO Epoch: [11] [ 330/2502] eta: 0:47:09 lr: 0.000006 loss_cls: 3.3433 (3.0983) grad_norm: 2.6841 (3.2886) time: 1.3026 data: 0.0003 max mem: 13912 +[2024-12-06 18:44:32 root] (utils.py 283): INFO Epoch: [11] [ 340/2502] eta: 0:46:56 lr: 0.000006 loss_cls: 3.3910 (3.1005) grad_norm: 2.6571 (3.2732) time: 1.3051 data: 0.0003 max mem: 13912 +[2024-12-06 18:44:45 root] (utils.py 283): INFO Epoch: [11] [ 350/2502] eta: 0:46:44 lr: 0.000006 loss_cls: 3.3910 (3.1058) grad_norm: 2.8000 (3.2752) time: 1.3101 data: 0.0002 max mem: 13912 +[2024-12-06 18:44:58 root] (utils.py 283): INFO Epoch: [11] [ 360/2502] eta: 0:46:30 lr: 0.000006 loss_cls: 3.3827 (3.1067) grad_norm: 2.8000 (3.2660) time: 1.3056 data: 0.0002 max mem: 13912 +[2024-12-06 18:45:11 root] (utils.py 283): INFO Epoch: [11] [ 370/2502] eta: 0:46:17 lr: 0.000006 loss_cls: 3.1407 (3.1035) grad_norm: 2.6689 (3.2562) time: 1.3006 data: 0.0003 max mem: 13912 +[2024-12-06 18:45:24 root] (utils.py 283): INFO Epoch: [11] [ 380/2502] eta: 0:46:04 lr: 0.000006 loss_cls: 2.9896 (3.0994) grad_norm: 2.6275 (3.2619) time: 1.3036 data: 0.0003 max mem: 13912 +[2024-12-06 18:45:37 root] (utils.py 283): INFO Epoch: [11] [ 390/2502] eta: 0:45:51 lr: 0.000006 loss_cls: 2.8812 (3.0947) grad_norm: 2.5801 (3.2415) time: 1.3027 data: 0.0003 max mem: 13912 +[2024-12-06 18:45:50 root] (utils.py 283): INFO Epoch: [11] [ 400/2502] eta: 0:45:38 lr: 0.000006 loss_cls: 3.0147 (3.0952) grad_norm: 2.5654 (3.2324) time: 1.3034 data: 0.0003 max mem: 13912 +[2024-12-06 18:46:03 root] (utils.py 283): INFO Epoch: [11] [ 410/2502] eta: 0:45:26 lr: 0.000006 loss_cls: 3.2628 (3.0976) grad_norm: 2.6709 (3.2291) time: 1.3073 data: 0.0003 max mem: 13912 +[2024-12-06 18:46:16 root] (utils.py 283): INFO Epoch: [11] [ 420/2502] eta: 0:45:13 lr: 0.000006 loss_cls: 3.1306 (3.0926) grad_norm: 2.8702 (3.2353) time: 1.3063 data: 0.0003 max mem: 13912 +[2024-12-06 18:46:29 root] (utils.py 283): INFO Epoch: [11] [ 430/2502] eta: 0:45:00 lr: 0.000006 loss_cls: 3.0769 (3.0946) grad_norm: 2.8702 (3.2238) time: 1.3046 data: 0.0003 max mem: 13912 +[2024-12-06 18:46:42 root] (utils.py 283): INFO Epoch: [11] [ 440/2502] eta: 0:44:47 lr: 0.000006 loss_cls: 3.2072 (3.0938) grad_norm: 2.6162 (3.2185) time: 1.3046 data: 0.0003 max mem: 13912 +[2024-12-06 18:46:55 root] (utils.py 283): INFO Epoch: [11] [ 450/2502] eta: 0:44:34 lr: 0.000006 loss_cls: 3.2782 (3.0998) grad_norm: 2.7058 (3.2106) time: 1.3017 data: 0.0002 max mem: 13912 +[2024-12-06 18:47:08 root] (utils.py 283): INFO Epoch: [11] [ 460/2502] eta: 0:44:21 lr: 0.000006 loss_cls: 3.3031 (3.0958) grad_norm: 2.6558 (3.1972) time: 1.3018 data: 0.0003 max mem: 13912 +[2024-12-06 18:47:21 root] (utils.py 283): INFO Epoch: [11] [ 470/2502] eta: 0:44:07 lr: 0.000006 loss_cls: 3.0181 (3.0965) grad_norm: 2.7100 (3.1928) time: 1.3002 data: 0.0003 max mem: 13912 +[2024-12-06 18:47:34 root] (utils.py 283): INFO Epoch: [11] [ 480/2502] eta: 0:43:54 lr: 0.000006 loss_cls: 3.2119 (3.0981) grad_norm: 2.9323 (3.1890) time: 1.3000 data: 0.0003 max mem: 13912 +[2024-12-06 18:47:47 root] (utils.py 283): INFO Epoch: [11] [ 490/2502] eta: 0:43:41 lr: 0.000006 loss_cls: 3.2148 (3.0960) grad_norm: 2.8968 (3.1919) time: 1.3012 data: 0.0003 max mem: 13912 +[2024-12-06 18:48:00 root] (utils.py 283): INFO Epoch: [11] [ 500/2502] eta: 0:43:28 lr: 0.000006 loss_cls: 3.2148 (3.0951) grad_norm: 2.6822 (3.1822) time: 1.3023 data: 0.0002 max mem: 13912 +[2024-12-06 18:48:13 root] (utils.py 283): INFO Epoch: [11] [ 510/2502] eta: 0:43:15 lr: 0.000006 loss_cls: 3.0352 (3.0923) grad_norm: 2.5393 (3.1833) time: 1.3043 data: 0.0002 max mem: 13912 +[2024-12-06 18:48:26 root] (utils.py 283): INFO Epoch: [11] [ 520/2502] eta: 0:43:02 lr: 0.000006 loss_cls: 3.1356 (3.0932) grad_norm: 2.6839 (3.1794) time: 1.3019 data: 0.0003 max mem: 13912 +[2024-12-06 18:48:39 root] (utils.py 283): INFO Epoch: [11] [ 530/2502] eta: 0:42:49 lr: 0.000006 loss_cls: 3.1765 (3.0927) grad_norm: 2.6711 (3.1760) time: 1.3059 data: 0.0003 max mem: 13912 +[2024-12-06 18:48:52 root] (utils.py 283): INFO Epoch: [11] [ 540/2502] eta: 0:42:36 lr: 0.000006 loss_cls: 3.0795 (3.0916) grad_norm: 2.7106 (3.1805) time: 1.3066 data: 0.0003 max mem: 13912 +[2024-12-06 18:49:06 root] (utils.py 283): INFO Epoch: [11] [ 550/2502] eta: 0:42:23 lr: 0.000006 loss_cls: 3.0795 (3.0884) grad_norm: 2.7106 (3.1852) time: 1.3015 data: 0.0003 max mem: 13912 +[2024-12-06 18:49:19 root] (utils.py 283): INFO Epoch: [11] [ 560/2502] eta: 0:42:10 lr: 0.000006 loss_cls: 3.2814 (3.0926) grad_norm: 2.7050 (3.1784) time: 1.3024 data: 0.0003 max mem: 13912 +[2024-12-06 18:49:32 root] (utils.py 283): INFO Epoch: [11] [ 570/2502] eta: 0:41:57 lr: 0.000006 loss_cls: 3.3696 (3.0974) grad_norm: 2.6866 (3.1784) time: 1.3023 data: 0.0003 max mem: 13912 +[2024-12-06 18:49:45 root] (utils.py 283): INFO Epoch: [11] [ 580/2502] eta: 0:41:44 lr: 0.000006 loss_cls: 3.2812 (3.0905) grad_norm: 2.6073 (3.1688) time: 1.3006 data: 0.0002 max mem: 13912 +[2024-12-06 18:49:58 root] (utils.py 283): INFO Epoch: [11] [ 590/2502] eta: 0:41:31 lr: 0.000006 loss_cls: 3.0731 (3.0941) grad_norm: 2.6522 (3.1669) time: 1.3006 data: 0.0002 max mem: 13912 +[2024-12-06 18:50:11 root] (utils.py 283): INFO Epoch: [11] [ 600/2502] eta: 0:41:18 lr: 0.000006 loss_cls: 3.1562 (3.0937) grad_norm: 2.7132 (3.1783) time: 1.3037 data: 0.0003 max mem: 13912 +[2024-12-06 18:50:24 root] (utils.py 283): INFO Epoch: [11] [ 610/2502] eta: 0:41:05 lr: 0.000006 loss_cls: 2.9123 (3.0889) grad_norm: 2.6157 (3.1683) time: 1.3052 data: 0.0003 max mem: 13912 +[2024-12-06 18:50:37 root] (utils.py 283): INFO Epoch: [11] [ 620/2502] eta: 0:40:52 lr: 0.000006 loss_cls: 2.8472 (3.0862) grad_norm: 2.6061 (3.1598) time: 1.3047 data: 0.0003 max mem: 13912 +[2024-12-06 18:50:50 root] (utils.py 283): INFO Epoch: [11] [ 630/2502] eta: 0:40:39 lr: 0.000006 loss_cls: 3.2095 (3.0900) grad_norm: 2.6516 (3.1570) time: 1.3028 data: 0.0003 max mem: 13912 +[2024-12-06 18:51:03 root] (utils.py 283): INFO Epoch: [11] [ 640/2502] eta: 0:40:26 lr: 0.000006 loss_cls: 3.2111 (3.0874) grad_norm: 2.7932 (3.1560) time: 1.3008 data: 0.0003 max mem: 13912 +[2024-12-06 18:51:16 root] (utils.py 283): INFO Epoch: [11] [ 650/2502] eta: 0:40:13 lr: 0.000006 loss_cls: 3.0810 (3.0892) grad_norm: 2.6676 (3.1532) time: 1.3008 data: 0.0003 max mem: 13912 +[2024-12-06 18:51:29 root] (utils.py 283): INFO Epoch: [11] [ 660/2502] eta: 0:40:00 lr: 0.000006 loss_cls: 3.0481 (3.0880) grad_norm: 2.5867 (3.1584) time: 1.3013 data: 0.0003 max mem: 13912 +[2024-12-06 18:51:42 root] (utils.py 283): INFO Epoch: [11] [ 670/2502] eta: 0:39:46 lr: 0.000006 loss_cls: 2.8755 (3.0846) grad_norm: 2.7164 (3.1584) time: 1.3008 data: 0.0003 max mem: 13912 +[2024-12-06 18:51:55 root] (utils.py 283): INFO Epoch: [11] [ 680/2502] eta: 0:39:33 lr: 0.000006 loss_cls: 3.1542 (3.0861) grad_norm: 2.6007 (3.1577) time: 1.2996 data: 0.0002 max mem: 13912 +[2024-12-06 18:52:08 root] (utils.py 283): INFO Epoch: [11] [ 690/2502] eta: 0:39:20 lr: 0.000006 loss_cls: 3.1542 (3.0837) grad_norm: 2.6701 (3.1506) time: 1.3009 data: 0.0002 max mem: 13912 +[2024-12-06 18:52:21 root] (utils.py 283): INFO Epoch: [11] [ 700/2502] eta: 0:39:07 lr: 0.000006 loss_cls: 2.9346 (3.0839) grad_norm: 2.7888 (3.1489) time: 1.3039 data: 0.0003 max mem: 13912 +[2024-12-06 18:52:34 root] (utils.py 283): INFO Epoch: [11] [ 710/2502] eta: 0:38:55 lr: 0.000006 loss_cls: 3.2220 (3.0866) grad_norm: 2.7264 (3.1435) time: 1.3117 data: 0.0003 max mem: 13912 +[2024-12-06 18:52:47 root] (utils.py 283): INFO Epoch: [11] [ 720/2502] eta: 0:38:42 lr: 0.000006 loss_cls: 3.2073 (3.0802) grad_norm: 2.6112 (3.1383) time: 1.3100 data: 0.0002 max mem: 13912 +[2024-12-06 18:53:00 root] (utils.py 283): INFO Epoch: [11] [ 730/2502] eta: 0:38:29 lr: 0.000006 loss_cls: 3.0755 (3.0812) grad_norm: 2.5250 (3.1325) time: 1.3020 data: 0.0003 max mem: 13912 +[2024-12-06 18:53:13 root] (utils.py 283): INFO Epoch: [11] [ 740/2502] eta: 0:38:15 lr: 0.000006 loss_cls: 3.1767 (3.0850) grad_norm: 2.6752 (3.1408) time: 1.3003 data: 0.0003 max mem: 13912 +[2024-12-06 18:53:26 root] (utils.py 283): INFO Epoch: [11] [ 750/2502] eta: 0:38:02 lr: 0.000006 loss_cls: 3.1501 (3.0849) grad_norm: 2.6348 (3.1663) time: 1.3003 data: 0.0003 max mem: 13912 +[2024-12-06 18:53:39 root] (utils.py 283): INFO Epoch: [11] [ 760/2502] eta: 0:37:49 lr: 0.000006 loss_cls: 3.0297 (3.0847) grad_norm: 2.8531 (3.1639) time: 1.3000 data: 0.0003 max mem: 13912 +[2024-12-06 18:53:52 root] (utils.py 283): INFO Epoch: [11] [ 770/2502] eta: 0:37:36 lr: 0.000006 loss_cls: 3.1341 (3.0839) grad_norm: 2.7896 (3.1576) time: 1.2989 data: 0.0003 max mem: 13912 +[2024-12-06 18:54:05 root] (utils.py 283): INFO Epoch: [11] [ 780/2502] eta: 0:37:23 lr: 0.000006 loss_cls: 3.0661 (3.0828) grad_norm: 2.7896 (3.1590) time: 1.3009 data: 0.0003 max mem: 13912 +[2024-12-06 18:54:18 root] (utils.py 283): INFO Epoch: [11] [ 790/2502] eta: 0:37:10 lr: 0.000006 loss_cls: 3.1286 (3.0846) grad_norm: 2.9529 (3.1669) time: 1.3014 data: 0.0003 max mem: 13912 +[2024-12-06 18:54:31 root] (utils.py 283): INFO Epoch: [11] [ 800/2502] eta: 0:36:57 lr: 0.000006 loss_cls: 3.1268 (3.0829) grad_norm: 2.6738 (3.1625) time: 1.3006 data: 0.0003 max mem: 13912 +[2024-12-06 18:54:45 root] (utils.py 283): INFO Epoch: [11] [ 810/2502] eta: 0:36:45 lr: 0.000006 loss_cls: 3.0328 (3.0814) grad_norm: 2.8226 (3.1608) time: 1.3310 data: 0.0003 max mem: 13912 +[2024-12-06 18:54:58 root] (utils.py 283): INFO Epoch: [11] [ 820/2502] eta: 0:36:32 lr: 0.000006 loss_cls: 3.0328 (3.0799) grad_norm: 2.7098 (3.1581) time: 1.3403 data: 0.0004 max mem: 13912 +[2024-12-06 18:55:11 root] (utils.py 283): INFO Epoch: [11] [ 830/2502] eta: 0:36:19 lr: 0.000006 loss_cls: 2.9292 (3.0768) grad_norm: 2.5774 (3.1525) time: 1.3099 data: 0.0003 max mem: 13912 +[2024-12-06 18:55:24 root] (utils.py 283): INFO Epoch: [11] [ 840/2502] eta: 0:36:06 lr: 0.000006 loss_cls: 3.2392 (3.0794) grad_norm: 2.5979 (3.1482) time: 1.3025 data: 0.0002 max mem: 13912 +[2024-12-06 18:55:37 root] (utils.py 283): INFO Epoch: [11] [ 850/2502] eta: 0:35:53 lr: 0.000006 loss_cls: 3.3056 (3.0794) grad_norm: 2.5483 (3.1564) time: 1.3049 data: 0.0002 max mem: 13912 +[2024-12-06 18:55:50 root] (utils.py 283): INFO Epoch: [11] [ 860/2502] eta: 0:35:40 lr: 0.000006 loss_cls: 3.1373 (3.0788) grad_norm: 2.5382 (3.1520) time: 1.3040 data: 0.0003 max mem: 13912 +[2024-12-06 18:56:03 root] (utils.py 283): INFO Epoch: [11] [ 870/2502] eta: 0:35:27 lr: 0.000006 loss_cls: 2.9156 (3.0766) grad_norm: 2.5183 (3.1511) time: 1.3022 data: 0.0003 max mem: 13912 +[2024-12-06 18:56:16 root] (utils.py 283): INFO Epoch: [11] [ 880/2502] eta: 0:35:14 lr: 0.000006 loss_cls: 3.2311 (3.0765) grad_norm: 2.5197 (3.1466) time: 1.3018 data: 0.0003 max mem: 13912 +[2024-12-06 18:56:29 root] (utils.py 283): INFO Epoch: [11] [ 890/2502] eta: 0:35:01 lr: 0.000006 loss_cls: 3.2600 (3.0758) grad_norm: 2.5062 (3.1398) time: 1.3000 data: 0.0002 max mem: 13912 +[2024-12-06 18:56:42 root] (utils.py 283): INFO Epoch: [11] [ 900/2502] eta: 0:34:48 lr: 0.000006 loss_cls: 3.0559 (3.0749) grad_norm: 2.6098 (3.1360) time: 1.2983 data: 0.0003 max mem: 13912 +[2024-12-06 18:56:55 root] (utils.py 283): INFO Epoch: [11] [ 910/2502] eta: 0:34:35 lr: 0.000006 loss_cls: 3.1034 (3.0744) grad_norm: 2.7430 (3.1307) time: 1.2992 data: 0.0003 max mem: 13912 +[2024-12-06 18:57:08 root] (utils.py 283): INFO Epoch: [11] [ 920/2502] eta: 0:34:22 lr: 0.000006 loss_cls: 2.9829 (3.0737) grad_norm: 2.7794 (3.1283) time: 1.3009 data: 0.0002 max mem: 13912 +[2024-12-06 18:57:21 root] (utils.py 283): INFO Epoch: [11] [ 930/2502] eta: 0:34:09 lr: 0.000006 loss_cls: 2.9829 (3.0736) grad_norm: 2.8300 (3.1249) time: 1.3009 data: 0.0003 max mem: 13912 +[2024-12-06 18:57:34 root] (utils.py 283): INFO Epoch: [11] [ 940/2502] eta: 0:33:56 lr: 0.000006 loss_cls: 3.2309 (3.0742) grad_norm: 2.6550 (3.1239) time: 1.3026 data: 0.0003 max mem: 13912 +[2024-12-06 18:57:47 root] (utils.py 283): INFO Epoch: [11] [ 950/2502] eta: 0:33:42 lr: 0.000006 loss_cls: 3.2944 (3.0751) grad_norm: 2.4813 (3.1164) time: 1.3024 data: 0.0003 max mem: 13912 +[2024-12-06 18:58:00 root] (utils.py 283): INFO Epoch: [11] [ 960/2502] eta: 0:33:29 lr: 0.000006 loss_cls: 3.2419 (3.0736) grad_norm: 2.4177 (3.1114) time: 1.3035 data: 0.0003 max mem: 13912 +[2024-12-06 18:58:13 root] (utils.py 283): INFO Epoch: [11] [ 970/2502] eta: 0:33:16 lr: 0.000006 loss_cls: 3.2031 (3.0744) grad_norm: 2.6417 (3.1112) time: 1.3058 data: 0.0002 max mem: 13912 +[2024-12-06 18:58:26 root] (utils.py 283): INFO Epoch: [11] [ 980/2502] eta: 0:33:04 lr: 0.000006 loss_cls: 3.3357 (3.0741) grad_norm: 2.6417 (3.1095) time: 1.3087 data: 0.0003 max mem: 13912 +[2024-12-06 18:58:39 root] (utils.py 283): INFO Epoch: [11] [ 990/2502] eta: 0:32:50 lr: 0.000006 loss_cls: 3.3357 (3.0761) grad_norm: 2.6939 (3.1092) time: 1.3043 data: 0.0003 max mem: 13912 +[2024-12-06 18:58:52 root] (utils.py 283): INFO Epoch: [11] [1000/2502] eta: 0:32:37 lr: 0.000006 loss_cls: 3.1347 (3.0748) grad_norm: 2.6205 (3.1053) time: 1.2988 data: 0.0002 max mem: 13912 +[2024-12-06 18:59:05 root] (utils.py 283): INFO Epoch: [11] [1010/2502] eta: 0:32:24 lr: 0.000006 loss_cls: 3.0093 (3.0746) grad_norm: 2.6149 (3.1033) time: 1.3009 data: 0.0002 max mem: 13912 +[2024-12-06 18:59:18 root] (utils.py 283): INFO Epoch: [11] [1020/2502] eta: 0:32:11 lr: 0.000006 loss_cls: 3.1807 (3.0759) grad_norm: 2.8217 (3.1052) time: 1.3022 data: 0.0003 max mem: 13912 +[2024-12-06 18:59:31 root] (utils.py 283): INFO Epoch: [11] [1030/2502] eta: 0:31:58 lr: 0.000006 loss_cls: 3.1096 (3.0756) grad_norm: 2.7406 (3.1064) time: 1.3027 data: 0.0003 max mem: 13912 +[2024-12-06 18:59:44 root] (utils.py 283): INFO Epoch: [11] [1040/2502] eta: 0:31:45 lr: 0.000006 loss_cls: 3.2817 (3.0774) grad_norm: 2.8403 (3.1088) time: 1.3037 data: 0.0003 max mem: 13912 +[2024-12-06 18:59:58 root] (utils.py 283): INFO Epoch: [11] [1050/2502] eta: 0:31:32 lr: 0.000006 loss_cls: 3.2817 (3.0748) grad_norm: 2.8184 (3.1067) time: 1.3057 data: 0.0003 max mem: 13912 +[2024-12-06 19:00:11 root] (utils.py 283): INFO Epoch: [11] [1060/2502] eta: 0:31:19 lr: 0.000006 loss_cls: 3.1816 (3.0762) grad_norm: 2.6694 (3.1015) time: 1.3054 data: 0.0003 max mem: 13912 +[2024-12-06 19:00:24 root] (utils.py 283): INFO Epoch: [11] [1070/2502] eta: 0:31:06 lr: 0.000006 loss_cls: 3.2043 (3.0765) grad_norm: 2.5402 (3.0996) time: 1.3021 data: 0.0003 max mem: 13912 +[2024-12-06 19:00:37 root] (utils.py 283): INFO Epoch: [11] [1080/2502] eta: 0:30:53 lr: 0.000006 loss_cls: 3.2964 (3.0773) grad_norm: 3.0564 (3.1080) time: 1.3016 data: 0.0003 max mem: 13912 +[2024-12-06 19:00:50 root] (utils.py 283): INFO Epoch: [11] [1090/2502] eta: 0:30:40 lr: 0.000006 loss_cls: 3.2201 (3.0789) grad_norm: 3.1131 (3.1159) time: 1.3006 data: 0.0002 max mem: 13912 +[2024-12-06 19:01:03 root] (utils.py 283): INFO Epoch: [11] [1100/2502] eta: 0:30:27 lr: 0.000006 loss_cls: 3.3747 (3.0816) grad_norm: 2.8052 (3.1145) time: 1.2984 data: 0.0003 max mem: 13912 +[2024-12-06 19:01:16 root] (utils.py 283): INFO Epoch: [11] [1110/2502] eta: 0:30:14 lr: 0.000006 loss_cls: 3.3505 (3.0808) grad_norm: 2.5844 (3.1146) time: 1.2984 data: 0.0003 max mem: 13912 +[2024-12-06 19:01:29 root] (utils.py 283): INFO Epoch: [11] [1120/2502] eta: 0:30:01 lr: 0.000006 loss_cls: 3.2029 (3.0801) grad_norm: 2.6343 (3.1114) time: 1.2972 data: 0.0003 max mem: 13912 +[2024-12-06 19:01:42 root] (utils.py 283): INFO Epoch: [11] [1130/2502] eta: 0:29:48 lr: 0.000006 loss_cls: 3.0535 (3.0806) grad_norm: 2.6696 (3.1195) time: 1.3011 data: 0.0002 max mem: 13912 +[2024-12-06 19:01:55 root] (utils.py 283): INFO Epoch: [11] [1140/2502] eta: 0:29:35 lr: 0.000006 loss_cls: 3.1734 (3.0820) grad_norm: 2.6957 (3.1173) time: 1.3039 data: 0.0003 max mem: 13912 +[2024-12-06 19:02:08 root] (utils.py 283): INFO Epoch: [11] [1150/2502] eta: 0:29:22 lr: 0.000006 loss_cls: 3.0396 (3.0803) grad_norm: 2.8019 (3.1380) time: 1.3033 data: 0.0003 max mem: 13912 +[2024-12-06 19:02:21 root] (utils.py 283): INFO Epoch: [11] [1160/2502] eta: 0:29:09 lr: 0.000006 loss_cls: 3.0259 (3.0812) grad_norm: 2.9853 (3.1356) time: 1.3047 data: 0.0003 max mem: 13912 +[2024-12-06 19:02:34 root] (utils.py 283): INFO Epoch: [11] [1170/2502] eta: 0:28:55 lr: 0.000006 loss_cls: 3.2309 (3.0823) grad_norm: 2.9150 (3.1348) time: 1.3037 data: 0.0003 max mem: 13912 +[2024-12-06 19:02:47 root] (utils.py 283): INFO Epoch: [11] [1180/2502] eta: 0:28:42 lr: 0.000006 loss_cls: 3.3520 (3.0839) grad_norm: 2.7877 (3.1361) time: 1.3018 data: 0.0003 max mem: 13912 +[2024-12-06 19:03:00 root] (utils.py 283): INFO Epoch: [11] [1190/2502] eta: 0:28:29 lr: 0.000006 loss_cls: 3.3191 (3.0842) grad_norm: 2.6924 (3.1335) time: 1.2999 data: 0.0003 max mem: 13912 +[2024-12-06 19:03:13 root] (utils.py 283): INFO Epoch: [11] [1200/2502] eta: 0:28:16 lr: 0.000006 loss_cls: 3.0694 (3.0848) grad_norm: 2.6740 (3.1318) time: 1.2987 data: 0.0003 max mem: 13912 +[2024-12-06 19:03:26 root] (utils.py 283): INFO Epoch: [11] [1210/2502] eta: 0:28:03 lr: 0.000006 loss_cls: 3.3389 (3.0868) grad_norm: 2.8526 (3.1353) time: 1.3021 data: 0.0003 max mem: 13912 +[2024-12-06 19:03:39 root] (utils.py 283): INFO Epoch: [11] [1220/2502] eta: 0:27:50 lr: 0.000006 loss_cls: 3.1662 (3.0848) grad_norm: 2.7691 (3.1327) time: 1.3081 data: 0.0003 max mem: 13912 +[2024-12-06 19:03:52 root] (utils.py 283): INFO Epoch: [11] [1230/2502] eta: 0:27:37 lr: 0.000006 loss_cls: 3.1662 (3.0859) grad_norm: 2.6040 (3.1312) time: 1.3064 data: 0.0003 max mem: 13912 +[2024-12-06 19:04:05 root] (utils.py 283): INFO Epoch: [11] [1240/2502] eta: 0:27:24 lr: 0.000006 loss_cls: 3.3079 (3.0853) grad_norm: 2.5733 (3.1289) time: 1.3021 data: 0.0003 max mem: 13912 +[2024-12-06 19:04:18 root] (utils.py 283): INFO Epoch: [11] [1250/2502] eta: 0:27:11 lr: 0.000006 loss_cls: 3.0608 (3.0832) grad_norm: 2.6967 (3.1283) time: 1.3030 data: 0.0003 max mem: 13912 +[2024-12-06 19:04:31 root] (utils.py 283): INFO Epoch: [11] [1260/2502] eta: 0:26:58 lr: 0.000006 loss_cls: 2.9919 (3.0821) grad_norm: 2.7556 (3.1266) time: 1.3010 data: 0.0003 max mem: 13912 +[2024-12-06 19:04:44 root] (utils.py 283): INFO Epoch: [11] [1270/2502] eta: 0:26:45 lr: 0.000006 loss_cls: 3.0941 (3.0836) grad_norm: 2.6575 (3.1264) time: 1.3007 data: 0.0002 max mem: 13912 +[2024-12-06 19:04:57 root] (utils.py 283): INFO Epoch: [11] [1280/2502] eta: 0:26:32 lr: 0.000006 loss_cls: 3.1940 (3.0833) grad_norm: 2.6144 (3.1227) time: 1.3015 data: 0.0003 max mem: 13912 +[2024-12-06 19:05:10 root] (utils.py 283): INFO Epoch: [11] [1290/2502] eta: 0:26:19 lr: 0.000006 loss_cls: 3.1689 (3.0828) grad_norm: 2.7302 (3.1224) time: 1.3018 data: 0.0003 max mem: 13912 +[2024-12-06 19:05:23 root] (utils.py 283): INFO Epoch: [11] [1300/2502] eta: 0:26:06 lr: 0.000006 loss_cls: 3.1689 (3.0825) grad_norm: 2.6426 (3.1185) time: 1.3040 data: 0.0003 max mem: 13912 +[2024-12-06 19:05:36 root] (utils.py 283): INFO Epoch: [11] [1310/2502] eta: 0:25:53 lr: 0.000006 loss_cls: 3.1565 (3.0834) grad_norm: 2.6112 (3.1164) time: 1.3048 data: 0.0003 max mem: 13912 +[2024-12-06 19:05:49 root] (utils.py 283): INFO Epoch: [11] [1320/2502] eta: 0:25:40 lr: 0.000006 loss_cls: 3.1273 (3.0817) grad_norm: 2.7777 (3.1178) time: 1.3051 data: 0.0003 max mem: 13912 +[2024-12-06 19:06:02 root] (utils.py 283): INFO Epoch: [11] [1330/2502] eta: 0:25:27 lr: 0.000006 loss_cls: 2.9390 (3.0823) grad_norm: 2.7506 (3.1190) time: 1.3042 data: 0.0003 max mem: 13912 +[2024-12-06 19:06:15 root] (utils.py 283): INFO Epoch: [11] [1340/2502] eta: 0:25:14 lr: 0.000006 loss_cls: 3.3136 (3.0840) grad_norm: 2.7499 (3.1185) time: 1.3024 data: 0.0002 max mem: 13912 +[2024-12-06 19:06:28 root] (utils.py 283): INFO Epoch: [11] [1350/2502] eta: 0:25:01 lr: 0.000006 loss_cls: 3.3505 (3.0853) grad_norm: 2.7218 (3.1162) time: 1.3029 data: 0.0002 max mem: 13912 +[2024-12-06 19:06:41 root] (utils.py 283): INFO Epoch: [11] [1360/2502] eta: 0:24:48 lr: 0.000006 loss_cls: 3.2006 (3.0854) grad_norm: 2.7218 (3.1150) time: 1.3004 data: 0.0003 max mem: 13912 +[2024-12-06 19:06:54 root] (utils.py 283): INFO Epoch: [11] [1370/2502] eta: 0:24:35 lr: 0.000006 loss_cls: 3.0149 (3.0838) grad_norm: 2.8071 (3.1161) time: 1.2995 data: 0.0003 max mem: 13912 +[2024-12-06 19:07:07 root] (utils.py 283): INFO Epoch: [11] [1380/2502] eta: 0:24:22 lr: 0.000006 loss_cls: 3.0149 (3.0834) grad_norm: 2.8579 (3.1162) time: 1.3023 data: 0.0003 max mem: 13912 +[2024-12-06 19:07:20 root] (utils.py 283): INFO Epoch: [11] [1390/2502] eta: 0:24:09 lr: 0.000006 loss_cls: 3.0242 (3.0822) grad_norm: 2.7155 (3.1158) time: 1.3099 data: 0.0003 max mem: 13912 +[2024-12-06 19:07:34 root] (utils.py 283): INFO Epoch: [11] [1400/2502] eta: 0:23:56 lr: 0.000006 loss_cls: 3.0281 (3.0819) grad_norm: 2.6372 (3.1212) time: 1.3126 data: 0.0003 max mem: 13912 +[2024-12-06 19:07:47 root] (utils.py 283): INFO Epoch: [11] [1410/2502] eta: 0:23:43 lr: 0.000006 loss_cls: 3.0281 (3.0809) grad_norm: 2.7189 (3.1212) time: 1.3054 data: 0.0003 max mem: 13912 +[2024-12-06 19:08:00 root] (utils.py 283): INFO Epoch: [11] [1420/2502] eta: 0:23:30 lr: 0.000006 loss_cls: 2.8969 (3.0799) grad_norm: 2.8879 (3.1205) time: 1.3045 data: 0.0002 max mem: 13912 +[2024-12-06 19:08:13 root] (utils.py 283): INFO Epoch: [11] [1430/2502] eta: 0:23:17 lr: 0.000006 loss_cls: 2.8591 (3.0786) grad_norm: 2.5451 (3.1178) time: 1.3048 data: 0.0002 max mem: 13912 +[2024-12-06 19:08:26 root] (utils.py 283): INFO Epoch: [11] [1440/2502] eta: 0:23:04 lr: 0.000006 loss_cls: 3.1136 (3.0795) grad_norm: 2.5435 (3.1175) time: 1.3020 data: 0.0002 max mem: 13912 +[2024-12-06 19:08:39 root] (utils.py 283): INFO Epoch: [11] [1450/2502] eta: 0:22:51 lr: 0.000006 loss_cls: 3.1533 (3.0796) grad_norm: 2.6069 (3.1141) time: 1.3026 data: 0.0002 max mem: 13912 +[2024-12-06 19:08:52 root] (utils.py 283): INFO Epoch: [11] [1460/2502] eta: 0:22:38 lr: 0.000006 loss_cls: 3.1400 (3.0801) grad_norm: 2.5978 (3.1149) time: 1.3028 data: 0.0003 max mem: 13912 +[2024-12-06 19:09:05 root] (utils.py 283): INFO Epoch: [11] [1470/2502] eta: 0:22:24 lr: 0.000006 loss_cls: 3.1375 (3.0795) grad_norm: 2.6877 (3.1119) time: 1.3020 data: 0.0003 max mem: 13912 +[2024-12-06 19:09:18 root] (utils.py 283): INFO Epoch: [11] [1480/2502] eta: 0:22:11 lr: 0.000006 loss_cls: 3.1208 (3.0791) grad_norm: 2.7389 (3.1092) time: 1.3019 data: 0.0003 max mem: 13912 +[2024-12-06 19:09:31 root] (utils.py 283): INFO Epoch: [11] [1490/2502] eta: 0:21:58 lr: 0.000006 loss_cls: 3.2705 (3.0796) grad_norm: 2.6299 (3.1058) time: 1.3023 data: 0.0003 max mem: 13912 +[2024-12-06 19:09:44 root] (utils.py 283): INFO Epoch: [11] [1500/2502] eta: 0:21:45 lr: 0.000006 loss_cls: 3.2067 (3.0793) grad_norm: 2.6299 (3.1039) time: 1.3037 data: 0.0003 max mem: 13912 +[2024-12-06 19:09:57 root] (utils.py 283): INFO Epoch: [11] [1510/2502] eta: 0:21:32 lr: 0.000006 loss_cls: 3.1270 (3.0802) grad_norm: 2.5387 (3.1003) time: 1.3027 data: 0.0003 max mem: 13912 +[2024-12-06 19:10:10 root] (utils.py 283): INFO Epoch: [11] [1520/2502] eta: 0:21:19 lr: 0.000006 loss_cls: 3.1270 (3.0794) grad_norm: 2.5387 (3.0974) time: 1.3045 data: 0.0003 max mem: 13912 +[2024-12-06 19:10:23 root] (utils.py 283): INFO Epoch: [11] [1530/2502] eta: 0:21:06 lr: 0.000006 loss_cls: 3.0789 (3.0792) grad_norm: 2.7183 (3.0954) time: 1.3041 data: 0.0003 max mem: 13912 +[2024-12-06 19:10:36 root] (utils.py 283): INFO Epoch: [11] [1540/2502] eta: 0:20:53 lr: 0.000006 loss_cls: 3.1322 (3.0796) grad_norm: 2.8250 (3.0930) time: 1.3008 data: 0.0003 max mem: 13912 +[2024-12-06 19:10:49 root] (utils.py 283): INFO Epoch: [11] [1550/2502] eta: 0:20:40 lr: 0.000006 loss_cls: 3.1322 (3.0786) grad_norm: 2.8419 (3.0919) time: 1.3036 data: 0.0003 max mem: 13912 +[2024-12-06 19:11:02 root] (utils.py 283): INFO Epoch: [11] [1560/2502] eta: 0:20:27 lr: 0.000006 loss_cls: 3.0852 (3.0801) grad_norm: 2.8071 (3.0897) time: 1.3052 data: 0.0003 max mem: 13912 +[2024-12-06 19:11:15 root] (utils.py 283): INFO Epoch: [11] [1570/2502] eta: 0:20:14 lr: 0.000006 loss_cls: 3.1065 (3.0797) grad_norm: 2.6994 (3.0872) time: 1.3103 data: 0.0003 max mem: 13912 +[2024-12-06 19:11:28 root] (utils.py 283): INFO Epoch: [11] [1580/2502] eta: 0:20:01 lr: 0.000006 loss_cls: 3.1406 (3.0804) grad_norm: 2.6894 (3.0881) time: 1.3082 data: 0.0003 max mem: 13912 +[2024-12-06 19:11:41 root] (utils.py 283): INFO Epoch: [11] [1590/2502] eta: 0:19:48 lr: 0.000006 loss_cls: 3.1701 (3.0801) grad_norm: 2.6018 (3.0880) time: 1.3016 data: 0.0003 max mem: 13912 +[2024-12-06 19:11:54 root] (utils.py 283): INFO Epoch: [11] [1600/2502] eta: 0:19:35 lr: 0.000006 loss_cls: 3.1121 (3.0797) grad_norm: 2.7223 (3.0868) time: 1.3022 data: 0.0003 max mem: 13912 +[2024-12-06 19:12:07 root] (utils.py 283): INFO Epoch: [11] [1610/2502] eta: 0:19:22 lr: 0.000006 loss_cls: 3.0058 (3.0774) grad_norm: 2.5976 (3.0838) time: 1.3020 data: 0.0003 max mem: 13912 +[2024-12-06 19:12:20 root] (utils.py 283): INFO Epoch: [11] [1620/2502] eta: 0:19:09 lr: 0.000006 loss_cls: 3.1030 (3.0787) grad_norm: 2.6487 (3.0844) time: 1.3027 data: 0.0002 max mem: 13912 +[2024-12-06 19:12:33 root] (utils.py 283): INFO Epoch: [11] [1630/2502] eta: 0:18:56 lr: 0.000006 loss_cls: 3.2820 (3.0791) grad_norm: 2.7067 (3.0862) time: 1.3015 data: 0.0002 max mem: 13912 +[2024-12-06 19:12:46 root] (utils.py 283): INFO Epoch: [11] [1640/2502] eta: 0:18:43 lr: 0.000006 loss_cls: 3.2366 (3.0793) grad_norm: 2.5911 (3.0858) time: 1.3018 data: 0.0003 max mem: 13912 +[2024-12-06 19:12:59 root] (utils.py 283): INFO Epoch: [11] [1650/2502] eta: 0:18:30 lr: 0.000006 loss_cls: 3.0827 (3.0787) grad_norm: 2.6326 (3.0842) time: 1.3041 data: 0.0003 max mem: 13912 +[2024-12-06 19:13:12 root] (utils.py 283): INFO Epoch: [11] [1660/2502] eta: 0:18:17 lr: 0.000006 loss_cls: 2.9549 (3.0779) grad_norm: 2.7077 (3.0824) time: 1.3053 data: 0.0003 max mem: 13912 +[2024-12-06 19:13:25 root] (utils.py 283): INFO Epoch: [11] [1670/2502] eta: 0:18:04 lr: 0.000006 loss_cls: 2.9347 (3.0771) grad_norm: 2.7512 (3.0833) time: 1.3030 data: 0.0003 max mem: 13912 +[2024-12-06 19:13:38 root] (utils.py 283): INFO Epoch: [11] [1680/2502] eta: 0:17:51 lr: 0.000006 loss_cls: 3.1216 (3.0767) grad_norm: 2.8580 (3.0825) time: 1.2999 data: 0.0003 max mem: 13912 +[2024-12-06 19:13:52 root] (utils.py 283): INFO Epoch: [11] [1690/2502] eta: 0:17:38 lr: 0.000006 loss_cls: 3.2376 (3.0767) grad_norm: 2.7305 (3.0800) time: 1.3020 data: 0.0003 max mem: 13912 +[2024-12-06 19:14:05 root] (utils.py 283): INFO Epoch: [11] [1700/2502] eta: 0:17:25 lr: 0.000006 loss_cls: 3.2376 (3.0768) grad_norm: 2.8319 (3.0841) time: 1.3017 data: 0.0002 max mem: 13912 +[2024-12-06 19:14:18 root] (utils.py 283): INFO Epoch: [11] [1710/2502] eta: 0:17:12 lr: 0.000006 loss_cls: 3.0739 (3.0759) grad_norm: 3.0111 (3.0882) time: 1.3041 data: 0.0002 max mem: 13912 +[2024-12-06 19:14:31 root] (utils.py 283): INFO Epoch: [11] [1720/2502] eta: 0:16:59 lr: 0.000006 loss_cls: 3.0726 (3.0753) grad_norm: 2.7592 (3.0864) time: 1.3060 data: 0.0003 max mem: 13912 +[2024-12-06 19:14:44 root] (utils.py 283): INFO Epoch: [11] [1730/2502] eta: 0:16:46 lr: 0.000006 loss_cls: 3.2187 (3.0752) grad_norm: 2.7099 (3.0884) time: 1.3021 data: 0.0003 max mem: 13912 +[2024-12-06 19:14:57 root] (utils.py 283): INFO Epoch: [11] [1740/2502] eta: 0:16:33 lr: 0.000006 loss_cls: 3.2187 (3.0753) grad_norm: 2.6902 (3.0922) time: 1.3038 data: 0.0003 max mem: 13912 +[2024-12-06 19:15:10 root] (utils.py 283): INFO Epoch: [11] [1750/2502] eta: 0:16:20 lr: 0.000006 loss_cls: 3.1527 (3.0751) grad_norm: 2.7600 (3.0910) time: 1.3067 data: 0.0003 max mem: 13912 +[2024-12-06 19:15:23 root] (utils.py 283): INFO Epoch: [11] [1760/2502] eta: 0:16:07 lr: 0.000006 loss_cls: 3.2494 (3.0769) grad_norm: 2.8438 (3.0911) time: 1.3066 data: 0.0002 max mem: 13912 +[2024-12-06 19:15:36 root] (utils.py 283): INFO Epoch: [11] [1770/2502] eta: 0:15:54 lr: 0.000006 loss_cls: 3.3043 (3.0786) grad_norm: 2.8899 (3.0917) time: 1.3044 data: 0.0002 max mem: 13912 +[2024-12-06 19:15:49 root] (utils.py 283): INFO Epoch: [11] [1780/2502] eta: 0:15:41 lr: 0.000006 loss_cls: 3.1696 (3.0785) grad_norm: 2.7363 (3.0925) time: 1.3050 data: 0.0003 max mem: 13912 +[2024-12-06 19:16:02 root] (utils.py 283): INFO Epoch: [11] [1790/2502] eta: 0:15:28 lr: 0.000006 loss_cls: 3.1022 (3.0787) grad_norm: 2.7382 (3.0930) time: 1.3073 data: 0.0003 max mem: 13912 +[2024-12-06 19:16:15 root] (utils.py 283): INFO Epoch: [11] [1800/2502] eta: 0:15:14 lr: 0.000006 loss_cls: 3.1007 (3.0787) grad_norm: 2.6946 (3.0905) time: 1.3059 data: 0.0002 max mem: 13912 +[2024-12-06 19:16:28 root] (utils.py 283): INFO Epoch: [11] [1810/2502] eta: 0:15:01 lr: 0.000006 loss_cls: 3.1953 (3.0794) grad_norm: 2.8253 (3.0913) time: 1.3029 data: 0.0003 max mem: 13912 +[2024-12-06 19:16:41 root] (utils.py 283): INFO Epoch: [11] [1820/2502] eta: 0:14:48 lr: 0.000006 loss_cls: 3.3154 (3.0806) grad_norm: 2.9508 (3.0912) time: 1.3011 data: 0.0003 max mem: 13912 +[2024-12-06 19:16:54 root] (utils.py 283): INFO Epoch: [11] [1830/2502] eta: 0:14:35 lr: 0.000006 loss_cls: 3.3526 (3.0819) grad_norm: 2.8728 (3.0910) time: 1.3015 data: 0.0003 max mem: 13912 +[2024-12-06 19:17:07 root] (utils.py 283): INFO Epoch: [11] [1840/2502] eta: 0:14:22 lr: 0.000006 loss_cls: 3.1825 (3.0801) grad_norm: 2.8008 (3.0908) time: 1.3038 data: 0.0003 max mem: 13912 +[2024-12-06 19:17:20 root] (utils.py 283): INFO Epoch: [11] [1850/2502] eta: 0:14:09 lr: 0.000006 loss_cls: 2.9103 (3.0799) grad_norm: 2.6367 (3.0884) time: 1.3042 data: 0.0003 max mem: 13912 +[2024-12-06 19:17:33 root] (utils.py 283): INFO Epoch: [11] [1860/2502] eta: 0:13:56 lr: 0.000006 loss_cls: 2.9291 (3.0791) grad_norm: 2.6860 (3.0872) time: 1.3046 data: 0.0003 max mem: 13912 +[2024-12-06 19:17:46 root] (utils.py 283): INFO Epoch: [11] [1870/2502] eta: 0:13:43 lr: 0.000006 loss_cls: 3.0614 (3.0788) grad_norm: 2.6860 (3.0851) time: 1.3033 data: 0.0002 max mem: 13912 +[2024-12-06 19:17:59 root] (utils.py 283): INFO Epoch: [11] [1880/2502] eta: 0:13:30 lr: 0.000006 loss_cls: 3.1841 (3.0789) grad_norm: 2.6228 (3.0837) time: 1.3010 data: 0.0002 max mem: 13912 +[2024-12-06 19:18:12 root] (utils.py 283): INFO Epoch: [11] [1890/2502] eta: 0:13:17 lr: 0.000006 loss_cls: 3.1841 (3.0790) grad_norm: 2.6354 (3.0820) time: 1.2996 data: 0.0003 max mem: 13912 +[2024-12-06 19:18:25 root] (utils.py 283): INFO Epoch: [11] [1900/2502] eta: 0:13:04 lr: 0.000006 loss_cls: 3.1386 (3.0789) grad_norm: 2.6354 (3.0816) time: 1.3000 data: 0.0003 max mem: 13912 +[2024-12-06 19:18:38 root] (utils.py 283): INFO Epoch: [11] [1910/2502] eta: 0:12:51 lr: 0.000006 loss_cls: 3.2578 (3.0786) grad_norm: 2.6159 (3.0794) time: 1.3020 data: 0.0003 max mem: 13912 +[2024-12-06 19:18:51 root] (utils.py 283): INFO Epoch: [11] [1920/2502] eta: 0:12:38 lr: 0.000006 loss_cls: 3.2578 (3.0788) grad_norm: 2.5659 (3.0794) time: 1.3026 data: 0.0003 max mem: 13912 +[2024-12-06 19:19:04 root] (utils.py 283): INFO Epoch: [11] [1930/2502] eta: 0:12:25 lr: 0.000006 loss_cls: 2.9858 (3.0776) grad_norm: 2.5942 (3.0776) time: 1.3014 data: 0.0002 max mem: 13912 +[2024-12-06 19:19:17 root] (utils.py 283): INFO Epoch: [11] [1940/2502] eta: 0:12:12 lr: 0.000006 loss_cls: 2.9858 (3.0778) grad_norm: 2.7456 (3.0774) time: 1.3005 data: 0.0002 max mem: 13912 +[2024-12-06 19:19:30 root] (utils.py 283): INFO Epoch: [11] [1950/2502] eta: 0:11:59 lr: 0.000006 loss_cls: 3.2586 (3.0774) grad_norm: 2.8065 (3.0779) time: 1.3066 data: 0.0003 max mem: 13912 +[2024-12-06 19:19:43 root] (utils.py 283): INFO Epoch: [11] [1960/2502] eta: 0:11:46 lr: 0.000006 loss_cls: 3.2650 (3.0788) grad_norm: 2.6954 (3.0768) time: 1.3060 data: 0.0003 max mem: 13912 +[2024-12-06 19:19:57 root] (utils.py 283): INFO Epoch: [11] [1970/2502] eta: 0:11:33 lr: 0.000006 loss_cls: 3.3708 (3.0784) grad_norm: 2.6690 (3.0755) time: 1.3013 data: 0.0003 max mem: 13912 +[2024-12-06 19:20:10 root] (utils.py 283): INFO Epoch: [11] [1980/2502] eta: 0:11:20 lr: 0.000006 loss_cls: 3.3307 (3.0793) grad_norm: 2.6872 (3.0764) time: 1.3046 data: 0.0003 max mem: 13912 +[2024-12-06 19:20:23 root] (utils.py 283): INFO Epoch: [11] [1990/2502] eta: 0:11:07 lr: 0.000006 loss_cls: 3.2488 (3.0804) grad_norm: 2.7594 (3.0794) time: 1.3045 data: 0.0003 max mem: 13912 +[2024-12-06 19:20:36 root] (utils.py 283): INFO Epoch: [11] [2000/2502] eta: 0:10:54 lr: 0.000006 loss_cls: 3.1455 (3.0788) grad_norm: 2.6435 (3.0772) time: 1.3021 data: 0.0003 max mem: 13912 +[2024-12-06 19:20:49 root] (utils.py 283): INFO Epoch: [11] [2010/2502] eta: 0:10:41 lr: 0.000006 loss_cls: 2.7138 (3.0776) grad_norm: 2.6435 (3.0800) time: 1.3013 data: 0.0003 max mem: 13912 +[2024-12-06 19:21:02 root] (utils.py 283): INFO Epoch: [11] [2020/2502] eta: 0:10:28 lr: 0.000006 loss_cls: 3.0765 (3.0784) grad_norm: 2.8526 (3.0818) time: 1.3013 data: 0.0003 max mem: 13912 +[2024-12-06 19:21:15 root] (utils.py 283): INFO Epoch: [11] [2030/2502] eta: 0:10:15 lr: 0.000006 loss_cls: 3.1689 (3.0777) grad_norm: 2.7019 (3.0796) time: 1.3053 data: 0.0003 max mem: 13912 +[2024-12-06 19:21:28 root] (utils.py 283): INFO Epoch: [11] [2040/2502] eta: 0:10:02 lr: 0.000006 loss_cls: 3.0297 (3.0786) grad_norm: 2.5924 (3.0783) time: 1.3058 data: 0.0003 max mem: 13912 +[2024-12-06 19:21:41 root] (utils.py 283): INFO Epoch: [11] [2050/2502] eta: 0:09:49 lr: 0.000006 loss_cls: 3.1694 (3.0788) grad_norm: 2.5924 (3.0763) time: 1.3043 data: 0.0002 max mem: 13912 +[2024-12-06 19:21:54 root] (utils.py 283): INFO Epoch: [11] [2060/2502] eta: 0:09:36 lr: 0.000006 loss_cls: 3.2405 (3.0794) grad_norm: 2.5764 (3.0777) time: 1.3054 data: 0.0003 max mem: 13912 +[2024-12-06 19:22:07 root] (utils.py 283): INFO Epoch: [11] [2070/2502] eta: 0:09:23 lr: 0.000006 loss_cls: 3.2341 (3.0782) grad_norm: 2.8625 (3.0765) time: 1.3034 data: 0.0003 max mem: 13912 +[2024-12-06 19:22:20 root] (utils.py 283): INFO Epoch: [11] [2080/2502] eta: 0:09:09 lr: 0.000006 loss_cls: 2.9092 (3.0773) grad_norm: 2.5948 (3.0739) time: 1.2999 data: 0.0003 max mem: 13912 +[2024-12-06 19:22:33 root] (utils.py 283): INFO Epoch: [11] [2090/2502] eta: 0:08:56 lr: 0.000006 loss_cls: 2.9983 (3.0769) grad_norm: 2.5298 (3.0712) time: 1.3001 data: 0.0003 max mem: 13912 +[2024-12-06 19:22:46 root] (utils.py 283): INFO Epoch: [11] [2100/2502] eta: 0:08:43 lr: 0.000006 loss_cls: 2.9983 (3.0767) grad_norm: 2.5350 (3.0703) time: 1.3003 data: 0.0002 max mem: 13912 +[2024-12-06 19:22:59 root] (utils.py 283): INFO Epoch: [11] [2110/2502] eta: 0:08:30 lr: 0.000006 loss_cls: 3.0593 (3.0762) grad_norm: 2.8718 (3.0713) time: 1.3001 data: 0.0002 max mem: 13912 +[2024-12-06 19:23:12 root] (utils.py 283): INFO Epoch: [11] [2120/2502] eta: 0:08:17 lr: 0.000006 loss_cls: 3.2413 (3.0771) grad_norm: 2.7910 (3.0704) time: 1.3075 data: 0.0003 max mem: 13912 +[2024-12-06 19:23:25 root] (utils.py 283): INFO Epoch: [11] [2130/2502] eta: 0:08:04 lr: 0.000006 loss_cls: 3.2601 (3.0777) grad_norm: 2.7507 (3.0699) time: 1.3096 data: 0.0003 max mem: 13912 +[2024-12-06 19:23:38 root] (utils.py 283): INFO Epoch: [11] [2140/2502] eta: 0:07:51 lr: 0.000006 loss_cls: 3.1792 (3.0776) grad_norm: 2.6759 (3.0705) time: 1.3025 data: 0.0003 max mem: 13912 +[2024-12-06 19:23:51 root] (utils.py 283): INFO Epoch: [11] [2150/2502] eta: 0:07:38 lr: 0.000006 loss_cls: 3.1739 (3.0780) grad_norm: 2.6759 (3.0705) time: 1.2993 data: 0.0003 max mem: 13912 +[2024-12-06 19:24:04 root] (utils.py 283): INFO Epoch: [11] [2160/2502] eta: 0:07:25 lr: 0.000006 loss_cls: 3.2041 (3.0788) grad_norm: 2.9759 (3.0720) time: 1.3003 data: 0.0002 max mem: 13912 +[2024-12-06 19:24:17 root] (utils.py 283): INFO Epoch: [11] [2170/2502] eta: 0:07:12 lr: 0.000006 loss_cls: 3.2415 (3.0792) grad_norm: 2.6840 (3.0728) time: 1.3014 data: 0.0002 max mem: 13912 +[2024-12-06 19:24:30 root] (utils.py 283): INFO Epoch: [11] [2180/2502] eta: 0:06:59 lr: 0.000006 loss_cls: 3.2803 (3.0796) grad_norm: 2.5380 (3.0740) time: 1.3013 data: 0.0002 max mem: 13912 +[2024-12-06 19:24:43 root] (utils.py 283): INFO Epoch: [11] [2190/2502] eta: 0:06:46 lr: 0.000006 loss_cls: 3.1787 (3.0794) grad_norm: 2.5595 (3.0718) time: 1.3065 data: 0.0003 max mem: 13912 +[2024-12-06 19:24:56 root] (utils.py 283): INFO Epoch: [11] [2200/2502] eta: 0:06:33 lr: 0.000006 loss_cls: 3.2238 (3.0798) grad_norm: 2.5710 (3.0706) time: 1.3091 data: 0.0003 max mem: 13912 +[2024-12-06 19:25:09 root] (utils.py 283): INFO Epoch: [11] [2210/2502] eta: 0:06:20 lr: 0.000006 loss_cls: 3.2238 (3.0794) grad_norm: 2.7950 (3.0730) time: 1.3036 data: 0.0003 max mem: 13912 +[2024-12-06 19:25:22 root] (utils.py 283): INFO Epoch: [11] [2220/2502] eta: 0:06:07 lr: 0.000006 loss_cls: 2.9223 (3.0785) grad_norm: 3.1652 (3.0787) time: 1.3032 data: 0.0003 max mem: 13912 +[2024-12-06 19:25:35 root] (utils.py 283): INFO Epoch: [11] [2230/2502] eta: 0:05:54 lr: 0.000006 loss_cls: 3.0525 (3.0787) grad_norm: 2.8265 (3.0795) time: 1.3038 data: 0.0003 max mem: 13912 +[2024-12-06 19:25:48 root] (utils.py 283): INFO Epoch: [11] [2240/2502] eta: 0:05:41 lr: 0.000006 loss_cls: 3.0392 (3.0781) grad_norm: 2.7479 (3.0784) time: 1.3029 data: 0.0003 max mem: 13912 +[2024-12-06 19:26:01 root] (utils.py 283): INFO Epoch: [11] [2250/2502] eta: 0:05:28 lr: 0.000006 loss_cls: 3.0989 (3.0782) grad_norm: 2.3993 (3.0756) time: 1.3011 data: 0.0003 max mem: 13912 +[2024-12-06 19:26:14 root] (utils.py 283): INFO Epoch: [11] [2260/2502] eta: 0:05:15 lr: 0.000006 loss_cls: 3.0827 (3.0771) grad_norm: 2.4386 (3.0731) time: 1.3023 data: 0.0003 max mem: 13912 +[2024-12-06 19:26:28 root] (utils.py 283): INFO Epoch: [11] [2270/2502] eta: 0:05:02 lr: 0.000006 loss_cls: 2.9708 (3.0772) grad_norm: 2.5271 (3.0711) time: 1.3047 data: 0.0003 max mem: 13912 +[2024-12-06 19:26:41 root] (utils.py 283): INFO Epoch: [11] [2280/2502] eta: 0:04:49 lr: 0.000006 loss_cls: 3.1241 (3.0779) grad_norm: 2.6564 (3.0696) time: 1.3048 data: 0.0003 max mem: 13912 +[2024-12-06 19:26:54 root] (utils.py 283): INFO Epoch: [11] [2290/2502] eta: 0:04:36 lr: 0.000006 loss_cls: 3.3199 (3.0784) grad_norm: 2.7327 (3.0703) time: 1.3036 data: 0.0003 max mem: 13912 +[2024-12-06 19:27:07 root] (utils.py 283): INFO Epoch: [11] [2300/2502] eta: 0:04:23 lr: 0.000006 loss_cls: 3.2349 (3.0786) grad_norm: 2.7655 (3.0693) time: 1.3101 data: 0.0003 max mem: 13912 +[2024-12-06 19:27:20 root] (utils.py 283): INFO Epoch: [11] [2310/2502] eta: 0:04:10 lr: 0.000006 loss_cls: 3.2175 (3.0789) grad_norm: 2.6539 (3.0684) time: 1.3108 data: 0.0003 max mem: 13912 +[2024-12-06 19:27:33 root] (utils.py 283): INFO Epoch: [11] [2320/2502] eta: 0:03:57 lr: 0.000006 loss_cls: 3.2489 (3.0787) grad_norm: 2.6548 (3.0673) time: 1.3092 data: 0.0003 max mem: 13912 +[2024-12-06 19:27:47 root] (utils.py 283): INFO Epoch: [11] [2330/2502] eta: 0:03:44 lr: 0.000006 loss_cls: 3.2498 (3.0790) grad_norm: 2.7196 (3.0768) time: 1.3393 data: 0.0003 max mem: 13912 +[2024-12-06 19:28:00 root] (utils.py 283): INFO Epoch: [11] [2340/2502] eta: 0:03:31 lr: 0.000006 loss_cls: 3.0657 (3.0790) grad_norm: 3.1071 (3.0802) time: 1.3336 data: 0.0003 max mem: 13912 +[2024-12-06 19:28:13 root] (utils.py 283): INFO Epoch: [11] [2350/2502] eta: 0:03:18 lr: 0.000006 loss_cls: 3.1158 (3.0790) grad_norm: 2.9694 (3.0794) time: 1.3017 data: 0.0003 max mem: 13912 +[2024-12-06 19:28:26 root] (utils.py 283): INFO Epoch: [11] [2360/2502] eta: 0:03:05 lr: 0.000006 loss_cls: 3.1396 (3.0787) grad_norm: 2.6911 (3.0785) time: 1.3002 data: 0.0003 max mem: 13912 +[2024-12-06 19:28:39 root] (utils.py 283): INFO Epoch: [11] [2370/2502] eta: 0:02:52 lr: 0.000006 loss_cls: 2.9459 (3.0779) grad_norm: 2.7773 (3.0777) time: 1.3008 data: 0.0002 max mem: 13912 +[2024-12-06 19:28:52 root] (utils.py 283): INFO Epoch: [11] [2380/2502] eta: 0:02:39 lr: 0.000006 loss_cls: 3.0136 (3.0773) grad_norm: 2.7836 (3.0787) time: 1.3024 data: 0.0003 max mem: 13912 +[2024-12-06 19:29:05 root] (utils.py 283): INFO Epoch: [11] [2390/2502] eta: 0:02:26 lr: 0.000006 loss_cls: 3.0435 (3.0771) grad_norm: 2.7127 (3.0770) time: 1.3040 data: 0.0003 max mem: 13912 +[2024-12-06 19:29:18 root] (utils.py 283): INFO Epoch: [11] [2400/2502] eta: 0:02:12 lr: 0.000006 loss_cls: 3.0835 (3.0765) grad_norm: 2.6828 (3.0764) time: 1.3029 data: 0.0003 max mem: 13912 +[2024-12-06 19:29:31 root] (utils.py 283): INFO Epoch: [11] [2410/2502] eta: 0:01:59 lr: 0.000006 loss_cls: 2.9280 (3.0763) grad_norm: 2.6828 (3.0747) time: 1.3027 data: 0.0002 max mem: 13912 +[2024-12-06 19:29:44 root] (utils.py 283): INFO Epoch: [11] [2420/2502] eta: 0:01:46 lr: 0.000006 loss_cls: 3.0780 (3.0769) grad_norm: 2.6997 (3.0753) time: 1.3022 data: 0.0002 max mem: 13912 +[2024-12-06 19:29:57 root] (utils.py 283): INFO Epoch: [11] [2430/2502] eta: 0:01:33 lr: 0.000006 loss_cls: 3.3421 (3.0778) grad_norm: 2.7819 (3.0800) time: 1.3043 data: 0.0003 max mem: 13912 +[2024-12-06 19:30:10 root] (utils.py 283): INFO Epoch: [11] [2440/2502] eta: 0:01:20 lr: 0.000006 loss_cls: 3.3179 (3.0780) grad_norm: 2.8414 (3.0798) time: 1.3024 data: 0.0003 max mem: 13912 +[2024-12-06 19:30:23 root] (utils.py 283): INFO Epoch: [11] [2450/2502] eta: 0:01:07 lr: 0.000006 loss_cls: 3.3604 (3.0789) grad_norm: 2.8414 (3.0802) time: 1.2971 data: 0.0002 max mem: 13912 +[2024-12-06 19:30:36 root] (utils.py 283): INFO Epoch: [11] [2460/2502] eta: 0:00:54 lr: 0.000006 loss_cls: 3.2954 (3.0779) grad_norm: 2.6742 (3.0792) time: 1.2984 data: 0.0003 max mem: 13912 +[2024-12-06 19:30:49 root] (utils.py 283): INFO Epoch: [11] [2470/2502] eta: 0:00:41 lr: 0.000006 loss_cls: 3.2054 (3.0785) grad_norm: 2.6641 (3.0792) time: 1.3026 data: 0.0002 max mem: 13912 +[2024-12-06 19:31:02 root] (utils.py 283): INFO Epoch: [11] [2480/2502] eta: 0:00:28 lr: 0.000006 loss_cls: 3.2632 (3.0787) grad_norm: 2.7463 (3.0802) time: 1.3057 data: 0.0003 max mem: 13912 +[2024-12-06 19:31:15 root] (utils.py 283): INFO Epoch: [11] [2490/2502] eta: 0:00:15 lr: 0.000006 loss_cls: 3.1382 (3.0788) grad_norm: 2.9207 (3.0801) time: 1.3295 data: 0.0258 max mem: 13912 +[2024-12-06 19:31:29 root] (utils.py 283): INFO Epoch: [11] [2500/2502] eta: 0:00:02 lr: 0.000006 loss_cls: 3.2508 (3.0787) grad_norm: 2.9125 (3.0802) time: 1.3309 data: 0.0258 max mem: 13912 +[2024-12-06 19:31:30 root] (utils.py 283): INFO Epoch: [11] [2501/2502] eta: 0:00:01 lr: 0.000006 loss_cls: 3.1878 (3.0785) grad_norm: 2.8125 (3.0800) time: 1.3301 data: 0.0258 max mem: 13912 +[2024-12-06 19:31:30 root] (utils.py 297): INFO Epoch: [11] Total time: 0:54:22 (1.3039 s / it) +[2024-12-06 19:31:30 root] (engine.py 179): INFO Averaged stats:lr: 0.000006 loss_cls: 3.1878 (3.0779) grad_norm: 2.8125 (3.0800) +[2024-12-06 19:31:31 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:22 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.4256 (0.4256) acc1: 91.4062 (91.4062) acc3: 98.4375 (98.4375) acc5: 98.4375 (98.4375) time: 0.2253 data: 0.0004 max mem: 13912 +[2024-12-06 19:31:33 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:20 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.6523 (0.6581) acc1: 86.7188 (85.9375) acc3: 96.8750 (95.5966) acc5: 97.6562 (97.1591) time: 0.2273 data: 0.0004 max mem: 13912 +[2024-12-06 19:31:35 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:17 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.6523 (0.6987) acc1: 85.1562 (85.0818) acc3: 95.3125 (95.0149) acc5: 97.6562 (96.8750) time: 0.2277 data: 0.0005 max mem: 13912 +[2024-12-06 19:31:38 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:15 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7490 (0.7201) acc1: 85.1562 (84.3246) acc3: 94.5312 (94.9345) acc5: 96.8750 (96.6986) time: 0.2281 data: 0.0005 max mem: 13912 +[2024-12-06 19:31:40 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:13 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7457 (0.7229) acc1: 83.5938 (84.3178) acc3: 94.5312 (94.9314) acc5: 96.8750 (96.8559) time: 0.2283 data: 0.0004 max mem: 13912 +[2024-12-06 19:31:42 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:10 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.8146 (0.8082) acc1: 78.9062 (82.1998) acc3: 90.6250 (93.6428) acc5: 94.5312 (95.8640) time: 0.2282 data: 0.0004 max mem: 13912 +[2024-12-06 19:31:45 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:08 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0835 (0.8398) acc1: 73.4375 (81.6983) acc3: 89.0625 (93.0328) acc5: 92.1875 (95.3381) time: 0.2293 data: 0.0005 max mem: 13912 +[2024-12-06 19:31:47 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:06 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0372 (0.8685) acc1: 78.9062 (81.0739) acc3: 90.6250 (92.7597) acc5: 94.5312 (95.1695) time: 0.2292 data: 0.0005 max mem: 13912 +[2024-12-06 19:31:49 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:04 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0459 (0.8991) acc1: 75.7812 (80.3337) acc3: 89.8438 (92.2647) acc5: 92.9688 (94.8013) time: 0.2282 data: 0.0007 max mem: 13912 +[2024-12-06 19:31:51 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0904 (0.9243) acc1: 74.2188 (79.6102) acc3: 89.0625 (91.8355) acc5: 91.4062 (94.5055) time: 0.2283 data: 0.0007 max mem: 13912 +[2024-12-06 19:31:53 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0134 (0.9200) acc1: 75.7812 (79.6240) acc3: 89.8438 (91.9040) acc5: 92.1875 (94.6000) time: 0.2244 data: 0.0007 max mem: 13912 +[2024-12-06 19:31:53 root] (utils.py 297): INFO Test: Total time: 0:00:22 (0.2277 s / it) +[2024-12-06 19:31:53 root] (engine.py 264): INFO * Acc@1 79.758 Acc@3 92.178 Acc@5 94.726 loss 0.913 flops 3.584 layer_flops 3.536 +[2024-12-06 19:31:53 root] (main.py 547): INFO Accuracy of the network on the 50000 test images: 79.8% +[2024-12-06 19:31:53 root] (main.py 551): INFO Max accuracy: 79.78% +[2024-12-06 19:31:54 root] (utils.py 283): INFO Epoch: [12] [ 0/2502] eta: 0:54:12 lr: 0.000004 loss_cls: 3.5310 (3.5310) grad_norm: 3.5735 (3.5735) time: 1.2998 data: 0.0004 max mem: 13912 +[2024-12-06 19:32:07 root] (utils.py 283): INFO Epoch: [12] [ 10/2502] eta: 0:54:06 lr: 0.000004 loss_cls: 3.1383 (3.0310) grad_norm: 2.7832 (2.8851) time: 1.3029 data: 0.0003 max mem: 13912 +[2024-12-06 19:32:21 root] (utils.py 283): INFO Epoch: [12] [ 20/2502] eta: 0:54:12 lr: 0.000004 loss_cls: 3.0536 (3.0238) grad_norm: 2.7378 (3.1591) time: 1.3109 data: 0.0003 max mem: 13912 +[2024-12-06 19:32:34 root] (utils.py 283): INFO Epoch: [12] [ 30/2502] eta: 0:53:49 lr: 0.000004 loss_cls: 3.1130 (3.0793) grad_norm: 2.7592 (3.1252) time: 1.3085 data: 0.0003 max mem: 13912 +[2024-12-06 19:32:47 root] (utils.py 283): INFO Epoch: [12] [ 40/2502] eta: 0:53:34 lr: 0.000004 loss_cls: 3.1285 (3.0788) grad_norm: 2.6593 (3.0073) time: 1.3004 data: 0.0003 max mem: 13912 +[2024-12-06 19:33:00 root] (utils.py 283): INFO Epoch: [12] [ 50/2502] eta: 0:53:19 lr: 0.000004 loss_cls: 3.2611 (3.0964) grad_norm: 2.6593 (3.0805) time: 1.3019 data: 0.0003 max mem: 13912 +[2024-12-06 19:33:13 root] (utils.py 283): INFO Epoch: [12] [ 60/2502] eta: 0:53:05 lr: 0.000004 loss_cls: 3.0238 (3.0386) grad_norm: 2.6645 (3.1733) time: 1.3024 data: 0.0003 max mem: 13912 +[2024-12-06 19:33:26 root] (utils.py 283): INFO Epoch: [12] [ 70/2502] eta: 0:52:51 lr: 0.000004 loss_cls: 2.9800 (3.0365) grad_norm: 2.6645 (3.1611) time: 1.3031 data: 0.0003 max mem: 13912 +[2024-12-06 19:33:39 root] (utils.py 283): INFO Epoch: [12] [ 80/2502] eta: 0:52:37 lr: 0.000004 loss_cls: 3.2972 (3.0766) grad_norm: 2.8379 (3.2456) time: 1.3018 data: 0.0002 max mem: 13912 +[2024-12-06 19:33:52 root] (utils.py 283): INFO Epoch: [12] [ 90/2502] eta: 0:52:23 lr: 0.000004 loss_cls: 3.4776 (3.0906) grad_norm: 2.7689 (3.1817) time: 1.2991 data: 0.0002 max mem: 13912 +[2024-12-06 19:34:05 root] (utils.py 283): INFO Epoch: [12] [ 100/2502] eta: 0:52:09 lr: 0.000004 loss_cls: 3.3362 (3.0977) grad_norm: 2.5676 (3.1378) time: 1.2991 data: 0.0002 max mem: 13912 +[2024-12-06 19:34:18 root] (utils.py 283): INFO Epoch: [12] [ 110/2502] eta: 0:51:56 lr: 0.000004 loss_cls: 3.3063 (3.1207) grad_norm: 2.7877 (3.1385) time: 1.3014 data: 0.0002 max mem: 13912 +[2024-12-06 19:34:31 root] (utils.py 283): INFO Epoch: [12] [ 120/2502] eta: 0:51:44 lr: 0.000004 loss_cls: 3.1321 (3.1146) grad_norm: 2.6102 (3.0908) time: 1.3046 data: 0.0003 max mem: 13912 +[2024-12-06 19:34:44 root] (utils.py 283): INFO Epoch: [12] [ 130/2502] eta: 0:51:30 lr: 0.000004 loss_cls: 3.2694 (3.1274) grad_norm: 2.6072 (3.1126) time: 1.3048 data: 0.0003 max mem: 13912 +[2024-12-06 19:34:57 root] (utils.py 283): INFO Epoch: [12] [ 140/2502] eta: 0:51:18 lr: 0.000004 loss_cls: 3.1373 (3.1138) grad_norm: 3.0301 (3.0893) time: 1.3037 data: 0.0003 max mem: 13912 +[2024-12-06 19:35:10 root] (utils.py 283): INFO Epoch: [12] [ 150/2502] eta: 0:51:05 lr: 0.000004 loss_cls: 3.1021 (3.1200) grad_norm: 2.7040 (3.0819) time: 1.3043 data: 0.0002 max mem: 13912 +[2024-12-06 19:35:23 root] (utils.py 283): INFO Epoch: [12] [ 160/2502] eta: 0:50:52 lr: 0.000004 loss_cls: 3.1021 (3.1080) grad_norm: 2.6359 (3.0663) time: 1.3035 data: 0.0002 max mem: 13912 +[2024-12-06 19:35:36 root] (utils.py 283): INFO Epoch: [12] [ 170/2502] eta: 0:50:39 lr: 0.000004 loss_cls: 3.0503 (3.1153) grad_norm: 2.8658 (3.0764) time: 1.3034 data: 0.0003 max mem: 13912 +[2024-12-06 19:35:49 root] (utils.py 283): INFO Epoch: [12] [ 180/2502] eta: 0:50:26 lr: 0.000004 loss_cls: 3.0604 (3.1164) grad_norm: 3.0935 (3.0765) time: 1.3044 data: 0.0003 max mem: 13912 +[2024-12-06 19:36:02 root] (utils.py 283): INFO Epoch: [12] [ 190/2502] eta: 0:50:13 lr: 0.000004 loss_cls: 2.9508 (3.1126) grad_norm: 2.9542 (3.0972) time: 1.3043 data: 0.0002 max mem: 13912 +[2024-12-06 19:36:15 root] (utils.py 283): INFO Epoch: [12] [ 200/2502] eta: 0:50:00 lr: 0.000004 loss_cls: 2.8721 (3.0968) grad_norm: 2.6092 (3.0663) time: 1.3030 data: 0.0003 max mem: 13912 +[2024-12-06 19:36:28 root] (utils.py 283): INFO Epoch: [12] [ 210/2502] eta: 0:49:48 lr: 0.000004 loss_cls: 2.8721 (3.0846) grad_norm: 2.5871 (3.0680) time: 1.3094 data: 0.0003 max mem: 13912 +[2024-12-06 19:36:41 root] (utils.py 283): INFO Epoch: [12] [ 220/2502] eta: 0:49:35 lr: 0.000004 loss_cls: 2.9964 (3.0863) grad_norm: 2.8711 (3.0566) time: 1.3088 data: 0.0003 max mem: 13912 +[2024-12-06 19:36:54 root] (utils.py 283): INFO Epoch: [12] [ 230/2502] eta: 0:49:21 lr: 0.000004 loss_cls: 3.2623 (3.0978) grad_norm: 2.9618 (3.1057) time: 1.2993 data: 0.0003 max mem: 13912 +[2024-12-06 19:37:07 root] (utils.py 283): INFO Epoch: [12] [ 240/2502] eta: 0:49:08 lr: 0.000004 loss_cls: 3.3043 (3.1020) grad_norm: 2.9481 (3.1141) time: 1.3012 data: 0.0003 max mem: 13912 +[2024-12-06 19:37:20 root] (utils.py 283): INFO Epoch: [12] [ 250/2502] eta: 0:48:55 lr: 0.000004 loss_cls: 3.1991 (3.0976) grad_norm: 2.5910 (3.0946) time: 1.3027 data: 0.0003 max mem: 13912 +[2024-12-06 19:37:33 root] (utils.py 283): INFO Epoch: [12] [ 260/2502] eta: 0:48:42 lr: 0.000004 loss_cls: 3.0729 (3.0914) grad_norm: 2.5489 (3.0749) time: 1.3007 data: 0.0002 max mem: 13912 +[2024-12-06 19:37:46 root] (utils.py 283): INFO Epoch: [12] [ 270/2502] eta: 0:48:29 lr: 0.000004 loss_cls: 3.0698 (3.0874) grad_norm: 2.7095 (3.0670) time: 1.3018 data: 0.0003 max mem: 13912 +[2024-12-06 19:37:59 root] (utils.py 283): INFO Epoch: [12] [ 280/2502] eta: 0:48:16 lr: 0.000004 loss_cls: 3.0836 (3.0902) grad_norm: 2.7764 (3.0938) time: 1.3035 data: 0.0003 max mem: 13912 +[2024-12-06 19:38:12 root] (utils.py 283): INFO Epoch: [12] [ 290/2502] eta: 0:48:03 lr: 0.000004 loss_cls: 3.3010 (3.0909) grad_norm: 2.6363 (3.0874) time: 1.3049 data: 0.0003 max mem: 13912 +[2024-12-06 19:38:25 root] (utils.py 283): INFO Epoch: [12] [ 300/2502] eta: 0:47:50 lr: 0.000004 loss_cls: 3.1388 (3.0918) grad_norm: 2.6363 (3.0810) time: 1.3035 data: 0.0002 max mem: 13912 +[2024-12-06 19:38:38 root] (utils.py 283): INFO Epoch: [12] [ 310/2502] eta: 0:47:36 lr: 0.000004 loss_cls: 3.1735 (3.0908) grad_norm: 2.7431 (3.0732) time: 1.3013 data: 0.0003 max mem: 13912 +[2024-12-06 19:38:51 root] (utils.py 283): INFO Epoch: [12] [ 320/2502] eta: 0:47:23 lr: 0.000004 loss_cls: 3.2215 (3.0935) grad_norm: 2.7696 (3.0681) time: 1.3023 data: 0.0003 max mem: 13912 +[2024-12-06 19:39:04 root] (utils.py 283): INFO Epoch: [12] [ 330/2502] eta: 0:47:10 lr: 0.000004 loss_cls: 3.0864 (3.0872) grad_norm: 2.7570 (3.0805) time: 1.3027 data: 0.0003 max mem: 13912 +[2024-12-06 19:39:18 root] (utils.py 283): INFO Epoch: [12] [ 340/2502] eta: 0:46:58 lr: 0.000004 loss_cls: 3.0864 (3.0919) grad_norm: 2.7388 (3.0814) time: 1.3096 data: 0.0003 max mem: 13912 +[2024-12-06 19:39:31 root] (utils.py 283): INFO Epoch: [12] [ 350/2502] eta: 0:46:45 lr: 0.000004 loss_cls: 3.0567 (3.0890) grad_norm: 2.6981 (3.0956) time: 1.3106 data: 0.0002 max mem: 13912 +[2024-12-06 19:39:44 root] (utils.py 283): INFO Epoch: [12] [ 360/2502] eta: 0:46:32 lr: 0.000004 loss_cls: 3.0538 (3.0892) grad_norm: 2.7267 (3.1016) time: 1.3044 data: 0.0002 max mem: 13912 +[2024-12-06 19:39:57 root] (utils.py 283): INFO Epoch: [12] [ 370/2502] eta: 0:46:19 lr: 0.000004 loss_cls: 3.2683 (3.0910) grad_norm: 2.7267 (3.1129) time: 1.3043 data: 0.0003 max mem: 13912 +[2024-12-06 19:40:10 root] (utils.py 283): INFO Epoch: [12] [ 380/2502] eta: 0:46:06 lr: 0.000004 loss_cls: 3.1942 (3.0845) grad_norm: 2.8091 (3.1113) time: 1.3022 data: 0.0002 max mem: 13912 +[2024-12-06 19:40:23 root] (utils.py 283): INFO Epoch: [12] [ 390/2502] eta: 0:45:53 lr: 0.000004 loss_cls: 2.6600 (3.0774) grad_norm: 2.6372 (3.1080) time: 1.3002 data: 0.0002 max mem: 13912 +[2024-12-06 19:40:36 root] (utils.py 283): INFO Epoch: [12] [ 400/2502] eta: 0:45:39 lr: 0.000004 loss_cls: 3.0157 (3.0801) grad_norm: 2.6379 (3.1522) time: 1.2999 data: 0.0003 max mem: 13912 +[2024-12-06 19:40:49 root] (utils.py 283): INFO Epoch: [12] [ 410/2502] eta: 0:45:26 lr: 0.000004 loss_cls: 3.2889 (3.0819) grad_norm: 2.7614 (3.2446) time: 1.2997 data: 0.0002 max mem: 13912 +[2024-12-06 19:41:02 root] (utils.py 283): INFO Epoch: [12] [ 420/2502] eta: 0:45:13 lr: 0.000004 loss_cls: 3.2889 (3.0862) grad_norm: 2.7583 (3.2360) time: 1.3027 data: 0.0002 max mem: 13912 +[2024-12-06 19:41:15 root] (utils.py 283): INFO Epoch: [12] [ 430/2502] eta: 0:45:00 lr: 0.000004 loss_cls: 3.2869 (3.0828) grad_norm: 2.7583 (3.2318) time: 1.3023 data: 0.0003 max mem: 13912 +[2024-12-06 19:41:28 root] (utils.py 283): INFO Epoch: [12] [ 440/2502] eta: 0:44:47 lr: 0.000004 loss_cls: 3.3302 (3.0862) grad_norm: 2.8447 (3.2239) time: 1.2999 data: 0.0003 max mem: 13912 +[2024-12-06 19:41:41 root] (utils.py 283): INFO Epoch: [12] [ 450/2502] eta: 0:44:34 lr: 0.000004 loss_cls: 3.3298 (3.0880) grad_norm: 2.8447 (3.2204) time: 1.3029 data: 0.0003 max mem: 13912 +[2024-12-06 19:41:54 root] (utils.py 283): INFO Epoch: [12] [ 460/2502] eta: 0:44:21 lr: 0.000004 loss_cls: 3.3298 (3.0927) grad_norm: 2.7467 (3.2177) time: 1.3046 data: 0.0003 max mem: 13912 +[2024-12-06 19:42:07 root] (utils.py 283): INFO Epoch: [12] [ 470/2502] eta: 0:44:08 lr: 0.000004 loss_cls: 3.2040 (3.0923) grad_norm: 2.7148 (3.2059) time: 1.3023 data: 0.0003 max mem: 13912 +[2024-12-06 19:42:20 root] (utils.py 283): INFO Epoch: [12] [ 480/2502] eta: 0:43:55 lr: 0.000004 loss_cls: 3.0113 (3.0914) grad_norm: 2.6275 (3.2034) time: 1.3019 data: 0.0002 max mem: 13912 +[2024-12-06 19:42:33 root] (utils.py 283): INFO Epoch: [12] [ 490/2502] eta: 0:43:42 lr: 0.000004 loss_cls: 2.8066 (3.0848) grad_norm: 2.7675 (3.2027) time: 1.3034 data: 0.0003 max mem: 13912 +[2024-12-06 19:42:46 root] (utils.py 283): INFO Epoch: [12] [ 500/2502] eta: 0:43:29 lr: 0.000004 loss_cls: 2.7769 (3.0810) grad_norm: 2.8551 (3.2050) time: 1.3040 data: 0.0003 max mem: 13912 +[2024-12-06 19:42:59 root] (utils.py 283): INFO Epoch: [12] [ 510/2502] eta: 0:43:16 lr: 0.000004 loss_cls: 2.7964 (3.0764) grad_norm: 2.7473 (3.2046) time: 1.3052 data: 0.0003 max mem: 13912 +[2024-12-06 19:43:12 root] (utils.py 283): INFO Epoch: [12] [ 520/2502] eta: 0:43:03 lr: 0.000004 loss_cls: 2.8852 (3.0776) grad_norm: 2.8933 (3.2005) time: 1.3112 data: 0.0003 max mem: 13912 +[2024-12-06 19:43:25 root] (utils.py 283): INFO Epoch: [12] [ 530/2502] eta: 0:42:50 lr: 0.000004 loss_cls: 3.1579 (3.0782) grad_norm: 3.0081 (3.2047) time: 1.3101 data: 0.0003 max mem: 13912 +[2024-12-06 19:43:38 root] (utils.py 283): INFO Epoch: [12] [ 540/2502] eta: 0:42:37 lr: 0.000004 loss_cls: 2.8985 (3.0721) grad_norm: 2.7298 (3.1933) time: 1.3033 data: 0.0002 max mem: 13912 +[2024-12-06 19:43:51 root] (utils.py 283): INFO Epoch: [12] [ 550/2502] eta: 0:42:24 lr: 0.000004 loss_cls: 2.8113 (3.0680) grad_norm: 2.5176 (3.1842) time: 1.3023 data: 0.0003 max mem: 13912 +[2024-12-06 19:44:04 root] (utils.py 283): INFO Epoch: [12] [ 560/2502] eta: 0:42:11 lr: 0.000004 loss_cls: 3.0228 (3.0688) grad_norm: 2.6123 (3.1799) time: 1.3007 data: 0.0003 max mem: 13912 +[2024-12-06 19:44:17 root] (utils.py 283): INFO Epoch: [12] [ 570/2502] eta: 0:41:58 lr: 0.000004 loss_cls: 3.1969 (3.0702) grad_norm: 2.7081 (3.1715) time: 1.3011 data: 0.0002 max mem: 13912 +[2024-12-06 19:44:30 root] (utils.py 283): INFO Epoch: [12] [ 580/2502] eta: 0:41:45 lr: 0.000004 loss_cls: 3.2984 (3.0690) grad_norm: 2.5531 (3.1686) time: 1.3017 data: 0.0002 max mem: 13912 +[2024-12-06 19:44:43 root] (utils.py 283): INFO Epoch: [12] [ 590/2502] eta: 0:41:32 lr: 0.000004 loss_cls: 3.2397 (3.0707) grad_norm: 2.5634 (3.1596) time: 1.3028 data: 0.0003 max mem: 13912 +[2024-12-06 19:44:56 root] (utils.py 283): INFO Epoch: [12] [ 600/2502] eta: 0:41:19 lr: 0.000004 loss_cls: 3.2397 (3.0723) grad_norm: 2.6962 (3.1540) time: 1.3017 data: 0.0003 max mem: 13912 +[2024-12-06 19:45:09 root] (utils.py 283): INFO Epoch: [12] [ 610/2502] eta: 0:41:05 lr: 0.000004 loss_cls: 3.0926 (3.0715) grad_norm: 2.7499 (3.1481) time: 1.3009 data: 0.0003 max mem: 13912 +[2024-12-06 19:45:22 root] (utils.py 283): INFO Epoch: [12] [ 620/2502] eta: 0:40:52 lr: 0.000004 loss_cls: 3.1163 (3.0719) grad_norm: 2.7717 (3.1428) time: 1.3029 data: 0.0003 max mem: 13912 +[2024-12-06 19:45:36 root] (utils.py 283): INFO Epoch: [12] [ 630/2502] eta: 0:40:39 lr: 0.000004 loss_cls: 3.2016 (3.0747) grad_norm: 2.5130 (3.1372) time: 1.3032 data: 0.0003 max mem: 13912 +[2024-12-06 19:45:49 root] (utils.py 283): INFO Epoch: [12] [ 640/2502] eta: 0:40:26 lr: 0.000004 loss_cls: 3.3108 (3.0779) grad_norm: 2.6560 (3.1326) time: 1.3017 data: 0.0003 max mem: 13912 +[2024-12-06 19:46:02 root] (utils.py 283): INFO Epoch: [12] [ 650/2502] eta: 0:40:13 lr: 0.000004 loss_cls: 3.2645 (3.0758) grad_norm: 2.8333 (3.1293) time: 1.3041 data: 0.0003 max mem: 13912 +[2024-12-06 19:46:15 root] (utils.py 283): INFO Epoch: [12] [ 660/2502] eta: 0:40:00 lr: 0.000004 loss_cls: 2.8473 (3.0716) grad_norm: 2.8585 (3.1262) time: 1.3054 data: 0.0003 max mem: 13912 +[2024-12-06 19:46:28 root] (utils.py 283): INFO Epoch: [12] [ 670/2502] eta: 0:39:47 lr: 0.000004 loss_cls: 2.7794 (3.0698) grad_norm: 2.8585 (3.1232) time: 1.3019 data: 0.0003 max mem: 13912 +[2024-12-06 19:46:41 root] (utils.py 283): INFO Epoch: [12] [ 680/2502] eta: 0:39:34 lr: 0.000004 loss_cls: 3.1621 (3.0705) grad_norm: 2.6680 (3.1220) time: 1.3011 data: 0.0003 max mem: 13912 +[2024-12-06 19:46:54 root] (utils.py 283): INFO Epoch: [12] [ 690/2502] eta: 0:39:21 lr: 0.000004 loss_cls: 3.1222 (3.0706) grad_norm: 2.7958 (3.1214) time: 1.3013 data: 0.0003 max mem: 13912 +[2024-12-06 19:47:07 root] (utils.py 283): INFO Epoch: [12] [ 700/2502] eta: 0:39:08 lr: 0.000004 loss_cls: 3.2185 (3.0715) grad_norm: 2.5930 (3.1154) time: 1.3006 data: 0.0003 max mem: 13912 +[2024-12-06 19:47:20 root] (utils.py 283): INFO Epoch: [12] [ 710/2502] eta: 0:38:55 lr: 0.000004 loss_cls: 3.2185 (3.0730) grad_norm: 2.5930 (3.1244) time: 1.2999 data: 0.0003 max mem: 13912 +[2024-12-06 19:47:33 root] (utils.py 283): INFO Epoch: [12] [ 720/2502] eta: 0:38:42 lr: 0.000004 loss_cls: 3.2169 (3.0710) grad_norm: 2.6015 (3.1176) time: 1.3034 data: 0.0003 max mem: 13912 +[2024-12-06 19:47:46 root] (utils.py 283): INFO Epoch: [12] [ 730/2502] eta: 0:38:29 lr: 0.000004 loss_cls: 3.2169 (3.0734) grad_norm: 2.6015 (3.1130) time: 1.3086 data: 0.0003 max mem: 13912 +[2024-12-06 19:47:59 root] (utils.py 283): INFO Epoch: [12] [ 740/2502] eta: 0:38:16 lr: 0.000004 loss_cls: 3.1579 (3.0740) grad_norm: 2.6192 (3.1086) time: 1.3050 data: 0.0003 max mem: 13912 +[2024-12-06 19:48:12 root] (utils.py 283): INFO Epoch: [12] [ 750/2502] eta: 0:38:03 lr: 0.000004 loss_cls: 3.2377 (3.0769) grad_norm: 2.6177 (3.1020) time: 1.3006 data: 0.0003 max mem: 13912 +[2024-12-06 19:48:25 root] (utils.py 283): INFO Epoch: [12] [ 760/2502] eta: 0:37:50 lr: 0.000004 loss_cls: 3.2016 (3.0771) grad_norm: 2.6754 (3.1093) time: 1.3018 data: 0.0003 max mem: 13912 +[2024-12-06 19:48:38 root] (utils.py 283): INFO Epoch: [12] [ 770/2502] eta: 0:37:37 lr: 0.000004 loss_cls: 3.2016 (3.0784) grad_norm: 2.6871 (3.1062) time: 1.3010 data: 0.0003 max mem: 13912 +[2024-12-06 19:48:51 root] (utils.py 283): INFO Epoch: [12] [ 780/2502] eta: 0:37:24 lr: 0.000004 loss_cls: 3.1981 (3.0791) grad_norm: 2.5872 (3.1006) time: 1.3009 data: 0.0003 max mem: 13912 +[2024-12-06 19:49:04 root] (utils.py 283): INFO Epoch: [12] [ 790/2502] eta: 0:37:11 lr: 0.000004 loss_cls: 3.2849 (3.0842) grad_norm: 2.6396 (3.1087) time: 1.3045 data: 0.0003 max mem: 13912 +[2024-12-06 19:49:17 root] (utils.py 283): INFO Epoch: [12] [ 800/2502] eta: 0:36:58 lr: 0.000004 loss_cls: 3.3820 (3.0871) grad_norm: 2.6458 (3.1086) time: 1.3052 data: 0.0003 max mem: 13912 +[2024-12-06 19:49:30 root] (utils.py 283): INFO Epoch: [12] [ 810/2502] eta: 0:36:44 lr: 0.000004 loss_cls: 3.2545 (3.0863) grad_norm: 2.7377 (3.1081) time: 1.3015 data: 0.0003 max mem: 13912 +[2024-12-06 19:49:43 root] (utils.py 283): INFO Epoch: [12] [ 820/2502] eta: 0:36:31 lr: 0.000004 loss_cls: 3.2232 (3.0872) grad_norm: 2.7415 (3.1121) time: 1.2988 data: 0.0003 max mem: 13912 +[2024-12-06 19:49:56 root] (utils.py 283): INFO Epoch: [12] [ 830/2502] eta: 0:36:18 lr: 0.000004 loss_cls: 3.1927 (3.0879) grad_norm: 2.7328 (3.1066) time: 1.3002 data: 0.0003 max mem: 13912 +[2024-12-06 19:50:09 root] (utils.py 283): INFO Epoch: [12] [ 840/2502] eta: 0:36:05 lr: 0.000004 loss_cls: 3.2890 (3.0908) grad_norm: 2.7307 (3.1093) time: 1.3029 data: 0.0002 max mem: 13912 +[2024-12-06 19:50:22 root] (utils.py 283): INFO Epoch: [12] [ 850/2502] eta: 0:35:52 lr: 0.000004 loss_cls: 3.2557 (3.0919) grad_norm: 2.5913 (3.1112) time: 1.3048 data: 0.0003 max mem: 13912 +[2024-12-06 19:50:35 root] (utils.py 283): INFO Epoch: [12] [ 860/2502] eta: 0:35:39 lr: 0.000004 loss_cls: 3.2449 (3.0914) grad_norm: 2.5528 (3.1073) time: 1.3045 data: 0.0003 max mem: 13912 +[2024-12-06 19:50:48 root] (utils.py 283): INFO Epoch: [12] [ 870/2502] eta: 0:35:26 lr: 0.000004 loss_cls: 3.1286 (3.0903) grad_norm: 2.5528 (3.1029) time: 1.3022 data: 0.0003 max mem: 13912 +[2024-12-06 19:51:01 root] (utils.py 283): INFO Epoch: [12] [ 880/2502] eta: 0:35:13 lr: 0.000004 loss_cls: 3.0926 (3.0892) grad_norm: 2.6569 (3.1123) time: 1.3091 data: 0.0003 max mem: 13912 +[2024-12-06 19:51:14 root] (utils.py 283): INFO Epoch: [12] [ 890/2502] eta: 0:35:00 lr: 0.000004 loss_cls: 3.1745 (3.0907) grad_norm: 2.7633 (3.1129) time: 1.3079 data: 0.0002 max mem: 13912 +[2024-12-06 19:51:27 root] (utils.py 283): INFO Epoch: [12] [ 900/2502] eta: 0:34:47 lr: 0.000004 loss_cls: 3.2127 (3.0895) grad_norm: 2.5653 (3.1074) time: 1.3020 data: 0.0002 max mem: 13912 +[2024-12-06 19:51:40 root] (utils.py 283): INFO Epoch: [12] [ 910/2502] eta: 0:34:34 lr: 0.000004 loss_cls: 3.2706 (3.0920) grad_norm: 2.5708 (3.1034) time: 1.3020 data: 0.0002 max mem: 13912 +[2024-12-06 19:51:53 root] (utils.py 283): INFO Epoch: [12] [ 920/2502] eta: 0:34:21 lr: 0.000004 loss_cls: 3.2086 (3.0874) grad_norm: 2.6321 (3.1021) time: 1.2998 data: 0.0003 max mem: 13912 +[2024-12-06 19:52:06 root] (utils.py 283): INFO Epoch: [12] [ 930/2502] eta: 0:34:08 lr: 0.000004 loss_cls: 2.7949 (3.0849) grad_norm: 2.6673 (3.1012) time: 1.3029 data: 0.0003 max mem: 13912 +[2024-12-06 19:52:19 root] (utils.py 283): INFO Epoch: [12] [ 940/2502] eta: 0:33:55 lr: 0.000004 loss_cls: 3.1483 (3.0856) grad_norm: 2.7296 (3.1010) time: 1.3055 data: 0.0003 max mem: 13912 +[2024-12-06 19:52:33 root] (utils.py 283): INFO Epoch: [12] [ 950/2502] eta: 0:33:42 lr: 0.000004 loss_cls: 3.1606 (3.0846) grad_norm: 2.6169 (3.0970) time: 1.3055 data: 0.0003 max mem: 13912 +[2024-12-06 19:52:46 root] (utils.py 283): INFO Epoch: [12] [ 960/2502] eta: 0:33:29 lr: 0.000004 loss_cls: 3.0817 (3.0852) grad_norm: 2.6073 (3.0934) time: 1.3037 data: 0.0003 max mem: 13912 +[2024-12-06 19:52:59 root] (utils.py 283): INFO Epoch: [12] [ 970/2502] eta: 0:33:16 lr: 0.000004 loss_cls: 3.1228 (3.0842) grad_norm: 2.7335 (3.0973) time: 1.3028 data: 0.0002 max mem: 13912 +[2024-12-06 19:53:12 root] (utils.py 283): INFO Epoch: [12] [ 980/2502] eta: 0:33:03 lr: 0.000004 loss_cls: 3.0685 (3.0826) grad_norm: 2.8081 (3.0972) time: 1.3028 data: 0.0003 max mem: 13912 +[2024-12-06 19:53:25 root] (utils.py 283): INFO Epoch: [12] [ 990/2502] eta: 0:32:50 lr: 0.000004 loss_cls: 3.1627 (3.0835) grad_norm: 2.6272 (3.0932) time: 1.3010 data: 0.0003 max mem: 13912 +[2024-12-06 19:53:38 root] (utils.py 283): INFO Epoch: [12] [1000/2502] eta: 0:32:37 lr: 0.000004 loss_cls: 3.1627 (3.0821) grad_norm: 2.6125 (3.0974) time: 1.3029 data: 0.0002 max mem: 13912 +[2024-12-06 19:53:51 root] (utils.py 283): INFO Epoch: [12] [1010/2502] eta: 0:32:24 lr: 0.000004 loss_cls: 3.0546 (3.0833) grad_norm: 2.6874 (3.0946) time: 1.3035 data: 0.0003 max mem: 13912 +[2024-12-06 19:54:04 root] (utils.py 283): INFO Epoch: [12] [1020/2502] eta: 0:32:11 lr: 0.000004 loss_cls: 3.2954 (3.0861) grad_norm: 2.6982 (3.0959) time: 1.3012 data: 0.0003 max mem: 13912 +[2024-12-06 19:54:17 root] (utils.py 283): INFO Epoch: [12] [1030/2502] eta: 0:31:58 lr: 0.000004 loss_cls: 3.4106 (3.0878) grad_norm: 3.1391 (3.0978) time: 1.3025 data: 0.0003 max mem: 13912 +[2024-12-06 19:54:30 root] (utils.py 283): INFO Epoch: [12] [1040/2502] eta: 0:31:45 lr: 0.000004 loss_cls: 3.4671 (3.0892) grad_norm: 2.9284 (3.0992) time: 1.3043 data: 0.0003 max mem: 13912 +[2024-12-06 19:54:43 root] (utils.py 283): INFO Epoch: [12] [1050/2502] eta: 0:31:32 lr: 0.000004 loss_cls: 3.1255 (3.0883) grad_norm: 2.7401 (3.0953) time: 1.3027 data: 0.0003 max mem: 13912 +[2024-12-06 19:54:56 root] (utils.py 283): INFO Epoch: [12] [1060/2502] eta: 0:31:19 lr: 0.000004 loss_cls: 3.1891 (3.0904) grad_norm: 2.5337 (3.0909) time: 1.2996 data: 0.0003 max mem: 13912 +[2024-12-06 19:55:09 root] (utils.py 283): INFO Epoch: [12] [1070/2502] eta: 0:31:06 lr: 0.000004 loss_cls: 3.2931 (3.0918) grad_norm: 2.6955 (3.0971) time: 1.3022 data: 0.0003 max mem: 13912 +[2024-12-06 19:55:22 root] (utils.py 283): INFO Epoch: [12] [1080/2502] eta: 0:30:53 lr: 0.000004 loss_cls: 3.2931 (3.0936) grad_norm: 2.8928 (3.0958) time: 1.3095 data: 0.0003 max mem: 13912 +[2024-12-06 19:55:35 root] (utils.py 283): INFO Epoch: [12] [1090/2502] eta: 0:30:40 lr: 0.000004 loss_cls: 3.2823 (3.0945) grad_norm: 2.8345 (3.0945) time: 1.3086 data: 0.0003 max mem: 13912 +[2024-12-06 19:55:48 root] (utils.py 283): INFO Epoch: [12] [1100/2502] eta: 0:30:27 lr: 0.000004 loss_cls: 3.2412 (3.0942) grad_norm: 2.8489 (3.0961) time: 1.3044 data: 0.0003 max mem: 13912 +[2024-12-06 19:56:01 root] (utils.py 283): INFO Epoch: [12] [1110/2502] eta: 0:30:14 lr: 0.000004 loss_cls: 3.3492 (3.0973) grad_norm: 2.7191 (3.0932) time: 1.3056 data: 0.0003 max mem: 13912 +[2024-12-06 19:56:14 root] (utils.py 283): INFO Epoch: [12] [1120/2502] eta: 0:30:01 lr: 0.000004 loss_cls: 3.3492 (3.0980) grad_norm: 2.6322 (3.0905) time: 1.3047 data: 0.0003 max mem: 13912 +[2024-12-06 19:56:27 root] (utils.py 283): INFO Epoch: [12] [1130/2502] eta: 0:29:48 lr: 0.000004 loss_cls: 3.2321 (3.0984) grad_norm: 2.7434 (3.0896) time: 1.3024 data: 0.0002 max mem: 13912 +[2024-12-06 19:56:40 root] (utils.py 283): INFO Epoch: [12] [1140/2502] eta: 0:29:35 lr: 0.000004 loss_cls: 3.2321 (3.0969) grad_norm: 2.7733 (3.0890) time: 1.3029 data: 0.0002 max mem: 13912 +[2024-12-06 19:56:53 root] (utils.py 283): INFO Epoch: [12] [1150/2502] eta: 0:29:22 lr: 0.000004 loss_cls: 3.2458 (3.0977) grad_norm: 2.7733 (3.0926) time: 1.3028 data: 0.0003 max mem: 13912 +[2024-12-06 19:57:06 root] (utils.py 283): INFO Epoch: [12] [1160/2502] eta: 0:29:09 lr: 0.000004 loss_cls: 3.2253 (3.0968) grad_norm: 2.8631 (3.0917) time: 1.3032 data: 0.0003 max mem: 13912 +[2024-12-06 19:57:19 root] (utils.py 283): INFO Epoch: [12] [1170/2502] eta: 0:28:56 lr: 0.000004 loss_cls: 3.0338 (3.0954) grad_norm: 2.6485 (3.0922) time: 1.3060 data: 0.0003 max mem: 13912 +[2024-12-06 19:57:32 root] (utils.py 283): INFO Epoch: [12] [1180/2502] eta: 0:28:43 lr: 0.000004 loss_cls: 2.8211 (3.0943) grad_norm: 2.5452 (3.1019) time: 1.3082 data: 0.0002 max mem: 13912 +[2024-12-06 19:57:46 root] (utils.py 283): INFO Epoch: [12] [1190/2502] eta: 0:28:30 lr: 0.000004 loss_cls: 2.9911 (3.0932) grad_norm: 2.6915 (3.1008) time: 1.3085 data: 0.0002 max mem: 13912 +[2024-12-06 19:57:59 root] (utils.py 283): INFO Epoch: [12] [1200/2502] eta: 0:28:17 lr: 0.000004 loss_cls: 2.9911 (3.0926) grad_norm: 2.7583 (3.1004) time: 1.3118 data: 0.0002 max mem: 13912 +[2024-12-06 19:58:12 root] (utils.py 283): INFO Epoch: [12] [1210/2502] eta: 0:28:04 lr: 0.000004 loss_cls: 3.2031 (3.0936) grad_norm: 2.8687 (3.1001) time: 1.3066 data: 0.0002 max mem: 13912 +[2024-12-06 19:58:25 root] (utils.py 283): INFO Epoch: [12] [1220/2502] eta: 0:27:51 lr: 0.000004 loss_cls: 3.3670 (3.0949) grad_norm: 2.8475 (3.1202) time: 1.2989 data: 0.0002 max mem: 13912 +[2024-12-06 19:58:38 root] (utils.py 283): INFO Epoch: [12] [1230/2502] eta: 0:27:37 lr: 0.000004 loss_cls: 3.3385 (3.0951) grad_norm: 2.8475 (3.1194) time: 1.2992 data: 0.0003 max mem: 13912 +[2024-12-06 19:58:51 root] (utils.py 283): INFO Epoch: [12] [1240/2502] eta: 0:27:24 lr: 0.000004 loss_cls: 3.3385 (3.0966) grad_norm: 2.8308 (3.1196) time: 1.2983 data: 0.0003 max mem: 13912 +[2024-12-06 19:59:04 root] (utils.py 283): INFO Epoch: [12] [1250/2502] eta: 0:27:11 lr: 0.000004 loss_cls: 3.1414 (3.0953) grad_norm: 2.8850 (3.1175) time: 1.3016 data: 0.0002 max mem: 13912 +[2024-12-06 19:59:17 root] (utils.py 283): INFO Epoch: [12] [1260/2502] eta: 0:26:58 lr: 0.000004 loss_cls: 3.1083 (3.0959) grad_norm: 3.0031 (3.1215) time: 1.3025 data: 0.0002 max mem: 13912 +[2024-12-06 19:59:30 root] (utils.py 283): INFO Epoch: [12] [1270/2502] eta: 0:26:45 lr: 0.000004 loss_cls: 3.1979 (3.0942) grad_norm: 2.9869 (3.1198) time: 1.3007 data: 0.0002 max mem: 13912 +[2024-12-06 19:59:43 root] (utils.py 283): INFO Epoch: [12] [1280/2502] eta: 0:26:32 lr: 0.000004 loss_cls: 2.9152 (3.0922) grad_norm: 2.7663 (3.1181) time: 1.3015 data: 0.0002 max mem: 13912 +[2024-12-06 19:59:56 root] (utils.py 283): INFO Epoch: [12] [1290/2502] eta: 0:26:19 lr: 0.000004 loss_cls: 3.0824 (3.0930) grad_norm: 2.8381 (3.1245) time: 1.3036 data: 0.0002 max mem: 13912 +[2024-12-06 20:00:09 root] (utils.py 283): INFO Epoch: [12] [1300/2502] eta: 0:26:06 lr: 0.000004 loss_cls: 3.0669 (3.0907) grad_norm: 2.9777 (3.1257) time: 1.3031 data: 0.0002 max mem: 13912 +[2024-12-06 20:00:22 root] (utils.py 283): INFO Epoch: [12] [1310/2502] eta: 0:25:53 lr: 0.000004 loss_cls: 3.1395 (3.0927) grad_norm: 2.7621 (3.1230) time: 1.3008 data: 0.0002 max mem: 13912 +[2024-12-06 20:00:35 root] (utils.py 283): INFO Epoch: [12] [1320/2502] eta: 0:25:40 lr: 0.000004 loss_cls: 3.3252 (3.0936) grad_norm: 2.6262 (3.1217) time: 1.3271 data: 0.0003 max mem: 13912 +[2024-12-06 20:00:49 root] (utils.py 283): INFO Epoch: [12] [1330/2502] eta: 0:25:28 lr: 0.000004 loss_cls: 3.2082 (3.0928) grad_norm: 2.7577 (3.1180) time: 1.3440 data: 0.0004 max mem: 13912 +[2024-12-06 20:01:02 root] (utils.py 283): INFO Epoch: [12] [1340/2502] eta: 0:25:15 lr: 0.000004 loss_cls: 3.0887 (3.0927) grad_norm: 2.7480 (3.1166) time: 1.3172 data: 0.0003 max mem: 13912 +[2024-12-06 20:01:15 root] (utils.py 283): INFO Epoch: [12] [1350/2502] eta: 0:25:02 lr: 0.000004 loss_cls: 3.0887 (3.0928) grad_norm: 2.9084 (3.1171) time: 1.3018 data: 0.0002 max mem: 13912 +[2024-12-06 20:01:28 root] (utils.py 283): INFO Epoch: [12] [1360/2502] eta: 0:24:48 lr: 0.000004 loss_cls: 3.1615 (3.0922) grad_norm: 2.5877 (3.1128) time: 1.3004 data: 0.0002 max mem: 13912 +[2024-12-06 20:01:41 root] (utils.py 283): INFO Epoch: [12] [1370/2502] eta: 0:24:35 lr: 0.000004 loss_cls: 3.3231 (3.0935) grad_norm: 2.5557 (3.1099) time: 1.2964 data: 0.0002 max mem: 13912 +[2024-12-06 20:01:54 root] (utils.py 283): INFO Epoch: [12] [1380/2502] eta: 0:24:22 lr: 0.000004 loss_cls: 3.2371 (3.0918) grad_norm: 2.6018 (3.1062) time: 1.2999 data: 0.0002 max mem: 13912 +[2024-12-06 20:02:07 root] (utils.py 283): INFO Epoch: [12] [1390/2502] eta: 0:24:09 lr: 0.000004 loss_cls: 3.0306 (3.0919) grad_norm: 2.6448 (3.1037) time: 1.2997 data: 0.0002 max mem: 13912 +[2024-12-06 20:02:20 root] (utils.py 283): INFO Epoch: [12] [1400/2502] eta: 0:23:56 lr: 0.000004 loss_cls: 3.2020 (3.0936) grad_norm: 2.7710 (3.1080) time: 1.2952 data: 0.0002 max mem: 13912 +[2024-12-06 20:02:33 root] (utils.py 283): INFO Epoch: [12] [1410/2502] eta: 0:23:43 lr: 0.000004 loss_cls: 3.4183 (3.0943) grad_norm: 2.7513 (3.1075) time: 1.2939 data: 0.0002 max mem: 13912 +[2024-12-06 20:02:45 root] (utils.py 283): INFO Epoch: [12] [1420/2502] eta: 0:23:30 lr: 0.000004 loss_cls: 3.3004 (3.0936) grad_norm: 2.6791 (3.1051) time: 1.2934 data: 0.0003 max mem: 13912 +[2024-12-06 20:02:58 root] (utils.py 283): INFO Epoch: [12] [1430/2502] eta: 0:23:17 lr: 0.000004 loss_cls: 3.2059 (3.0935) grad_norm: 2.6791 (3.1026) time: 1.2963 data: 0.0003 max mem: 13912 +[2024-12-06 20:03:11 root] (utils.py 283): INFO Epoch: [12] [1440/2502] eta: 0:23:04 lr: 0.000004 loss_cls: 3.2550 (3.0946) grad_norm: 2.8079 (3.1129) time: 1.2963 data: 0.0003 max mem: 13912 +[2024-12-06 20:03:24 root] (utils.py 283): INFO Epoch: [12] [1450/2502] eta: 0:22:51 lr: 0.000004 loss_cls: 3.3027 (3.0952) grad_norm: 2.9760 (3.1114) time: 1.2953 data: 0.0003 max mem: 13912 +[2024-12-06 20:03:37 root] (utils.py 283): INFO Epoch: [12] [1460/2502] eta: 0:22:38 lr: 0.000004 loss_cls: 3.2268 (3.0952) grad_norm: 2.9760 (3.1103) time: 1.2954 data: 0.0002 max mem: 13912 +[2024-12-06 20:03:50 root] (utils.py 283): INFO Epoch: [12] [1470/2502] eta: 0:22:24 lr: 0.000004 loss_cls: 3.2237 (3.0952) grad_norm: 2.6869 (3.1069) time: 1.2949 data: 0.0002 max mem: 13912 +[2024-12-06 20:04:03 root] (utils.py 283): INFO Epoch: [12] [1480/2502] eta: 0:22:11 lr: 0.000004 loss_cls: 3.2372 (3.0957) grad_norm: 2.6123 (3.1062) time: 1.2953 data: 0.0003 max mem: 13912 +[2024-12-06 20:04:16 root] (utils.py 283): INFO Epoch: [12] [1490/2502] eta: 0:21:58 lr: 0.000004 loss_cls: 3.0926 (3.0947) grad_norm: 2.7891 (3.1068) time: 1.2942 data: 0.0003 max mem: 13912 +[2024-12-06 20:04:29 root] (utils.py 283): INFO Epoch: [12] [1500/2502] eta: 0:21:45 lr: 0.000004 loss_cls: 3.0918 (3.0956) grad_norm: 2.7293 (3.1046) time: 1.2961 data: 0.0003 max mem: 13912 +[2024-12-06 20:04:42 root] (utils.py 283): INFO Epoch: [12] [1510/2502] eta: 0:21:32 lr: 0.000004 loss_cls: 3.1677 (3.0956) grad_norm: 2.6204 (3.1016) time: 1.3008 data: 0.0003 max mem: 13912 +[2024-12-06 20:04:55 root] (utils.py 283): INFO Epoch: [12] [1520/2502] eta: 0:21:19 lr: 0.000004 loss_cls: 3.1932 (3.0937) grad_norm: 2.6204 (3.1059) time: 1.3010 data: 0.0003 max mem: 13912 +[2024-12-06 20:05:08 root] (utils.py 283): INFO Epoch: [12] [1530/2502] eta: 0:21:06 lr: 0.000004 loss_cls: 2.6399 (3.0921) grad_norm: 2.5671 (3.1033) time: 1.3001 data: 0.0003 max mem: 13912 +[2024-12-06 20:05:21 root] (utils.py 283): INFO Epoch: [12] [1540/2502] eta: 0:20:53 lr: 0.000004 loss_cls: 3.0423 (3.0916) grad_norm: 2.7256 (3.1019) time: 1.3012 data: 0.0003 max mem: 13912 +[2024-12-06 20:05:34 root] (utils.py 283): INFO Epoch: [12] [1550/2502] eta: 0:20:40 lr: 0.000004 loss_cls: 3.2775 (3.0925) grad_norm: 2.7256 (3.0998) time: 1.3019 data: 0.0003 max mem: 13912 +[2024-12-06 20:05:47 root] (utils.py 283): INFO Epoch: [12] [1560/2502] eta: 0:20:27 lr: 0.000004 loss_cls: 3.3518 (3.0931) grad_norm: 2.7819 (3.1034) time: 1.3037 data: 0.0002 max mem: 13912 +[2024-12-06 20:06:00 root] (utils.py 283): INFO Epoch: [12] [1570/2502] eta: 0:20:14 lr: 0.000004 loss_cls: 3.2828 (3.0943) grad_norm: 2.7035 (3.1007) time: 1.3023 data: 0.0003 max mem: 13912 +[2024-12-06 20:06:13 root] (utils.py 283): INFO Epoch: [12] [1580/2502] eta: 0:20:01 lr: 0.000004 loss_cls: 3.0588 (3.0924) grad_norm: 2.6791 (3.0993) time: 1.3000 data: 0.0002 max mem: 13912 +[2024-12-06 20:06:26 root] (utils.py 283): INFO Epoch: [12] [1590/2502] eta: 0:19:48 lr: 0.000004 loss_cls: 2.9316 (3.0922) grad_norm: 2.7874 (3.0986) time: 1.2993 data: 0.0002 max mem: 13912 +[2024-12-06 20:06:39 root] (utils.py 283): INFO Epoch: [12] [1600/2502] eta: 0:19:35 lr: 0.000004 loss_cls: 2.9169 (3.0911) grad_norm: 2.7100 (3.0972) time: 1.2985 data: 0.0003 max mem: 13912 +[2024-12-06 20:06:52 root] (utils.py 283): INFO Epoch: [12] [1610/2502] eta: 0:19:22 lr: 0.000004 loss_cls: 2.9169 (3.0905) grad_norm: 2.6208 (3.0955) time: 1.3004 data: 0.0003 max mem: 13912 +[2024-12-06 20:07:05 root] (utils.py 283): INFO Epoch: [12] [1620/2502] eta: 0:19:09 lr: 0.000004 loss_cls: 3.1194 (3.0907) grad_norm: 2.7074 (3.0947) time: 1.2990 data: 0.0003 max mem: 13912 +[2024-12-06 20:07:18 root] (utils.py 283): INFO Epoch: [12] [1630/2502] eta: 0:18:56 lr: 0.000004 loss_cls: 3.2996 (3.0910) grad_norm: 2.7074 (3.0938) time: 1.3019 data: 0.0003 max mem: 13912 +[2024-12-06 20:07:31 root] (utils.py 283): INFO Epoch: [12] [1640/2502] eta: 0:18:43 lr: 0.000004 loss_cls: 3.2629 (3.0918) grad_norm: 2.6711 (3.0926) time: 1.3032 data: 0.0003 max mem: 13912 +[2024-12-06 20:07:44 root] (utils.py 283): INFO Epoch: [12] [1650/2502] eta: 0:18:30 lr: 0.000004 loss_cls: 3.2380 (3.0924) grad_norm: 2.5716 (3.0897) time: 1.2992 data: 0.0003 max mem: 13912 +[2024-12-06 20:07:57 root] (utils.py 283): INFO Epoch: [12] [1660/2502] eta: 0:18:17 lr: 0.000004 loss_cls: 3.3399 (3.0936) grad_norm: 2.5528 (3.0887) time: 1.2999 data: 0.0002 max mem: 13912 +[2024-12-06 20:08:10 root] (utils.py 283): INFO Epoch: [12] [1670/2502] eta: 0:18:03 lr: 0.000004 loss_cls: 3.3041 (3.0924) grad_norm: 2.5528 (3.0864) time: 1.2999 data: 0.0002 max mem: 13912 +[2024-12-06 20:08:23 root] (utils.py 283): INFO Epoch: [12] [1680/2502] eta: 0:17:50 lr: 0.000004 loss_cls: 3.0807 (3.0937) grad_norm: 2.9205 (3.0917) time: 1.3006 data: 0.0003 max mem: 13912 +[2024-12-06 20:08:36 root] (utils.py 283): INFO Epoch: [12] [1690/2502] eta: 0:17:37 lr: 0.000004 loss_cls: 3.1212 (3.0937) grad_norm: 2.7250 (3.0904) time: 1.3030 data: 0.0003 max mem: 13912 +[2024-12-06 20:08:49 root] (utils.py 283): INFO Epoch: [12] [1700/2502] eta: 0:17:24 lr: 0.000004 loss_cls: 3.1212 (3.0939) grad_norm: 2.6905 (3.0893) time: 1.3048 data: 0.0002 max mem: 13912 +[2024-12-06 20:09:02 root] (utils.py 283): INFO Epoch: [12] [1710/2502] eta: 0:17:11 lr: 0.000004 loss_cls: 3.1549 (3.0920) grad_norm: 2.9239 (3.0878) time: 1.3032 data: 0.0002 max mem: 13912 +[2024-12-06 20:09:15 root] (utils.py 283): INFO Epoch: [12] [1720/2502] eta: 0:16:58 lr: 0.000004 loss_cls: 2.9389 (3.0917) grad_norm: 2.6765 (3.0869) time: 1.3024 data: 0.0002 max mem: 13912 +[2024-12-06 20:09:28 root] (utils.py 283): INFO Epoch: [12] [1730/2502] eta: 0:16:45 lr: 0.000004 loss_cls: 3.1073 (3.0925) grad_norm: 2.6765 (3.0863) time: 1.3035 data: 0.0002 max mem: 13912 +[2024-12-06 20:09:41 root] (utils.py 283): INFO Epoch: [12] [1740/2502] eta: 0:16:32 lr: 0.000004 loss_cls: 3.2777 (3.0931) grad_norm: 2.6940 (3.0842) time: 1.3019 data: 0.0002 max mem: 13912 +[2024-12-06 20:09:54 root] (utils.py 283): INFO Epoch: [12] [1750/2502] eta: 0:16:19 lr: 0.000004 loss_cls: 3.3566 (3.0941) grad_norm: 2.6737 (3.0816) time: 1.3010 data: 0.0003 max mem: 13912 +[2024-12-06 20:10:07 root] (utils.py 283): INFO Epoch: [12] [1760/2502] eta: 0:16:06 lr: 0.000004 loss_cls: 3.3777 (3.0951) grad_norm: 2.8347 (3.0823) time: 1.3004 data: 0.0003 max mem: 13912 +[2024-12-06 20:10:20 root] (utils.py 283): INFO Epoch: [12] [1770/2502] eta: 0:15:53 lr: 0.000004 loss_cls: 3.3432 (3.0958) grad_norm: 3.0613 (3.0848) time: 1.3003 data: 0.0003 max mem: 13912 +[2024-12-06 20:10:34 root] (utils.py 283): INFO Epoch: [12] [1780/2502] eta: 0:15:40 lr: 0.000004 loss_cls: 3.2384 (3.0961) grad_norm: 2.7395 (3.0851) time: 1.3015 data: 0.0002 max mem: 13912 +[2024-12-06 20:10:47 root] (utils.py 283): INFO Epoch: [12] [1790/2502] eta: 0:15:27 lr: 0.000004 loss_cls: 3.2174 (3.0957) grad_norm: 2.6716 (3.0821) time: 1.3017 data: 0.0003 max mem: 13912 +[2024-12-06 20:11:00 root] (utils.py 283): INFO Epoch: [12] [1800/2502] eta: 0:15:14 lr: 0.000004 loss_cls: 3.0579 (3.0955) grad_norm: 2.6716 (3.0824) time: 1.3052 data: 0.0003 max mem: 13912 +[2024-12-06 20:11:13 root] (utils.py 283): INFO Epoch: [12] [1810/2502] eta: 0:15:01 lr: 0.000004 loss_cls: 3.0317 (3.0957) grad_norm: 2.9568 (3.0817) time: 1.3037 data: 0.0002 max mem: 13912 +[2024-12-06 20:11:26 root] (utils.py 283): INFO Epoch: [12] [1820/2502] eta: 0:14:48 lr: 0.000004 loss_cls: 3.0317 (3.0950) grad_norm: 2.6213 (3.0798) time: 1.3002 data: 0.0002 max mem: 13912 +[2024-12-06 20:11:39 root] (utils.py 283): INFO Epoch: [12] [1830/2502] eta: 0:14:35 lr: 0.000004 loss_cls: 2.8088 (3.0934) grad_norm: 2.5854 (3.0773) time: 1.3026 data: 0.0002 max mem: 13912 +[2024-12-06 20:11:52 root] (utils.py 283): INFO Epoch: [12] [1840/2502] eta: 0:14:22 lr: 0.000004 loss_cls: 2.9610 (3.0931) grad_norm: 2.6574 (3.0780) time: 1.3010 data: 0.0002 max mem: 13912 +[2024-12-06 20:12:05 root] (utils.py 283): INFO Epoch: [12] [1850/2502] eta: 0:14:09 lr: 0.000004 loss_cls: 3.1361 (3.0929) grad_norm: 2.7920 (3.0766) time: 1.2997 data: 0.0002 max mem: 13912 +[2024-12-06 20:12:18 root] (utils.py 283): INFO Epoch: [12] [1860/2502] eta: 0:13:56 lr: 0.000004 loss_cls: 2.9650 (3.0923) grad_norm: 2.6725 (3.0748) time: 1.3011 data: 0.0002 max mem: 13912 +[2024-12-06 20:12:31 root] (utils.py 283): INFO Epoch: [12] [1870/2502] eta: 0:13:43 lr: 0.000004 loss_cls: 2.8267 (3.0915) grad_norm: 2.6506 (3.0728) time: 1.3001 data: 0.0002 max mem: 13912 +[2024-12-06 20:12:44 root] (utils.py 283): INFO Epoch: [12] [1880/2502] eta: 0:13:30 lr: 0.000004 loss_cls: 2.8380 (3.0906) grad_norm: 2.8337 (3.0724) time: 1.2995 data: 0.0003 max mem: 13912 +[2024-12-06 20:12:57 root] (utils.py 283): INFO Epoch: [12] [1890/2502] eta: 0:13:17 lr: 0.000004 loss_cls: 3.3149 (3.0913) grad_norm: 2.8450 (3.0720) time: 1.3005 data: 0.0003 max mem: 13912 +[2024-12-06 20:13:10 root] (utils.py 283): INFO Epoch: [12] [1900/2502] eta: 0:13:04 lr: 0.000004 loss_cls: 3.4394 (3.0923) grad_norm: 2.6593 (3.0709) time: 1.3003 data: 0.0003 max mem: 13912 +[2024-12-06 20:13:23 root] (utils.py 283): INFO Epoch: [12] [1910/2502] eta: 0:12:51 lr: 0.000004 loss_cls: 3.3144 (3.0926) grad_norm: 2.6593 (3.0726) time: 1.3007 data: 0.0002 max mem: 13912 +[2024-12-06 20:13:36 root] (utils.py 283): INFO Epoch: [12] [1920/2502] eta: 0:12:38 lr: 0.000004 loss_cls: 3.2217 (3.0932) grad_norm: 2.7871 (3.0710) time: 1.3021 data: 0.0003 max mem: 13912 +[2024-12-06 20:13:49 root] (utils.py 283): INFO Epoch: [12] [1930/2502] eta: 0:12:25 lr: 0.000004 loss_cls: 3.3640 (3.0941) grad_norm: 2.6433 (3.0686) time: 1.3103 data: 0.0003 max mem: 13912 +[2024-12-06 20:14:02 root] (utils.py 283): INFO Epoch: [12] [1940/2502] eta: 0:12:12 lr: 0.000004 loss_cls: 3.3253 (3.0936) grad_norm: 2.7239 (3.0682) time: 1.3085 data: 0.0003 max mem: 13912 +[2024-12-06 20:14:15 root] (utils.py 283): INFO Epoch: [12] [1950/2502] eta: 0:11:59 lr: 0.000004 loss_cls: 3.1484 (3.0932) grad_norm: 2.7233 (3.0672) time: 1.2993 data: 0.0003 max mem: 13912 +[2024-12-06 20:14:28 root] (utils.py 283): INFO Epoch: [12] [1960/2502] eta: 0:11:46 lr: 0.000004 loss_cls: 2.8294 (3.0916) grad_norm: 2.6887 (3.0691) time: 1.3002 data: 0.0003 max mem: 13912 +[2024-12-06 20:14:41 root] (utils.py 283): INFO Epoch: [12] [1970/2502] eta: 0:11:33 lr: 0.000004 loss_cls: 2.9052 (3.0914) grad_norm: 2.6887 (3.0701) time: 1.3010 data: 0.0003 max mem: 13912 +[2024-12-06 20:14:54 root] (utils.py 283): INFO Epoch: [12] [1980/2502] eta: 0:11:20 lr: 0.000004 loss_cls: 3.2231 (3.0916) grad_norm: 2.7146 (3.0689) time: 1.2991 data: 0.0003 max mem: 13912 +[2024-12-06 20:15:07 root] (utils.py 283): INFO Epoch: [12] [1990/2502] eta: 0:11:06 lr: 0.000004 loss_cls: 3.1034 (3.0903) grad_norm: 2.7209 (3.0676) time: 1.3021 data: 0.0003 max mem: 13912 +[2024-12-06 20:15:20 root] (utils.py 283): INFO Epoch: [12] [2000/2502] eta: 0:10:53 lr: 0.000004 loss_cls: 3.1472 (3.0910) grad_norm: 2.5412 (3.0663) time: 1.3044 data: 0.0003 max mem: 13912 +[2024-12-06 20:15:33 root] (utils.py 283): INFO Epoch: [12] [2010/2502] eta: 0:10:40 lr: 0.000004 loss_cls: 3.1472 (3.0908) grad_norm: 2.8665 (3.0695) time: 1.2991 data: 0.0003 max mem: 13912 +[2024-12-06 20:15:46 root] (utils.py 283): INFO Epoch: [12] [2020/2502] eta: 0:10:27 lr: 0.000004 loss_cls: 2.8987 (3.0897) grad_norm: 2.8665 (3.0694) time: 1.2949 data: 0.0003 max mem: 13912 +[2024-12-06 20:15:59 root] (utils.py 283): INFO Epoch: [12] [2030/2502] eta: 0:10:14 lr: 0.000004 loss_cls: 2.9982 (3.0895) grad_norm: 2.7527 (3.0732) time: 1.2942 data: 0.0003 max mem: 13912 +[2024-12-06 20:16:12 root] (utils.py 283): INFO Epoch: [12] [2040/2502] eta: 0:10:01 lr: 0.000004 loss_cls: 3.1009 (3.0902) grad_norm: 2.7412 (3.0713) time: 1.2934 data: 0.0003 max mem: 13912 +[2024-12-06 20:16:25 root] (utils.py 283): INFO Epoch: [12] [2050/2502] eta: 0:09:48 lr: 0.000004 loss_cls: 3.1708 (3.0906) grad_norm: 2.6652 (3.0697) time: 1.2936 data: 0.0003 max mem: 13912 +[2024-12-06 20:16:38 root] (utils.py 283): INFO Epoch: [12] [2060/2502] eta: 0:09:35 lr: 0.000004 loss_cls: 3.1014 (3.0909) grad_norm: 2.6394 (3.0715) time: 1.3045 data: 0.0002 max mem: 13912 +[2024-12-06 20:16:51 root] (utils.py 283): INFO Epoch: [12] [2070/2502] eta: 0:09:22 lr: 0.000004 loss_cls: 3.2342 (3.0907) grad_norm: 2.6648 (3.0700) time: 1.3033 data: 0.0002 max mem: 13912 +[2024-12-06 20:17:04 root] (utils.py 283): INFO Epoch: [12] [2080/2502] eta: 0:09:09 lr: 0.000004 loss_cls: 3.2342 (3.0912) grad_norm: 2.7082 (3.0689) time: 1.2932 data: 0.0003 max mem: 13912 +[2024-12-06 20:17:17 root] (utils.py 283): INFO Epoch: [12] [2090/2502] eta: 0:08:56 lr: 0.000004 loss_cls: 3.2050 (3.0920) grad_norm: 2.8764 (3.0679) time: 1.2942 data: 0.0002 max mem: 13912 +[2024-12-06 20:17:30 root] (utils.py 283): INFO Epoch: [12] [2100/2502] eta: 0:08:43 lr: 0.000004 loss_cls: 3.3609 (3.0936) grad_norm: 2.7828 (3.0690) time: 1.2954 data: 0.0002 max mem: 13912 +[2024-12-06 20:17:43 root] (utils.py 283): INFO Epoch: [12] [2110/2502] eta: 0:08:30 lr: 0.000004 loss_cls: 3.2954 (3.0932) grad_norm: 2.7522 (3.0684) time: 1.3035 data: 0.0003 max mem: 13912 +[2024-12-06 20:17:56 root] (utils.py 283): INFO Epoch: [12] [2120/2502] eta: 0:08:17 lr: 0.000004 loss_cls: 3.1906 (3.0935) grad_norm: 2.6201 (3.0664) time: 1.3047 data: 0.0003 max mem: 13912 +[2024-12-06 20:18:09 root] (utils.py 283): INFO Epoch: [12] [2130/2502] eta: 0:08:04 lr: 0.000004 loss_cls: 3.2857 (3.0941) grad_norm: 2.5299 (3.0646) time: 1.2986 data: 0.0003 max mem: 13912 +[2024-12-06 20:18:22 root] (utils.py 283): INFO Epoch: [12] [2140/2502] eta: 0:07:51 lr: 0.000004 loss_cls: 3.2641 (3.0946) grad_norm: 2.5752 (3.0637) time: 1.2959 data: 0.0003 max mem: 13912 +[2024-12-06 20:18:35 root] (utils.py 283): INFO Epoch: [12] [2150/2502] eta: 0:07:38 lr: 0.000004 loss_cls: 3.3003 (3.0953) grad_norm: 2.6412 (3.0628) time: 1.2929 data: 0.0003 max mem: 13912 +[2024-12-06 20:18:48 root] (utils.py 283): INFO Epoch: [12] [2160/2502] eta: 0:07:25 lr: 0.000004 loss_cls: 3.3078 (3.0958) grad_norm: 2.7808 (3.0622) time: 1.2944 data: 0.0003 max mem: 13912 +[2024-12-06 20:19:01 root] (utils.py 283): INFO Epoch: [12] [2170/2502] eta: 0:07:12 lr: 0.000004 loss_cls: 3.1597 (3.0960) grad_norm: 2.7808 (3.0626) time: 1.2979 data: 0.0003 max mem: 13912 +[2024-12-06 20:19:14 root] (utils.py 283): INFO Epoch: [12] [2180/2502] eta: 0:06:59 lr: 0.000004 loss_cls: 3.1984 (3.0959) grad_norm: 2.7756 (3.0620) time: 1.3002 data: 0.0002 max mem: 13912 +[2024-12-06 20:19:27 root] (utils.py 283): INFO Epoch: [12] [2190/2502] eta: 0:06:46 lr: 0.000004 loss_cls: 3.2042 (3.0965) grad_norm: 2.7787 (3.0627) time: 1.3003 data: 0.0003 max mem: 13912 +[2024-12-06 20:19:39 root] (utils.py 283): INFO Epoch: [12] [2200/2502] eta: 0:06:33 lr: 0.000004 loss_cls: 3.2073 (3.0963) grad_norm: 2.7181 (3.0612) time: 1.2985 data: 0.0003 max mem: 13912 +[2024-12-06 20:19:53 root] (utils.py 283): INFO Epoch: [12] [2210/2502] eta: 0:06:20 lr: 0.000004 loss_cls: 2.9605 (3.0951) grad_norm: 2.7511 (3.0618) time: 1.3004 data: 0.0003 max mem: 13912 +[2024-12-06 20:20:06 root] (utils.py 283): INFO Epoch: [12] [2220/2502] eta: 0:06:07 lr: 0.000004 loss_cls: 3.1700 (3.0961) grad_norm: 2.7458 (3.0610) time: 1.3017 data: 0.0002 max mem: 13912 +[2024-12-06 20:20:19 root] (utils.py 283): INFO Epoch: [12] [2230/2502] eta: 0:05:54 lr: 0.000004 loss_cls: 3.2118 (3.0956) grad_norm: 2.7002 (3.0609) time: 1.2997 data: 0.0003 max mem: 13912 +[2024-12-06 20:20:32 root] (utils.py 283): INFO Epoch: [12] [2240/2502] eta: 0:05:41 lr: 0.000004 loss_cls: 3.1249 (3.0954) grad_norm: 2.5757 (3.0591) time: 1.2996 data: 0.0003 max mem: 13912 +[2024-12-06 20:20:45 root] (utils.py 283): INFO Epoch: [12] [2250/2502] eta: 0:05:28 lr: 0.000004 loss_cls: 3.0596 (3.0952) grad_norm: 2.5795 (3.0581) time: 1.3013 data: 0.0003 max mem: 13912 +[2024-12-06 20:20:58 root] (utils.py 283): INFO Epoch: [12] [2260/2502] eta: 0:05:15 lr: 0.000004 loss_cls: 3.1586 (3.0953) grad_norm: 2.6556 (3.0576) time: 1.3029 data: 0.0003 max mem: 13912 +[2024-12-06 20:21:11 root] (utils.py 283): INFO Epoch: [12] [2270/2502] eta: 0:05:02 lr: 0.000004 loss_cls: 3.1118 (3.0942) grad_norm: 2.7509 (3.0565) time: 1.3016 data: 0.0003 max mem: 13912 +[2024-12-06 20:21:24 root] (utils.py 283): INFO Epoch: [12] [2280/2502] eta: 0:04:49 lr: 0.000004 loss_cls: 3.1545 (3.0951) grad_norm: 2.8260 (3.0566) time: 1.3007 data: 0.0003 max mem: 13912 +[2024-12-06 20:21:37 root] (utils.py 283): INFO Epoch: [12] [2290/2502] eta: 0:04:36 lr: 0.000004 loss_cls: 3.3377 (3.0958) grad_norm: 2.8806 (3.0555) time: 1.3007 data: 0.0003 max mem: 13912 +[2024-12-06 20:21:50 root] (utils.py 283): INFO Epoch: [12] [2300/2502] eta: 0:04:23 lr: 0.000004 loss_cls: 3.2939 (3.0963) grad_norm: 2.8117 (3.0546) time: 1.3013 data: 0.0003 max mem: 13912 +[2024-12-06 20:22:03 root] (utils.py 283): INFO Epoch: [12] [2310/2502] eta: 0:04:10 lr: 0.000004 loss_cls: 3.2382 (3.0967) grad_norm: 2.8117 (3.0539) time: 1.3007 data: 0.0002 max mem: 13912 +[2024-12-06 20:22:16 root] (utils.py 283): INFO Epoch: [12] [2320/2502] eta: 0:03:56 lr: 0.000004 loss_cls: 3.2464 (3.0978) grad_norm: 2.6769 (3.0532) time: 1.3020 data: 0.0003 max mem: 13912 +[2024-12-06 20:22:29 root] (utils.py 283): INFO Epoch: [12] [2330/2502] eta: 0:03:43 lr: 0.000004 loss_cls: 3.1907 (3.0974) grad_norm: 2.6769 (3.0534) time: 1.3030 data: 0.0003 max mem: 13912 +[2024-12-06 20:22:42 root] (utils.py 283): INFO Epoch: [12] [2340/2502] eta: 0:03:30 lr: 0.000004 loss_cls: 3.0335 (3.0967) grad_norm: 2.6994 (3.0543) time: 1.3021 data: 0.0003 max mem: 13912 +[2024-12-06 20:22:55 root] (utils.py 283): INFO Epoch: [12] [2350/2502] eta: 0:03:17 lr: 0.000004 loss_cls: 2.9825 (3.0960) grad_norm: 2.6623 (3.0532) time: 1.3059 data: 0.0003 max mem: 13912 +[2024-12-06 20:23:08 root] (utils.py 283): INFO Epoch: [12] [2360/2502] eta: 0:03:04 lr: 0.000004 loss_cls: 3.0976 (3.0962) grad_norm: 2.6211 (3.0515) time: 1.3049 data: 0.0003 max mem: 13912 +[2024-12-06 20:23:21 root] (utils.py 283): INFO Epoch: [12] [2370/2502] eta: 0:02:51 lr: 0.000004 loss_cls: 3.1959 (3.0961) grad_norm: 2.7167 (3.0519) time: 1.3026 data: 0.0003 max mem: 13912 +[2024-12-06 20:23:34 root] (utils.py 283): INFO Epoch: [12] [2380/2502] eta: 0:02:38 lr: 0.000004 loss_cls: 3.1843 (3.0959) grad_norm: 2.9584 (3.0539) time: 1.3035 data: 0.0002 max mem: 13912 +[2024-12-06 20:23:47 root] (utils.py 283): INFO Epoch: [12] [2390/2502] eta: 0:02:25 lr: 0.000004 loss_cls: 3.1809 (3.0955) grad_norm: 2.6967 (3.0691) time: 1.3033 data: 0.0002 max mem: 13912 +[2024-12-06 20:24:00 root] (utils.py 283): INFO Epoch: [12] [2400/2502] eta: 0:02:12 lr: 0.000004 loss_cls: 3.1996 (3.0960) grad_norm: 2.7736 (3.0704) time: 1.3014 data: 0.0003 max mem: 13912 +[2024-12-06 20:24:13 root] (utils.py 283): INFO Epoch: [12] [2410/2502] eta: 0:01:59 lr: 0.000004 loss_cls: 3.1996 (3.0957) grad_norm: 2.7736 (3.0693) time: 1.2986 data: 0.0003 max mem: 13912 +[2024-12-06 20:24:26 root] (utils.py 283): INFO Epoch: [12] [2420/2502] eta: 0:01:46 lr: 0.000004 loss_cls: 3.2173 (3.0964) grad_norm: 2.6776 (3.0685) time: 1.3007 data: 0.0002 max mem: 13912 +[2024-12-06 20:24:39 root] (utils.py 283): INFO Epoch: [12] [2430/2502] eta: 0:01:33 lr: 0.000004 loss_cls: 3.1205 (3.0955) grad_norm: 2.9690 (3.0687) time: 1.3038 data: 0.0003 max mem: 13912 +[2024-12-06 20:24:52 root] (utils.py 283): INFO Epoch: [12] [2440/2502] eta: 0:01:20 lr: 0.000004 loss_cls: 3.0404 (3.0956) grad_norm: 3.0031 (3.0700) time: 1.3021 data: 0.0003 max mem: 13912 +[2024-12-06 20:25:05 root] (utils.py 283): INFO Epoch: [12] [2450/2502] eta: 0:01:07 lr: 0.000004 loss_cls: 3.1971 (3.0956) grad_norm: 3.0028 (3.0708) time: 1.3000 data: 0.0003 max mem: 13912 +[2024-12-06 20:25:18 root] (utils.py 283): INFO Epoch: [12] [2460/2502] eta: 0:00:54 lr: 0.000004 loss_cls: 3.0988 (3.0950) grad_norm: 2.6805 (3.0692) time: 1.2998 data: 0.0003 max mem: 13912 +[2024-12-06 20:25:31 root] (utils.py 283): INFO Epoch: [12] [2470/2502] eta: 0:00:41 lr: 0.000004 loss_cls: 2.9457 (3.0948) grad_norm: 2.6762 (3.0690) time: 1.3023 data: 0.0003 max mem: 13912 +[2024-12-06 20:25:44 root] (utils.py 283): INFO Epoch: [12] [2480/2502] eta: 0:00:28 lr: 0.000004 loss_cls: 2.9457 (3.0942) grad_norm: 2.6890 (3.0670) time: 1.3045 data: 0.0002 max mem: 13912 +[2024-12-06 20:25:58 root] (utils.py 283): INFO Epoch: [12] [2490/2502] eta: 0:00:15 lr: 0.000004 loss_cls: 2.8684 (3.0934) grad_norm: 2.6027 (3.0671) time: 1.3263 data: 0.0251 max mem: 13912 +[2024-12-06 20:26:11 root] (utils.py 283): INFO Epoch: [12] [2500/2502] eta: 0:00:02 lr: 0.000004 loss_cls: 3.0942 (3.0938) grad_norm: 2.6034 (3.0684) time: 1.3263 data: 0.0251 max mem: 13912 +[2024-12-06 20:26:12 root] (utils.py 283): INFO Epoch: [12] [2501/2502] eta: 0:00:01 lr: 0.000004 loss_cls: 3.1123 (3.0939) grad_norm: 2.6269 (3.0683) time: 1.3262 data: 0.0251 max mem: 13912 +[2024-12-06 20:26:12 root] (utils.py 297): INFO Epoch: [12] Total time: 0:54:18 (1.3025 s / it) +[2024-12-06 20:26:12 root] (engine.py 179): INFO Averaged stats:lr: 0.000004 loss_cls: 3.1123 (3.0837) grad_norm: 2.6269 (3.0683) +[2024-12-06 20:26:13 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:21 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.4336 (0.4336) acc1: 89.8438 (89.8438) acc3: 98.4375 (98.4375) acc5: 98.4375 (98.4375) time: 0.2245 data: 0.0003 max mem: 13912 +[2024-12-06 20:26:15 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:20 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.6648 (0.6634) acc1: 85.9375 (86.0795) acc3: 95.3125 (95.5256) acc5: 97.6562 (97.2301) time: 0.2277 data: 0.0004 max mem: 13912 +[2024-12-06 20:26:18 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:17 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.6648 (0.6973) acc1: 84.3750 (85.3051) acc3: 95.3125 (95.2009) acc5: 96.8750 (96.9494) time: 0.2282 data: 0.0004 max mem: 13912 +[2024-12-06 20:26:20 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:15 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7299 (0.7204) acc1: 84.3750 (84.5262) acc3: 95.3125 (95.0101) acc5: 96.8750 (96.9002) time: 0.2284 data: 0.0004 max mem: 13912 +[2024-12-06 20:26:22 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:13 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7276 (0.7244) acc1: 82.8125 (84.2607) acc3: 94.5312 (94.9886) acc5: 96.8750 (96.9322) time: 0.2283 data: 0.0004 max mem: 13912 +[2024-12-06 20:26:24 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:10 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.8263 (0.8065) acc1: 78.1250 (82.3376) acc3: 91.4062 (93.7653) acc5: 96.0938 (96.0018) time: 0.2282 data: 0.0004 max mem: 13912 +[2024-12-06 20:26:27 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:08 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0641 (0.8390) acc1: 75.0000 (81.9288) acc3: 88.2812 (93.0328) acc5: 91.4062 (95.3893) time: 0.2294 data: 0.0005 max mem: 13912 +[2024-12-06 20:26:29 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:06 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0510 (0.8660) acc1: 78.9062 (81.2390) acc3: 90.6250 (92.7377) acc5: 93.7500 (95.1585) time: 0.2294 data: 0.0005 max mem: 13912 +[2024-12-06 20:26:31 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:04 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0439 (0.8963) acc1: 75.7812 (80.5073) acc3: 89.8438 (92.2357) acc5: 92.9688 (94.7724) time: 0.2285 data: 0.0008 max mem: 13912 +[2024-12-06 20:26:34 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0830 (0.9206) acc1: 74.2188 (79.7905) acc3: 89.0625 (91.8441) acc5: 92.1875 (94.5484) time: 0.2285 data: 0.0008 max mem: 13912 +[2024-12-06 20:26:35 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0060 (0.9154) acc1: 75.7812 (79.8400) acc3: 89.8438 (91.9600) acc5: 92.9688 (94.6240) time: 0.2248 data: 0.0008 max mem: 13912 +[2024-12-06 20:26:35 root] (utils.py 297): INFO Test: Total time: 0:00:22 (0.2279 s / it) +[2024-12-06 20:26:35 root] (engine.py 264): INFO * Acc@1 79.758 Acc@3 92.182 Acc@5 94.848 loss 0.910 flops 3.584 layer_flops 3.536 +[2024-12-06 20:26:35 root] (main.py 547): INFO Accuracy of the network on the 50000 test images: 79.8% +[2024-12-06 20:26:35 root] (main.py 551): INFO Max accuracy: 79.78% +[2024-12-06 20:26:36 root] (utils.py 283): INFO Epoch: [13] [ 0/2502] eta: 0:54:03 lr: 0.000003 loss_cls: 3.2030 (3.2030) grad_norm: 2.5130 (2.5130) time: 1.2964 data: 0.0004 max mem: 13912 +[2024-12-06 20:26:49 root] (utils.py 283): INFO Epoch: [13] [ 10/2502] eta: 0:54:04 lr: 0.000003 loss_cls: 3.1740 (3.0143) grad_norm: 2.7147 (2.8646) time: 1.3020 data: 0.0003 max mem: 13912 +[2024-12-06 20:27:02 root] (utils.py 283): INFO Epoch: [13] [ 20/2502] eta: 0:53:52 lr: 0.000003 loss_cls: 3.1422 (3.0122) grad_norm: 2.8264 (2.9391) time: 1.3027 data: 0.0003 max mem: 13912 +[2024-12-06 20:27:16 root] (utils.py 283): INFO Epoch: [13] [ 30/2502] eta: 0:53:42 lr: 0.000003 loss_cls: 3.1422 (2.9880) grad_norm: 2.6552 (2.8505) time: 1.3043 data: 0.0003 max mem: 13912 +[2024-12-06 20:27:29 root] (utils.py 283): INFO Epoch: [13] [ 40/2502] eta: 0:53:30 lr: 0.000003 loss_cls: 3.2485 (3.0774) grad_norm: 2.6439 (2.8117) time: 1.3054 data: 0.0002 max mem: 13912 +[2024-12-06 20:27:42 root] (utils.py 283): INFO Epoch: [13] [ 50/2502] eta: 0:53:17 lr: 0.000003 loss_cls: 3.3257 (3.0850) grad_norm: 2.5403 (2.7881) time: 1.3044 data: 0.0002 max mem: 13912 +[2024-12-06 20:27:55 root] (utils.py 283): INFO Epoch: [13] [ 60/2502] eta: 0:53:05 lr: 0.000003 loss_cls: 3.1552 (3.0856) grad_norm: 2.5359 (2.8830) time: 1.3061 data: 0.0003 max mem: 13912 +[2024-12-06 20:28:08 root] (utils.py 283): INFO Epoch: [13] [ 70/2502] eta: 0:52:53 lr: 0.000003 loss_cls: 3.1552 (3.0665) grad_norm: 2.4922 (2.8336) time: 1.3070 data: 0.0003 max mem: 13912 +[2024-12-06 20:28:21 root] (utils.py 283): INFO Epoch: [13] [ 80/2502] eta: 0:52:40 lr: 0.000003 loss_cls: 3.0934 (3.0685) grad_norm: 2.4922 (2.8273) time: 1.3058 data: 0.0003 max mem: 13912 +[2024-12-06 20:28:34 root] (utils.py 283): INFO Epoch: [13] [ 90/2502] eta: 0:52:27 lr: 0.000003 loss_cls: 2.9256 (3.0606) grad_norm: 2.7640 (2.8486) time: 1.3056 data: 0.0003 max mem: 13912 +[2024-12-06 20:28:47 root] (utils.py 283): INFO Epoch: [13] [ 100/2502] eta: 0:52:14 lr: 0.000003 loss_cls: 2.8533 (3.0376) grad_norm: 2.7223 (2.9096) time: 1.3054 data: 0.0003 max mem: 13912 +[2024-12-06 20:29:00 root] (utils.py 283): INFO Epoch: [13] [ 110/2502] eta: 0:52:01 lr: 0.000003 loss_cls: 3.0765 (3.0574) grad_norm: 2.6831 (2.9307) time: 1.3050 data: 0.0003 max mem: 13912 +[2024-12-06 20:29:13 root] (utils.py 283): INFO Epoch: [13] [ 120/2502] eta: 0:51:50 lr: 0.000003 loss_cls: 3.4395 (3.0677) grad_norm: 2.6831 (2.9300) time: 1.3092 data: 0.0002 max mem: 13912 +[2024-12-06 20:29:26 root] (utils.py 283): INFO Epoch: [13] [ 130/2502] eta: 0:51:36 lr: 0.000003 loss_cls: 3.2482 (3.0637) grad_norm: 2.7026 (2.9138) time: 1.3091 data: 0.0002 max mem: 13912 +[2024-12-06 20:29:39 root] (utils.py 283): INFO Epoch: [13] [ 140/2502] eta: 0:51:23 lr: 0.000003 loss_cls: 3.1803 (3.0602) grad_norm: 2.5952 (2.9258) time: 1.3037 data: 0.0002 max mem: 13912 +[2024-12-06 20:29:52 root] (utils.py 283): INFO Epoch: [13] [ 150/2502] eta: 0:51:09 lr: 0.000003 loss_cls: 3.2529 (3.0709) grad_norm: 2.7243 (3.0051) time: 1.3032 data: 0.0003 max mem: 13912 +[2024-12-06 20:30:05 root] (utils.py 283): INFO Epoch: [13] [ 160/2502] eta: 0:50:56 lr: 0.000003 loss_cls: 3.3876 (3.0746) grad_norm: 2.7834 (3.0354) time: 1.3012 data: 0.0003 max mem: 13912 +[2024-12-06 20:30:18 root] (utils.py 283): INFO Epoch: [13] [ 170/2502] eta: 0:50:41 lr: 0.000003 loss_cls: 2.9763 (3.0690) grad_norm: 2.7204 (3.1559) time: 1.2983 data: 0.0003 max mem: 13912 +[2024-12-06 20:30:31 root] (utils.py 283): INFO Epoch: [13] [ 180/2502] eta: 0:50:27 lr: 0.000003 loss_cls: 2.8301 (3.0537) grad_norm: 2.5749 (3.1499) time: 1.2972 data: 0.0003 max mem: 13912 +[2024-12-06 20:30:44 root] (utils.py 283): INFO Epoch: [13] [ 190/2502] eta: 0:50:15 lr: 0.000003 loss_cls: 3.1659 (3.0636) grad_norm: 2.6015 (3.1336) time: 1.3013 data: 0.0002 max mem: 13912 +[2024-12-06 20:30:57 root] (utils.py 283): INFO Epoch: [13] [ 200/2502] eta: 0:50:03 lr: 0.000003 loss_cls: 3.1925 (3.0688) grad_norm: 2.6015 (3.1224) time: 1.3116 data: 0.0002 max mem: 13912 +[2024-12-06 20:31:10 root] (utils.py 283): INFO Epoch: [13] [ 210/2502] eta: 0:49:50 lr: 0.000003 loss_cls: 3.2132 (3.0785) grad_norm: 2.7036 (3.1144) time: 1.3105 data: 0.0003 max mem: 13912 +[2024-12-06 20:31:23 root] (utils.py 283): INFO Epoch: [13] [ 220/2502] eta: 0:49:36 lr: 0.000003 loss_cls: 3.2132 (3.0755) grad_norm: 2.7915 (3.1002) time: 1.3009 data: 0.0002 max mem: 13912 +[2024-12-06 20:31:36 root] (utils.py 283): INFO Epoch: [13] [ 230/2502] eta: 0:49:23 lr: 0.000003 loss_cls: 2.9968 (3.0741) grad_norm: 2.7424 (3.0850) time: 1.3009 data: 0.0003 max mem: 13912 +[2024-12-06 20:31:49 root] (utils.py 283): INFO Epoch: [13] [ 240/2502] eta: 0:49:10 lr: 0.000003 loss_cls: 3.0829 (3.0746) grad_norm: 2.6370 (3.1692) time: 1.3030 data: 0.0003 max mem: 13912 +[2024-12-06 20:32:03 root] (utils.py 283): INFO Epoch: [13] [ 250/2502] eta: 0:48:57 lr: 0.000003 loss_cls: 3.2378 (3.0801) grad_norm: 2.6915 (3.1579) time: 1.3051 data: 0.0002 max mem: 13912 +[2024-12-06 20:32:16 root] (utils.py 283): INFO Epoch: [13] [ 260/2502] eta: 0:48:44 lr: 0.000003 loss_cls: 3.1949 (3.0801) grad_norm: 2.7543 (3.1412) time: 1.3048 data: 0.0002 max mem: 13912 +[2024-12-06 20:32:29 root] (utils.py 283): INFO Epoch: [13] [ 270/2502] eta: 0:48:31 lr: 0.000003 loss_cls: 3.0946 (3.0822) grad_norm: 2.8968 (3.1611) time: 1.3035 data: 0.0003 max mem: 13912 +[2024-12-06 20:32:42 root] (utils.py 283): INFO Epoch: [13] [ 280/2502] eta: 0:48:18 lr: 0.000003 loss_cls: 2.9760 (3.0732) grad_norm: 2.8222 (3.1465) time: 1.3058 data: 0.0003 max mem: 13912 +[2024-12-06 20:32:55 root] (utils.py 283): INFO Epoch: [13] [ 290/2502] eta: 0:48:05 lr: 0.000003 loss_cls: 3.1278 (3.0773) grad_norm: 2.6313 (3.1357) time: 1.3062 data: 0.0003 max mem: 13912 +[2024-12-06 20:33:08 root] (utils.py 283): INFO Epoch: [13] [ 300/2502] eta: 0:47:52 lr: 0.000003 loss_cls: 3.3225 (3.0849) grad_norm: 2.6091 (3.1236) time: 1.3079 data: 0.0002 max mem: 13912 +[2024-12-06 20:33:21 root] (utils.py 283): INFO Epoch: [13] [ 310/2502] eta: 0:47:40 lr: 0.000003 loss_cls: 3.2755 (3.0882) grad_norm: 2.7745 (3.1185) time: 1.3101 data: 0.0003 max mem: 13912 +[2024-12-06 20:33:34 root] (utils.py 283): INFO Epoch: [13] [ 320/2502] eta: 0:47:30 lr: 0.000003 loss_cls: 3.1615 (3.0861) grad_norm: 2.6569 (3.1003) time: 1.3291 data: 0.0003 max mem: 13912 +[2024-12-06 20:33:48 root] (utils.py 283): INFO Epoch: [13] [ 330/2502] eta: 0:47:18 lr: 0.000003 loss_cls: 3.3640 (3.0955) grad_norm: 2.4524 (3.0985) time: 1.3393 data: 0.0004 max mem: 13912 +[2024-12-06 20:34:01 root] (utils.py 283): INFO Epoch: [13] [ 340/2502] eta: 0:47:05 lr: 0.000003 loss_cls: 3.4811 (3.0988) grad_norm: 2.7151 (3.0904) time: 1.3148 data: 0.0003 max mem: 13912 +[2024-12-06 20:34:14 root] (utils.py 283): INFO Epoch: [13] [ 350/2502] eta: 0:46:51 lr: 0.000003 loss_cls: 3.0446 (3.0903) grad_norm: 2.7098 (3.0922) time: 1.3020 data: 0.0003 max mem: 13912 +[2024-12-06 20:34:27 root] (utils.py 283): INFO Epoch: [13] [ 360/2502] eta: 0:46:39 lr: 0.000003 loss_cls: 3.0446 (3.0878) grad_norm: 2.7860 (3.0868) time: 1.3079 data: 0.0003 max mem: 13912 +[2024-12-06 20:34:40 root] (utils.py 283): INFO Epoch: [13] [ 370/2502] eta: 0:46:25 lr: 0.000003 loss_cls: 3.1327 (3.0822) grad_norm: 2.8249 (3.1001) time: 1.3063 data: 0.0003 max mem: 13912 +[2024-12-06 20:34:53 root] (utils.py 283): INFO Epoch: [13] [ 380/2502] eta: 0:46:12 lr: 0.000003 loss_cls: 3.0256 (3.0815) grad_norm: 2.5419 (3.0963) time: 1.3027 data: 0.0003 max mem: 13912 +[2024-12-06 20:35:06 root] (utils.py 283): INFO Epoch: [13] [ 390/2502] eta: 0:45:59 lr: 0.000003 loss_cls: 3.2056 (3.0842) grad_norm: 2.5347 (3.0885) time: 1.3020 data: 0.0002 max mem: 13912 +[2024-12-06 20:35:19 root] (utils.py 283): INFO Epoch: [13] [ 400/2502] eta: 0:45:45 lr: 0.000003 loss_cls: 3.2056 (3.0823) grad_norm: 2.4978 (3.0748) time: 1.2999 data: 0.0002 max mem: 13912 +[2024-12-06 20:35:32 root] (utils.py 283): INFO Epoch: [13] [ 410/2502] eta: 0:45:32 lr: 0.000003 loss_cls: 3.2474 (3.0849) grad_norm: 2.8798 (3.0911) time: 1.3024 data: 0.0002 max mem: 13912 +[2024-12-06 20:35:45 root] (utils.py 283): INFO Epoch: [13] [ 420/2502] eta: 0:45:19 lr: 0.000003 loss_cls: 3.2674 (3.0852) grad_norm: 3.1350 (3.0854) time: 1.3071 data: 0.0002 max mem: 13912 +[2024-12-06 20:35:58 root] (utils.py 283): INFO Epoch: [13] [ 430/2502] eta: 0:45:06 lr: 0.000003 loss_cls: 3.0869 (3.0821) grad_norm: 2.8391 (3.1131) time: 1.3072 data: 0.0003 max mem: 13912 +[2024-12-06 20:36:11 root] (utils.py 283): INFO Epoch: [13] [ 440/2502] eta: 0:44:53 lr: 0.000003 loss_cls: 2.9327 (3.0803) grad_norm: 2.7625 (3.1044) time: 1.3036 data: 0.0003 max mem: 13912 +[2024-12-06 20:36:24 root] (utils.py 283): INFO Epoch: [13] [ 450/2502] eta: 0:44:40 lr: 0.000003 loss_cls: 3.0225 (3.0801) grad_norm: 2.6272 (3.0948) time: 1.3033 data: 0.0002 max mem: 13912 +[2024-12-06 20:36:37 root] (utils.py 283): INFO Epoch: [13] [ 460/2502] eta: 0:44:27 lr: 0.000003 loss_cls: 2.9661 (3.0724) grad_norm: 2.6918 (3.0926) time: 1.3072 data: 0.0003 max mem: 13912 +[2024-12-06 20:36:50 root] (utils.py 283): INFO Epoch: [13] [ 470/2502] eta: 0:44:14 lr: 0.000003 loss_cls: 2.9733 (3.0718) grad_norm: 2.8844 (3.0857) time: 1.3077 data: 0.0003 max mem: 13912 +[2024-12-06 20:37:03 root] (utils.py 283): INFO Epoch: [13] [ 480/2502] eta: 0:44:00 lr: 0.000003 loss_cls: 3.1248 (3.0721) grad_norm: 2.5941 (3.0813) time: 1.3031 data: 0.0003 max mem: 13912 +[2024-12-06 20:37:16 root] (utils.py 283): INFO Epoch: [13] [ 490/2502] eta: 0:43:47 lr: 0.000003 loss_cls: 2.9517 (3.0729) grad_norm: 2.5941 (3.0780) time: 1.3020 data: 0.0003 max mem: 13912 +[2024-12-06 20:37:29 root] (utils.py 283): INFO Epoch: [13] [ 500/2502] eta: 0:43:34 lr: 0.000003 loss_cls: 3.3935 (3.0783) grad_norm: 2.7951 (3.0768) time: 1.3037 data: 0.0002 max mem: 13912 +[2024-12-06 20:37:43 root] (utils.py 283): INFO Epoch: [13] [ 510/2502] eta: 0:43:21 lr: 0.000003 loss_cls: 3.4273 (3.0791) grad_norm: 2.7953 (3.0739) time: 1.3050 data: 0.0003 max mem: 13912 +[2024-12-06 20:37:56 root] (utils.py 283): INFO Epoch: [13] [ 520/2502] eta: 0:43:08 lr: 0.000003 loss_cls: 3.3105 (3.0834) grad_norm: 2.8023 (3.0753) time: 1.3038 data: 0.0002 max mem: 13912 +[2024-12-06 20:38:09 root] (utils.py 283): INFO Epoch: [13] [ 530/2502] eta: 0:42:55 lr: 0.000003 loss_cls: 3.2503 (3.0868) grad_norm: 2.7998 (3.0711) time: 1.3023 data: 0.0003 max mem: 13912 +[2024-12-06 20:38:22 root] (utils.py 283): INFO Epoch: [13] [ 540/2502] eta: 0:42:42 lr: 0.000003 loss_cls: 3.2594 (3.0887) grad_norm: 2.7419 (3.0645) time: 1.3050 data: 0.0003 max mem: 13912 +[2024-12-06 20:38:35 root] (utils.py 283): INFO Epoch: [13] [ 550/2502] eta: 0:42:28 lr: 0.000003 loss_cls: 3.2594 (3.0862) grad_norm: 2.6030 (3.0604) time: 1.3024 data: 0.0003 max mem: 13912 +[2024-12-06 20:38:48 root] (utils.py 283): INFO Epoch: [13] [ 560/2502] eta: 0:42:15 lr: 0.000003 loss_cls: 3.1580 (3.0887) grad_norm: 2.6561 (3.0537) time: 1.3013 data: 0.0003 max mem: 13912 +[2024-12-06 20:39:01 root] (utils.py 283): INFO Epoch: [13] [ 570/2502] eta: 0:42:02 lr: 0.000003 loss_cls: 3.1995 (3.0884) grad_norm: 2.6554 (3.0593) time: 1.3053 data: 0.0003 max mem: 13912 +[2024-12-06 20:39:14 root] (utils.py 283): INFO Epoch: [13] [ 580/2502] eta: 0:41:49 lr: 0.000003 loss_cls: 3.2206 (3.0884) grad_norm: 2.7605 (3.0559) time: 1.3030 data: 0.0003 max mem: 13912 +[2024-12-06 20:39:27 root] (utils.py 283): INFO Epoch: [13] [ 590/2502] eta: 0:41:35 lr: 0.000003 loss_cls: 3.3069 (3.0908) grad_norm: 2.8716 (3.0592) time: 1.2972 data: 0.0002 max mem: 13912 +[2024-12-06 20:39:40 root] (utils.py 283): INFO Epoch: [13] [ 600/2502] eta: 0:41:22 lr: 0.000003 loss_cls: 3.2705 (3.0919) grad_norm: 2.7738 (3.0583) time: 1.2956 data: 0.0003 max mem: 13912 +[2024-12-06 20:39:53 root] (utils.py 283): INFO Epoch: [13] [ 610/2502] eta: 0:41:09 lr: 0.000003 loss_cls: 3.0877 (3.0911) grad_norm: 2.7128 (3.0534) time: 1.2970 data: 0.0003 max mem: 13912 +[2024-12-06 20:40:06 root] (utils.py 283): INFO Epoch: [13] [ 620/2502] eta: 0:40:56 lr: 0.000003 loss_cls: 2.8971 (3.0844) grad_norm: 2.6785 (3.1260) time: 1.2972 data: 0.0003 max mem: 13912 +[2024-12-06 20:40:19 root] (utils.py 283): INFO Epoch: [13] [ 630/2502] eta: 0:40:42 lr: 0.000003 loss_cls: 2.9382 (3.0844) grad_norm: 2.6156 (3.1293) time: 1.2984 data: 0.0003 max mem: 13912 +[2024-12-06 20:40:32 root] (utils.py 283): INFO Epoch: [13] [ 640/2502] eta: 0:40:29 lr: 0.000003 loss_cls: 3.1626 (3.0849) grad_norm: 2.7389 (3.1334) time: 1.2984 data: 0.0003 max mem: 13912 +[2024-12-06 20:40:45 root] (utils.py 283): INFO Epoch: [13] [ 650/2502] eta: 0:40:16 lr: 0.000003 loss_cls: 3.1626 (3.0835) grad_norm: 2.7217 (3.1273) time: 1.2960 data: 0.0003 max mem: 13912 +[2024-12-06 20:40:57 root] (utils.py 283): INFO Epoch: [13] [ 660/2502] eta: 0:40:02 lr: 0.000003 loss_cls: 3.0978 (3.0822) grad_norm: 2.8969 (3.1250) time: 1.2954 data: 0.0003 max mem: 13912 +[2024-12-06 20:41:10 root] (utils.py 283): INFO Epoch: [13] [ 670/2502] eta: 0:39:49 lr: 0.000003 loss_cls: 3.1949 (3.0835) grad_norm: 2.8969 (3.1250) time: 1.2967 data: 0.0002 max mem: 13912 +[2024-12-06 20:41:23 root] (utils.py 283): INFO Epoch: [13] [ 680/2502] eta: 0:39:36 lr: 0.000003 loss_cls: 3.1721 (3.0845) grad_norm: 2.8336 (3.1232) time: 1.2965 data: 0.0002 max mem: 13912 +[2024-12-06 20:41:36 root] (utils.py 283): INFO Epoch: [13] [ 690/2502] eta: 0:39:23 lr: 0.000003 loss_cls: 3.1230 (3.0809) grad_norm: 2.6325 (3.1146) time: 1.2950 data: 0.0003 max mem: 13912 +[2024-12-06 20:41:49 root] (utils.py 283): INFO Epoch: [13] [ 700/2502] eta: 0:39:09 lr: 0.000003 loss_cls: 3.0014 (3.0791) grad_norm: 2.5847 (3.1122) time: 1.2949 data: 0.0003 max mem: 13912 +[2024-12-06 20:42:02 root] (utils.py 283): INFO Epoch: [13] [ 710/2502] eta: 0:38:56 lr: 0.000003 loss_cls: 3.1660 (3.0799) grad_norm: 2.7761 (3.1072) time: 1.2970 data: 0.0003 max mem: 13912 +[2024-12-06 20:42:15 root] (utils.py 283): INFO Epoch: [13] [ 720/2502] eta: 0:38:43 lr: 0.000003 loss_cls: 3.2751 (3.0825) grad_norm: 2.7180 (3.1011) time: 1.2983 data: 0.0002 max mem: 13912 +[2024-12-06 20:42:28 root] (utils.py 283): INFO Epoch: [13] [ 730/2502] eta: 0:38:30 lr: 0.000003 loss_cls: 3.1546 (3.0786) grad_norm: 2.8652 (3.1066) time: 1.2956 data: 0.0002 max mem: 13912 +[2024-12-06 20:42:41 root] (utils.py 283): INFO Epoch: [13] [ 740/2502] eta: 0:38:16 lr: 0.000003 loss_cls: 3.0057 (3.0810) grad_norm: 2.8875 (3.1027) time: 1.2949 data: 0.0002 max mem: 13912 +[2024-12-06 20:42:54 root] (utils.py 283): INFO Epoch: [13] [ 750/2502] eta: 0:38:03 lr: 0.000003 loss_cls: 3.2096 (3.0790) grad_norm: 2.6696 (3.0974) time: 1.2969 data: 0.0003 max mem: 13912 +[2024-12-06 20:43:07 root] (utils.py 283): INFO Epoch: [13] [ 760/2502] eta: 0:37:50 lr: 0.000003 loss_cls: 3.1664 (3.0820) grad_norm: 2.6696 (3.0948) time: 1.2955 data: 0.0002 max mem: 13912 +[2024-12-06 20:43:20 root] (utils.py 283): INFO Epoch: [13] [ 770/2502] eta: 0:37:37 lr: 0.000003 loss_cls: 3.1664 (3.0821) grad_norm: 2.8124 (3.1107) time: 1.2945 data: 0.0002 max mem: 13912 +[2024-12-06 20:43:33 root] (utils.py 283): INFO Epoch: [13] [ 780/2502] eta: 0:37:24 lr: 0.000003 loss_cls: 3.1578 (3.0832) grad_norm: 2.8124 (3.1112) time: 1.2949 data: 0.0002 max mem: 13912 +[2024-12-06 20:43:46 root] (utils.py 283): INFO Epoch: [13] [ 790/2502] eta: 0:37:10 lr: 0.000003 loss_cls: 3.2024 (3.0846) grad_norm: 2.7236 (3.1100) time: 1.2949 data: 0.0002 max mem: 13912 +[2024-12-06 20:43:59 root] (utils.py 283): INFO Epoch: [13] [ 800/2502] eta: 0:36:57 lr: 0.000003 loss_cls: 3.1793 (3.0855) grad_norm: 2.6993 (3.1162) time: 1.2991 data: 0.0002 max mem: 13912 +[2024-12-06 20:44:12 root] (utils.py 283): INFO Epoch: [13] [ 810/2502] eta: 0:36:44 lr: 0.000003 loss_cls: 3.1395 (3.0867) grad_norm: 2.5927 (3.1135) time: 1.2979 data: 0.0002 max mem: 13912 +[2024-12-06 20:44:25 root] (utils.py 283): INFO Epoch: [13] [ 820/2502] eta: 0:36:31 lr: 0.000003 loss_cls: 3.0625 (3.0868) grad_norm: 2.5927 (3.1095) time: 1.2932 data: 0.0002 max mem: 13912 +[2024-12-06 20:44:38 root] (utils.py 283): INFO Epoch: [13] [ 830/2502] eta: 0:36:18 lr: 0.000003 loss_cls: 3.0903 (3.0879) grad_norm: 2.8547 (3.1115) time: 1.2935 data: 0.0002 max mem: 13912 +[2024-12-06 20:44:51 root] (utils.py 283): INFO Epoch: [13] [ 840/2502] eta: 0:36:04 lr: 0.000003 loss_cls: 2.9606 (3.0846) grad_norm: 2.8218 (3.1101) time: 1.2935 data: 0.0002 max mem: 13912 +[2024-12-06 20:45:04 root] (utils.py 283): INFO Epoch: [13] [ 850/2502] eta: 0:35:51 lr: 0.000003 loss_cls: 3.1099 (3.0862) grad_norm: 2.7968 (3.1077) time: 1.2932 data: 0.0002 max mem: 13912 +[2024-12-06 20:45:17 root] (utils.py 283): INFO Epoch: [13] [ 860/2502] eta: 0:35:38 lr: 0.000003 loss_cls: 3.1886 (3.0879) grad_norm: 2.7663 (3.1071) time: 1.2929 data: 0.0002 max mem: 13912 +[2024-12-06 20:45:29 root] (utils.py 283): INFO Epoch: [13] [ 870/2502] eta: 0:35:25 lr: 0.000003 loss_cls: 3.0833 (3.0866) grad_norm: 2.5934 (3.1040) time: 1.2933 data: 0.0002 max mem: 13912 +[2024-12-06 20:45:42 root] (utils.py 283): INFO Epoch: [13] [ 880/2502] eta: 0:35:12 lr: 0.000003 loss_cls: 3.0189 (3.0848) grad_norm: 2.4758 (3.0970) time: 1.2935 data: 0.0002 max mem: 13912 +[2024-12-06 20:45:55 root] (utils.py 283): INFO Epoch: [13] [ 890/2502] eta: 0:34:59 lr: 0.000003 loss_cls: 2.8500 (3.0813) grad_norm: 2.5986 (3.0945) time: 1.2952 data: 0.0003 max mem: 13912 +[2024-12-06 20:46:08 root] (utils.py 283): INFO Epoch: [13] [ 900/2502] eta: 0:34:46 lr: 0.000003 loss_cls: 3.0988 (3.0809) grad_norm: 2.8003 (3.0908) time: 1.3018 data: 0.0003 max mem: 13912 +[2024-12-06 20:46:22 root] (utils.py 283): INFO Epoch: [13] [ 910/2502] eta: 0:34:33 lr: 0.000003 loss_cls: 3.0964 (3.0779) grad_norm: 2.7629 (3.0870) time: 1.3068 data: 0.0003 max mem: 13912 +[2024-12-06 20:46:35 root] (utils.py 283): INFO Epoch: [13] [ 920/2502] eta: 0:34:20 lr: 0.000003 loss_cls: 2.8987 (3.0778) grad_norm: 2.6673 (3.0834) time: 1.3031 data: 0.0002 max mem: 13912 +[2024-12-06 20:46:48 root] (utils.py 283): INFO Epoch: [13] [ 930/2502] eta: 0:34:07 lr: 0.000003 loss_cls: 3.0872 (3.0775) grad_norm: 2.6545 (3.0800) time: 1.2994 data: 0.0002 max mem: 13912 +[2024-12-06 20:47:01 root] (utils.py 283): INFO Epoch: [13] [ 940/2502] eta: 0:33:53 lr: 0.000003 loss_cls: 3.0909 (3.0785) grad_norm: 2.6246 (3.0781) time: 1.3006 data: 0.0002 max mem: 13912 +[2024-12-06 20:47:14 root] (utils.py 283): INFO Epoch: [13] [ 950/2502] eta: 0:33:40 lr: 0.000003 loss_cls: 3.2911 (3.0826) grad_norm: 2.6702 (3.0757) time: 1.3018 data: 0.0002 max mem: 13912 +[2024-12-06 20:47:27 root] (utils.py 283): INFO Epoch: [13] [ 960/2502] eta: 0:33:28 lr: 0.000003 loss_cls: 3.4238 (3.0839) grad_norm: 2.6730 (3.0741) time: 1.3043 data: 0.0002 max mem: 13912 +[2024-12-06 20:47:40 root] (utils.py 283): INFO Epoch: [13] [ 970/2502] eta: 0:33:14 lr: 0.000003 loss_cls: 3.2369 (3.0837) grad_norm: 2.6359 (3.0700) time: 1.3042 data: 0.0002 max mem: 13912 +[2024-12-06 20:47:53 root] (utils.py 283): INFO Epoch: [13] [ 980/2502] eta: 0:33:02 lr: 0.000003 loss_cls: 3.2369 (3.0847) grad_norm: 2.5430 (3.0671) time: 1.3097 data: 0.0003 max mem: 13912 +[2024-12-06 20:48:06 root] (utils.py 283): INFO Epoch: [13] [ 990/2502] eta: 0:32:49 lr: 0.000003 loss_cls: 3.3475 (3.0872) grad_norm: 2.6918 (3.0637) time: 1.3127 data: 0.0002 max mem: 13912 +[2024-12-06 20:48:19 root] (utils.py 283): INFO Epoch: [13] [1000/2502] eta: 0:32:36 lr: 0.000003 loss_cls: 3.2634 (3.0868) grad_norm: 2.6918 (3.0613) time: 1.3052 data: 0.0002 max mem: 13912 +[2024-12-06 20:48:32 root] (utils.py 283): INFO Epoch: [13] [1010/2502] eta: 0:32:23 lr: 0.000003 loss_cls: 3.2654 (3.0882) grad_norm: 2.5883 (3.0680) time: 1.3024 data: 0.0002 max mem: 13912 +[2024-12-06 20:48:45 root] (utils.py 283): INFO Epoch: [13] [1020/2502] eta: 0:32:10 lr: 0.000003 loss_cls: 3.2931 (3.0896) grad_norm: 2.6210 (3.0661) time: 1.3050 data: 0.0002 max mem: 13912 +[2024-12-06 20:48:58 root] (utils.py 283): INFO Epoch: [13] [1030/2502] eta: 0:31:57 lr: 0.000003 loss_cls: 3.2428 (3.0891) grad_norm: 2.5684 (3.0609) time: 1.3063 data: 0.0003 max mem: 13912 +[2024-12-06 20:49:11 root] (utils.py 283): INFO Epoch: [13] [1040/2502] eta: 0:31:44 lr: 0.000003 loss_cls: 3.2319 (3.0897) grad_norm: 2.6303 (3.0605) time: 1.3043 data: 0.0003 max mem: 13912 +[2024-12-06 20:49:24 root] (utils.py 283): INFO Epoch: [13] [1050/2502] eta: 0:31:31 lr: 0.000003 loss_cls: 3.3222 (3.0905) grad_norm: 2.7003 (3.0655) time: 1.3020 data: 0.0003 max mem: 13912 +[2024-12-06 20:49:37 root] (utils.py 283): INFO Epoch: [13] [1060/2502] eta: 0:31:18 lr: 0.000003 loss_cls: 3.1684 (3.0904) grad_norm: 2.6012 (3.0621) time: 1.3031 data: 0.0002 max mem: 13912 +[2024-12-06 20:49:50 root] (utils.py 283): INFO Epoch: [13] [1070/2502] eta: 0:31:05 lr: 0.000003 loss_cls: 3.2408 (3.0921) grad_norm: 2.7819 (3.0609) time: 1.3105 data: 0.0003 max mem: 13912 +[2024-12-06 20:50:03 root] (utils.py 283): INFO Epoch: [13] [1080/2502] eta: 0:30:52 lr: 0.000003 loss_cls: 3.3686 (3.0925) grad_norm: 2.8488 (3.0596) time: 1.3114 data: 0.0003 max mem: 13912 +[2024-12-06 20:50:16 root] (utils.py 283): INFO Epoch: [13] [1090/2502] eta: 0:30:39 lr: 0.000003 loss_cls: 3.2791 (3.0948) grad_norm: 2.6396 (3.0579) time: 1.3065 data: 0.0002 max mem: 13912 +[2024-12-06 20:50:29 root] (utils.py 283): INFO Epoch: [13] [1100/2502] eta: 0:30:26 lr: 0.000003 loss_cls: 3.2615 (3.0949) grad_norm: 2.6903 (3.0563) time: 1.3035 data: 0.0002 max mem: 13912 +[2024-12-06 20:50:42 root] (utils.py 283): INFO Epoch: [13] [1110/2502] eta: 0:30:13 lr: 0.000003 loss_cls: 3.2124 (3.0941) grad_norm: 2.7625 (3.0730) time: 1.3005 data: 0.0002 max mem: 13912 +[2024-12-06 20:50:56 root] (utils.py 283): INFO Epoch: [13] [1120/2502] eta: 0:30:00 lr: 0.000003 loss_cls: 3.1354 (3.0912) grad_norm: 2.7014 (3.0697) time: 1.3000 data: 0.0002 max mem: 13912 +[2024-12-06 20:51:08 root] (utils.py 283): INFO Epoch: [13] [1130/2502] eta: 0:29:47 lr: 0.000003 loss_cls: 3.0080 (3.0902) grad_norm: 2.7176 (3.0721) time: 1.2990 data: 0.0003 max mem: 13912 +[2024-12-06 20:51:21 root] (utils.py 283): INFO Epoch: [13] [1140/2502] eta: 0:29:34 lr: 0.000003 loss_cls: 3.0013 (3.0893) grad_norm: 2.7034 (3.0681) time: 1.2982 data: 0.0003 max mem: 13912 +[2024-12-06 20:51:34 root] (utils.py 283): INFO Epoch: [13] [1150/2502] eta: 0:29:21 lr: 0.000003 loss_cls: 3.0013 (3.0888) grad_norm: 2.7564 (3.0672) time: 1.2974 data: 0.0003 max mem: 13912 +[2024-12-06 20:51:47 root] (utils.py 283): INFO Epoch: [13] [1160/2502] eta: 0:29:07 lr: 0.000003 loss_cls: 3.0558 (3.0880) grad_norm: 2.9151 (3.0800) time: 1.2964 data: 0.0002 max mem: 13912 +[2024-12-06 20:52:00 root] (utils.py 283): INFO Epoch: [13] [1170/2502] eta: 0:28:54 lr: 0.000003 loss_cls: 3.0558 (3.0877) grad_norm: 2.7975 (3.0795) time: 1.2985 data: 0.0002 max mem: 13912 +[2024-12-06 20:52:13 root] (utils.py 283): INFO Epoch: [13] [1180/2502] eta: 0:28:41 lr: 0.000003 loss_cls: 3.0738 (3.0878) grad_norm: 2.6796 (3.0772) time: 1.2974 data: 0.0003 max mem: 13912 +[2024-12-06 20:52:26 root] (utils.py 283): INFO Epoch: [13] [1190/2502] eta: 0:28:28 lr: 0.000003 loss_cls: 3.2186 (3.0874) grad_norm: 2.6610 (3.0727) time: 1.3017 data: 0.0003 max mem: 13912 +[2024-12-06 20:52:39 root] (utils.py 283): INFO Epoch: [13] [1200/2502] eta: 0:28:15 lr: 0.000003 loss_cls: 2.7626 (3.0841) grad_norm: 2.5360 (3.0743) time: 1.3033 data: 0.0003 max mem: 13912 +[2024-12-06 20:52:52 root] (utils.py 283): INFO Epoch: [13] [1210/2502] eta: 0:28:02 lr: 0.000003 loss_cls: 2.7626 (3.0839) grad_norm: 2.7617 (3.0746) time: 1.2985 data: 0.0002 max mem: 13912 +[2024-12-06 20:53:05 root] (utils.py 283): INFO Epoch: [13] [1220/2502] eta: 0:27:49 lr: 0.000003 loss_cls: 3.3012 (3.0859) grad_norm: 3.0532 (3.0769) time: 1.2982 data: 0.0002 max mem: 13912 +[2024-12-06 20:53:18 root] (utils.py 283): INFO Epoch: [13] [1230/2502] eta: 0:27:36 lr: 0.000003 loss_cls: 3.1556 (3.0842) grad_norm: 2.7116 (3.0737) time: 1.3024 data: 0.0002 max mem: 13912 +[2024-12-06 20:53:32 root] (utils.py 283): INFO Epoch: [13] [1240/2502] eta: 0:27:23 lr: 0.000003 loss_cls: 2.8490 (3.0824) grad_norm: 2.6145 (3.0737) time: 1.3062 data: 0.0002 max mem: 13912 +[2024-12-06 20:53:45 root] (utils.py 283): INFO Epoch: [13] [1250/2502] eta: 0:27:10 lr: 0.000003 loss_cls: 2.9517 (3.0814) grad_norm: 2.6746 (3.0754) time: 1.3037 data: 0.0003 max mem: 13912 +[2024-12-06 20:53:58 root] (utils.py 283): INFO Epoch: [13] [1260/2502] eta: 0:26:57 lr: 0.000003 loss_cls: 2.9779 (3.0790) grad_norm: 2.8792 (3.0924) time: 1.3035 data: 0.0003 max mem: 13912 +[2024-12-06 20:54:11 root] (utils.py 283): INFO Epoch: [13] [1270/2502] eta: 0:26:44 lr: 0.000003 loss_cls: 2.9779 (3.0776) grad_norm: 2.5814 (3.0892) time: 1.3077 data: 0.0003 max mem: 13912 +[2024-12-06 20:54:24 root] (utils.py 283): INFO Epoch: [13] [1280/2502] eta: 0:26:31 lr: 0.000003 loss_cls: 3.2897 (3.0807) grad_norm: 2.8212 (3.0914) time: 1.3076 data: 0.0003 max mem: 13912 +[2024-12-06 20:54:37 root] (utils.py 283): INFO Epoch: [13] [1290/2502] eta: 0:26:18 lr: 0.000003 loss_cls: 3.2967 (3.0810) grad_norm: 2.9815 (3.0894) time: 1.3050 data: 0.0002 max mem: 13912 +[2024-12-06 20:54:50 root] (utils.py 283): INFO Epoch: [13] [1300/2502] eta: 0:26:05 lr: 0.000003 loss_cls: 3.3097 (3.0813) grad_norm: 2.8440 (3.0869) time: 1.3038 data: 0.0003 max mem: 13912 +[2024-12-06 20:55:03 root] (utils.py 283): INFO Epoch: [13] [1310/2502] eta: 0:25:52 lr: 0.000003 loss_cls: 3.3268 (3.0814) grad_norm: 2.6981 (3.0857) time: 1.3028 data: 0.0003 max mem: 13912 +[2024-12-06 20:55:16 root] (utils.py 283): INFO Epoch: [13] [1320/2502] eta: 0:25:39 lr: 0.000003 loss_cls: 3.3251 (3.0821) grad_norm: 2.6981 (3.0882) time: 1.3027 data: 0.0003 max mem: 13912 +[2024-12-06 20:55:29 root] (utils.py 283): INFO Epoch: [13] [1330/2502] eta: 0:25:26 lr: 0.000003 loss_cls: 3.3248 (3.0833) grad_norm: 2.8618 (3.0924) time: 1.3005 data: 0.0002 max mem: 13912 +[2024-12-06 20:55:42 root] (utils.py 283): INFO Epoch: [13] [1340/2502] eta: 0:25:13 lr: 0.000003 loss_cls: 3.2842 (3.0853) grad_norm: 2.5781 (3.0898) time: 1.3012 data: 0.0002 max mem: 13912 +[2024-12-06 20:55:55 root] (utils.py 283): INFO Epoch: [13] [1350/2502] eta: 0:25:00 lr: 0.000003 loss_cls: 3.3642 (3.0865) grad_norm: 2.6640 (3.0973) time: 1.3024 data: 0.0003 max mem: 13912 +[2024-12-06 20:56:08 root] (utils.py 283): INFO Epoch: [13] [1360/2502] eta: 0:24:47 lr: 0.000003 loss_cls: 3.2757 (3.0853) grad_norm: 2.5969 (3.0933) time: 1.3019 data: 0.0003 max mem: 13912 +[2024-12-06 20:56:21 root] (utils.py 283): INFO Epoch: [13] [1370/2502] eta: 0:24:34 lr: 0.000003 loss_cls: 3.1752 (3.0860) grad_norm: 2.5262 (3.0920) time: 1.3020 data: 0.0003 max mem: 13912 +[2024-12-06 20:56:34 root] (utils.py 283): INFO Epoch: [13] [1380/2502] eta: 0:24:21 lr: 0.000003 loss_cls: 3.1826 (3.0841) grad_norm: 2.6353 (3.0947) time: 1.3020 data: 0.0003 max mem: 13912 +[2024-12-06 20:56:47 root] (utils.py 283): INFO Epoch: [13] [1390/2502] eta: 0:24:08 lr: 0.000003 loss_cls: 2.8174 (3.0820) grad_norm: 2.9008 (3.0957) time: 1.3011 data: 0.0003 max mem: 13912 +[2024-12-06 20:57:00 root] (utils.py 283): INFO Epoch: [13] [1400/2502] eta: 0:23:55 lr: 0.000003 loss_cls: 2.8586 (3.0817) grad_norm: 2.6933 (3.0933) time: 1.3015 data: 0.0003 max mem: 13912 +[2024-12-06 20:57:13 root] (utils.py 283): INFO Epoch: [13] [1410/2502] eta: 0:23:42 lr: 0.000003 loss_cls: 3.1823 (3.0828) grad_norm: 2.5767 (3.0912) time: 1.3023 data: 0.0003 max mem: 13912 +[2024-12-06 20:57:26 root] (utils.py 283): INFO Epoch: [13] [1420/2502] eta: 0:23:29 lr: 0.000003 loss_cls: 3.1823 (3.0811) grad_norm: 2.6321 (3.0884) time: 1.3010 data: 0.0003 max mem: 13912 +[2024-12-06 20:57:39 root] (utils.py 283): INFO Epoch: [13] [1430/2502] eta: 0:23:16 lr: 0.000003 loss_cls: 2.5798 (3.0773) grad_norm: 2.7197 (3.0888) time: 1.2989 data: 0.0003 max mem: 13912 +[2024-12-06 20:57:52 root] (utils.py 283): INFO Epoch: [13] [1440/2502] eta: 0:23:03 lr: 0.000003 loss_cls: 2.7564 (3.0781) grad_norm: 2.7265 (3.0887) time: 1.2962 data: 0.0003 max mem: 13912 +[2024-12-06 20:58:05 root] (utils.py 283): INFO Epoch: [13] [1450/2502] eta: 0:22:50 lr: 0.000003 loss_cls: 3.0831 (3.0785) grad_norm: 2.8480 (3.0871) time: 1.2954 data: 0.0003 max mem: 13912 +[2024-12-06 20:58:18 root] (utils.py 283): INFO Epoch: [13] [1460/2502] eta: 0:22:36 lr: 0.000003 loss_cls: 3.0746 (3.0779) grad_norm: 2.7235 (3.0865) time: 1.2943 data: 0.0003 max mem: 13912 +[2024-12-06 20:58:31 root] (utils.py 283): INFO Epoch: [13] [1470/2502] eta: 0:22:23 lr: 0.000003 loss_cls: 3.1439 (3.0772) grad_norm: 2.5126 (3.0830) time: 1.2928 data: 0.0002 max mem: 13912 +[2024-12-06 20:58:44 root] (utils.py 283): INFO Epoch: [13] [1480/2502] eta: 0:22:10 lr: 0.000003 loss_cls: 3.1764 (3.0781) grad_norm: 2.6441 (3.0814) time: 1.2948 data: 0.0003 max mem: 13912 +[2024-12-06 20:58:57 root] (utils.py 283): INFO Epoch: [13] [1490/2502] eta: 0:21:57 lr: 0.000003 loss_cls: 3.1764 (3.0778) grad_norm: 2.8053 (3.0845) time: 1.2945 data: 0.0003 max mem: 13912 +[2024-12-06 20:59:10 root] (utils.py 283): INFO Epoch: [13] [1500/2502] eta: 0:21:44 lr: 0.000003 loss_cls: 3.0100 (3.0772) grad_norm: 2.6451 (3.0816) time: 1.2990 data: 0.0002 max mem: 13912 +[2024-12-06 20:59:23 root] (utils.py 283): INFO Epoch: [13] [1510/2502] eta: 0:21:31 lr: 0.000003 loss_cls: 2.9933 (3.0767) grad_norm: 2.6341 (3.0803) time: 1.3007 data: 0.0003 max mem: 13912 +[2024-12-06 20:59:36 root] (utils.py 283): INFO Epoch: [13] [1520/2502] eta: 0:21:18 lr: 0.000003 loss_cls: 3.2308 (3.0770) grad_norm: 2.7469 (3.0795) time: 1.2948 data: 0.0003 max mem: 13912 +[2024-12-06 20:59:49 root] (utils.py 283): INFO Epoch: [13] [1530/2502] eta: 0:21:05 lr: 0.000003 loss_cls: 3.1202 (3.0760) grad_norm: 2.7192 (3.0771) time: 1.3036 data: 0.0002 max mem: 13912 +[2024-12-06 21:00:02 root] (utils.py 283): INFO Epoch: [13] [1540/2502] eta: 0:20:52 lr: 0.000003 loss_cls: 3.1202 (3.0768) grad_norm: 2.6651 (3.0782) time: 1.3039 data: 0.0003 max mem: 13912 +[2024-12-06 21:00:15 root] (utils.py 283): INFO Epoch: [13] [1550/2502] eta: 0:20:39 lr: 0.000003 loss_cls: 3.2788 (3.0775) grad_norm: 2.6686 (3.0786) time: 1.2934 data: 0.0003 max mem: 13912 +[2024-12-06 21:00:28 root] (utils.py 283): INFO Epoch: [13] [1560/2502] eta: 0:20:26 lr: 0.000003 loss_cls: 2.9187 (3.0757) grad_norm: 2.6259 (3.0766) time: 1.2920 data: 0.0002 max mem: 13912 +[2024-12-06 21:00:41 root] (utils.py 283): INFO Epoch: [13] [1570/2502] eta: 0:20:13 lr: 0.000003 loss_cls: 2.9811 (3.0763) grad_norm: 2.6259 (3.0753) time: 1.2944 data: 0.0003 max mem: 13912 +[2024-12-06 21:00:53 root] (utils.py 283): INFO Epoch: [13] [1580/2502] eta: 0:20:00 lr: 0.000003 loss_cls: 3.2322 (3.0758) grad_norm: 2.7074 (3.0736) time: 1.2957 data: 0.0003 max mem: 13912 +[2024-12-06 21:01:06 root] (utils.py 283): INFO Epoch: [13] [1590/2502] eta: 0:19:47 lr: 0.000003 loss_cls: 2.9574 (3.0749) grad_norm: 2.7074 (3.0717) time: 1.2933 data: 0.0002 max mem: 13912 +[2024-12-06 21:01:19 root] (utils.py 283): INFO Epoch: [13] [1600/2502] eta: 0:19:34 lr: 0.000003 loss_cls: 3.1158 (3.0757) grad_norm: 2.6850 (3.0697) time: 1.2944 data: 0.0002 max mem: 13912 +[2024-12-06 21:01:32 root] (utils.py 283): INFO Epoch: [13] [1610/2502] eta: 0:19:21 lr: 0.000003 loss_cls: 3.2038 (3.0762) grad_norm: 2.5833 (3.0691) time: 1.2982 data: 0.0003 max mem: 13912 +[2024-12-06 21:01:45 root] (utils.py 283): INFO Epoch: [13] [1620/2502] eta: 0:19:08 lr: 0.000003 loss_cls: 3.1397 (3.0761) grad_norm: 2.7201 (3.0721) time: 1.3005 data: 0.0003 max mem: 13912 +[2024-12-06 21:01:58 root] (utils.py 283): INFO Epoch: [13] [1630/2502] eta: 0:18:55 lr: 0.000003 loss_cls: 3.1288 (3.0757) grad_norm: 2.5839 (3.0686) time: 1.2992 data: 0.0003 max mem: 13912 +[2024-12-06 21:02:11 root] (utils.py 283): INFO Epoch: [13] [1640/2502] eta: 0:18:42 lr: 0.000003 loss_cls: 3.1594 (3.0769) grad_norm: 2.6741 (3.0698) time: 1.2961 data: 0.0003 max mem: 13912 +[2024-12-06 21:02:24 root] (utils.py 283): INFO Epoch: [13] [1650/2502] eta: 0:18:28 lr: 0.000003 loss_cls: 3.1176 (3.0741) grad_norm: 2.8789 (3.0723) time: 1.2959 data: 0.0003 max mem: 13912 +[2024-12-06 21:02:37 root] (utils.py 283): INFO Epoch: [13] [1660/2502] eta: 0:18:15 lr: 0.000003 loss_cls: 3.1603 (3.0756) grad_norm: 2.7037 (3.0705) time: 1.2974 data: 0.0003 max mem: 13912 +[2024-12-06 21:02:50 root] (utils.py 283): INFO Epoch: [13] [1670/2502] eta: 0:18:02 lr: 0.000003 loss_cls: 3.1645 (3.0736) grad_norm: 2.7132 (3.0695) time: 1.2992 data: 0.0002 max mem: 13912 +[2024-12-06 21:03:03 root] (utils.py 283): INFO Epoch: [13] [1680/2502] eta: 0:17:49 lr: 0.000003 loss_cls: 2.6537 (3.0724) grad_norm: 2.5885 (3.0669) time: 1.2974 data: 0.0003 max mem: 13912 +[2024-12-06 21:03:16 root] (utils.py 283): INFO Epoch: [13] [1690/2502] eta: 0:17:36 lr: 0.000003 loss_cls: 2.8027 (3.0714) grad_norm: 2.5885 (3.0649) time: 1.2964 data: 0.0002 max mem: 13912 +[2024-12-06 21:03:29 root] (utils.py 283): INFO Epoch: [13] [1700/2502] eta: 0:17:23 lr: 0.000003 loss_cls: 2.7168 (3.0692) grad_norm: 2.6082 (3.0629) time: 1.3018 data: 0.0003 max mem: 13912 +[2024-12-06 21:03:42 root] (utils.py 283): INFO Epoch: [13] [1710/2502] eta: 0:17:10 lr: 0.000003 loss_cls: 3.0380 (3.0695) grad_norm: 2.6398 (3.0632) time: 1.3039 data: 0.0003 max mem: 13912 +[2024-12-06 21:03:55 root] (utils.py 283): INFO Epoch: [13] [1720/2502] eta: 0:16:57 lr: 0.000003 loss_cls: 3.1921 (3.0705) grad_norm: 2.6463 (3.0615) time: 1.3061 data: 0.0003 max mem: 13912 +[2024-12-06 21:04:08 root] (utils.py 283): INFO Epoch: [13] [1730/2502] eta: 0:16:44 lr: 0.000003 loss_cls: 3.2894 (3.0698) grad_norm: 2.5850 (3.0587) time: 1.3055 data: 0.0003 max mem: 13912 +[2024-12-06 21:04:21 root] (utils.py 283): INFO Epoch: [13] [1740/2502] eta: 0:16:31 lr: 0.000003 loss_cls: 3.0034 (3.0687) grad_norm: 2.5850 (3.0563) time: 1.3017 data: 0.0003 max mem: 13912 +[2024-12-06 21:04:34 root] (utils.py 283): INFO Epoch: [13] [1750/2502] eta: 0:16:18 lr: 0.000003 loss_cls: 3.2259 (3.0704) grad_norm: 2.7212 (3.0545) time: 1.3008 data: 0.0003 max mem: 13912 +[2024-12-06 21:04:47 root] (utils.py 283): INFO Epoch: [13] [1760/2502] eta: 0:16:05 lr: 0.000003 loss_cls: 3.2259 (3.0707) grad_norm: 2.5610 (3.0511) time: 1.3016 data: 0.0002 max mem: 13912 +[2024-12-06 21:05:00 root] (utils.py 283): INFO Epoch: [13] [1770/2502] eta: 0:15:52 lr: 0.000003 loss_cls: 3.0213 (3.0704) grad_norm: 2.5624 (3.0523) time: 1.3031 data: 0.0002 max mem: 13912 +[2024-12-06 21:05:14 root] (utils.py 283): INFO Epoch: [13] [1780/2502] eta: 0:15:39 lr: 0.000003 loss_cls: 3.2474 (3.0718) grad_norm: 2.8658 (3.0522) time: 1.3042 data: 0.0002 max mem: 13912 +[2024-12-06 21:05:27 root] (utils.py 283): INFO Epoch: [13] [1790/2502] eta: 0:15:26 lr: 0.000003 loss_cls: 3.2374 (3.0716) grad_norm: 2.7769 (3.0519) time: 1.3029 data: 0.0002 max mem: 13912 +[2024-12-06 21:05:40 root] (utils.py 283): INFO Epoch: [13] [1800/2502] eta: 0:15:13 lr: 0.000003 loss_cls: 3.2393 (3.0726) grad_norm: 2.7325 (3.0552) time: 1.3008 data: 0.0002 max mem: 13912 +[2024-12-06 21:05:53 root] (utils.py 283): INFO Epoch: [13] [1810/2502] eta: 0:15:00 lr: 0.000003 loss_cls: 3.3184 (3.0730) grad_norm: 2.6825 (3.0526) time: 1.3024 data: 0.0002 max mem: 13912 +[2024-12-06 21:06:06 root] (utils.py 283): INFO Epoch: [13] [1820/2502] eta: 0:14:47 lr: 0.000003 loss_cls: 2.8805 (3.0708) grad_norm: 2.5737 (3.0511) time: 1.2987 data: 0.0002 max mem: 13912 +[2024-12-06 21:06:18 root] (utils.py 283): INFO Epoch: [13] [1830/2502] eta: 0:14:34 lr: 0.000003 loss_cls: 2.9550 (3.0707) grad_norm: 2.7144 (3.0492) time: 1.2927 data: 0.0003 max mem: 13912 +[2024-12-06 21:06:31 root] (utils.py 283): INFO Epoch: [13] [1840/2502] eta: 0:14:21 lr: 0.000003 loss_cls: 3.1243 (3.0718) grad_norm: 2.7144 (3.0474) time: 1.2940 data: 0.0003 max mem: 13912 +[2024-12-06 21:06:45 root] (utils.py 283): INFO Epoch: [13] [1850/2502] eta: 0:14:08 lr: 0.000003 loss_cls: 3.3574 (3.0734) grad_norm: 2.6961 (3.0476) time: 1.3306 data: 0.0004 max mem: 13912 +[2024-12-06 21:06:58 root] (utils.py 283): INFO Epoch: [13] [1860/2502] eta: 0:13:55 lr: 0.000003 loss_cls: 3.2445 (3.0730) grad_norm: 2.6230 (3.0454) time: 1.3310 data: 0.0004 max mem: 13912 +[2024-12-06 21:07:11 root] (utils.py 283): INFO Epoch: [13] [1870/2502] eta: 0:13:42 lr: 0.000003 loss_cls: 3.0956 (3.0735) grad_norm: 2.7818 (3.0482) time: 1.2968 data: 0.0002 max mem: 13912 +[2024-12-06 21:07:24 root] (utils.py 283): INFO Epoch: [13] [1880/2502] eta: 0:13:29 lr: 0.000003 loss_cls: 3.1447 (3.0737) grad_norm: 2.8540 (3.0480) time: 1.2940 data: 0.0002 max mem: 13912 +[2024-12-06 21:07:37 root] (utils.py 283): INFO Epoch: [13] [1890/2502] eta: 0:13:16 lr: 0.000003 loss_cls: 3.1048 (3.0734) grad_norm: 2.5863 (3.0478) time: 1.2937 data: 0.0003 max mem: 13912 +[2024-12-06 21:07:50 root] (utils.py 283): INFO Epoch: [13] [1900/2502] eta: 0:13:03 lr: 0.000003 loss_cls: 2.8720 (3.0722) grad_norm: 2.6266 (3.0478) time: 1.2959 data: 0.0003 max mem: 13912 +[2024-12-06 21:08:03 root] (utils.py 283): INFO Epoch: [13] [1910/2502] eta: 0:12:50 lr: 0.000003 loss_cls: 2.8720 (3.0719) grad_norm: 2.6964 (3.0482) time: 1.2940 data: 0.0003 max mem: 13912 +[2024-12-06 21:08:16 root] (utils.py 283): INFO Epoch: [13] [1920/2502] eta: 0:12:37 lr: 0.000003 loss_cls: 3.0634 (3.0721) grad_norm: 2.6964 (3.0469) time: 1.2989 data: 0.0003 max mem: 13912 +[2024-12-06 21:08:29 root] (utils.py 283): INFO Epoch: [13] [1930/2502] eta: 0:12:24 lr: 0.000003 loss_cls: 3.1068 (3.0724) grad_norm: 2.5367 (3.0443) time: 1.3031 data: 0.0003 max mem: 13912 +[2024-12-06 21:08:42 root] (utils.py 283): INFO Epoch: [13] [1940/2502] eta: 0:12:11 lr: 0.000003 loss_cls: 3.1068 (3.0720) grad_norm: 2.6208 (3.0439) time: 1.2984 data: 0.0002 max mem: 13912 +[2024-12-06 21:08:55 root] (utils.py 283): INFO Epoch: [13] [1950/2502] eta: 0:11:58 lr: 0.000003 loss_cls: 3.0460 (3.0720) grad_norm: 2.6723 (3.0430) time: 1.2998 data: 0.0003 max mem: 13912 +[2024-12-06 21:09:08 root] (utils.py 283): INFO Epoch: [13] [1960/2502] eta: 0:11:45 lr: 0.000003 loss_cls: 3.3497 (3.0734) grad_norm: 2.7852 (3.0421) time: 1.2972 data: 0.0003 max mem: 13912 +[2024-12-06 21:09:21 root] (utils.py 283): INFO Epoch: [13] [1970/2502] eta: 0:11:32 lr: 0.000003 loss_cls: 3.3777 (3.0736) grad_norm: 2.6461 (3.0411) time: 1.2920 data: 0.0003 max mem: 13912 +[2024-12-06 21:09:34 root] (utils.py 283): INFO Epoch: [13] [1980/2502] eta: 0:11:19 lr: 0.000003 loss_cls: 3.1654 (3.0735) grad_norm: 2.6494 (3.0402) time: 1.2943 data: 0.0003 max mem: 13912 +[2024-12-06 21:09:47 root] (utils.py 283): INFO Epoch: [13] [1990/2502] eta: 0:11:06 lr: 0.000003 loss_cls: 3.1593 (3.0736) grad_norm: 2.8965 (3.0396) time: 1.2956 data: 0.0002 max mem: 13912 +[2024-12-06 21:10:00 root] (utils.py 283): INFO Epoch: [13] [2000/2502] eta: 0:10:53 lr: 0.000003 loss_cls: 2.9791 (3.0730) grad_norm: 2.7180 (3.0388) time: 1.2952 data: 0.0003 max mem: 13912 +[2024-12-06 21:10:12 root] (utils.py 283): INFO Epoch: [13] [2010/2502] eta: 0:10:40 lr: 0.000003 loss_cls: 2.8766 (3.0726) grad_norm: 2.7151 (3.0414) time: 1.2942 data: 0.0003 max mem: 13912 +[2024-12-06 21:10:25 root] (utils.py 283): INFO Epoch: [13] [2020/2502] eta: 0:10:27 lr: 0.000003 loss_cls: 2.8497 (3.0718) grad_norm: 2.8891 (3.0410) time: 1.2931 data: 0.0003 max mem: 13912 +[2024-12-06 21:10:38 root] (utils.py 283): INFO Epoch: [13] [2030/2502] eta: 0:10:14 lr: 0.000003 loss_cls: 3.2729 (3.0720) grad_norm: 2.7657 (3.0432) time: 1.2930 data: 0.0003 max mem: 13912 +[2024-12-06 21:10:51 root] (utils.py 283): INFO Epoch: [13] [2040/2502] eta: 0:10:01 lr: 0.000003 loss_cls: 3.2729 (3.0720) grad_norm: 2.6483 (3.0419) time: 1.2927 data: 0.0002 max mem: 13912 +[2024-12-06 21:11:04 root] (utils.py 283): INFO Epoch: [13] [2050/2502] eta: 0:09:48 lr: 0.000003 loss_cls: 3.1414 (3.0718) grad_norm: 2.6409 (3.0410) time: 1.2951 data: 0.0003 max mem: 13912 +[2024-12-06 21:11:17 root] (utils.py 283): INFO Epoch: [13] [2060/2502] eta: 0:09:35 lr: 0.000003 loss_cls: 3.1414 (3.0720) grad_norm: 2.6409 (3.0395) time: 1.2949 data: 0.0003 max mem: 13912 +[2024-12-06 21:11:30 root] (utils.py 283): INFO Epoch: [13] [2070/2502] eta: 0:09:22 lr: 0.000003 loss_cls: 3.0369 (3.0711) grad_norm: 2.6483 (3.0434) time: 1.2909 data: 0.0003 max mem: 13912 +[2024-12-06 21:11:43 root] (utils.py 283): INFO Epoch: [13] [2080/2502] eta: 0:09:09 lr: 0.000003 loss_cls: 3.0470 (3.0711) grad_norm: 2.9568 (3.0449) time: 1.2906 data: 0.0003 max mem: 13912 +[2024-12-06 21:11:56 root] (utils.py 283): INFO Epoch: [13] [2090/2502] eta: 0:08:56 lr: 0.000003 loss_cls: 3.1631 (3.0698) grad_norm: 2.9568 (3.0461) time: 1.2931 data: 0.0003 max mem: 13912 +[2024-12-06 21:12:09 root] (utils.py 283): INFO Epoch: [13] [2100/2502] eta: 0:08:43 lr: 0.000003 loss_cls: 2.9083 (3.0690) grad_norm: 2.9523 (3.0460) time: 1.2950 data: 0.0002 max mem: 13912 +[2024-12-06 21:12:22 root] (utils.py 283): INFO Epoch: [13] [2110/2502] eta: 0:08:30 lr: 0.000003 loss_cls: 3.1191 (3.0689) grad_norm: 2.8532 (3.0466) time: 1.2967 data: 0.0003 max mem: 13912 +[2024-12-06 21:12:35 root] (utils.py 283): INFO Epoch: [13] [2120/2502] eta: 0:08:16 lr: 0.000003 loss_cls: 2.8458 (3.0673) grad_norm: 2.6183 (3.0448) time: 1.2971 data: 0.0003 max mem: 13912 +[2024-12-06 21:12:48 root] (utils.py 283): INFO Epoch: [13] [2130/2502] eta: 0:08:03 lr: 0.000003 loss_cls: 2.8372 (3.0670) grad_norm: 2.5536 (3.0427) time: 1.2955 data: 0.0002 max mem: 13912 +[2024-12-06 21:13:01 root] (utils.py 283): INFO Epoch: [13] [2140/2502] eta: 0:07:50 lr: 0.000003 loss_cls: 3.1247 (3.0675) grad_norm: 2.6619 (3.0410) time: 1.2946 data: 0.0003 max mem: 13912 +[2024-12-06 21:13:14 root] (utils.py 283): INFO Epoch: [13] [2150/2502] eta: 0:07:37 lr: 0.000003 loss_cls: 3.1362 (3.0673) grad_norm: 2.6789 (3.0395) time: 1.2925 data: 0.0003 max mem: 13912 +[2024-12-06 21:13:26 root] (utils.py 283): INFO Epoch: [13] [2160/2502] eta: 0:07:24 lr: 0.000003 loss_cls: 3.2209 (3.0678) grad_norm: 2.6779 (3.0381) time: 1.2907 data: 0.0002 max mem: 13912 +[2024-12-06 21:13:39 root] (utils.py 283): INFO Epoch: [13] [2170/2502] eta: 0:07:11 lr: 0.000003 loss_cls: 3.3538 (3.0694) grad_norm: 2.7598 (3.0391) time: 1.2907 data: 0.0003 max mem: 13912 +[2024-12-06 21:13:52 root] (utils.py 283): INFO Epoch: [13] [2180/2502] eta: 0:06:58 lr: 0.000003 loss_cls: 3.4082 (3.0713) grad_norm: 2.9955 (3.0403) time: 1.2901 data: 0.0003 max mem: 13912 +[2024-12-06 21:14:05 root] (utils.py 283): INFO Epoch: [13] [2190/2502] eta: 0:06:45 lr: 0.000003 loss_cls: 3.3587 (3.0719) grad_norm: 2.9951 (3.0458) time: 1.2928 data: 0.0002 max mem: 13912 +[2024-12-06 21:14:18 root] (utils.py 283): INFO Epoch: [13] [2200/2502] eta: 0:06:32 lr: 0.000003 loss_cls: 3.1749 (3.0719) grad_norm: 2.7524 (3.0439) time: 1.2953 data: 0.0002 max mem: 13912 +[2024-12-06 21:14:31 root] (utils.py 283): INFO Epoch: [13] [2210/2502] eta: 0:06:19 lr: 0.000003 loss_cls: 3.1579 (3.0713) grad_norm: 2.6775 (3.0455) time: 1.2955 data: 0.0003 max mem: 13912 +[2024-12-06 21:14:44 root] (utils.py 283): INFO Epoch: [13] [2220/2502] eta: 0:06:06 lr: 0.000003 loss_cls: 2.9794 (3.0713) grad_norm: 2.6135 (3.0492) time: 1.2947 data: 0.0003 max mem: 13912 +[2024-12-06 21:14:57 root] (utils.py 283): INFO Epoch: [13] [2230/2502] eta: 0:05:53 lr: 0.000003 loss_cls: 3.2301 (3.0723) grad_norm: 2.8284 (3.0495) time: 1.2936 data: 0.0002 max mem: 13912 +[2024-12-06 21:15:10 root] (utils.py 283): INFO Epoch: [13] [2240/2502] eta: 0:05:40 lr: 0.000003 loss_cls: 3.2464 (3.0727) grad_norm: 2.8526 (3.0500) time: 1.3009 data: 0.0003 max mem: 13912 +[2024-12-06 21:15:23 root] (utils.py 283): INFO Epoch: [13] [2250/2502] eta: 0:05:27 lr: 0.000003 loss_cls: 3.1868 (3.0733) grad_norm: 2.7752 (3.0494) time: 1.3018 data: 0.0003 max mem: 13912 +[2024-12-06 21:15:36 root] (utils.py 283): INFO Epoch: [13] [2260/2502] eta: 0:05:14 lr: 0.000003 loss_cls: 3.1235 (3.0731) grad_norm: 2.7630 (3.0509) time: 1.2952 data: 0.0002 max mem: 13912 +[2024-12-06 21:15:49 root] (utils.py 283): INFO Epoch: [13] [2270/2502] eta: 0:05:01 lr: 0.000003 loss_cls: 2.9447 (3.0727) grad_norm: 2.7539 (3.0504) time: 1.2930 data: 0.0003 max mem: 13912 +[2024-12-06 21:16:02 root] (utils.py 283): INFO Epoch: [13] [2280/2502] eta: 0:04:48 lr: 0.000003 loss_cls: 3.0138 (3.0719) grad_norm: 2.8194 (3.0495) time: 1.2909 data: 0.0003 max mem: 13912 +[2024-12-06 21:16:15 root] (utils.py 283): INFO Epoch: [13] [2290/2502] eta: 0:04:35 lr: 0.000003 loss_cls: 3.1544 (3.0721) grad_norm: 2.9097 (3.0509) time: 1.2898 data: 0.0003 max mem: 13912 +[2024-12-06 21:16:28 root] (utils.py 283): INFO Epoch: [13] [2300/2502] eta: 0:04:22 lr: 0.000003 loss_cls: 3.0838 (3.0716) grad_norm: 2.5863 (3.0488) time: 1.2924 data: 0.0003 max mem: 13912 +[2024-12-06 21:16:41 root] (utils.py 283): INFO Epoch: [13] [2310/2502] eta: 0:04:09 lr: 0.000003 loss_cls: 3.1079 (3.0728) grad_norm: 2.5969 (3.0500) time: 1.2938 data: 0.0003 max mem: 13912 +[2024-12-06 21:16:54 root] (utils.py 283): INFO Epoch: [13] [2320/2502] eta: 0:03:56 lr: 0.000003 loss_cls: 3.1772 (3.0727) grad_norm: 2.7064 (3.0528) time: 1.2926 data: 0.0003 max mem: 13912 +[2024-12-06 21:17:06 root] (utils.py 283): INFO Epoch: [13] [2330/2502] eta: 0:03:43 lr: 0.000003 loss_cls: 3.2728 (3.0737) grad_norm: 2.5528 (3.0508) time: 1.2933 data: 0.0003 max mem: 13912 +[2024-12-06 21:17:19 root] (utils.py 283): INFO Epoch: [13] [2340/2502] eta: 0:03:30 lr: 0.000003 loss_cls: 3.2339 (3.0737) grad_norm: 2.7660 (3.0501) time: 1.2926 data: 0.0002 max mem: 13912 +[2024-12-06 21:17:32 root] (utils.py 283): INFO Epoch: [13] [2350/2502] eta: 0:03:17 lr: 0.000003 loss_cls: 3.2339 (3.0746) grad_norm: 2.7114 (3.0485) time: 1.2932 data: 0.0002 max mem: 13912 +[2024-12-06 21:17:45 root] (utils.py 283): INFO Epoch: [13] [2360/2502] eta: 0:03:04 lr: 0.000003 loss_cls: 3.1713 (3.0737) grad_norm: 2.6130 (3.0489) time: 1.2951 data: 0.0002 max mem: 13912 +[2024-12-06 21:17:58 root] (utils.py 283): INFO Epoch: [13] [2370/2502] eta: 0:02:51 lr: 0.000003 loss_cls: 2.9061 (3.0731) grad_norm: 2.6130 (3.0469) time: 1.2945 data: 0.0002 max mem: 13912 +[2024-12-06 21:18:11 root] (utils.py 283): INFO Epoch: [13] [2380/2502] eta: 0:02:38 lr: 0.000003 loss_cls: 3.2138 (3.0733) grad_norm: 2.5588 (3.0467) time: 1.2948 data: 0.0002 max mem: 13912 +[2024-12-06 21:18:24 root] (utils.py 283): INFO Epoch: [13] [2390/2502] eta: 0:02:25 lr: 0.000003 loss_cls: 3.2496 (3.0740) grad_norm: 2.5618 (3.0511) time: 1.3045 data: 0.0002 max mem: 13912 +[2024-12-06 21:18:37 root] (utils.py 283): INFO Epoch: [13] [2400/2502] eta: 0:02:12 lr: 0.000003 loss_cls: 3.1594 (3.0740) grad_norm: 2.5618 (3.0505) time: 1.3050 data: 0.0003 max mem: 13912 +[2024-12-06 21:18:50 root] (utils.py 283): INFO Epoch: [13] [2410/2502] eta: 0:01:59 lr: 0.000003 loss_cls: 3.0138 (3.0740) grad_norm: 2.8769 (3.0556) time: 1.2984 data: 0.0003 max mem: 13912 +[2024-12-06 21:19:03 root] (utils.py 283): INFO Epoch: [13] [2420/2502] eta: 0:01:46 lr: 0.000003 loss_cls: 3.1948 (3.0746) grad_norm: 2.8949 (3.0543) time: 1.3008 data: 0.0002 max mem: 13912 +[2024-12-06 21:19:16 root] (utils.py 283): INFO Epoch: [13] [2430/2502] eta: 0:01:33 lr: 0.000003 loss_cls: 3.2647 (3.0755) grad_norm: 2.8027 (3.0542) time: 1.3006 data: 0.0002 max mem: 13912 +[2024-12-06 21:19:29 root] (utils.py 283): INFO Epoch: [13] [2440/2502] eta: 0:01:20 lr: 0.000003 loss_cls: 3.2497 (3.0756) grad_norm: 2.9002 (3.0565) time: 1.2988 data: 0.0002 max mem: 13912 +[2024-12-06 21:19:42 root] (utils.py 283): INFO Epoch: [13] [2450/2502] eta: 0:01:07 lr: 0.000003 loss_cls: 3.2601 (3.0761) grad_norm: 2.9163 (3.0584) time: 1.2967 data: 0.0002 max mem: 13912 +[2024-12-06 21:19:55 root] (utils.py 283): INFO Epoch: [13] [2460/2502] eta: 0:00:54 lr: 0.000003 loss_cls: 3.1642 (3.0747) grad_norm: 2.8881 (3.0577) time: 1.2997 data: 0.0003 max mem: 13912 +[2024-12-06 21:20:08 root] (utils.py 283): INFO Epoch: [13] [2470/2502] eta: 0:00:41 lr: 0.000003 loss_cls: 2.8762 (3.0744) grad_norm: 2.8819 (3.0572) time: 1.3017 data: 0.0003 max mem: 13912 +[2024-12-06 21:20:21 root] (utils.py 283): INFO Epoch: [13] [2480/2502] eta: 0:00:28 lr: 0.000003 loss_cls: 3.2612 (3.0747) grad_norm: 2.7361 (3.0595) time: 1.3008 data: 0.0002 max mem: 13912 +[2024-12-06 21:20:35 root] (utils.py 283): INFO Epoch: [13] [2490/2502] eta: 0:00:15 lr: 0.000003 loss_cls: 3.0982 (3.0742) grad_norm: 2.7723 (3.0591) time: 1.3255 data: 0.0242 max mem: 13912 +[2024-12-06 21:20:48 root] (utils.py 283): INFO Epoch: [13] [2500/2502] eta: 0:00:02 lr: 0.000003 loss_cls: 3.0691 (3.0743) grad_norm: 2.7723 (3.0579) time: 1.3233 data: 0.0242 max mem: 13912 +[2024-12-06 21:20:49 root] (utils.py 283): INFO Epoch: [13] [2501/2502] eta: 0:00:01 lr: 0.000003 loss_cls: 2.8990 (3.0739) grad_norm: 2.7774 (3.0579) time: 1.3241 data: 0.0241 max mem: 13912 +[2024-12-06 21:20:49 root] (utils.py 297): INFO Epoch: [13] Total time: 0:54:14 (1.3006 s / it) +[2024-12-06 21:20:49 root] (engine.py 179): INFO Averaged stats:lr: 0.000003 loss_cls: 2.8990 (3.0723) grad_norm: 2.7774 (3.0579) +[2024-12-06 21:20:50 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:21 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.4350 (0.4350) acc1: 89.8438 (89.8438) acc3: 98.4375 (98.4375) acc5: 98.4375 (98.4375) time: 0.2244 data: 0.0004 max mem: 13912 +[2024-12-06 21:20:52 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:19 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.6681 (0.6469) acc1: 85.9375 (86.0795) acc3: 96.8750 (95.6676) acc5: 97.6562 (97.2301) time: 0.2272 data: 0.0006 max mem: 13912 +[2024-12-06 21:20:55 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:17 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.6681 (0.6861) acc1: 84.3750 (85.3423) acc3: 96.0938 (95.2753) acc5: 97.6562 (96.8378) time: 0.2276 data: 0.0005 max mem: 13912 +[2024-12-06 21:20:57 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:15 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7405 (0.7146) acc1: 83.5938 (84.5262) acc3: 94.5312 (94.9597) acc5: 96.8750 (96.7994) time: 0.2277 data: 0.0005 max mem: 13912 +[2024-12-06 21:20:59 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:13 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7405 (0.7177) acc1: 83.5938 (84.3941) acc3: 94.5312 (95.1029) acc5: 96.8750 (96.7988) time: 0.2279 data: 0.0005 max mem: 13912 +[2024-12-06 21:21:02 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:10 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7722 (0.8011) acc1: 79.6875 (82.3070) acc3: 89.8438 (93.7960) acc5: 94.5312 (95.8027) time: 0.2280 data: 0.0005 max mem: 13912 +[2024-12-06 21:21:04 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:08 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0940 (0.8333) acc1: 75.0000 (81.8391) acc3: 89.0625 (93.1096) acc5: 92.1875 (95.2741) time: 0.2293 data: 0.0005 max mem: 13912 +[2024-12-06 21:21:06 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:06 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0369 (0.8608) acc1: 79.6875 (81.1840) acc3: 89.8438 (92.7707) acc5: 92.1875 (94.9824) time: 0.2292 data: 0.0005 max mem: 13912 +[2024-12-06 21:21:08 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:04 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0369 (0.8910) acc1: 76.5625 (80.5652) acc3: 89.8438 (92.2550) acc5: 92.1875 (94.6566) time: 0.2282 data: 0.0008 max mem: 13912 +[2024-12-06 21:21:11 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0800 (0.9143) acc1: 74.2188 (79.8764) acc3: 89.0625 (91.8870) acc5: 92.1875 (94.4111) time: 0.2284 data: 0.0007 max mem: 13912 +[2024-12-06 21:21:12 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.9844 (0.9096) acc1: 76.5625 (79.8960) acc3: 90.6250 (91.9920) acc5: 93.7500 (94.5040) time: 0.2246 data: 0.0006 max mem: 13912 +[2024-12-06 21:21:12 root] (utils.py 297): INFO Test: Total time: 0:00:22 (0.2277 s / it) +[2024-12-06 21:21:12 root] (engine.py 264): INFO * Acc@1 79.952 Acc@3 92.240 Acc@5 94.780 loss 0.904 flops 3.584 layer_flops 3.536 +[2024-12-06 21:21:12 root] (main.py 547): INFO Accuracy of the network on the 50000 test images: 80.0% +[2024-12-06 21:21:13 root] (main.py 551): INFO Max accuracy: 79.95% +[2024-12-06 21:21:14 root] (utils.py 283): INFO Epoch: [14] [ 0/2502] eta: 0:53:45 lr: 0.000002 loss_cls: 3.4524 (3.4524) grad_norm: 3.2652 (3.2652) time: 1.2892 data: 0.0005 max mem: 13912 +[2024-12-06 21:21:27 root] (utils.py 283): INFO Epoch: [14] [ 10/2502] eta: 0:53:47 lr: 0.000002 loss_cls: 3.4281 (3.2301) grad_norm: 3.0435 (3.1247) time: 1.2952 data: 0.0003 max mem: 13912 +[2024-12-06 21:21:40 root] (utils.py 283): INFO Epoch: [14] [ 20/2502] eta: 0:53:40 lr: 0.000002 loss_cls: 2.8736 (3.0193) grad_norm: 2.7068 (3.3423) time: 1.2980 data: 0.0002 max mem: 13912 +[2024-12-06 21:21:53 root] (utils.py 283): INFO Epoch: [14] [ 30/2502] eta: 0:53:31 lr: 0.000002 loss_cls: 2.8736 (3.0335) grad_norm: 2.7809 (3.2783) time: 1.3016 data: 0.0002 max mem: 13912 +[2024-12-06 21:22:06 root] (utils.py 283): INFO Epoch: [14] [ 40/2502] eta: 0:53:22 lr: 0.000002 loss_cls: 3.1777 (3.0401) grad_norm: 2.6967 (3.1770) time: 1.3040 data: 0.0003 max mem: 13912 +[2024-12-06 21:22:19 root] (utils.py 283): INFO Epoch: [14] [ 50/2502] eta: 0:53:15 lr: 0.000002 loss_cls: 3.0426 (2.9954) grad_norm: 2.5484 (3.1389) time: 1.3096 data: 0.0002 max mem: 13912 +[2024-12-06 21:22:32 root] (utils.py 283): INFO Epoch: [14] [ 60/2502] eta: 0:53:00 lr: 0.000002 loss_cls: 3.2002 (3.0481) grad_norm: 2.7893 (3.3067) time: 1.3058 data: 0.0002 max mem: 13912 +[2024-12-06 21:22:45 root] (utils.py 283): INFO Epoch: [14] [ 70/2502] eta: 0:52:48 lr: 0.000002 loss_cls: 3.4166 (3.0520) grad_norm: 2.8060 (3.2196) time: 1.3010 data: 0.0003 max mem: 13912 +[2024-12-06 21:22:58 root] (utils.py 283): INFO Epoch: [14] [ 80/2502] eta: 0:52:33 lr: 0.000002 loss_cls: 3.1137 (3.0336) grad_norm: 2.6860 (3.1702) time: 1.3007 data: 0.0002 max mem: 13912 +[2024-12-06 21:23:11 root] (utils.py 283): INFO Epoch: [14] [ 90/2502] eta: 0:52:21 lr: 0.000002 loss_cls: 3.2003 (3.0607) grad_norm: 2.7317 (3.1303) time: 1.3011 data: 0.0002 max mem: 13912 +[2024-12-06 21:23:24 root] (utils.py 283): INFO Epoch: [14] [ 100/2502] eta: 0:52:08 lr: 0.000002 loss_cls: 3.3150 (3.0861) grad_norm: 2.6907 (3.1353) time: 1.3046 data: 0.0002 max mem: 13912 +[2024-12-06 21:23:37 root] (utils.py 283): INFO Epoch: [14] [ 110/2502] eta: 0:51:56 lr: 0.000002 loss_cls: 3.3913 (3.1052) grad_norm: 2.6967 (3.1156) time: 1.3047 data: 0.0002 max mem: 13912 +[2024-12-06 21:23:50 root] (utils.py 283): INFO Epoch: [14] [ 120/2502] eta: 0:51:43 lr: 0.000002 loss_cls: 3.2079 (3.0992) grad_norm: 2.8069 (3.0911) time: 1.3054 data: 0.0002 max mem: 13912 +[2024-12-06 21:24:03 root] (utils.py 283): INFO Epoch: [14] [ 130/2502] eta: 0:51:29 lr: 0.000002 loss_cls: 3.1794 (3.1019) grad_norm: 2.6741 (3.0603) time: 1.3012 data: 0.0002 max mem: 13912 +[2024-12-06 21:24:17 root] (utils.py 283): INFO Epoch: [14] [ 140/2502] eta: 0:51:16 lr: 0.000002 loss_cls: 3.2766 (3.1100) grad_norm: 2.6726 (3.0613) time: 1.3006 data: 0.0002 max mem: 13912 +[2024-12-06 21:24:30 root] (utils.py 283): INFO Epoch: [14] [ 150/2502] eta: 0:51:03 lr: 0.000002 loss_cls: 3.2766 (3.1150) grad_norm: 2.9226 (3.0775) time: 1.3037 data: 0.0002 max mem: 13912 +[2024-12-06 21:24:43 root] (utils.py 283): INFO Epoch: [14] [ 160/2502] eta: 0:50:50 lr: 0.000002 loss_cls: 3.2866 (3.1314) grad_norm: 2.9006 (3.0587) time: 1.3031 data: 0.0002 max mem: 13912 +[2024-12-06 21:24:56 root] (utils.py 283): INFO Epoch: [14] [ 170/2502] eta: 0:50:37 lr: 0.000002 loss_cls: 3.2866 (3.1326) grad_norm: 2.8116 (3.0474) time: 1.3029 data: 0.0002 max mem: 13912 +[2024-12-06 21:25:09 root] (utils.py 283): INFO Epoch: [14] [ 180/2502] eta: 0:50:25 lr: 0.000002 loss_cls: 3.2660 (3.1395) grad_norm: 2.5611 (3.0269) time: 1.3047 data: 0.0002 max mem: 13912 +[2024-12-06 21:25:22 root] (utils.py 283): INFO Epoch: [14] [ 190/2502] eta: 0:50:12 lr: 0.000002 loss_cls: 3.2660 (3.1391) grad_norm: 2.4777 (3.0282) time: 1.3051 data: 0.0002 max mem: 13912 +[2024-12-06 21:25:35 root] (utils.py 283): INFO Epoch: [14] [ 200/2502] eta: 0:50:02 lr: 0.000002 loss_cls: 3.0930 (3.1380) grad_norm: 2.6574 (3.0155) time: 1.3168 data: 0.0002 max mem: 13912 +[2024-12-06 21:25:48 root] (utils.py 283): INFO Epoch: [14] [ 210/2502] eta: 0:49:49 lr: 0.000002 loss_cls: 3.0598 (3.1324) grad_norm: 2.6994 (3.0137) time: 1.3150 data: 0.0002 max mem: 13912 +[2024-12-06 21:26:01 root] (utils.py 283): INFO Epoch: [14] [ 220/2502] eta: 0:49:35 lr: 0.000002 loss_cls: 3.2461 (3.1470) grad_norm: 2.7783 (3.0114) time: 1.3012 data: 0.0002 max mem: 13912 +[2024-12-06 21:26:14 root] (utils.py 283): INFO Epoch: [14] [ 230/2502] eta: 0:49:22 lr: 0.000002 loss_cls: 3.2974 (3.1499) grad_norm: 2.6820 (3.0004) time: 1.3034 data: 0.0002 max mem: 13912 +[2024-12-06 21:26:27 root] (utils.py 283): INFO Epoch: [14] [ 240/2502] eta: 0:49:09 lr: 0.000002 loss_cls: 3.2563 (3.1452) grad_norm: 2.6143 (3.0412) time: 1.3044 data: 0.0002 max mem: 13912 +[2024-12-06 21:26:40 root] (utils.py 283): INFO Epoch: [14] [ 250/2502] eta: 0:48:56 lr: 0.000002 loss_cls: 2.9503 (3.1325) grad_norm: 2.7605 (3.0620) time: 1.3033 data: 0.0003 max mem: 13912 +[2024-12-06 21:26:53 root] (utils.py 283): INFO Epoch: [14] [ 260/2502] eta: 0:48:42 lr: 0.000002 loss_cls: 2.8724 (3.1184) grad_norm: 2.7155 (3.0500) time: 1.2997 data: 0.0003 max mem: 13912 +[2024-12-06 21:27:06 root] (utils.py 283): INFO Epoch: [14] [ 270/2502] eta: 0:48:29 lr: 0.000002 loss_cls: 2.9758 (3.1160) grad_norm: 2.6576 (3.0662) time: 1.2971 data: 0.0002 max mem: 13912 +[2024-12-06 21:27:19 root] (utils.py 283): INFO Epoch: [14] [ 280/2502] eta: 0:48:16 lr: 0.000002 loss_cls: 3.0814 (3.1151) grad_norm: 2.8540 (3.0962) time: 1.2985 data: 0.0002 max mem: 13912 +[2024-12-06 21:27:32 root] (utils.py 283): INFO Epoch: [14] [ 290/2502] eta: 0:48:02 lr: 0.000002 loss_cls: 2.9603 (3.1094) grad_norm: 2.6981 (3.0814) time: 1.2982 data: 0.0002 max mem: 13912 +[2024-12-06 21:27:45 root] (utils.py 283): INFO Epoch: [14] [ 300/2502] eta: 0:47:49 lr: 0.000002 loss_cls: 2.9603 (3.1056) grad_norm: 2.5896 (3.0781) time: 1.2993 data: 0.0002 max mem: 13912 +[2024-12-06 21:27:58 root] (utils.py 283): INFO Epoch: [14] [ 310/2502] eta: 0:47:36 lr: 0.000002 loss_cls: 3.1937 (3.1034) grad_norm: 2.6750 (3.0742) time: 1.3016 data: 0.0003 max mem: 13912 +[2024-12-06 21:28:11 root] (utils.py 283): INFO Epoch: [14] [ 320/2502] eta: 0:47:23 lr: 0.000002 loss_cls: 2.9985 (3.0989) grad_norm: 2.6750 (3.0695) time: 1.3020 data: 0.0003 max mem: 13912 +[2024-12-06 21:28:24 root] (utils.py 283): INFO Epoch: [14] [ 330/2502] eta: 0:47:10 lr: 0.000002 loss_cls: 2.8627 (3.0932) grad_norm: 2.5684 (3.0710) time: 1.3024 data: 0.0003 max mem: 13912 +[2024-12-06 21:28:37 root] (utils.py 283): INFO Epoch: [14] [ 340/2502] eta: 0:46:56 lr: 0.000002 loss_cls: 3.0670 (3.0931) grad_norm: 2.7029 (3.0733) time: 1.3003 data: 0.0002 max mem: 13912 +[2024-12-06 21:28:50 root] (utils.py 283): INFO Epoch: [14] [ 350/2502] eta: 0:46:43 lr: 0.000002 loss_cls: 2.9436 (3.0831) grad_norm: 2.7029 (3.0619) time: 1.2974 data: 0.0002 max mem: 13912 +[2024-12-06 21:29:03 root] (utils.py 283): INFO Epoch: [14] [ 360/2502] eta: 0:46:30 lr: 0.000002 loss_cls: 2.9436 (3.0850) grad_norm: 2.6152 (3.0571) time: 1.2996 data: 0.0003 max mem: 13912 +[2024-12-06 21:29:16 root] (utils.py 283): INFO Epoch: [14] [ 370/2502] eta: 0:46:17 lr: 0.000002 loss_cls: 2.8484 (3.0715) grad_norm: 2.6152 (3.0490) time: 1.3041 data: 0.0003 max mem: 13912 +[2024-12-06 21:29:29 root] (utils.py 283): INFO Epoch: [14] [ 380/2502] eta: 0:46:04 lr: 0.000002 loss_cls: 2.7048 (3.0659) grad_norm: 2.6015 (3.0385) time: 1.3025 data: 0.0003 max mem: 13912 +[2024-12-06 21:29:42 root] (utils.py 283): INFO Epoch: [14] [ 390/2502] eta: 0:45:51 lr: 0.000002 loss_cls: 3.1452 (3.0696) grad_norm: 2.8557 (3.0432) time: 1.3001 data: 0.0003 max mem: 13912 +[2024-12-06 21:29:55 root] (utils.py 283): INFO Epoch: [14] [ 400/2502] eta: 0:45:38 lr: 0.000002 loss_cls: 3.0916 (3.0687) grad_norm: 3.0485 (3.0720) time: 1.3015 data: 0.0002 max mem: 13912 +[2024-12-06 21:30:08 root] (utils.py 283): INFO Epoch: [14] [ 410/2502] eta: 0:45:24 lr: 0.000002 loss_cls: 3.0906 (3.0709) grad_norm: 2.8208 (3.0832) time: 1.3016 data: 0.0003 max mem: 13912 +[2024-12-06 21:30:21 root] (utils.py 283): INFO Epoch: [14] [ 420/2502] eta: 0:45:11 lr: 0.000002 loss_cls: 3.1374 (3.0680) grad_norm: 2.8750 (3.0806) time: 1.3012 data: 0.0002 max mem: 13912 +[2024-12-06 21:30:34 root] (utils.py 283): INFO Epoch: [14] [ 430/2502] eta: 0:44:58 lr: 0.000002 loss_cls: 2.9765 (3.0667) grad_norm: 2.8939 (3.0776) time: 1.3012 data: 0.0002 max mem: 13912 +[2024-12-06 21:30:47 root] (utils.py 283): INFO Epoch: [14] [ 440/2502] eta: 0:44:45 lr: 0.000002 loss_cls: 3.3014 (3.0715) grad_norm: 2.7161 (3.0702) time: 1.2999 data: 0.0002 max mem: 13912 +[2024-12-06 21:31:00 root] (utils.py 283): INFO Epoch: [14] [ 450/2502] eta: 0:44:33 lr: 0.000002 loss_cls: 3.2849 (3.0734) grad_norm: 2.6664 (3.0658) time: 1.3063 data: 0.0003 max mem: 13912 +[2024-12-06 21:31:13 root] (utils.py 283): INFO Epoch: [14] [ 460/2502] eta: 0:44:19 lr: 0.000002 loss_cls: 3.1770 (3.0736) grad_norm: 2.6236 (3.0590) time: 1.3026 data: 0.0003 max mem: 13912 +[2024-12-06 21:31:26 root] (utils.py 283): INFO Epoch: [14] [ 470/2502] eta: 0:44:06 lr: 0.000002 loss_cls: 3.2053 (3.0733) grad_norm: 2.5867 (3.0574) time: 1.2924 data: 0.0002 max mem: 13912 +[2024-12-06 21:31:39 root] (utils.py 283): INFO Epoch: [14] [ 480/2502] eta: 0:43:52 lr: 0.000002 loss_cls: 3.1828 (3.0723) grad_norm: 2.5621 (3.0509) time: 1.2955 data: 0.0002 max mem: 13912 +[2024-12-06 21:31:52 root] (utils.py 283): INFO Epoch: [14] [ 490/2502] eta: 0:43:39 lr: 0.000002 loss_cls: 3.2924 (3.0756) grad_norm: 2.5911 (3.0460) time: 1.2985 data: 0.0003 max mem: 13912 +[2024-12-06 21:32:05 root] (utils.py 283): INFO Epoch: [14] [ 500/2502] eta: 0:43:26 lr: 0.000002 loss_cls: 3.2926 (3.0740) grad_norm: 2.7114 (3.0552) time: 1.2973 data: 0.0003 max mem: 13912 +[2024-12-06 21:32:18 root] (utils.py 283): INFO Epoch: [14] [ 510/2502] eta: 0:43:13 lr: 0.000002 loss_cls: 2.7675 (3.0686) grad_norm: 2.7689 (3.0495) time: 1.2932 data: 0.0002 max mem: 13912 +[2024-12-06 21:32:31 root] (utils.py 283): INFO Epoch: [14] [ 520/2502] eta: 0:42:59 lr: 0.000002 loss_cls: 3.0063 (3.0695) grad_norm: 2.6391 (3.0541) time: 1.2910 data: 0.0003 max mem: 13912 +[2024-12-06 21:32:44 root] (utils.py 283): INFO Epoch: [14] [ 530/2502] eta: 0:42:46 lr: 0.000002 loss_cls: 3.1390 (3.0685) grad_norm: 2.7230 (3.0473) time: 1.2932 data: 0.0002 max mem: 13912 +[2024-12-06 21:32:57 root] (utils.py 283): INFO Epoch: [14] [ 540/2502] eta: 0:42:32 lr: 0.000002 loss_cls: 3.1390 (3.0705) grad_norm: 2.7595 (3.0666) time: 1.2929 data: 0.0003 max mem: 13912 +[2024-12-06 21:33:10 root] (utils.py 283): INFO Epoch: [14] [ 550/2502] eta: 0:42:19 lr: 0.000002 loss_cls: 3.2637 (3.0716) grad_norm: 2.9417 (3.1041) time: 1.2908 data: 0.0003 max mem: 13912 +[2024-12-06 21:33:23 root] (utils.py 283): INFO Epoch: [14] [ 560/2502] eta: 0:42:06 lr: 0.000002 loss_cls: 3.2637 (3.0723) grad_norm: 2.9417 (3.1011) time: 1.2930 data: 0.0002 max mem: 13912 +[2024-12-06 21:33:36 root] (utils.py 283): INFO Epoch: [14] [ 570/2502] eta: 0:41:53 lr: 0.000002 loss_cls: 3.3754 (3.0755) grad_norm: 2.6069 (3.0984) time: 1.2929 data: 0.0002 max mem: 13912 +[2024-12-06 21:33:49 root] (utils.py 283): INFO Epoch: [14] [ 580/2502] eta: 0:41:39 lr: 0.000002 loss_cls: 3.3371 (3.0810) grad_norm: 2.5720 (3.0884) time: 1.2936 data: 0.0003 max mem: 13912 +[2024-12-06 21:34:01 root] (utils.py 283): INFO Epoch: [14] [ 590/2502] eta: 0:41:26 lr: 0.000002 loss_cls: 3.3316 (3.0806) grad_norm: 2.6273 (3.0856) time: 1.2937 data: 0.0003 max mem: 13912 +[2024-12-06 21:34:14 root] (utils.py 283): INFO Epoch: [14] [ 600/2502] eta: 0:41:13 lr: 0.000002 loss_cls: 3.3431 (3.0864) grad_norm: 2.8537 (3.0849) time: 1.2919 data: 0.0003 max mem: 13912 +[2024-12-06 21:34:27 root] (utils.py 283): INFO Epoch: [14] [ 610/2502] eta: 0:41:00 lr: 0.000002 loss_cls: 3.3431 (3.0859) grad_norm: 2.6266 (3.0743) time: 1.2918 data: 0.0002 max mem: 13912 +[2024-12-06 21:34:40 root] (utils.py 283): INFO Epoch: [14] [ 620/2502] eta: 0:40:46 lr: 0.000002 loss_cls: 2.9198 (3.0803) grad_norm: 2.5157 (3.0671) time: 1.2941 data: 0.0003 max mem: 13912 +[2024-12-06 21:34:53 root] (utils.py 283): INFO Epoch: [14] [ 630/2502] eta: 0:40:33 lr: 0.000002 loss_cls: 3.1302 (3.0835) grad_norm: 2.5807 (3.0616) time: 1.2926 data: 0.0002 max mem: 13912 +[2024-12-06 21:35:06 root] (utils.py 283): INFO Epoch: [14] [ 640/2502] eta: 0:40:20 lr: 0.000002 loss_cls: 3.0608 (3.0780) grad_norm: 2.3792 (3.0502) time: 1.2913 data: 0.0002 max mem: 13912 +[2024-12-06 21:35:19 root] (utils.py 283): INFO Epoch: [14] [ 650/2502] eta: 0:40:07 lr: 0.000002 loss_cls: 2.6391 (3.0759) grad_norm: 2.3465 (3.0558) time: 1.2948 data: 0.0002 max mem: 13912 +[2024-12-06 21:35:32 root] (utils.py 283): INFO Epoch: [14] [ 660/2502] eta: 0:39:53 lr: 0.000002 loss_cls: 3.1648 (3.0735) grad_norm: 2.7229 (3.0535) time: 1.2923 data: 0.0002 max mem: 13912 +[2024-12-06 21:35:45 root] (utils.py 283): INFO Epoch: [14] [ 670/2502] eta: 0:39:40 lr: 0.000002 loss_cls: 3.1683 (3.0742) grad_norm: 2.6569 (3.0493) time: 1.2912 data: 0.0002 max mem: 13912 +[2024-12-06 21:35:58 root] (utils.py 283): INFO Epoch: [14] [ 680/2502] eta: 0:39:27 lr: 0.000002 loss_cls: 3.2224 (3.0730) grad_norm: 2.8172 (3.0565) time: 1.2933 data: 0.0003 max mem: 13912 +[2024-12-06 21:36:11 root] (utils.py 283): INFO Epoch: [14] [ 690/2502] eta: 0:39:14 lr: 0.000002 loss_cls: 2.8145 (3.0665) grad_norm: 2.9293 (3.0545) time: 1.2938 data: 0.0003 max mem: 13912 +[2024-12-06 21:36:24 root] (utils.py 283): INFO Epoch: [14] [ 700/2502] eta: 0:39:01 lr: 0.000002 loss_cls: 2.8378 (3.0691) grad_norm: 2.7424 (3.0510) time: 1.2925 data: 0.0002 max mem: 13912 +[2024-12-06 21:36:37 root] (utils.py 283): INFO Epoch: [14] [ 710/2502] eta: 0:38:48 lr: 0.000002 loss_cls: 3.1798 (3.0664) grad_norm: 2.5850 (3.0450) time: 1.2933 data: 0.0002 max mem: 13912 +[2024-12-06 21:36:50 root] (utils.py 283): INFO Epoch: [14] [ 720/2502] eta: 0:38:35 lr: 0.000002 loss_cls: 3.1890 (3.0685) grad_norm: 2.5850 (3.0400) time: 1.2966 data: 0.0002 max mem: 13912 +[2024-12-06 21:37:03 root] (utils.py 283): INFO Epoch: [14] [ 730/2502] eta: 0:38:22 lr: 0.000002 loss_cls: 3.2440 (3.0703) grad_norm: 2.7384 (3.0380) time: 1.3000 data: 0.0002 max mem: 13912 +[2024-12-06 21:37:16 root] (utils.py 283): INFO Epoch: [14] [ 740/2502] eta: 0:38:09 lr: 0.000002 loss_cls: 3.0891 (3.0643) grad_norm: 2.7255 (3.0321) time: 1.3020 data: 0.0002 max mem: 13912 +[2024-12-06 21:37:29 root] (utils.py 283): INFO Epoch: [14] [ 750/2502] eta: 0:37:56 lr: 0.000002 loss_cls: 2.9767 (3.0636) grad_norm: 2.5123 (3.0350) time: 1.3017 data: 0.0003 max mem: 13912 +[2024-12-06 21:37:42 root] (utils.py 283): INFO Epoch: [14] [ 760/2502] eta: 0:37:43 lr: 0.000002 loss_cls: 3.1102 (3.0652) grad_norm: 2.7035 (3.0325) time: 1.3019 data: 0.0002 max mem: 13912 +[2024-12-06 21:37:55 root] (utils.py 283): INFO Epoch: [14] [ 770/2502] eta: 0:37:30 lr: 0.000002 loss_cls: 3.3741 (3.0681) grad_norm: 2.7489 (3.0325) time: 1.3018 data: 0.0003 max mem: 13912 +[2024-12-06 21:38:08 root] (utils.py 283): INFO Epoch: [14] [ 780/2502] eta: 0:37:17 lr: 0.000002 loss_cls: 3.2496 (3.0667) grad_norm: 2.8921 (3.0339) time: 1.3030 data: 0.0003 max mem: 13912 +[2024-12-06 21:38:21 root] (utils.py 283): INFO Epoch: [14] [ 790/2502] eta: 0:37:04 lr: 0.000002 loss_cls: 2.9006 (3.0657) grad_norm: 2.7811 (3.0301) time: 1.3037 data: 0.0002 max mem: 13912 +[2024-12-06 21:38:34 root] (utils.py 283): INFO Epoch: [14] [ 800/2502] eta: 0:36:51 lr: 0.000002 loss_cls: 3.0805 (3.0672) grad_norm: 2.6204 (3.0285) time: 1.3038 data: 0.0002 max mem: 13912 +[2024-12-06 21:38:47 root] (utils.py 283): INFO Epoch: [14] [ 810/2502] eta: 0:36:38 lr: 0.000002 loss_cls: 3.1838 (3.0679) grad_norm: 2.7870 (3.0262) time: 1.3046 data: 0.0002 max mem: 13912 +[2024-12-06 21:39:00 root] (utils.py 283): INFO Epoch: [14] [ 820/2502] eta: 0:36:26 lr: 0.000002 loss_cls: 3.3349 (3.0689) grad_norm: 2.5826 (3.0221) time: 1.3056 data: 0.0003 max mem: 13912 +[2024-12-06 21:39:13 root] (utils.py 283): INFO Epoch: [14] [ 830/2502] eta: 0:36:13 lr: 0.000002 loss_cls: 3.0340 (3.0664) grad_norm: 2.7411 (3.0224) time: 1.3064 data: 0.0003 max mem: 13912 +[2024-12-06 21:39:26 root] (utils.py 283): INFO Epoch: [14] [ 840/2502] eta: 0:36:00 lr: 0.000002 loss_cls: 2.7736 (3.0645) grad_norm: 2.6240 (3.0168) time: 1.3012 data: 0.0003 max mem: 13912 +[2024-12-06 21:39:39 root] (utils.py 283): INFO Epoch: [14] [ 850/2502] eta: 0:35:47 lr: 0.000002 loss_cls: 3.1370 (3.0650) grad_norm: 2.6906 (3.0172) time: 1.3135 data: 0.0003 max mem: 13912 +[2024-12-06 21:39:53 root] (utils.py 283): INFO Epoch: [14] [ 860/2502] eta: 0:35:35 lr: 0.000002 loss_cls: 2.8900 (3.0615) grad_norm: 2.9878 (3.0215) time: 1.3322 data: 0.0004 max mem: 13912 +[2024-12-06 21:40:06 root] (utils.py 283): INFO Epoch: [14] [ 870/2502] eta: 0:35:22 lr: 0.000002 loss_cls: 2.7341 (3.0582) grad_norm: 2.6020 (3.0187) time: 1.3195 data: 0.0003 max mem: 13912 +[2024-12-06 21:40:19 root] (utils.py 283): INFO Epoch: [14] [ 880/2502] eta: 0:35:09 lr: 0.000002 loss_cls: 3.1902 (3.0608) grad_norm: 2.5044 (3.0162) time: 1.3031 data: 0.0003 max mem: 13912 +[2024-12-06 21:40:32 root] (utils.py 283): INFO Epoch: [14] [ 890/2502] eta: 0:34:56 lr: 0.000002 loss_cls: 3.0997 (3.0589) grad_norm: 2.7732 (3.0187) time: 1.3009 data: 0.0002 max mem: 13912 +[2024-12-06 21:40:45 root] (utils.py 283): INFO Epoch: [14] [ 900/2502] eta: 0:34:43 lr: 0.000002 loss_cls: 3.0626 (3.0589) grad_norm: 2.9145 (3.0183) time: 1.3030 data: 0.0002 max mem: 13912 +[2024-12-06 21:40:58 root] (utils.py 283): INFO Epoch: [14] [ 910/2502] eta: 0:34:30 lr: 0.000002 loss_cls: 3.1734 (3.0589) grad_norm: 2.8162 (3.0238) time: 1.3031 data: 0.0003 max mem: 13912 +[2024-12-06 21:41:11 root] (utils.py 283): INFO Epoch: [14] [ 920/2502] eta: 0:34:17 lr: 0.000002 loss_cls: 3.1088 (3.0597) grad_norm: 2.6220 (3.0205) time: 1.3093 data: 0.0003 max mem: 13912 +[2024-12-06 21:41:24 root] (utils.py 283): INFO Epoch: [14] [ 930/2502] eta: 0:34:04 lr: 0.000002 loss_cls: 3.1656 (3.0612) grad_norm: 2.6042 (3.0209) time: 1.3091 data: 0.0002 max mem: 13912 +[2024-12-06 21:41:37 root] (utils.py 283): INFO Epoch: [14] [ 940/2502] eta: 0:33:51 lr: 0.000002 loss_cls: 3.2649 (3.0595) grad_norm: 2.6125 (3.0255) time: 1.3003 data: 0.0003 max mem: 13912 +[2024-12-06 21:41:50 root] (utils.py 283): INFO Epoch: [14] [ 950/2502] eta: 0:33:38 lr: 0.000002 loss_cls: 2.8716 (3.0565) grad_norm: 2.7553 (3.0244) time: 1.3013 data: 0.0003 max mem: 13912 +[2024-12-06 21:42:03 root] (utils.py 283): INFO Epoch: [14] [ 960/2502] eta: 0:33:25 lr: 0.000002 loss_cls: 2.9411 (3.0556) grad_norm: 2.7638 (3.0216) time: 1.3022 data: 0.0003 max mem: 13912 +[2024-12-06 21:42:16 root] (utils.py 283): INFO Epoch: [14] [ 970/2502] eta: 0:33:12 lr: 0.000002 loss_cls: 2.9442 (3.0553) grad_norm: 2.5430 (3.0223) time: 1.3025 data: 0.0003 max mem: 13912 +[2024-12-06 21:42:29 root] (utils.py 283): INFO Epoch: [14] [ 980/2502] eta: 0:32:59 lr: 0.000002 loss_cls: 3.0685 (3.0537) grad_norm: 2.4032 (3.0191) time: 1.3044 data: 0.0003 max mem: 13912 +[2024-12-06 21:42:42 root] (utils.py 283): INFO Epoch: [14] [ 990/2502] eta: 0:32:46 lr: 0.000002 loss_cls: 3.0851 (3.0542) grad_norm: 2.5442 (3.0169) time: 1.3035 data: 0.0003 max mem: 13912 +[2024-12-06 21:42:55 root] (utils.py 283): INFO Epoch: [14] [1000/2502] eta: 0:32:33 lr: 0.000002 loss_cls: 3.1212 (3.0541) grad_norm: 2.7014 (3.0253) time: 1.3016 data: 0.0003 max mem: 13912 +[2024-12-06 21:43:08 root] (utils.py 283): INFO Epoch: [14] [1010/2502] eta: 0:32:20 lr: 0.000002 loss_cls: 3.1083 (3.0539) grad_norm: 2.9501 (3.0260) time: 1.3022 data: 0.0003 max mem: 13912 +[2024-12-06 21:43:21 root] (utils.py 283): INFO Epoch: [14] [1020/2502] eta: 0:32:07 lr: 0.000002 loss_cls: 3.1395 (3.0552) grad_norm: 2.9009 (3.0249) time: 1.3021 data: 0.0002 max mem: 13912 +[2024-12-06 21:43:34 root] (utils.py 283): INFO Epoch: [14] [1030/2502] eta: 0:31:54 lr: 0.000002 loss_cls: 3.0930 (3.0539) grad_norm: 2.6015 (3.0217) time: 1.3010 data: 0.0002 max mem: 13912 +[2024-12-06 21:43:47 root] (utils.py 283): INFO Epoch: [14] [1040/2502] eta: 0:31:41 lr: 0.000002 loss_cls: 2.9609 (3.0543) grad_norm: 2.6015 (3.0235) time: 1.3006 data: 0.0002 max mem: 13912 +[2024-12-06 21:44:00 root] (utils.py 283): INFO Epoch: [14] [1050/2502] eta: 0:31:28 lr: 0.000002 loss_cls: 3.0251 (3.0525) grad_norm: 2.7749 (3.0276) time: 1.2995 data: 0.0002 max mem: 13912 +[2024-12-06 21:44:14 root] (utils.py 283): INFO Epoch: [14] [1060/2502] eta: 0:31:16 lr: 0.000002 loss_cls: 3.0376 (3.0526) grad_norm: 2.7702 (3.0256) time: 1.3174 data: 0.0002 max mem: 13912 +[2024-12-06 21:44:27 root] (utils.py 283): INFO Epoch: [14] [1070/2502] eta: 0:31:03 lr: 0.000002 loss_cls: 2.9536 (3.0512) grad_norm: 2.7797 (3.0231) time: 1.3173 data: 0.0003 max mem: 13912 +[2024-12-06 21:44:40 root] (utils.py 283): INFO Epoch: [14] [1080/2502] eta: 0:30:50 lr: 0.000002 loss_cls: 2.8996 (3.0512) grad_norm: 2.7537 (3.0210) time: 1.3005 data: 0.0002 max mem: 13912 +[2024-12-06 21:44:53 root] (utils.py 283): INFO Epoch: [14] [1090/2502] eta: 0:30:37 lr: 0.000002 loss_cls: 3.0775 (3.0493) grad_norm: 2.7537 (3.0222) time: 1.3015 data: 0.0002 max mem: 13912 +[2024-12-06 21:45:06 root] (utils.py 283): INFO Epoch: [14] [1100/2502] eta: 0:30:24 lr: 0.000002 loss_cls: 2.9687 (3.0488) grad_norm: 2.6328 (3.0198) time: 1.3008 data: 0.0002 max mem: 13912 +[2024-12-06 21:45:19 root] (utils.py 283): INFO Epoch: [14] [1110/2502] eta: 0:30:11 lr: 0.000002 loss_cls: 3.1027 (3.0480) grad_norm: 2.6728 (3.0178) time: 1.2997 data: 0.0002 max mem: 13912 +[2024-12-06 21:45:32 root] (utils.py 283): INFO Epoch: [14] [1120/2502] eta: 0:29:58 lr: 0.000002 loss_cls: 2.9669 (3.0476) grad_norm: 2.7412 (3.0150) time: 1.3006 data: 0.0002 max mem: 13912 +[2024-12-06 21:45:45 root] (utils.py 283): INFO Epoch: [14] [1130/2502] eta: 0:29:45 lr: 0.000002 loss_cls: 3.1482 (3.0497) grad_norm: 2.7635 (3.0167) time: 1.3019 data: 0.0002 max mem: 13912 +[2024-12-06 21:45:58 root] (utils.py 283): INFO Epoch: [14] [1140/2502] eta: 0:29:32 lr: 0.000002 loss_cls: 3.3256 (3.0498) grad_norm: 2.6480 (3.0130) time: 1.3003 data: 0.0002 max mem: 13912 +[2024-12-06 21:46:11 root] (utils.py 283): INFO Epoch: [14] [1150/2502] eta: 0:29:19 lr: 0.000002 loss_cls: 3.1748 (3.0511) grad_norm: 2.5125 (3.0100) time: 1.2993 data: 0.0002 max mem: 13912 +[2024-12-06 21:46:24 root] (utils.py 283): INFO Epoch: [14] [1160/2502] eta: 0:29:06 lr: 0.000002 loss_cls: 3.0539 (3.0512) grad_norm: 2.7098 (3.0225) time: 1.3020 data: 0.0002 max mem: 13912 +[2024-12-06 21:46:37 root] (utils.py 283): INFO Epoch: [14] [1170/2502] eta: 0:28:53 lr: 0.000002 loss_cls: 3.0539 (3.0504) grad_norm: 3.0102 (3.0271) time: 1.3010 data: 0.0002 max mem: 13912 +[2024-12-06 21:46:50 root] (utils.py 283): INFO Epoch: [14] [1180/2502] eta: 0:28:40 lr: 0.000002 loss_cls: 3.0405 (3.0505) grad_norm: 2.6562 (3.0250) time: 1.2991 data: 0.0002 max mem: 13912 +[2024-12-06 21:47:03 root] (utils.py 283): INFO Epoch: [14] [1190/2502] eta: 0:28:27 lr: 0.000002 loss_cls: 3.0950 (3.0503) grad_norm: 2.6003 (3.0244) time: 1.3025 data: 0.0003 max mem: 13912 +[2024-12-06 21:47:16 root] (utils.py 283): INFO Epoch: [14] [1200/2502] eta: 0:28:14 lr: 0.000002 loss_cls: 3.0950 (3.0500) grad_norm: 2.6003 (3.0213) time: 1.3017 data: 0.0002 max mem: 13912 +[2024-12-06 21:47:29 root] (utils.py 283): INFO Epoch: [14] [1210/2502] eta: 0:28:01 lr: 0.000002 loss_cls: 3.0754 (3.0499) grad_norm: 2.5226 (3.0239) time: 1.2994 data: 0.0003 max mem: 13912 +[2024-12-06 21:47:42 root] (utils.py 283): INFO Epoch: [14] [1220/2502] eta: 0:27:48 lr: 0.000002 loss_cls: 3.1710 (3.0504) grad_norm: 2.6013 (3.0210) time: 1.3009 data: 0.0003 max mem: 13912 +[2024-12-06 21:47:55 root] (utils.py 283): INFO Epoch: [14] [1230/2502] eta: 0:27:35 lr: 0.000002 loss_cls: 3.1657 (3.0508) grad_norm: 2.6917 (3.0210) time: 1.2985 data: 0.0002 max mem: 13912 +[2024-12-06 21:48:08 root] (utils.py 283): INFO Epoch: [14] [1240/2502] eta: 0:27:21 lr: 0.000002 loss_cls: 3.1657 (3.0526) grad_norm: 2.6883 (3.0233) time: 1.2973 data: 0.0002 max mem: 13912 +[2024-12-06 21:48:21 root] (utils.py 283): INFO Epoch: [14] [1250/2502] eta: 0:27:08 lr: 0.000002 loss_cls: 3.4284 (3.0546) grad_norm: 2.6152 (3.0198) time: 1.3009 data: 0.0002 max mem: 13912 +[2024-12-06 21:48:34 root] (utils.py 283): INFO Epoch: [14] [1260/2502] eta: 0:26:55 lr: 0.000002 loss_cls: 3.2398 (3.0557) grad_norm: 2.6808 (3.0186) time: 1.2997 data: 0.0003 max mem: 13912 +[2024-12-06 21:48:47 root] (utils.py 283): INFO Epoch: [14] [1270/2502] eta: 0:26:42 lr: 0.000002 loss_cls: 3.1099 (3.0540) grad_norm: 2.9016 (3.0192) time: 1.2981 data: 0.0003 max mem: 13912 +[2024-12-06 21:49:00 root] (utils.py 283): INFO Epoch: [14] [1280/2502] eta: 0:26:29 lr: 0.000002 loss_cls: 3.1069 (3.0546) grad_norm: 3.1580 (3.0219) time: 1.2991 data: 0.0003 max mem: 13912 +[2024-12-06 21:49:13 root] (utils.py 283): INFO Epoch: [14] [1290/2502] eta: 0:26:16 lr: 0.000002 loss_cls: 2.7958 (3.0528) grad_norm: 2.7894 (3.0231) time: 1.3010 data: 0.0003 max mem: 13912 +[2024-12-06 21:49:26 root] (utils.py 283): INFO Epoch: [14] [1300/2502] eta: 0:26:03 lr: 0.000002 loss_cls: 2.7958 (3.0538) grad_norm: 2.6860 (3.0240) time: 1.3029 data: 0.0002 max mem: 13912 +[2024-12-06 21:49:39 root] (utils.py 283): INFO Epoch: [14] [1310/2502] eta: 0:25:50 lr: 0.000002 loss_cls: 2.6694 (3.0502) grad_norm: 2.6088 (3.0209) time: 1.3009 data: 0.0003 max mem: 13912 +[2024-12-06 21:49:52 root] (utils.py 283): INFO Epoch: [14] [1320/2502] eta: 0:25:37 lr: 0.000002 loss_cls: 2.7712 (3.0504) grad_norm: 2.6088 (3.0223) time: 1.3046 data: 0.0003 max mem: 13912 +[2024-12-06 21:50:05 root] (utils.py 283): INFO Epoch: [14] [1330/2502] eta: 0:25:24 lr: 0.000002 loss_cls: 3.2618 (3.0506) grad_norm: 2.9845 (3.0308) time: 1.3059 data: 0.0002 max mem: 13912 +[2024-12-06 21:50:18 root] (utils.py 283): INFO Epoch: [14] [1340/2502] eta: 0:25:11 lr: 0.000002 loss_cls: 3.2447 (3.0509) grad_norm: 2.9895 (3.0339) time: 1.2992 data: 0.0002 max mem: 13912 +[2024-12-06 21:50:31 root] (utils.py 283): INFO Epoch: [14] [1350/2502] eta: 0:24:58 lr: 0.000002 loss_cls: 3.1081 (3.0506) grad_norm: 2.8728 (3.0459) time: 1.3029 data: 0.0003 max mem: 13912 +[2024-12-06 21:50:44 root] (utils.py 283): INFO Epoch: [14] [1360/2502] eta: 0:24:45 lr: 0.000002 loss_cls: 3.0687 (3.0493) grad_norm: 2.7485 (3.0450) time: 1.3030 data: 0.0002 max mem: 13912 +[2024-12-06 21:50:57 root] (utils.py 283): INFO Epoch: [14] [1370/2502] eta: 0:24:32 lr: 0.000002 loss_cls: 3.2227 (3.0511) grad_norm: 2.6923 (3.0444) time: 1.3015 data: 0.0002 max mem: 13912 +[2024-12-06 21:51:10 root] (utils.py 283): INFO Epoch: [14] [1380/2502] eta: 0:24:19 lr: 0.000002 loss_cls: 3.3592 (3.0521) grad_norm: 2.7372 (3.0451) time: 1.3031 data: 0.0003 max mem: 13912 +[2024-12-06 21:51:23 root] (utils.py 283): INFO Epoch: [14] [1390/2502] eta: 0:24:06 lr: 0.000002 loss_cls: 3.1573 (3.0504) grad_norm: 2.8822 (3.0459) time: 1.3003 data: 0.0003 max mem: 13912 +[2024-12-06 21:51:36 root] (utils.py 283): INFO Epoch: [14] [1400/2502] eta: 0:23:53 lr: 0.000002 loss_cls: 3.0947 (3.0502) grad_norm: 2.7084 (3.0444) time: 1.2982 data: 0.0002 max mem: 13912 +[2024-12-06 21:51:49 root] (utils.py 283): INFO Epoch: [14] [1410/2502] eta: 0:23:40 lr: 0.000002 loss_cls: 3.3109 (3.0533) grad_norm: 2.7084 (3.0435) time: 1.3008 data: 0.0003 max mem: 13912 +[2024-12-06 21:52:02 root] (utils.py 283): INFO Epoch: [14] [1420/2502] eta: 0:23:27 lr: 0.000002 loss_cls: 3.4143 (3.0544) grad_norm: 2.6869 (3.0414) time: 1.3020 data: 0.0002 max mem: 13912 +[2024-12-06 21:52:15 root] (utils.py 283): INFO Epoch: [14] [1430/2502] eta: 0:23:14 lr: 0.000002 loss_cls: 3.1804 (3.0543) grad_norm: 2.6057 (3.0391) time: 1.3010 data: 0.0002 max mem: 13912 +[2024-12-06 21:52:28 root] (utils.py 283): INFO Epoch: [14] [1440/2502] eta: 0:23:01 lr: 0.000002 loss_cls: 2.9559 (3.0527) grad_norm: 2.6810 (3.0366) time: 1.3005 data: 0.0002 max mem: 13912 +[2024-12-06 21:52:41 root] (utils.py 283): INFO Epoch: [14] [1450/2502] eta: 0:22:48 lr: 0.000002 loss_cls: 2.9256 (3.0510) grad_norm: 2.8106 (3.0366) time: 1.3010 data: 0.0002 max mem: 13912 +[2024-12-06 21:52:54 root] (utils.py 283): INFO Epoch: [14] [1460/2502] eta: 0:22:35 lr: 0.000002 loss_cls: 3.0695 (3.0510) grad_norm: 2.8871 (3.0358) time: 1.2989 data: 0.0002 max mem: 13912 +[2024-12-06 21:53:07 root] (utils.py 283): INFO Epoch: [14] [1470/2502] eta: 0:22:22 lr: 0.000002 loss_cls: 3.1983 (3.0519) grad_norm: 2.8544 (3.0354) time: 1.2956 data: 0.0002 max mem: 13912 +[2024-12-06 21:53:20 root] (utils.py 283): INFO Epoch: [14] [1480/2502] eta: 0:22:09 lr: 0.000002 loss_cls: 3.2669 (3.0525) grad_norm: 2.6698 (3.0322) time: 1.2944 data: 0.0002 max mem: 13912 +[2024-12-06 21:53:33 root] (utils.py 283): INFO Epoch: [14] [1490/2502] eta: 0:21:56 lr: 0.000002 loss_cls: 3.2291 (3.0527) grad_norm: 2.6093 (3.0331) time: 1.2957 data: 0.0003 max mem: 13912 +[2024-12-06 21:53:46 root] (utils.py 283): INFO Epoch: [14] [1500/2502] eta: 0:21:43 lr: 0.000002 loss_cls: 3.2291 (3.0525) grad_norm: 2.9172 (3.0326) time: 1.2957 data: 0.0003 max mem: 13912 +[2024-12-06 21:53:59 root] (utils.py 283): INFO Epoch: [14] [1510/2502] eta: 0:21:30 lr: 0.000002 loss_cls: 3.0616 (3.0521) grad_norm: 2.8297 (3.0322) time: 1.2923 data: 0.0002 max mem: 13912 +[2024-12-06 21:54:12 root] (utils.py 283): INFO Epoch: [14] [1520/2502] eta: 0:21:17 lr: 0.000002 loss_cls: 3.1476 (3.0533) grad_norm: 2.8297 (3.0341) time: 1.2954 data: 0.0002 max mem: 13912 +[2024-12-06 21:54:25 root] (utils.py 283): INFO Epoch: [14] [1530/2502] eta: 0:21:04 lr: 0.000002 loss_cls: 3.1928 (3.0536) grad_norm: 2.6974 (3.0329) time: 1.2973 data: 0.0002 max mem: 13912 +[2024-12-06 21:54:37 root] (utils.py 283): INFO Epoch: [14] [1540/2502] eta: 0:20:51 lr: 0.000002 loss_cls: 3.0565 (3.0524) grad_norm: 2.7015 (3.0352) time: 1.2942 data: 0.0003 max mem: 13912 +[2024-12-06 21:54:50 root] (utils.py 283): INFO Epoch: [14] [1550/2502] eta: 0:20:38 lr: 0.000002 loss_cls: 3.0771 (3.0524) grad_norm: 2.7642 (3.0338) time: 1.2944 data: 0.0003 max mem: 13912 +[2024-12-06 21:55:03 root] (utils.py 283): INFO Epoch: [14] [1560/2502] eta: 0:20:25 lr: 0.000002 loss_cls: 3.3539 (3.0537) grad_norm: 2.7754 (3.0325) time: 1.2932 data: 0.0003 max mem: 13912 +[2024-12-06 21:55:16 root] (utils.py 283): INFO Epoch: [14] [1570/2502] eta: 0:20:12 lr: 0.000002 loss_cls: 3.3182 (3.0538) grad_norm: 2.5923 (3.0292) time: 1.2934 data: 0.0002 max mem: 13912 +[2024-12-06 21:55:29 root] (utils.py 283): INFO Epoch: [14] [1580/2502] eta: 0:19:59 lr: 0.000002 loss_cls: 3.1533 (3.0548) grad_norm: 2.5923 (3.0293) time: 1.2944 data: 0.0003 max mem: 13912 +[2024-12-06 21:55:42 root] (utils.py 283): INFO Epoch: [14] [1590/2502] eta: 0:19:46 lr: 0.000002 loss_cls: 3.1797 (3.0550) grad_norm: 2.8627 (3.0306) time: 1.2929 data: 0.0002 max mem: 13912 +[2024-12-06 21:55:55 root] (utils.py 283): INFO Epoch: [14] [1600/2502] eta: 0:19:33 lr: 0.000002 loss_cls: 3.1208 (3.0538) grad_norm: 2.7564 (3.0325) time: 1.2943 data: 0.0003 max mem: 13912 +[2024-12-06 21:56:08 root] (utils.py 283): INFO Epoch: [14] [1610/2502] eta: 0:19:20 lr: 0.000002 loss_cls: 2.9824 (3.0535) grad_norm: 2.7345 (3.0323) time: 1.2931 data: 0.0003 max mem: 13912 +[2024-12-06 21:56:21 root] (utils.py 283): INFO Epoch: [14] [1620/2502] eta: 0:19:06 lr: 0.000002 loss_cls: 2.9824 (3.0527) grad_norm: 2.7108 (3.0408) time: 1.2920 data: 0.0003 max mem: 13912 +[2024-12-06 21:56:34 root] (utils.py 283): INFO Epoch: [14] [1630/2502] eta: 0:18:53 lr: 0.000002 loss_cls: 3.3668 (3.0548) grad_norm: 2.5133 (3.0392) time: 1.2926 data: 0.0003 max mem: 13912 +[2024-12-06 21:56:47 root] (utils.py 283): INFO Epoch: [14] [1640/2502] eta: 0:18:40 lr: 0.000002 loss_cls: 3.2918 (3.0559) grad_norm: 2.5912 (3.0376) time: 1.2942 data: 0.0003 max mem: 13912 +[2024-12-06 21:57:00 root] (utils.py 283): INFO Epoch: [14] [1650/2502] eta: 0:18:27 lr: 0.000002 loss_cls: 3.1180 (3.0548) grad_norm: 2.6819 (3.0375) time: 1.3058 data: 0.0003 max mem: 13912 +[2024-12-06 21:57:13 root] (utils.py 283): INFO Epoch: [14] [1660/2502] eta: 0:18:14 lr: 0.000002 loss_cls: 3.0830 (3.0544) grad_norm: 2.6819 (3.0363) time: 1.3089 data: 0.0003 max mem: 13912 +[2024-12-06 21:57:26 root] (utils.py 283): INFO Epoch: [14] [1670/2502] eta: 0:18:01 lr: 0.000002 loss_cls: 2.9192 (3.0535) grad_norm: 2.7649 (3.0375) time: 1.3033 data: 0.0003 max mem: 13912 +[2024-12-06 21:57:39 root] (utils.py 283): INFO Epoch: [14] [1680/2502] eta: 0:17:48 lr: 0.000002 loss_cls: 2.9618 (3.0536) grad_norm: 2.8728 (3.0374) time: 1.3048 data: 0.0003 max mem: 13912 +[2024-12-06 21:57:52 root] (utils.py 283): INFO Epoch: [14] [1690/2502] eta: 0:17:36 lr: 0.000002 loss_cls: 3.2755 (3.0533) grad_norm: 2.7205 (3.0348) time: 1.3039 data: 0.0002 max mem: 13912 +[2024-12-06 21:58:05 root] (utils.py 283): INFO Epoch: [14] [1700/2502] eta: 0:17:23 lr: 0.000002 loss_cls: 3.1564 (3.0536) grad_norm: 2.6250 (3.0341) time: 1.3033 data: 0.0002 max mem: 13912 +[2024-12-06 21:58:18 root] (utils.py 283): INFO Epoch: [14] [1710/2502] eta: 0:17:10 lr: 0.000002 loss_cls: 3.0792 (3.0524) grad_norm: 2.7126 (3.0333) time: 1.3025 data: 0.0003 max mem: 13912 +[2024-12-06 21:58:31 root] (utils.py 283): INFO Epoch: [14] [1720/2502] eta: 0:16:57 lr: 0.000002 loss_cls: 3.0959 (3.0525) grad_norm: 2.7662 (3.0330) time: 1.3035 data: 0.0003 max mem: 13912 +[2024-12-06 21:58:44 root] (utils.py 283): INFO Epoch: [14] [1730/2502] eta: 0:16:44 lr: 0.000002 loss_cls: 3.2914 (3.0545) grad_norm: 2.9304 (3.0346) time: 1.3063 data: 0.0003 max mem: 13912 +[2024-12-06 21:58:57 root] (utils.py 283): INFO Epoch: [14] [1740/2502] eta: 0:16:31 lr: 0.000002 loss_cls: 3.3302 (3.0549) grad_norm: 2.7162 (3.0321) time: 1.3053 data: 0.0003 max mem: 13912 +[2024-12-06 21:59:10 root] (utils.py 283): INFO Epoch: [14] [1750/2502] eta: 0:16:18 lr: 0.000002 loss_cls: 3.1062 (3.0546) grad_norm: 2.5957 (3.0300) time: 1.3038 data: 0.0003 max mem: 13912 +[2024-12-06 21:59:23 root] (utils.py 283): INFO Epoch: [14] [1760/2502] eta: 0:16:05 lr: 0.000002 loss_cls: 2.8417 (3.0537) grad_norm: 2.6143 (3.0284) time: 1.3027 data: 0.0003 max mem: 13912 +[2024-12-06 21:59:36 root] (utils.py 283): INFO Epoch: [14] [1770/2502] eta: 0:15:52 lr: 0.000002 loss_cls: 2.7731 (3.0527) grad_norm: 2.7984 (3.0287) time: 1.2989 data: 0.0003 max mem: 13912 +[2024-12-06 21:59:49 root] (utils.py 283): INFO Epoch: [14] [1780/2502] eta: 0:15:39 lr: 0.000002 loss_cls: 3.0237 (3.0522) grad_norm: 2.6001 (3.0266) time: 1.2971 data: 0.0002 max mem: 13912 +[2024-12-06 22:00:03 root] (utils.py 283): INFO Epoch: [14] [1790/2502] eta: 0:15:26 lr: 0.000002 loss_cls: 3.0617 (3.0503) grad_norm: 2.6046 (3.0244) time: 1.3072 data: 0.0003 max mem: 13912 +[2024-12-06 22:00:16 root] (utils.py 283): INFO Epoch: [14] [1800/2502] eta: 0:15:13 lr: 0.000002 loss_cls: 3.1165 (3.0515) grad_norm: 2.6324 (3.0241) time: 1.3154 data: 0.0002 max mem: 13912 +[2024-12-06 22:00:29 root] (utils.py 283): INFO Epoch: [14] [1810/2502] eta: 0:15:00 lr: 0.000002 loss_cls: 3.1165 (3.0503) grad_norm: 2.8600 (3.0265) time: 1.3072 data: 0.0003 max mem: 13912 +[2024-12-06 22:00:42 root] (utils.py 283): INFO Epoch: [14] [1820/2502] eta: 0:14:47 lr: 0.000002 loss_cls: 3.1582 (3.0508) grad_norm: 2.8914 (3.0265) time: 1.3035 data: 0.0003 max mem: 13912 +[2024-12-06 22:00:55 root] (utils.py 283): INFO Epoch: [14] [1830/2502] eta: 0:14:34 lr: 0.000002 loss_cls: 3.2491 (3.0509) grad_norm: 2.6924 (3.0248) time: 1.3047 data: 0.0003 max mem: 13912 +[2024-12-06 22:01:08 root] (utils.py 283): INFO Epoch: [14] [1840/2502] eta: 0:14:21 lr: 0.000002 loss_cls: 3.1998 (3.0515) grad_norm: 2.7148 (3.0258) time: 1.3016 data: 0.0003 max mem: 13912 +[2024-12-06 22:01:21 root] (utils.py 283): INFO Epoch: [14] [1850/2502] eta: 0:14:08 lr: 0.000002 loss_cls: 3.0656 (3.0512) grad_norm: 2.8098 (3.0356) time: 1.2998 data: 0.0002 max mem: 13912 +[2024-12-06 22:01:34 root] (utils.py 283): INFO Epoch: [14] [1860/2502] eta: 0:13:55 lr: 0.000002 loss_cls: 3.1535 (3.0519) grad_norm: 2.8908 (3.0362) time: 1.3001 data: 0.0002 max mem: 13912 +[2024-12-06 22:01:47 root] (utils.py 283): INFO Epoch: [14] [1870/2502] eta: 0:13:42 lr: 0.000002 loss_cls: 3.1928 (3.0518) grad_norm: 2.9002 (3.0367) time: 1.3014 data: 0.0003 max mem: 13912 +[2024-12-06 22:02:00 root] (utils.py 283): INFO Epoch: [14] [1880/2502] eta: 0:13:29 lr: 0.000002 loss_cls: 3.2069 (3.0514) grad_norm: 2.8981 (3.0379) time: 1.3028 data: 0.0003 max mem: 13912 +[2024-12-06 22:02:13 root] (utils.py 283): INFO Epoch: [14] [1890/2502] eta: 0:13:16 lr: 0.000002 loss_cls: 3.2371 (3.0523) grad_norm: 2.6116 (3.0356) time: 1.3040 data: 0.0003 max mem: 13912 +[2024-12-06 22:02:26 root] (utils.py 283): INFO Epoch: [14] [1900/2502] eta: 0:13:03 lr: 0.000002 loss_cls: 3.1983 (3.0512) grad_norm: 2.5739 (3.0347) time: 1.3076 data: 0.0003 max mem: 13912 +[2024-12-06 22:02:39 root] (utils.py 283): INFO Epoch: [14] [1910/2502] eta: 0:12:50 lr: 0.000002 loss_cls: 3.0558 (3.0526) grad_norm: 2.6024 (3.0350) time: 1.3057 data: 0.0003 max mem: 13912 +[2024-12-06 22:02:52 root] (utils.py 283): INFO Epoch: [14] [1920/2502] eta: 0:12:37 lr: 0.000002 loss_cls: 3.2612 (3.0514) grad_norm: 2.8438 (3.0333) time: 1.3036 data: 0.0002 max mem: 13912 +[2024-12-06 22:03:05 root] (utils.py 283): INFO Epoch: [14] [1930/2502] eta: 0:12:24 lr: 0.000002 loss_cls: 3.0999 (3.0523) grad_norm: 2.9107 (3.0368) time: 1.3033 data: 0.0002 max mem: 13912 +[2024-12-06 22:03:18 root] (utils.py 283): INFO Epoch: [14] [1940/2502] eta: 0:12:11 lr: 0.000002 loss_cls: 3.2868 (3.0535) grad_norm: 2.9985 (3.0375) time: 1.3007 data: 0.0002 max mem: 13912 +[2024-12-06 22:03:31 root] (utils.py 283): INFO Epoch: [14] [1950/2502] eta: 0:11:58 lr: 0.000002 loss_cls: 3.1649 (3.0535) grad_norm: 2.6524 (3.0387) time: 1.3007 data: 0.0002 max mem: 13912 +[2024-12-06 22:03:44 root] (utils.py 283): INFO Epoch: [14] [1960/2502] eta: 0:11:45 lr: 0.000002 loss_cls: 3.1407 (3.0543) grad_norm: 2.6286 (3.0376) time: 1.3007 data: 0.0003 max mem: 13912 +[2024-12-06 22:03:57 root] (utils.py 283): INFO Epoch: [14] [1970/2502] eta: 0:11:32 lr: 0.000002 loss_cls: 3.2891 (3.0551) grad_norm: 2.7320 (3.0370) time: 1.3001 data: 0.0002 max mem: 13912 +[2024-12-06 22:04:10 root] (utils.py 283): INFO Epoch: [14] [1980/2502] eta: 0:11:19 lr: 0.000002 loss_cls: 3.2282 (3.0556) grad_norm: 2.7467 (3.0367) time: 1.3012 data: 0.0002 max mem: 13912 +[2024-12-06 22:04:23 root] (utils.py 283): INFO Epoch: [14] [1990/2502] eta: 0:11:06 lr: 0.000002 loss_cls: 3.0625 (3.0546) grad_norm: 2.7336 (3.0358) time: 1.3033 data: 0.0002 max mem: 13912 +[2024-12-06 22:04:36 root] (utils.py 283): INFO Epoch: [14] [2000/2502] eta: 0:10:53 lr: 0.000002 loss_cls: 2.8900 (3.0536) grad_norm: 2.6495 (3.0404) time: 1.3037 data: 0.0002 max mem: 13912 +[2024-12-06 22:04:49 root] (utils.py 283): INFO Epoch: [14] [2010/2502] eta: 0:10:40 lr: 0.000002 loss_cls: 3.0518 (3.0543) grad_norm: 2.9025 (3.0408) time: 1.3023 data: 0.0003 max mem: 13912 +[2024-12-06 22:05:02 root] (utils.py 283): INFO Epoch: [14] [2020/2502] eta: 0:10:27 lr: 0.000002 loss_cls: 2.9200 (3.0530) grad_norm: 2.6162 (3.0385) time: 1.2997 data: 0.0002 max mem: 13912 +[2024-12-06 22:05:15 root] (utils.py 283): INFO Epoch: [14] [2030/2502] eta: 0:10:14 lr: 0.000002 loss_cls: 2.9200 (3.0533) grad_norm: 2.7449 (3.0382) time: 1.2982 data: 0.0002 max mem: 13912 +[2024-12-06 22:05:28 root] (utils.py 283): INFO Epoch: [14] [2040/2502] eta: 0:10:01 lr: 0.000002 loss_cls: 3.2929 (3.0526) grad_norm: 2.8450 (3.0376) time: 1.3006 data: 0.0002 max mem: 13912 +[2024-12-06 22:05:41 root] (utils.py 283): INFO Epoch: [14] [2050/2502] eta: 0:09:48 lr: 0.000002 loss_cls: 3.2368 (3.0529) grad_norm: 2.5435 (3.0356) time: 1.3081 data: 0.0003 max mem: 13912 +[2024-12-06 22:05:54 root] (utils.py 283): INFO Epoch: [14] [2060/2502] eta: 0:09:35 lr: 0.000002 loss_cls: 3.0003 (3.0520) grad_norm: 2.4615 (3.0332) time: 1.3075 data: 0.0003 max mem: 13912 +[2024-12-06 22:06:07 root] (utils.py 283): INFO Epoch: [14] [2070/2502] eta: 0:09:22 lr: 0.000002 loss_cls: 2.9989 (3.0521) grad_norm: 2.4506 (3.0336) time: 1.3031 data: 0.0002 max mem: 13912 +[2024-12-06 22:06:20 root] (utils.py 283): INFO Epoch: [14] [2080/2502] eta: 0:09:09 lr: 0.000002 loss_cls: 3.1004 (3.0524) grad_norm: 2.7139 (3.0331) time: 1.3004 data: 0.0003 max mem: 13912 +[2024-12-06 22:06:33 root] (utils.py 283): INFO Epoch: [14] [2090/2502] eta: 0:08:56 lr: 0.000002 loss_cls: 3.1552 (3.0524) grad_norm: 2.7778 (3.0317) time: 1.2982 data: 0.0003 max mem: 13912 +[2024-12-06 22:06:46 root] (utils.py 283): INFO Epoch: [14] [2100/2502] eta: 0:08:42 lr: 0.000002 loss_cls: 3.0940 (3.0519) grad_norm: 2.6632 (3.0313) time: 1.2979 data: 0.0003 max mem: 13912 +[2024-12-06 22:06:59 root] (utils.py 283): INFO Epoch: [14] [2110/2502] eta: 0:08:29 lr: 0.000002 loss_cls: 3.2065 (3.0526) grad_norm: 2.7199 (3.0316) time: 1.2971 data: 0.0003 max mem: 13912 +[2024-12-06 22:07:12 root] (utils.py 283): INFO Epoch: [14] [2120/2502] eta: 0:08:16 lr: 0.000002 loss_cls: 3.1780 (3.0522) grad_norm: 2.8408 (3.0304) time: 1.3004 data: 0.0002 max mem: 13912 +[2024-12-06 22:07:25 root] (utils.py 283): INFO Epoch: [14] [2130/2502] eta: 0:08:03 lr: 0.000002 loss_cls: 3.1584 (3.0527) grad_norm: 2.6789 (3.0308) time: 1.3012 data: 0.0003 max mem: 13912 +[2024-12-06 22:07:38 root] (utils.py 283): INFO Epoch: [14] [2140/2502] eta: 0:07:50 lr: 0.000002 loss_cls: 3.1015 (3.0521) grad_norm: 2.6987 (3.0310) time: 1.2988 data: 0.0003 max mem: 13912 +[2024-12-06 22:07:51 root] (utils.py 283): INFO Epoch: [14] [2150/2502] eta: 0:07:37 lr: 0.000002 loss_cls: 3.1015 (3.0537) grad_norm: 2.8605 (3.0321) time: 1.3003 data: 0.0003 max mem: 13912 +[2024-12-06 22:08:04 root] (utils.py 283): INFO Epoch: [14] [2160/2502] eta: 0:07:24 lr: 0.000002 loss_cls: 3.4076 (3.0539) grad_norm: 2.8741 (3.0400) time: 1.3025 data: 0.0003 max mem: 13912 +[2024-12-06 22:08:17 root] (utils.py 283): INFO Epoch: [14] [2170/2502] eta: 0:07:11 lr: 0.000002 loss_cls: 2.9043 (3.0528) grad_norm: 2.6899 (3.0384) time: 1.3049 data: 0.0002 max mem: 13912 +[2024-12-06 22:08:30 root] (utils.py 283): INFO Epoch: [14] [2180/2502] eta: 0:06:58 lr: 0.000002 loss_cls: 2.7614 (3.0517) grad_norm: 2.6834 (3.0380) time: 1.3030 data: 0.0003 max mem: 13912 +[2024-12-06 22:08:43 root] (utils.py 283): INFO Epoch: [14] [2190/2502] eta: 0:06:45 lr: 0.000002 loss_cls: 3.1800 (3.0524) grad_norm: 2.5332 (3.0354) time: 1.3007 data: 0.0003 max mem: 13912 +[2024-12-06 22:08:57 root] (utils.py 283): INFO Epoch: [14] [2200/2502] eta: 0:06:32 lr: 0.000002 loss_cls: 3.1973 (3.0524) grad_norm: 2.5105 (3.0349) time: 1.3041 data: 0.0003 max mem: 13912 +[2024-12-06 22:09:10 root] (utils.py 283): INFO Epoch: [14] [2210/2502] eta: 0:06:19 lr: 0.000002 loss_cls: 3.1476 (3.0525) grad_norm: 2.6462 (3.0367) time: 1.3070 data: 0.0003 max mem: 13912 +[2024-12-06 22:09:23 root] (utils.py 283): INFO Epoch: [14] [2220/2502] eta: 0:06:06 lr: 0.000002 loss_cls: 2.9621 (3.0518) grad_norm: 2.6462 (3.0367) time: 1.3074 data: 0.0003 max mem: 13912 +[2024-12-06 22:09:36 root] (utils.py 283): INFO Epoch: [14] [2230/2502] eta: 0:05:53 lr: 0.000002 loss_cls: 2.8941 (3.0517) grad_norm: 2.6856 (3.0352) time: 1.3014 data: 0.0002 max mem: 13912 +[2024-12-06 22:09:49 root] (utils.py 283): INFO Epoch: [14] [2240/2502] eta: 0:05:40 lr: 0.000002 loss_cls: 3.0930 (3.0519) grad_norm: 2.6856 (3.0353) time: 1.2981 data: 0.0002 max mem: 13912 +[2024-12-06 22:10:02 root] (utils.py 283): INFO Epoch: [14] [2250/2502] eta: 0:05:27 lr: 0.000002 loss_cls: 3.1473 (3.0525) grad_norm: 2.8124 (3.0356) time: 1.2998 data: 0.0002 max mem: 13912 +[2024-12-06 22:10:15 root] (utils.py 283): INFO Epoch: [14] [2260/2502] eta: 0:05:14 lr: 0.000002 loss_cls: 3.3100 (3.0533) grad_norm: 2.6267 (3.0341) time: 1.3004 data: 0.0002 max mem: 13912 +[2024-12-06 22:10:28 root] (utils.py 283): INFO Epoch: [14] [2270/2502] eta: 0:05:01 lr: 0.000002 loss_cls: 3.2887 (3.0537) grad_norm: 2.5610 (3.0335) time: 1.2997 data: 0.0002 max mem: 13912 +[2024-12-06 22:10:41 root] (utils.py 283): INFO Epoch: [14] [2280/2502] eta: 0:04:48 lr: 0.000002 loss_cls: 3.2239 (3.0541) grad_norm: 2.8966 (3.0335) time: 1.3013 data: 0.0003 max mem: 13912 +[2024-12-06 22:10:54 root] (utils.py 283): INFO Epoch: [14] [2290/2502] eta: 0:04:35 lr: 0.000002 loss_cls: 3.1635 (3.0545) grad_norm: 2.8966 (3.0356) time: 1.3025 data: 0.0002 max mem: 13912 +[2024-12-06 22:11:07 root] (utils.py 283): INFO Epoch: [14] [2300/2502] eta: 0:04:22 lr: 0.000002 loss_cls: 3.1485 (3.0551) grad_norm: 2.8926 (3.0362) time: 1.3031 data: 0.0003 max mem: 13912 +[2024-12-06 22:11:20 root] (utils.py 283): INFO Epoch: [14] [2310/2502] eta: 0:04:09 lr: 0.000002 loss_cls: 2.9483 (3.0544) grad_norm: 2.8637 (3.0373) time: 1.3025 data: 0.0003 max mem: 13912 +[2024-12-06 22:11:33 root] (utils.py 283): INFO Epoch: [14] [2320/2502] eta: 0:03:56 lr: 0.000002 loss_cls: 3.1677 (3.0554) grad_norm: 2.7720 (3.0359) time: 1.3014 data: 0.0002 max mem: 13912 +[2024-12-06 22:11:46 root] (utils.py 283): INFO Epoch: [14] [2330/2502] eta: 0:03:43 lr: 0.000002 loss_cls: 3.2298 (3.0555) grad_norm: 2.6943 (3.0355) time: 1.3020 data: 0.0003 max mem: 13912 +[2024-12-06 22:11:59 root] (utils.py 283): INFO Epoch: [14] [2340/2502] eta: 0:03:30 lr: 0.000002 loss_cls: 3.1363 (3.0554) grad_norm: 2.6341 (3.0340) time: 1.3021 data: 0.0003 max mem: 13912 +[2024-12-06 22:12:12 root] (utils.py 283): INFO Epoch: [14] [2350/2502] eta: 0:03:17 lr: 0.000002 loss_cls: 2.8342 (3.0545) grad_norm: 2.6341 (3.0325) time: 1.3011 data: 0.0002 max mem: 13912 +[2024-12-06 22:12:25 root] (utils.py 283): INFO Epoch: [14] [2360/2502] eta: 0:03:04 lr: 0.000002 loss_cls: 3.1751 (3.0552) grad_norm: 2.7479 (3.0344) time: 1.2977 data: 0.0002 max mem: 13912 +[2024-12-06 22:12:38 root] (utils.py 283): INFO Epoch: [14] [2370/2502] eta: 0:02:51 lr: 0.000002 loss_cls: 3.2599 (3.0560) grad_norm: 2.7721 (3.0333) time: 1.3033 data: 0.0002 max mem: 13912 +[2024-12-06 22:12:52 root] (utils.py 283): INFO Epoch: [14] [2380/2502] eta: 0:02:38 lr: 0.000002 loss_cls: 3.1905 (3.0553) grad_norm: 2.6088 (3.0313) time: 1.3408 data: 0.0004 max mem: 13912 +[2024-12-06 22:13:05 root] (utils.py 283): INFO Epoch: [14] [2390/2502] eta: 0:02:25 lr: 0.000002 loss_cls: 2.9795 (3.0556) grad_norm: 2.5159 (3.0296) time: 1.3345 data: 0.0004 max mem: 13912 +[2024-12-06 22:13:18 root] (utils.py 283): INFO Epoch: [14] [2400/2502] eta: 0:02:12 lr: 0.000002 loss_cls: 3.2099 (3.0558) grad_norm: 2.6349 (3.0287) time: 1.2968 data: 0.0003 max mem: 13912 +[2024-12-06 22:13:31 root] (utils.py 283): INFO Epoch: [14] [2410/2502] eta: 0:01:59 lr: 0.000002 loss_cls: 3.1704 (3.0551) grad_norm: 2.7773 (3.0279) time: 1.2996 data: 0.0002 max mem: 13912 +[2024-12-06 22:13:44 root] (utils.py 283): INFO Epoch: [14] [2420/2502] eta: 0:01:46 lr: 0.000002 loss_cls: 2.8977 (3.0555) grad_norm: 2.8087 (3.0303) time: 1.3009 data: 0.0002 max mem: 13912 +[2024-12-06 22:13:57 root] (utils.py 283): INFO Epoch: [14] [2430/2502] eta: 0:01:33 lr: 0.000002 loss_cls: 3.2139 (3.0552) grad_norm: 2.7649 (3.0295) time: 1.2994 data: 0.0002 max mem: 13912 +[2024-12-06 22:14:10 root] (utils.py 283): INFO Epoch: [14] [2440/2502] eta: 0:01:20 lr: 0.000002 loss_cls: 3.2139 (3.0555) grad_norm: 2.6675 (3.0287) time: 1.3009 data: 0.0002 max mem: 13912 +[2024-12-06 22:14:23 root] (utils.py 283): INFO Epoch: [14] [2450/2502] eta: 0:01:07 lr: 0.000002 loss_cls: 3.1594 (3.0548) grad_norm: 2.6889 (3.0283) time: 1.3040 data: 0.0002 max mem: 13912 +[2024-12-06 22:14:36 root] (utils.py 283): INFO Epoch: [14] [2460/2502] eta: 0:00:54 lr: 0.000002 loss_cls: 3.0822 (3.0554) grad_norm: 2.5594 (3.0269) time: 1.3044 data: 0.0002 max mem: 13912 +[2024-12-06 22:14:49 root] (utils.py 283): INFO Epoch: [14] [2470/2502] eta: 0:00:41 lr: 0.000002 loss_cls: 3.0389 (3.0544) grad_norm: 2.4958 (3.0254) time: 1.3022 data: 0.0002 max mem: 13912 +[2024-12-06 22:15:02 root] (utils.py 283): INFO Epoch: [14] [2480/2502] eta: 0:00:28 lr: 0.000002 loss_cls: 2.9379 (3.0545) grad_norm: 2.5393 (3.0245) time: 1.2976 data: 0.0002 max mem: 13912 +[2024-12-06 22:15:15 root] (utils.py 283): INFO Epoch: [14] [2490/2502] eta: 0:00:15 lr: 0.000002 loss_cls: 2.9226 (3.0542) grad_norm: 2.7440 (3.0248) time: 1.3176 data: 0.0241 max mem: 13912 +[2024-12-06 22:15:28 root] (utils.py 283): INFO Epoch: [14] [2500/2502] eta: 0:00:02 lr: 0.000002 loss_cls: 3.0919 (3.0545) grad_norm: 2.8501 (3.0250) time: 1.3144 data: 0.0241 max mem: 13912 +[2024-12-06 22:15:29 root] (utils.py 283): INFO Epoch: [14] [2501/2502] eta: 0:00:01 lr: 0.000002 loss_cls: 3.0919 (3.0546) grad_norm: 2.8173 (3.0248) time: 1.3140 data: 0.0241 max mem: 13912 +[2024-12-06 22:15:29 root] (utils.py 297): INFO Epoch: [14] Total time: 0:54:16 (1.3015 s / it) +[2024-12-06 22:15:29 root] (engine.py 179): INFO Averaged stats:lr: 0.000002 loss_cls: 3.0919 (3.0596) grad_norm: 2.8173 (3.0248) +[2024-12-06 22:15:30 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:22 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.4508 (0.4508) acc1: 90.6250 (90.6250) acc3: 98.4375 (98.4375) acc5: 98.4375 (98.4375) time: 0.2251 data: 0.0006 max mem: 13912 +[2024-12-06 22:15:33 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:20 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.6587 (0.6565) acc1: 86.7188 (86.0795) acc3: 96.8750 (95.5966) acc5: 97.6562 (97.3011) time: 0.2274 data: 0.0004 max mem: 13912 +[2024-12-06 22:15:35 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:17 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.6587 (0.6933) acc1: 82.8125 (84.9330) acc3: 96.0938 (95.2381) acc5: 96.8750 (96.9866) time: 0.2278 data: 0.0004 max mem: 13912 +[2024-12-06 22:15:37 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:15 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7538 (0.7195) acc1: 82.8125 (84.1986) acc3: 94.5312 (94.9849) acc5: 96.8750 (96.8498) time: 0.2279 data: 0.0005 max mem: 13912 +[2024-12-06 22:15:39 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:13 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.7493 (0.7229) acc1: 83.5938 (84.0892) acc3: 94.5312 (95.0267) acc5: 96.8750 (96.8941) time: 0.2280 data: 0.0005 max mem: 13912 +[2024-12-06 22:15:42 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:10 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.8054 (0.8050) acc1: 79.6875 (82.2151) acc3: 89.8438 (93.7040) acc5: 95.3125 (95.9559) time: 0.2281 data: 0.0005 max mem: 13912 +[2024-12-06 22:15:44 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:08 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0935 (0.8369) acc1: 75.7812 (81.7879) acc3: 89.0625 (93.0456) acc5: 91.4062 (95.4150) time: 0.2280 data: 0.0005 max mem: 13912 +[2024-12-06 22:15:46 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:06 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0346 (0.8636) acc1: 78.9062 (81.1180) acc3: 89.8438 (92.7707) acc5: 92.9688 (95.2355) time: 0.2285 data: 0.0005 max mem: 13912 +[2024-12-06 22:15:49 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:04 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0346 (0.8928) acc1: 75.7812 (80.4495) acc3: 89.8438 (92.2647) acc5: 93.7500 (94.9074) time: 0.2299 data: 0.0009 max mem: 13912 +[2024-12-06 22:15:51 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 1.0596 (0.9166) acc1: 75.7812 (79.7562) acc3: 89.0625 (91.9471) acc5: 92.9688 (94.6171) time: 0.2298 data: 0.0009 max mem: 13912 +[2024-12-06 22:15:52 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 3.5844 (3.5844) layer_flops: 3.5356 (3.5356) loss: 0.9950 (0.9125) acc1: 75.7812 (79.7520) acc3: 89.8438 (92.0000) acc5: 92.9688 (94.6880) time: 0.2253 data: 0.0008 max mem: 13912 +[2024-12-06 22:15:52 root] (utils.py 297): INFO Test: Total time: 0:00:22 (0.2279 s / it) +[2024-12-06 22:15:52 root] (engine.py 264): INFO * Acc@1 79.874 Acc@3 92.216 Acc@5 94.774 loss 0.907 flops 3.584 layer_flops 3.536 +[2024-12-06 22:15:52 root] (main.py 547): INFO Accuracy of the network on the 50000 test images: 79.9% +[2024-12-06 22:15:52 root] (main.py 551): INFO Max accuracy: 79.95% +[2024-12-06 22:15:52 root] (main.py 564): INFO Finetune time 17:29:06 +[2024-12-06 22:22:15 root] (main.py 226): INFO Namespace(batch_size=128, epochs=15, model='RMeeTo_small', multi_reso=False, input_size=224, drop=0.0, drop_path=0.1, model_ema_decay=0.99996, model_ema_force_cpu=False, model_ema=False, opt='adamw', opt_eps=1e-08, opt_betas=None, clip_grad=None, momentum=0.9, weight_decay=0.0, sched='cosine', lr=2e-05, lr_noise=None, lr_noise_pct=0.67, lr_noise_std=1.0, warmup_lr=1e-06, min_lr=1e-06, decay_epochs=30, warmup_epochs=5, cooldown_epochs=10, patience_epochs=10, decay_rate=0.1, color_jitter=0.4, aa='rand-m9-mstd0.5-inc1', smoothing=0.1, train_interpolation='bicubic', repeated_aug=True, reprob=0.25, remode='pixel', recount=1, resplit=False, mixup=0.8, cutmix=1.0, cutmix_minmax=None, mixup_prob=1.0, mixup_switch_prob=0.5, mixup_mode='batch', finetune='', data_set='IMNET', inat_category='name', output_dir='exp/tab2/small/distill', device='cuda', seed=0, autoresume=False, start_epoch=0, dist_eval=True, num_workers=10, pin_mem=True, world_size=4, port='15662', dist_url='env://', target_flops=3.0, granularity=4, load_compression_rate=False, warmup_compression_rate=False, distill='True', throughput=False, eval=False, merge_method='ToMe', merge_interval=2, if_pruning=False, num_prune='11', metric='X', distance='cosine', if_order=True, if_random=False, if_merge_odd=True, rank=0, gpu=0, distributed=True, dist_backend='nccl') +[2024-12-06 22:22:20 root] (main.py 292): INFO Creating model: RMeeTo_small