diff --git "a/VideoMamba-B_15epochs_81.9/logs/log_rank0.txt" "b/VideoMamba-B_15epochs_81.9/logs/log_rank0.txt" new file mode 100644--- /dev/null +++ "b/VideoMamba-B_15epochs_81.9/logs/log_rank0.txt" @@ -0,0 +1,4040 @@ +[2024-12-11 18:34:03 root] (main.py 238): INFO Namespace(batch_size=128, epochs=15, model='RMeeTo_video_base', multi_reso=False, input_size=224, drop=0.0, drop_path=0.1, model_ema_decay=0.99996, model_ema_force_cpu=False, model_ema=False, opt='adamw', opt_eps=1e-08, opt_betas=None, clip_grad=None, momentum=0.9, weight_decay=0.0, sched='cosine', lr=2e-05, lr_noise=None, lr_noise_pct=0.67, lr_noise_std=1.0, warmup_lr=1e-06, min_lr=1e-06, decay_epochs=30, warmup_epochs=5, cooldown_epochs=10, patience_epochs=10, decay_rate=0.1, color_jitter=0.4, aa='rand-m9-mstd0.5-inc1', smoothing=0.1, train_interpolation='bicubic', repeated_aug=True, reprob=0.25, remode='pixel', recount=1, resplit=False, mixup=0.8, cutmix=1.0, cutmix_minmax=None, mixup_prob=1.0, mixup_switch_prob=0.5, mixup_mode='batch', finetune='', data_path='/datasets/imagenet/', data_set='IMNET', inat_category='name', output_dir='exp/tab2/video_base', device='cuda', seed=0, resume='', autoresume=False, start_epoch=0, dist_eval=True, num_workers=10, pin_mem=True, world_size=4, port='15662', dist_url='env://', target_flops=3.0, granularity=4, load_compression_rate=False, warmup_compression_rate=False, distill='True', throughput=False, eval=False, merge_method='ToMe', merge_interval=2, if_pruning=False, num_prune='11', metric='X', distance='cosine', if_order=True, if_random=False, model_pth='/pretrained', if_merge_odd=False, merge_mode='sum', if_shuffle=False, shuffle_rate=0.0, choose='max', compare=11, data_ratio=1.0, data_seed=0, rank=0, gpu=0, distributed=True, dist_backend='nccl') +[2024-12-11 18:34:08 root] (main.py 304): INFO Creating model: RMeeTo_video_base +[2024-12-11 18:34:18 root] (main.py 393): INFO number of params: 97598440 +[2024-12-11 18:34:21 root] (main.py 513): INFO Start training for 15 epochs +[2024-12-11 18:34:29 root] (utils.py 283): INFO Epoch: [0] [ 0/2502] eta: 5:53:02 lr: 0.000020 loss_cls: 2.8249 (2.8249) grad_norm: 1.5740 (1.5740) time: 8.4663 data: 0.0010 max mem: 27321 +[2024-12-11 18:34:58 root] (utils.py 283): INFO Epoch: [0] [ 10/2502] eta: 2:20:25 lr: 0.000020 loss_cls: 3.0441 (2.9388) grad_norm: 1.5973 (1.7291) time: 3.3810 data: 0.0003 max mem: 28452 +[2024-12-11 18:35:26 root] (utils.py 283): INFO Epoch: [0] [ 20/2502] eta: 2:09:23 lr: 0.000020 loss_cls: 2.9815 (2.8952) grad_norm: 1.4903 (1.5905) time: 2.8609 data: 0.0002 max mem: 28452 +[2024-12-11 18:35:55 root] (utils.py 283): INFO Epoch: [0] [ 30/2502] eta: 2:05:17 lr: 0.000020 loss_cls: 2.9632 (2.9166) grad_norm: 1.3828 (1.5950) time: 2.8541 data: 0.0002 max mem: 28452 +[2024-12-11 18:36:24 root] (utils.py 283): INFO Epoch: [0] [ 40/2502] eta: 2:02:58 lr: 0.000020 loss_cls: 2.8872 (2.8716) grad_norm: 1.2967 (1.5062) time: 2.8598 data: 0.0003 max mem: 28452 +[2024-12-11 18:36:52 root] (utils.py 283): INFO Epoch: [0] [ 50/2502] eta: 2:01:23 lr: 0.000020 loss_cls: 2.8076 (2.8551) grad_norm: 1.1970 (1.4827) time: 2.8611 data: 0.0003 max mem: 28452 +[2024-12-11 18:37:21 root] (utils.py 283): INFO Epoch: [0] [ 60/2502] eta: 2:00:10 lr: 0.000020 loss_cls: 2.8113 (2.8287) grad_norm: 1.1837 (1.4497) time: 2.8621 data: 0.0003 max mem: 28452 +[2024-12-11 18:37:49 root] (utils.py 283): INFO Epoch: [0] [ 70/2502] eta: 1:59:12 lr: 0.000020 loss_cls: 2.6824 (2.7967) grad_norm: 1.1726 (1.4745) time: 2.8652 data: 0.0003 max mem: 28452 +[2024-12-11 18:38:18 root] (utils.py 283): INFO Epoch: [0] [ 80/2502] eta: 1:58:20 lr: 0.000020 loss_cls: 2.6732 (2.7749) grad_norm: 1.1726 (1.4353) time: 2.8670 data: 0.0003 max mem: 28452 +[2024-12-11 18:38:47 root] (utils.py 283): INFO Epoch: [0] [ 90/2502] eta: 1:57:33 lr: 0.000020 loss_cls: 2.7467 (2.7600) grad_norm: 1.2171 (1.4195) time: 2.8656 data: 0.0003 max mem: 28452 +[2024-12-11 18:39:16 root] (utils.py 283): INFO Epoch: [0] [ 100/2502] eta: 1:56:51 lr: 0.000020 loss_cls: 2.8612 (2.7668) grad_norm: 1.2292 (1.4086) time: 2.8672 data: 0.0003 max mem: 28452 +[2024-12-11 18:39:44 root] (utils.py 283): INFO Epoch: [0] [ 110/2502] eta: 1:56:11 lr: 0.000020 loss_cls: 2.8490 (2.7578) grad_norm: 1.2292 (1.4006) time: 2.8700 data: 0.0003 max mem: 28452 +[2024-12-11 18:40:13 root] (utils.py 283): INFO Epoch: [0] [ 120/2502] eta: 1:55:33 lr: 0.000020 loss_cls: 2.8088 (2.7514) grad_norm: 1.2099 (1.3965) time: 2.8694 data: 0.0003 max mem: 28452 +[2024-12-11 18:40:42 root] (utils.py 283): INFO Epoch: [0] [ 130/2502] eta: 1:54:57 lr: 0.000020 loss_cls: 2.7845 (2.7456) grad_norm: 1.1759 (1.4238) time: 2.8700 data: 0.0003 max mem: 28452 +[2024-12-11 18:41:10 root] (utils.py 283): INFO Epoch: [0] [ 140/2502] eta: 1:54:21 lr: 0.000020 loss_cls: 2.6933 (2.7377) grad_norm: 1.3040 (1.5509) time: 2.8708 data: 0.0002 max mem: 28452 +[2024-12-11 18:41:39 root] (utils.py 283): INFO Epoch: [0] [ 150/2502] eta: 1:53:46 lr: 0.000020 loss_cls: 2.6933 (2.7330) grad_norm: 1.3596 (1.6380) time: 2.8688 data: 0.0002 max mem: 28452 +[2024-12-11 18:42:08 root] (utils.py 283): INFO Epoch: [0] [ 160/2502] eta: 1:53:13 lr: 0.000020 loss_cls: 2.6962 (2.7316) grad_norm: 1.1407 (1.6085) time: 2.8713 data: 0.0002 max mem: 28452 +[2024-12-11 18:42:36 root] (utils.py 283): INFO Epoch: [0] [ 170/2502] eta: 1:52:41 lr: 0.000020 loss_cls: 2.6962 (2.7280) grad_norm: 1.1267 (1.5806) time: 2.8739 data: 0.0002 max mem: 28452 +[2024-12-11 18:43:05 root] (utils.py 283): INFO Epoch: [0] [ 180/2502] eta: 1:52:08 lr: 0.000020 loss_cls: 2.6286 (2.7122) grad_norm: 1.1207 (1.5588) time: 2.8707 data: 0.0003 max mem: 28452 +[2024-12-11 18:43:34 root] (utils.py 283): INFO Epoch: [0] [ 190/2502] eta: 1:51:35 lr: 0.000020 loss_cls: 2.5564 (2.7057) grad_norm: 1.1222 (1.5349) time: 2.8688 data: 0.0003 max mem: 28452 +[2024-12-11 18:44:03 root] (utils.py 283): INFO Epoch: [0] [ 200/2502] eta: 1:51:04 lr: 0.000020 loss_cls: 2.5738 (2.6945) grad_norm: 1.1459 (1.5737) time: 2.8724 data: 0.0002 max mem: 28452 +[2024-12-11 18:44:31 root] (utils.py 283): INFO Epoch: [0] [ 210/2502] eta: 1:50:33 lr: 0.000020 loss_cls: 2.5738 (2.6892) grad_norm: 1.1579 (1.5561) time: 2.8745 data: 0.0002 max mem: 28452 +[2024-12-11 18:45:00 root] (utils.py 283): INFO Epoch: [0] [ 220/2502] eta: 1:50:01 lr: 0.000020 loss_cls: 2.5234 (2.6827) grad_norm: 1.1218 (1.5365) time: 2.8731 data: 0.0003 max mem: 28452 +[2024-12-11 18:45:29 root] (utils.py 283): INFO Epoch: [0] [ 230/2502] eta: 1:49:30 lr: 0.000020 loss_cls: 2.6223 (2.6795) grad_norm: 1.1211 (1.5224) time: 2.8714 data: 0.0003 max mem: 28452 +[2024-12-11 18:45:58 root] (utils.py 283): INFO Epoch: [0] [ 240/2502] eta: 1:49:00 lr: 0.000020 loss_cls: 2.6223 (2.6758) grad_norm: 1.1211 (1.5081) time: 2.8739 data: 0.0002 max mem: 28452 +[2024-12-11 18:46:26 root] (utils.py 283): INFO Epoch: [0] [ 250/2502] eta: 1:48:30 lr: 0.000020 loss_cls: 2.5684 (2.6651) grad_norm: 1.1076 (1.4929) time: 2.8780 data: 0.0002 max mem: 28452 +[2024-12-11 18:46:55 root] (utils.py 283): INFO Epoch: [0] [ 260/2502] eta: 1:47:59 lr: 0.000020 loss_cls: 2.5474 (2.6559) grad_norm: 1.1218 (1.4870) time: 2.8743 data: 0.0002 max mem: 28452 +[2024-12-11 18:47:24 root] (utils.py 283): INFO Epoch: [0] [ 270/2502] eta: 1:47:29 lr: 0.000020 loss_cls: 2.5096 (2.6506) grad_norm: 1.2754 (1.4818) time: 2.8723 data: 0.0002 max mem: 28452 +[2024-12-11 18:47:53 root] (utils.py 283): INFO Epoch: [0] [ 280/2502] eta: 1:46:59 lr: 0.000020 loss_cls: 2.7390 (2.6549) grad_norm: 1.2983 (1.4726) time: 2.8742 data: 0.0002 max mem: 28452 +[2024-12-11 18:48:21 root] (utils.py 283): INFO Epoch: [0] [ 290/2502] eta: 1:46:29 lr: 0.000020 loss_cls: 2.5581 (2.6461) grad_norm: 1.2528 (1.4639) time: 2.8749 data: 0.0002 max mem: 28452 +[2024-12-11 18:48:50 root] (utils.py 283): INFO Epoch: [0] [ 300/2502] eta: 1:45:59 lr: 0.000020 loss_cls: 2.3521 (2.6383) grad_norm: 1.1757 (1.4526) time: 2.8740 data: 0.0002 max mem: 28452 +[2024-12-11 18:49:19 root] (utils.py 283): INFO Epoch: [0] [ 310/2502] eta: 1:45:29 lr: 0.000020 loss_cls: 2.4096 (2.6347) grad_norm: 1.1639 (1.4579) time: 2.8730 data: 0.0002 max mem: 28452 +[2024-12-11 18:49:47 root] (utils.py 283): INFO Epoch: [0] [ 320/2502] eta: 1:44:59 lr: 0.000020 loss_cls: 2.7758 (2.6434) grad_norm: 1.2836 (1.4687) time: 2.8732 data: 0.0003 max mem: 28452 +[2024-12-11 18:50:16 root] (utils.py 283): INFO Epoch: [0] [ 330/2502] eta: 1:44:29 lr: 0.000020 loss_cls: 2.7878 (2.6411) grad_norm: 1.2808 (1.4722) time: 2.8733 data: 0.0003 max mem: 28452 +[2024-12-11 18:50:45 root] (utils.py 283): INFO Epoch: [0] [ 340/2502] eta: 1:44:00 lr: 0.000020 loss_cls: 2.7853 (2.6446) grad_norm: 1.2759 (1.4672) time: 2.8734 data: 0.0003 max mem: 28452 +[2024-12-11 18:51:14 root] (utils.py 283): INFO Epoch: [0] [ 350/2502] eta: 1:43:30 lr: 0.000020 loss_cls: 2.7853 (2.6451) grad_norm: 1.2778 (1.4653) time: 2.8754 data: 0.0003 max mem: 28452 +[2024-12-11 18:51:43 root] (utils.py 283): INFO Epoch: [0] [ 360/2502] eta: 1:43:01 lr: 0.000020 loss_cls: 2.6709 (2.6426) grad_norm: 1.1566 (1.5113) time: 2.8777 data: 0.0003 max mem: 28452 +[2024-12-11 18:52:11 root] (utils.py 283): INFO Epoch: [0] [ 370/2502] eta: 1:42:32 lr: 0.000020 loss_cls: 2.7067 (2.6408) grad_norm: 1.2303 (1.5070) time: 2.8789 data: 0.0002 max mem: 28452 +[2024-12-11 18:52:40 root] (utils.py 283): INFO Epoch: [0] [ 380/2502] eta: 1:42:02 lr: 0.000020 loss_cls: 2.7266 (2.6402) grad_norm: 1.2799 (1.5016) time: 2.8778 data: 0.0003 max mem: 28452 +[2024-12-11 18:53:09 root] (utils.py 283): INFO Epoch: [0] [ 390/2502] eta: 1:41:33 lr: 0.000020 loss_cls: 2.7266 (2.6371) grad_norm: 1.2877 (1.4999) time: 2.8757 data: 0.0002 max mem: 28452 +[2024-12-11 18:53:38 root] (utils.py 283): INFO Epoch: [0] [ 400/2502] eta: 1:41:04 lr: 0.000020 loss_cls: 2.6925 (2.6400) grad_norm: 1.1707 (1.4984) time: 2.8767 data: 0.0002 max mem: 28452 +[2024-12-11 18:54:06 root] (utils.py 283): INFO Epoch: [0] [ 410/2502] eta: 1:40:34 lr: 0.000020 loss_cls: 2.7704 (2.6444) grad_norm: 1.2481 (1.4969) time: 2.8767 data: 0.0002 max mem: 28452 +[2024-12-11 18:54:35 root] (utils.py 283): INFO Epoch: [0] [ 420/2502] eta: 1:40:05 lr: 0.000020 loss_cls: 2.7704 (2.6445) grad_norm: 1.2523 (1.4908) time: 2.8746 data: 0.0002 max mem: 28452 +[2024-12-11 18:55:04 root] (utils.py 283): INFO Epoch: [0] [ 430/2502] eta: 1:39:36 lr: 0.000020 loss_cls: 2.7841 (2.6473) grad_norm: 1.1982 (1.4865) time: 2.8739 data: 0.0003 max mem: 28452 +[2024-12-11 18:55:33 root] (utils.py 283): INFO Epoch: [0] [ 440/2502] eta: 1:39:07 lr: 0.000020 loss_cls: 2.7877 (2.6505) grad_norm: 1.1861 (1.4792) time: 2.8764 data: 0.0003 max mem: 28452 +[2024-12-11 18:56:01 root] (utils.py 283): INFO Epoch: [0] [ 450/2502] eta: 1:38:37 lr: 0.000020 loss_cls: 2.7533 (2.6505) grad_norm: 1.1823 (1.4771) time: 2.8791 data: 0.0003 max mem: 28452 +[2024-12-11 18:56:30 root] (utils.py 283): INFO Epoch: [0] [ 460/2502] eta: 1:38:08 lr: 0.000020 loss_cls: 2.5929 (2.6456) grad_norm: 1.2066 (1.4734) time: 2.8778 data: 0.0003 max mem: 28452 +[2024-12-11 18:56:59 root] (utils.py 283): INFO Epoch: [0] [ 470/2502] eta: 1:37:39 lr: 0.000020 loss_cls: 2.4749 (2.6381) grad_norm: 1.1908 (1.4679) time: 2.8772 data: 0.0003 max mem: 28452 +[2024-12-11 18:57:28 root] (utils.py 283): INFO Epoch: [0] [ 480/2502] eta: 1:37:10 lr: 0.000020 loss_cls: 2.5176 (2.6363) grad_norm: 1.1423 (1.5286) time: 2.8775 data: 0.0002 max mem: 28452 +[2024-12-11 18:57:57 root] (utils.py 283): INFO Epoch: [0] [ 490/2502] eta: 1:36:41 lr: 0.000020 loss_cls: 2.7225 (2.6409) grad_norm: 1.1156 (1.5218) time: 2.8772 data: 0.0002 max mem: 28452 +[2024-12-11 18:58:25 root] (utils.py 283): INFO Epoch: [0] [ 500/2502] eta: 1:36:12 lr: 0.000020 loss_cls: 2.7884 (2.6395) grad_norm: 1.2085 (1.5163) time: 2.8775 data: 0.0002 max mem: 28452 +[2024-12-11 18:58:54 root] (utils.py 283): INFO Epoch: [0] [ 510/2502] eta: 1:35:43 lr: 0.000020 loss_cls: 2.7863 (2.6421) grad_norm: 1.1650 (1.5114) time: 2.8760 data: 0.0002 max mem: 28452 +[2024-12-11 18:59:23 root] (utils.py 283): INFO Epoch: [0] [ 520/2502] eta: 1:35:14 lr: 0.000020 loss_cls: 2.7833 (2.6430) grad_norm: 1.1372 (1.5075) time: 2.8758 data: 0.0002 max mem: 28452 +[2024-12-11 18:59:52 root] (utils.py 283): INFO Epoch: [0] [ 530/2502] eta: 1:34:44 lr: 0.000020 loss_cls: 2.6568 (2.6375) grad_norm: 1.1054 (1.5151) time: 2.8749 data: 0.0002 max mem: 28452 +[2024-12-11 19:00:20 root] (utils.py 283): INFO Epoch: [0] [ 540/2502] eta: 1:34:15 lr: 0.000020 loss_cls: 2.7759 (2.6364) grad_norm: 1.1181 (1.5104) time: 2.8733 data: 0.0003 max mem: 28452 +[2024-12-11 19:00:49 root] (utils.py 283): INFO Epoch: [0] [ 550/2502] eta: 1:33:46 lr: 0.000020 loss_cls: 2.7759 (2.6360) grad_norm: 1.2082 (1.5368) time: 2.8753 data: 0.0003 max mem: 28452 +[2024-12-11 19:01:18 root] (utils.py 283): INFO Epoch: [0] [ 560/2502] eta: 1:33:17 lr: 0.000020 loss_cls: 2.8857 (2.6389) grad_norm: 1.2309 (1.5326) time: 2.8780 data: 0.0002 max mem: 28452 +[2024-12-11 19:01:47 root] (utils.py 283): INFO Epoch: [0] [ 570/2502] eta: 1:32:48 lr: 0.000020 loss_cls: 2.9054 (2.6410) grad_norm: 1.2364 (1.5280) time: 2.8762 data: 0.0002 max mem: 28452 +[2024-12-11 19:02:15 root] (utils.py 283): INFO Epoch: [0] [ 580/2502] eta: 1:32:19 lr: 0.000020 loss_cls: 2.7289 (2.6418) grad_norm: 1.1835 (1.5255) time: 2.8721 data: 0.0002 max mem: 28452 +[2024-12-11 19:02:44 root] (utils.py 283): INFO Epoch: [0] [ 590/2502] eta: 1:31:50 lr: 0.000020 loss_cls: 2.7373 (2.6427) grad_norm: 1.1548 (1.5218) time: 2.8726 data: 0.0002 max mem: 28452 +[2024-12-11 19:03:13 root] (utils.py 283): INFO Epoch: [0] [ 600/2502] eta: 1:31:21 lr: 0.000020 loss_cls: 2.7373 (2.6418) grad_norm: 1.1567 (1.5697) time: 2.8750 data: 0.0002 max mem: 28452 +[2024-12-11 19:03:42 root] (utils.py 283): INFO Epoch: [0] [ 610/2502] eta: 1:30:52 lr: 0.000020 loss_cls: 2.6926 (2.6433) grad_norm: 1.4052 (1.5708) time: 2.8793 data: 0.0003 max mem: 28452 +[2024-12-11 19:04:10 root] (utils.py 283): INFO Epoch: [0] [ 620/2502] eta: 1:30:23 lr: 0.000020 loss_cls: 2.7600 (2.6460) grad_norm: 1.4996 (1.6071) time: 2.8789 data: 0.0003 max mem: 28452 +[2024-12-11 19:04:39 root] (utils.py 283): INFO Epoch: [0] [ 630/2502] eta: 1:29:54 lr: 0.000020 loss_cls: 2.7970 (2.6472) grad_norm: 1.3644 (1.6039) time: 2.8755 data: 0.0002 max mem: 28452 +[2024-12-11 19:05:08 root] (utils.py 283): INFO Epoch: [0] [ 640/2502] eta: 1:29:25 lr: 0.000020 loss_cls: 2.7210 (2.6463) grad_norm: 1.2742 (1.6051) time: 2.8743 data: 0.0003 max mem: 28452 +[2024-12-11 19:05:37 root] (utils.py 283): INFO Epoch: [0] [ 650/2502] eta: 1:28:56 lr: 0.000020 loss_cls: 2.6870 (2.6467) grad_norm: 1.2089 (1.5991) time: 2.8725 data: 0.0003 max mem: 28452 +[2024-12-11 19:06:05 root] (utils.py 283): INFO Epoch: [0] [ 660/2502] eta: 1:28:27 lr: 0.000020 loss_cls: 2.5356 (2.6415) grad_norm: 1.1166 (1.5923) time: 2.8722 data: 0.0003 max mem: 28452 +[2024-12-11 19:06:34 root] (utils.py 283): INFO Epoch: [0] [ 670/2502] eta: 1:27:58 lr: 0.000020 loss_cls: 2.6398 (2.6447) grad_norm: 1.1035 (1.5868) time: 2.8724 data: 0.0003 max mem: 28452 +[2024-12-11 19:07:03 root] (utils.py 283): INFO Epoch: [0] [ 680/2502] eta: 1:27:29 lr: 0.000020 loss_cls: 2.8324 (2.6450) grad_norm: 1.1770 (1.5870) time: 2.8739 data: 0.0003 max mem: 28452 +[2024-12-11 19:07:32 root] (utils.py 283): INFO Epoch: [0] [ 690/2502] eta: 1:27:00 lr: 0.000020 loss_cls: 2.4334 (2.6417) grad_norm: 1.1784 (1.5819) time: 2.8759 data: 0.0002 max mem: 28452 +[2024-12-11 19:08:00 root] (utils.py 283): INFO Epoch: [0] [ 700/2502] eta: 1:26:31 lr: 0.000020 loss_cls: 2.4334 (2.6402) grad_norm: 1.1445 (1.5755) time: 2.8758 data: 0.0002 max mem: 28452 +[2024-12-11 19:08:29 root] (utils.py 283): INFO Epoch: [0] [ 710/2502] eta: 1:26:02 lr: 0.000020 loss_cls: 2.6155 (2.6387) grad_norm: 1.1185 (1.5712) time: 2.8763 data: 0.0003 max mem: 28452 +[2024-12-11 19:08:58 root] (utils.py 283): INFO Epoch: [0] [ 720/2502] eta: 1:25:33 lr: 0.000020 loss_cls: 2.6155 (2.6353) grad_norm: 1.1286 (1.5662) time: 2.8770 data: 0.0003 max mem: 28452 +[2024-12-11 19:09:27 root] (utils.py 283): INFO Epoch: [0] [ 730/2502] eta: 1:25:04 lr: 0.000020 loss_cls: 2.6072 (2.6353) grad_norm: 1.1325 (1.5610) time: 2.8754 data: 0.0003 max mem: 28452 +[2024-12-11 19:09:55 root] (utils.py 283): INFO Epoch: [0] [ 740/2502] eta: 1:24:35 lr: 0.000020 loss_cls: 2.8138 (2.6393) grad_norm: 1.1531 (1.5563) time: 2.8734 data: 0.0002 max mem: 28452 +[2024-12-11 19:10:24 root] (utils.py 283): INFO Epoch: [0] [ 750/2502] eta: 1:24:06 lr: 0.000020 loss_cls: 2.8138 (2.6377) grad_norm: 1.1743 (1.5505) time: 2.8749 data: 0.0002 max mem: 28452 +[2024-12-11 19:10:53 root] (utils.py 283): INFO Epoch: [0] [ 760/2502] eta: 1:23:37 lr: 0.000020 loss_cls: 2.7701 (2.6379) grad_norm: 1.1630 (1.5474) time: 2.8775 data: 0.0002 max mem: 28452 +[2024-12-11 19:11:22 root] (utils.py 283): INFO Epoch: [0] [ 770/2502] eta: 1:23:09 lr: 0.000020 loss_cls: 2.7411 (2.6384) grad_norm: 1.1613 (1.5428) time: 2.8776 data: 0.0002 max mem: 28452 +[2024-12-11 19:11:50 root] (utils.py 283): INFO Epoch: [0] [ 780/2502] eta: 1:22:40 lr: 0.000020 loss_cls: 2.8206 (2.6395) grad_norm: 1.0512 (1.5384) time: 2.8777 data: 0.0002 max mem: 28452 +[2024-12-11 19:12:19 root] (utils.py 283): INFO Epoch: [0] [ 790/2502] eta: 1:22:11 lr: 0.000020 loss_cls: 2.6951 (2.6382) grad_norm: 1.0357 (1.5322) time: 2.8771 data: 0.0002 max mem: 28452 +[2024-12-11 19:12:48 root] (utils.py 283): INFO Epoch: [0] [ 800/2502] eta: 1:21:42 lr: 0.000020 loss_cls: 2.5949 (2.6383) grad_norm: 1.0884 (1.5277) time: 2.8742 data: 0.0002 max mem: 28452 +[2024-12-11 19:13:17 root] (utils.py 283): INFO Epoch: [0] [ 810/2502] eta: 1:21:13 lr: 0.000020 loss_cls: 2.4494 (2.6320) grad_norm: 1.0991 (1.5229) time: 2.8714 data: 0.0002 max mem: 28452 +[2024-12-11 19:13:45 root] (utils.py 283): INFO Epoch: [0] [ 820/2502] eta: 1:20:44 lr: 0.000020 loss_cls: 2.4494 (2.6323) grad_norm: 1.0991 (1.5199) time: 2.8721 data: 0.0002 max mem: 28452 +[2024-12-11 19:14:14 root] (utils.py 283): INFO Epoch: [0] [ 830/2502] eta: 1:20:15 lr: 0.000020 loss_cls: 2.6717 (2.6295) grad_norm: 1.2579 (1.6206) time: 2.8814 data: 0.0002 max mem: 28452 +[2024-12-11 19:14:43 root] (utils.py 283): INFO Epoch: [0] [ 840/2502] eta: 1:19:46 lr: 0.000020 loss_cls: 2.5965 (2.6312) grad_norm: 1.7811 (1.6245) time: 2.8809 data: 0.0002 max mem: 28452 +[2024-12-11 19:15:12 root] (utils.py 283): INFO Epoch: [0] [ 850/2502] eta: 1:19:17 lr: 0.000020 loss_cls: 2.8891 (2.6302) grad_norm: 1.8173 (1.6255) time: 2.8720 data: 0.0002 max mem: 28452 +[2024-12-11 19:15:40 root] (utils.py 283): INFO Epoch: [0] [ 860/2502] eta: 1:18:48 lr: 0.000020 loss_cls: 2.7658 (2.6315) grad_norm: 1.3306 (1.6223) time: 2.8721 data: 0.0003 max mem: 28452 +[2024-12-11 19:16:09 root] (utils.py 283): INFO Epoch: [0] [ 870/2502] eta: 1:18:19 lr: 0.000020 loss_cls: 2.7252 (2.6313) grad_norm: 1.3306 (1.6208) time: 2.8717 data: 0.0002 max mem: 28452 +[2024-12-11 19:16:38 root] (utils.py 283): INFO Epoch: [0] [ 880/2502] eta: 1:17:50 lr: 0.000020 loss_cls: 2.6302 (2.6296) grad_norm: 1.2195 (1.6155) time: 2.8726 data: 0.0002 max mem: 28452 +[2024-12-11 19:17:07 root] (utils.py 283): INFO Epoch: [0] [ 890/2502] eta: 1:17:22 lr: 0.000020 loss_cls: 2.5748 (2.6279) grad_norm: 1.2153 (1.6249) time: 2.8738 data: 0.0002 max mem: 28452 +[2024-12-11 19:17:35 root] (utils.py 283): INFO Epoch: [0] [ 900/2502] eta: 1:16:53 lr: 0.000020 loss_cls: 2.6949 (2.6283) grad_norm: 1.2924 (1.6217) time: 2.8729 data: 0.0003 max mem: 28452 +[2024-12-11 19:18:04 root] (utils.py 283): INFO Epoch: [0] [ 910/2502] eta: 1:16:24 lr: 0.000020 loss_cls: 2.8509 (2.6288) grad_norm: 1.1866 (1.6246) time: 2.8744 data: 0.0003 max mem: 28452 +[2024-12-11 19:18:33 root] (utils.py 283): INFO Epoch: [0] [ 920/2502] eta: 1:15:55 lr: 0.000020 loss_cls: 2.9464 (2.6305) grad_norm: 1.2026 (1.6241) time: 2.8755 data: 0.0003 max mem: 28452 +[2024-12-11 19:19:02 root] (utils.py 283): INFO Epoch: [0] [ 930/2502] eta: 1:15:26 lr: 0.000020 loss_cls: 2.8293 (2.6324) grad_norm: 1.1927 (1.6202) time: 2.8740 data: 0.0002 max mem: 28452 +[2024-12-11 19:19:30 root] (utils.py 283): INFO Epoch: [0] [ 940/2502] eta: 1:14:57 lr: 0.000020 loss_cls: 2.7362 (2.6317) grad_norm: 1.1398 (1.6164) time: 2.8748 data: 0.0002 max mem: 28452 +[2024-12-11 19:19:59 root] (utils.py 283): INFO Epoch: [0] [ 950/2502] eta: 1:14:28 lr: 0.000020 loss_cls: 2.3178 (2.6281) grad_norm: 1.1444 (1.6126) time: 2.8785 data: 0.0002 max mem: 28452 +[2024-12-11 19:20:28 root] (utils.py 283): INFO Epoch: [0] [ 960/2502] eta: 1:14:00 lr: 0.000020 loss_cls: 2.5772 (2.6292) grad_norm: 1.1233 (1.6076) time: 2.8776 data: 0.0002 max mem: 28452 +[2024-12-11 19:20:57 root] (utils.py 283): INFO Epoch: [0] [ 970/2502] eta: 1:13:31 lr: 0.000020 loss_cls: 2.8166 (2.6299) grad_norm: 1.0943 (1.6056) time: 2.8713 data: 0.0003 max mem: 28452 +[2024-12-11 19:21:25 root] (utils.py 283): INFO Epoch: [0] [ 980/2502] eta: 1:13:02 lr: 0.000020 loss_cls: 2.6691 (2.6289) grad_norm: 1.1130 (1.6049) time: 2.8685 data: 0.0003 max mem: 28452 +[2024-12-11 19:21:54 root] (utils.py 283): INFO Epoch: [0] [ 990/2502] eta: 1:12:33 lr: 0.000020 loss_cls: 2.8785 (2.6285) grad_norm: 1.1609 (1.6068) time: 2.8697 data: 0.0003 max mem: 28452 +[2024-12-11 19:22:23 root] (utils.py 283): INFO Epoch: [0] [1000/2502] eta: 1:12:04 lr: 0.000020 loss_cls: 2.6907 (2.6280) grad_norm: 1.0937 (1.6018) time: 2.8708 data: 0.0002 max mem: 28452 +[2024-12-11 19:22:51 root] (utils.py 283): INFO Epoch: [0] [1010/2502] eta: 1:11:35 lr: 0.000020 loss_cls: 2.7389 (2.6290) grad_norm: 1.0798 (1.5973) time: 2.8706 data: 0.0002 max mem: 28452 +[2024-12-11 19:23:20 root] (utils.py 283): INFO Epoch: [0] [1020/2502] eta: 1:11:06 lr: 0.000020 loss_cls: 2.7389 (2.6290) grad_norm: 1.1838 (1.5998) time: 2.8688 data: 0.0002 max mem: 28452 +[2024-12-11 19:23:49 root] (utils.py 283): INFO Epoch: [0] [1030/2502] eta: 1:10:37 lr: 0.000020 loss_cls: 2.4800 (2.6263) grad_norm: 1.2317 (1.5959) time: 2.8686 data: 0.0002 max mem: 28452 +[2024-12-11 19:24:17 root] (utils.py 283): INFO Epoch: [0] [1040/2502] eta: 1:10:08 lr: 0.000020 loss_cls: 2.6334 (2.6273) grad_norm: 1.1215 (1.5951) time: 2.8698 data: 0.0002 max mem: 28452 +[2024-12-11 19:24:46 root] (utils.py 283): INFO Epoch: [0] [1050/2502] eta: 1:09:39 lr: 0.000020 loss_cls: 2.6841 (2.6264) grad_norm: 1.1272 (1.5917) time: 2.8720 data: 0.0002 max mem: 28452 +[2024-12-11 19:25:15 root] (utils.py 283): INFO Epoch: [0] [1060/2502] eta: 1:09:10 lr: 0.000020 loss_cls: 2.6525 (2.6272) grad_norm: 1.1612 (1.6316) time: 2.8755 data: 0.0002 max mem: 28452 +[2024-12-11 19:25:44 root] (utils.py 283): INFO Epoch: [0] [1070/2502] eta: 1:08:41 lr: 0.000020 loss_cls: 2.7002 (2.6269) grad_norm: 1.2047 (1.6281) time: 2.8726 data: 0.0002 max mem: 28452 +[2024-12-11 19:26:12 root] (utils.py 283): INFO Epoch: [0] [1080/2502] eta: 1:08:13 lr: 0.000020 loss_cls: 2.6540 (2.6268) grad_norm: 1.1777 (1.6242) time: 2.8699 data: 0.0002 max mem: 28452 +[2024-12-11 19:26:41 root] (utils.py 283): INFO Epoch: [0] [1090/2502] eta: 1:07:44 lr: 0.000020 loss_cls: 2.6358 (2.6260) grad_norm: 1.1697 (1.6206) time: 2.8686 data: 0.0002 max mem: 28452 +[2024-12-11 19:27:10 root] (utils.py 283): INFO Epoch: [0] [1100/2502] eta: 1:07:15 lr: 0.000020 loss_cls: 2.5631 (2.6253) grad_norm: 1.1352 (1.6214) time: 2.8682 data: 0.0002 max mem: 28452 +[2024-12-11 19:27:38 root] (utils.py 283): INFO Epoch: [0] [1110/2502] eta: 1:06:46 lr: 0.000020 loss_cls: 2.6575 (2.6256) grad_norm: 1.2367 (1.6190) time: 2.8687 data: 0.0002 max mem: 28452 +[2024-12-11 19:28:07 root] (utils.py 283): INFO Epoch: [0] [1120/2502] eta: 1:06:17 lr: 0.000020 loss_cls: 2.7581 (2.6266) grad_norm: 1.1269 (1.6147) time: 2.8661 data: 0.0002 max mem: 28452 +[2024-12-11 19:28:36 root] (utils.py 283): INFO Epoch: [0] [1130/2502] eta: 1:05:48 lr: 0.000020 loss_cls: 2.7805 (2.6275) grad_norm: 1.1409 (1.6293) time: 2.8679 data: 0.0002 max mem: 28452 +[2024-12-11 19:29:04 root] (utils.py 283): INFO Epoch: [0] [1140/2502] eta: 1:05:19 lr: 0.000020 loss_cls: 2.7752 (2.6268) grad_norm: 1.2041 (1.6256) time: 2.8682 data: 0.0002 max mem: 28452 +[2024-12-11 19:29:33 root] (utils.py 283): INFO Epoch: [0] [1150/2502] eta: 1:04:50 lr: 0.000020 loss_cls: 2.6407 (2.6265) grad_norm: 1.0901 (1.6207) time: 2.8688 data: 0.0002 max mem: 28452 +[2024-12-11 19:30:02 root] (utils.py 283): INFO Epoch: [0] [1160/2502] eta: 1:04:21 lr: 0.000020 loss_cls: 2.7656 (2.6264) grad_norm: 1.0916 (1.6224) time: 2.8707 data: 0.0002 max mem: 28452 +[2024-12-11 19:30:31 root] (utils.py 283): INFO Epoch: [0] [1170/2502] eta: 1:03:53 lr: 0.000020 loss_cls: 2.6775 (2.6255) grad_norm: 1.1928 (1.6193) time: 2.8717 data: 0.0002 max mem: 28452 +[2024-12-11 19:30:59 root] (utils.py 283): INFO Epoch: [0] [1180/2502] eta: 1:03:24 lr: 0.000020 loss_cls: 2.7901 (2.6277) grad_norm: 1.2034 (1.6180) time: 2.8733 data: 0.0003 max mem: 28452 +[2024-12-11 19:31:28 root] (utils.py 283): INFO Epoch: [0] [1190/2502] eta: 1:02:55 lr: 0.000020 loss_cls: 2.8786 (2.6296) grad_norm: 1.1943 (1.6170) time: 2.8726 data: 0.0002 max mem: 28452 +[2024-12-11 19:31:57 root] (utils.py 283): INFO Epoch: [0] [1200/2502] eta: 1:02:26 lr: 0.000020 loss_cls: 2.7972 (2.6303) grad_norm: 1.1484 (1.6136) time: 2.8712 data: 0.0002 max mem: 28452 +[2024-12-11 19:32:25 root] (utils.py 283): INFO Epoch: [0] [1210/2502] eta: 1:01:57 lr: 0.000020 loss_cls: 2.6925 (2.6301) grad_norm: 1.1487 (1.6105) time: 2.8699 data: 0.0002 max mem: 28452 +[2024-12-11 19:32:54 root] (utils.py 283): INFO Epoch: [0] [1220/2502] eta: 1:01:28 lr: 0.000020 loss_cls: 2.5598 (2.6294) grad_norm: 1.1995 (1.6074) time: 2.8723 data: 0.0002 max mem: 28452 +[2024-12-11 19:33:23 root] (utils.py 283): INFO Epoch: [0] [1230/2502] eta: 1:01:00 lr: 0.000020 loss_cls: 2.6299 (2.6295) grad_norm: 1.1616 (1.6043) time: 2.8723 data: 0.0002 max mem: 28452 +[2024-12-11 19:33:52 root] (utils.py 283): INFO Epoch: [0] [1240/2502] eta: 1:00:31 lr: 0.000020 loss_cls: 2.7441 (2.6299) grad_norm: 1.1616 (1.6004) time: 2.8711 data: 0.0002 max mem: 28452 +[2024-12-11 19:34:20 root] (utils.py 283): INFO Epoch: [0] [1250/2502] eta: 1:00:02 lr: 0.000020 loss_cls: 2.7021 (2.6288) grad_norm: 1.0865 (1.5971) time: 2.8719 data: 0.0003 max mem: 28452 +[2024-12-11 19:34:49 root] (utils.py 283): INFO Epoch: [0] [1260/2502] eta: 0:59:33 lr: 0.000020 loss_cls: 2.7237 (2.6297) grad_norm: 1.1107 (1.5962) time: 2.8718 data: 0.0002 max mem: 28452 +[2024-12-11 19:35:18 root] (utils.py 283): INFO Epoch: [0] [1270/2502] eta: 0:59:04 lr: 0.000020 loss_cls: 2.8071 (2.6306) grad_norm: 1.1824 (1.5937) time: 2.8723 data: 0.0002 max mem: 28452 +[2024-12-11 19:35:47 root] (utils.py 283): INFO Epoch: [0] [1280/2502] eta: 0:58:35 lr: 0.000020 loss_cls: 2.8023 (2.6319) grad_norm: 1.1824 (1.5914) time: 2.8722 data: 0.0002 max mem: 28452 +[2024-12-11 19:36:15 root] (utils.py 283): INFO Epoch: [0] [1290/2502] eta: 0:58:07 lr: 0.000020 loss_cls: 2.7382 (2.6329) grad_norm: 1.1854 (1.5883) time: 2.8726 data: 0.0003 max mem: 28452 +[2024-12-11 19:36:44 root] (utils.py 283): INFO Epoch: [0] [1300/2502] eta: 0:57:38 lr: 0.000020 loss_cls: 2.7093 (2.6332) grad_norm: 1.1854 (1.5851) time: 2.8713 data: 0.0003 max mem: 28452 +[2024-12-11 19:37:13 root] (utils.py 283): INFO Epoch: [0] [1310/2502] eta: 0:57:09 lr: 0.000020 loss_cls: 2.5818 (2.6319) grad_norm: 1.1869 (1.5819) time: 2.8714 data: 0.0002 max mem: 28452 +[2024-12-11 19:37:41 root] (utils.py 283): INFO Epoch: [0] [1320/2502] eta: 0:56:40 lr: 0.000020 loss_cls: 2.5642 (2.6306) grad_norm: 1.1220 (1.5784) time: 2.8713 data: 0.0002 max mem: 28452 +[2024-12-11 19:38:10 root] (utils.py 283): INFO Epoch: [0] [1330/2502] eta: 0:56:11 lr: 0.000020 loss_cls: 2.7575 (2.6315) grad_norm: 1.0984 (1.5752) time: 2.8712 data: 0.0002 max mem: 28452 +[2024-12-11 19:38:39 root] (utils.py 283): INFO Epoch: [0] [1340/2502] eta: 0:55:42 lr: 0.000020 loss_cls: 2.8295 (2.6318) grad_norm: 1.0984 (1.5750) time: 2.8720 data: 0.0002 max mem: 28452 +[2024-12-11 19:39:07 root] (utils.py 283): INFO Epoch: [0] [1350/2502] eta: 0:55:14 lr: 0.000020 loss_cls: 2.6724 (2.6317) grad_norm: 1.0669 (1.5794) time: 2.8690 data: 0.0002 max mem: 28452 +[2024-12-11 19:39:36 root] (utils.py 283): INFO Epoch: [0] [1360/2502] eta: 0:54:45 lr: 0.000020 loss_cls: 2.6682 (2.6330) grad_norm: 1.1627 (1.5807) time: 2.8669 data: 0.0002 max mem: 28452 +[2024-12-11 19:40:05 root] (utils.py 283): INFO Epoch: [0] [1370/2502] eta: 0:54:16 lr: 0.000020 loss_cls: 2.8284 (2.6340) grad_norm: 1.1680 (1.5777) time: 2.8681 data: 0.0003 max mem: 28452 +[2024-12-11 19:40:34 root] (utils.py 283): INFO Epoch: [0] [1380/2502] eta: 0:53:47 lr: 0.000020 loss_cls: 2.8151 (2.6348) grad_norm: 1.1140 (1.5742) time: 2.8687 data: 0.0003 max mem: 28452 +[2024-12-11 19:41:02 root] (utils.py 283): INFO Epoch: [0] [1390/2502] eta: 0:53:18 lr: 0.000020 loss_cls: 2.8084 (2.6347) grad_norm: 1.1846 (1.5761) time: 2.8681 data: 0.0002 max mem: 28452 +[2024-12-11 19:41:31 root] (utils.py 283): INFO Epoch: [0] [1400/2502] eta: 0:52:49 lr: 0.000020 loss_cls: 2.8001 (2.6346) grad_norm: 1.2155 (1.5729) time: 2.8667 data: 0.0002 max mem: 28452 +[2024-12-11 19:42:00 root] (utils.py 283): INFO Epoch: [0] [1410/2502] eta: 0:52:21 lr: 0.000020 loss_cls: 2.8237 (2.6362) grad_norm: 1.0965 (1.5698) time: 2.8671 data: 0.0002 max mem: 28452 +[2024-12-11 19:42:28 root] (utils.py 283): INFO Epoch: [0] [1420/2502] eta: 0:51:52 lr: 0.000020 loss_cls: 2.7227 (2.6354) grad_norm: 1.0559 (1.5662) time: 2.8656 data: 0.0002 max mem: 28452 +[2024-12-11 19:42:57 root] (utils.py 283): INFO Epoch: [0] [1430/2502] eta: 0:51:23 lr: 0.000020 loss_cls: 2.6332 (2.6355) grad_norm: 1.0412 (1.5634) time: 2.8641 data: 0.0002 max mem: 28452 +[2024-12-11 19:43:26 root] (utils.py 283): INFO Epoch: [0] [1440/2502] eta: 0:50:54 lr: 0.000020 loss_cls: 2.7724 (2.6350) grad_norm: 1.0999 (1.5607) time: 2.8655 data: 0.0002 max mem: 28452 +[2024-12-11 19:43:54 root] (utils.py 283): INFO Epoch: [0] [1450/2502] eta: 0:50:25 lr: 0.000020 loss_cls: 2.7724 (2.6353) grad_norm: 1.1931 (1.5618) time: 2.8691 data: 0.0002 max mem: 28452 +[2024-12-11 19:44:23 root] (utils.py 283): INFO Epoch: [0] [1460/2502] eta: 0:49:56 lr: 0.000020 loss_cls: 2.7873 (2.6361) grad_norm: 1.1931 (1.5590) time: 2.8703 data: 0.0002 max mem: 28452 +[2024-12-11 19:44:52 root] (utils.py 283): INFO Epoch: [0] [1470/2502] eta: 0:49:28 lr: 0.000020 loss_cls: 2.7613 (2.6354) grad_norm: 1.1345 (1.5580) time: 2.8676 data: 0.0002 max mem: 28452 +[2024-12-11 19:45:20 root] (utils.py 283): INFO Epoch: [0] [1480/2502] eta: 0:48:59 lr: 0.000020 loss_cls: 2.4478 (2.6335) grad_norm: 1.1057 (1.5552) time: 2.8675 data: 0.0002 max mem: 28452 +[2024-12-11 19:45:49 root] (utils.py 283): INFO Epoch: [0] [1490/2502] eta: 0:48:30 lr: 0.000020 loss_cls: 2.3314 (2.6305) grad_norm: 1.0837 (1.5614) time: 2.8663 data: 0.0002 max mem: 28452 +[2024-12-11 19:46:18 root] (utils.py 283): INFO Epoch: [0] [1500/2502] eta: 0:48:01 lr: 0.000020 loss_cls: 2.3314 (2.6305) grad_norm: 1.1374 (1.5591) time: 2.8667 data: 0.0002 max mem: 28452 +[2024-12-11 19:46:46 root] (utils.py 283): INFO Epoch: [0] [1510/2502] eta: 0:47:32 lr: 0.000020 loss_cls: 2.6979 (2.6308) grad_norm: 1.1541 (1.5574) time: 2.8675 data: 0.0002 max mem: 28452 +[2024-12-11 19:47:15 root] (utils.py 283): INFO Epoch: [0] [1520/2502] eta: 0:47:04 lr: 0.000020 loss_cls: 2.7154 (2.6315) grad_norm: 1.0755 (1.5547) time: 2.8666 data: 0.0002 max mem: 28452 +[2024-12-11 19:47:44 root] (utils.py 283): INFO Epoch: [0] [1530/2502] eta: 0:46:35 lr: 0.000020 loss_cls: 2.7154 (2.6310) grad_norm: 1.0722 (1.5522) time: 2.8652 data: 0.0002 max mem: 28452 +[2024-12-11 19:48:12 root] (utils.py 283): INFO Epoch: [0] [1540/2502] eta: 0:46:06 lr: 0.000020 loss_cls: 2.7098 (2.6307) grad_norm: 1.1814 (1.5502) time: 2.8648 data: 0.0002 max mem: 28452 +[2024-12-11 19:48:41 root] (utils.py 283): INFO Epoch: [0] [1550/2502] eta: 0:45:37 lr: 0.000020 loss_cls: 2.6364 (2.6296) grad_norm: 1.1844 (1.5500) time: 2.8668 data: 0.0002 max mem: 28452 +[2024-12-11 19:49:10 root] (utils.py 283): INFO Epoch: [0] [1560/2502] eta: 0:45:08 lr: 0.000020 loss_cls: 2.5211 (2.6289) grad_norm: 1.1952 (1.5661) time: 2.8670 data: 0.0002 max mem: 28452 +[2024-12-11 19:49:38 root] (utils.py 283): INFO Epoch: [0] [1570/2502] eta: 0:44:39 lr: 0.000020 loss_cls: 2.5316 (2.6272) grad_norm: 1.2265 (1.5638) time: 2.8672 data: 0.0002 max mem: 28452 +[2024-12-11 19:50:07 root] (utils.py 283): INFO Epoch: [0] [1580/2502] eta: 0:44:11 lr: 0.000020 loss_cls: 2.5316 (2.6268) grad_norm: 1.1495 (1.5612) time: 2.8673 data: 0.0002 max mem: 28452 +[2024-12-11 19:50:36 root] (utils.py 283): INFO Epoch: [0] [1590/2502] eta: 0:43:42 lr: 0.000020 loss_cls: 2.7436 (2.6277) grad_norm: 1.1745 (1.5596) time: 2.8688 data: 0.0002 max mem: 28452 +[2024-12-11 19:51:04 root] (utils.py 283): INFO Epoch: [0] [1600/2502] eta: 0:43:13 lr: 0.000020 loss_cls: 2.7955 (2.6290) grad_norm: 1.2239 (1.5585) time: 2.8675 data: 0.0002 max mem: 28452 +[2024-12-11 19:51:33 root] (utils.py 283): INFO Epoch: [0] [1610/2502] eta: 0:42:44 lr: 0.000020 loss_cls: 2.7487 (2.6292) grad_norm: 1.1229 (1.5567) time: 2.8654 data: 0.0002 max mem: 28452 +[2024-12-11 19:52:02 root] (utils.py 283): INFO Epoch: [0] [1620/2502] eta: 0:42:15 lr: 0.000020 loss_cls: 2.6624 (2.6292) grad_norm: 1.1229 (1.5574) time: 2.8666 data: 0.0002 max mem: 28452 +[2024-12-11 19:52:30 root] (utils.py 283): INFO Epoch: [0] [1630/2502] eta: 0:41:47 lr: 0.000020 loss_cls: 2.7473 (2.6296) grad_norm: 1.2251 (1.5559) time: 2.8661 data: 0.0002 max mem: 28452 +[2024-12-11 19:52:59 root] (utils.py 283): INFO Epoch: [0] [1640/2502] eta: 0:41:18 lr: 0.000020 loss_cls: 2.6515 (2.6290) grad_norm: 1.2227 (1.5619) time: 2.8659 data: 0.0002 max mem: 28452 +[2024-12-11 19:53:28 root] (utils.py 283): INFO Epoch: [0] [1650/2502] eta: 0:40:49 lr: 0.000020 loss_cls: 2.5085 (2.6267) grad_norm: 1.1414 (1.5594) time: 2.8681 data: 0.0002 max mem: 28452 +[2024-12-11 19:53:56 root] (utils.py 283): INFO Epoch: [0] [1660/2502] eta: 0:40:20 lr: 0.000020 loss_cls: 2.3021 (2.6248) grad_norm: 1.1010 (1.5567) time: 2.8725 data: 0.0002 max mem: 28452 +[2024-12-11 19:54:25 root] (utils.py 283): INFO Epoch: [0] [1670/2502] eta: 0:39:52 lr: 0.000020 loss_cls: 2.5081 (2.6246) grad_norm: 1.0996 (1.5540) time: 2.8754 data: 0.0002 max mem: 28452 +[2024-12-11 19:54:54 root] (utils.py 283): INFO Epoch: [0] [1680/2502] eta: 0:39:23 lr: 0.000020 loss_cls: 2.7412 (2.6240) grad_norm: 1.1137 (1.5610) time: 2.8750 data: 0.0002 max mem: 28452 +[2024-12-11 19:55:23 root] (utils.py 283): INFO Epoch: [0] [1690/2502] eta: 0:38:54 lr: 0.000020 loss_cls: 2.6833 (2.6241) grad_norm: 1.1502 (1.5587) time: 2.8767 data: 0.0002 max mem: 28452 +[2024-12-11 19:55:51 root] (utils.py 283): INFO Epoch: [0] [1700/2502] eta: 0:38:25 lr: 0.000020 loss_cls: 2.8541 (2.6257) grad_norm: 1.1843 (1.5574) time: 2.8751 data: 0.0002 max mem: 28452 +[2024-12-11 19:56:20 root] (utils.py 283): INFO Epoch: [0] [1710/2502] eta: 0:37:57 lr: 0.000020 loss_cls: 2.7298 (2.6255) grad_norm: 1.1772 (1.5563) time: 2.8745 data: 0.0002 max mem: 28452 +[2024-12-11 19:56:49 root] (utils.py 283): INFO Epoch: [0] [1720/2502] eta: 0:37:28 lr: 0.000020 loss_cls: 2.6441 (2.6251) grad_norm: 1.0586 (1.5539) time: 2.8769 data: 0.0002 max mem: 28452 +[2024-12-11 19:57:18 root] (utils.py 283): INFO Epoch: [0] [1730/2502] eta: 0:36:59 lr: 0.000020 loss_cls: 2.7383 (2.6260) grad_norm: 1.1726 (1.5523) time: 2.8785 data: 0.0002 max mem: 28452 +[2024-12-11 19:57:47 root] (utils.py 283): INFO Epoch: [0] [1740/2502] eta: 0:36:30 lr: 0.000020 loss_cls: 2.7852 (2.6256) grad_norm: 1.1835 (1.5500) time: 2.8791 data: 0.0002 max mem: 28452 +[2024-12-11 19:58:15 root] (utils.py 283): INFO Epoch: [0] [1750/2502] eta: 0:36:02 lr: 0.000020 loss_cls: 2.5577 (2.6248) grad_norm: 1.1574 (1.5483) time: 2.8764 data: 0.0002 max mem: 28452 +[2024-12-11 19:58:44 root] (utils.py 283): INFO Epoch: [0] [1760/2502] eta: 0:35:33 lr: 0.000020 loss_cls: 2.3805 (2.6245) grad_norm: 1.1154 (1.5463) time: 2.8773 data: 0.0002 max mem: 28452 +[2024-12-11 19:59:13 root] (utils.py 283): INFO Epoch: [0] [1770/2502] eta: 0:35:04 lr: 0.000020 loss_cls: 2.7556 (2.6251) grad_norm: 1.0746 (1.5448) time: 2.8762 data: 0.0002 max mem: 28452 +[2024-12-11 19:59:42 root] (utils.py 283): INFO Epoch: [0] [1780/2502] eta: 0:34:35 lr: 0.000020 loss_cls: 2.6690 (2.6243) grad_norm: 1.0764 (1.5423) time: 2.8742 data: 0.0002 max mem: 28452 +[2024-12-11 20:00:10 root] (utils.py 283): INFO Epoch: [0] [1790/2502] eta: 0:34:07 lr: 0.000020 loss_cls: 2.6019 (2.6236) grad_norm: 1.1387 (1.5400) time: 2.8746 data: 0.0002 max mem: 28452 +[2024-12-11 20:00:39 root] (utils.py 283): INFO Epoch: [0] [1800/2502] eta: 0:33:38 lr: 0.000020 loss_cls: 2.6810 (2.6245) grad_norm: 1.1491 (1.5381) time: 2.8746 data: 0.0002 max mem: 28452 +[2024-12-11 20:01:08 root] (utils.py 283): INFO Epoch: [0] [1810/2502] eta: 0:33:09 lr: 0.000020 loss_cls: 2.6663 (2.6239) grad_norm: 1.1311 (1.5369) time: 2.8742 data: 0.0002 max mem: 28452 +[2024-12-11 20:01:36 root] (utils.py 283): INFO Epoch: [0] [1820/2502] eta: 0:32:40 lr: 0.000020 loss_cls: 2.6242 (2.6243) grad_norm: 1.1100 (1.5345) time: 2.8725 data: 0.0002 max mem: 28452 +[2024-12-11 20:02:05 root] (utils.py 283): INFO Epoch: [0] [1830/2502] eta: 0:32:12 lr: 0.000020 loss_cls: 2.7527 (2.6258) grad_norm: 1.1173 (1.5344) time: 2.8707 data: 0.0002 max mem: 28452 +[2024-12-11 20:02:34 root] (utils.py 283): INFO Epoch: [0] [1840/2502] eta: 0:31:43 lr: 0.000020 loss_cls: 2.6786 (2.6250) grad_norm: 1.1441 (1.5330) time: 2.8687 data: 0.0002 max mem: 28452 +[2024-12-11 20:03:03 root] (utils.py 283): INFO Epoch: [0] [1850/2502] eta: 0:31:14 lr: 0.000020 loss_cls: 2.5206 (2.6245) grad_norm: 1.1185 (1.5310) time: 2.8696 data: 0.0002 max mem: 28452 +[2024-12-11 20:03:31 root] (utils.py 283): INFO Epoch: [0] [1860/2502] eta: 0:30:45 lr: 0.000020 loss_cls: 2.6312 (2.6239) grad_norm: 1.1185 (1.5290) time: 2.8744 data: 0.0003 max mem: 28452 +[2024-12-11 20:04:00 root] (utils.py 283): INFO Epoch: [0] [1870/2502] eta: 0:30:17 lr: 0.000020 loss_cls: 2.6780 (2.6230) grad_norm: 1.1460 (1.5284) time: 2.8751 data: 0.0003 max mem: 28452 +[2024-12-11 20:04:29 root] (utils.py 283): INFO Epoch: [0] [1880/2502] eta: 0:29:48 lr: 0.000020 loss_cls: 2.8033 (2.6239) grad_norm: 1.2389 (1.5270) time: 2.8721 data: 0.0002 max mem: 28452 +[2024-12-11 20:04:58 root] (utils.py 283): INFO Epoch: [0] [1890/2502] eta: 0:29:19 lr: 0.000020 loss_cls: 2.8033 (2.6232) grad_norm: 1.2389 (1.5256) time: 2.8735 data: 0.0002 max mem: 28452 +[2024-12-11 20:05:26 root] (utils.py 283): INFO Epoch: [0] [1900/2502] eta: 0:28:50 lr: 0.000020 loss_cls: 2.5118 (2.6222) grad_norm: 1.1197 (1.5245) time: 2.8731 data: 0.0002 max mem: 28452 +[2024-12-11 20:05:55 root] (utils.py 283): INFO Epoch: [0] [1910/2502] eta: 0:28:21 lr: 0.000020 loss_cls: 2.6085 (2.6227) grad_norm: 1.1273 (1.5225) time: 2.8728 data: 0.0002 max mem: 28452 +[2024-12-11 20:06:24 root] (utils.py 283): INFO Epoch: [0] [1920/2502] eta: 0:27:53 lr: 0.000020 loss_cls: 2.5545 (2.6208) grad_norm: 1.0467 (1.5201) time: 2.8719 data: 0.0002 max mem: 28452 +[2024-12-11 20:06:52 root] (utils.py 283): INFO Epoch: [0] [1930/2502] eta: 0:27:24 lr: 0.000020 loss_cls: 2.4384 (2.6206) grad_norm: 1.1482 (1.5191) time: 2.8704 data: 0.0002 max mem: 28452 +[2024-12-11 20:07:21 root] (utils.py 283): INFO Epoch: [0] [1940/2502] eta: 0:26:55 lr: 0.000020 loss_cls: 2.5905 (2.6202) grad_norm: 1.1418 (1.5168) time: 2.8716 data: 0.0002 max mem: 28452 +[2024-12-11 20:07:50 root] (utils.py 283): INFO Epoch: [0] [1950/2502] eta: 0:26:26 lr: 0.000020 loss_cls: 2.5651 (2.6195) grad_norm: 1.0773 (1.5148) time: 2.8719 data: 0.0003 max mem: 28452 +[2024-12-11 20:08:19 root] (utils.py 283): INFO Epoch: [0] [1960/2502] eta: 0:25:58 lr: 0.000020 loss_cls: 2.6804 (2.6188) grad_norm: 1.0714 (1.5125) time: 2.8712 data: 0.0002 max mem: 28452 +[2024-12-11 20:08:47 root] (utils.py 283): INFO Epoch: [0] [1970/2502] eta: 0:25:29 lr: 0.000020 loss_cls: 2.6804 (2.6173) grad_norm: 1.0803 (1.5104) time: 2.8727 data: 0.0002 max mem: 28452 +[2024-12-11 20:09:16 root] (utils.py 283): INFO Epoch: [0] [1980/2502] eta: 0:25:00 lr: 0.000020 loss_cls: 2.5066 (2.6170) grad_norm: 1.1566 (1.5088) time: 2.8735 data: 0.0002 max mem: 28452 +[2024-12-11 20:09:45 root] (utils.py 283): INFO Epoch: [0] [1990/2502] eta: 0:24:31 lr: 0.000020 loss_cls: 2.7566 (2.6166) grad_norm: 1.1787 (1.5087) time: 2.8743 data: 0.0002 max mem: 28452 +[2024-12-11 20:10:14 root] (utils.py 283): INFO Epoch: [0] [2000/2502] eta: 0:24:03 lr: 0.000020 loss_cls: 2.6619 (2.6161) grad_norm: 1.1567 (1.5082) time: 2.8767 data: 0.0002 max mem: 28452 +[2024-12-11 20:10:42 root] (utils.py 283): INFO Epoch: [0] [2010/2502] eta: 0:23:34 lr: 0.000020 loss_cls: 2.6900 (2.6172) grad_norm: 1.1567 (1.5065) time: 2.8765 data: 0.0002 max mem: 28452 +[2024-12-11 20:11:11 root] (utils.py 283): INFO Epoch: [0] [2020/2502] eta: 0:23:05 lr: 0.000020 loss_cls: 2.8164 (2.6170) grad_norm: 1.1647 (1.5051) time: 2.8767 data: 0.0002 max mem: 28452 +[2024-12-11 20:11:40 root] (utils.py 283): INFO Epoch: [0] [2030/2502] eta: 0:22:36 lr: 0.000020 loss_cls: 2.8188 (2.6175) grad_norm: 1.1401 (1.5031) time: 2.8759 data: 0.0002 max mem: 28452 +[2024-12-11 20:12:09 root] (utils.py 283): INFO Epoch: [0] [2040/2502] eta: 0:22:08 lr: 0.000020 loss_cls: 2.8203 (2.6181) grad_norm: 1.1338 (1.5020) time: 2.8746 data: 0.0002 max mem: 28452 +[2024-12-11 20:12:37 root] (utils.py 283): INFO Epoch: [0] [2050/2502] eta: 0:21:39 lr: 0.000020 loss_cls: 2.6751 (2.6176) grad_norm: 1.1352 (1.5043) time: 2.8719 data: 0.0002 max mem: 28452 +[2024-12-11 20:13:06 root] (utils.py 283): INFO Epoch: [0] [2060/2502] eta: 0:21:10 lr: 0.000020 loss_cls: 2.4900 (2.6175) grad_norm: 1.1825 (1.5028) time: 2.8706 data: 0.0002 max mem: 28452 +[2024-12-11 20:13:35 root] (utils.py 283): INFO Epoch: [0] [2070/2502] eta: 0:20:41 lr: 0.000020 loss_cls: 2.7942 (2.6179) grad_norm: 1.1569 (1.5011) time: 2.8702 data: 0.0002 max mem: 28452 +[2024-12-11 20:14:03 root] (utils.py 283): INFO Epoch: [0] [2080/2502] eta: 0:20:13 lr: 0.000020 loss_cls: 2.7566 (2.6177) grad_norm: 1.1335 (1.4994) time: 2.8717 data: 0.0002 max mem: 28452 +[2024-12-11 20:14:32 root] (utils.py 283): INFO Epoch: [0] [2090/2502] eta: 0:19:44 lr: 0.000020 loss_cls: 2.7566 (2.6184) grad_norm: 1.1335 (1.4980) time: 2.8740 data: 0.0002 max mem: 28452 +[2024-12-11 20:15:01 root] (utils.py 283): INFO Epoch: [0] [2100/2502] eta: 0:19:15 lr: 0.000020 loss_cls: 2.8301 (2.6174) grad_norm: 1.1621 (1.4965) time: 2.8732 data: 0.0002 max mem: 28452 +[2024-12-11 20:15:30 root] (utils.py 283): INFO Epoch: [0] [2110/2502] eta: 0:18:46 lr: 0.000020 loss_cls: 2.7200 (2.6178) grad_norm: 1.1006 (1.4958) time: 2.8713 data: 0.0002 max mem: 28452 +[2024-12-11 20:15:58 root] (utils.py 283): INFO Epoch: [0] [2120/2502] eta: 0:18:18 lr: 0.000020 loss_cls: 2.7574 (2.6174) grad_norm: 1.0904 (1.4947) time: 2.8697 data: 0.0002 max mem: 28452 +[2024-12-11 20:16:27 root] (utils.py 283): INFO Epoch: [0] [2130/2502] eta: 0:17:49 lr: 0.000020 loss_cls: 2.6674 (2.6175) grad_norm: 1.1143 (1.4932) time: 2.8714 data: 0.0002 max mem: 28452 +[2024-12-11 20:16:56 root] (utils.py 283): INFO Epoch: [0] [2140/2502] eta: 0:17:20 lr: 0.000020 loss_cls: 2.7772 (2.6185) grad_norm: 1.1520 (1.4920) time: 2.8736 data: 0.0003 max mem: 28452 +[2024-12-11 20:17:25 root] (utils.py 283): INFO Epoch: [0] [2150/2502] eta: 0:16:51 lr: 0.000020 loss_cls: 2.8126 (2.6185) grad_norm: 1.0868 (1.4905) time: 2.8723 data: 0.0002 max mem: 28452 +[2024-12-11 20:17:53 root] (utils.py 283): INFO Epoch: [0] [2160/2502] eta: 0:16:23 lr: 0.000020 loss_cls: 2.7831 (2.6188) grad_norm: 1.0504 (1.4887) time: 2.8701 data: 0.0002 max mem: 28452 +[2024-12-11 20:18:22 root] (utils.py 283): INFO Epoch: [0] [2170/2502] eta: 0:15:54 lr: 0.000020 loss_cls: 2.8496 (2.6194) grad_norm: 1.1129 (1.4879) time: 2.8716 data: 0.0003 max mem: 28452 +[2024-12-11 20:18:51 root] (utils.py 283): INFO Epoch: [0] [2180/2502] eta: 0:15:25 lr: 0.000020 loss_cls: 2.7934 (2.6191) grad_norm: 1.0983 (1.4864) time: 2.8741 data: 0.0002 max mem: 28452 +[2024-12-11 20:19:19 root] (utils.py 283): INFO Epoch: [0] [2190/2502] eta: 0:14:56 lr: 0.000020 loss_cls: 2.8469 (2.6199) grad_norm: 1.0983 (1.4853) time: 2.8740 data: 0.0002 max mem: 28452 +[2024-12-11 20:19:48 root] (utils.py 283): INFO Epoch: [0] [2200/2502] eta: 0:14:28 lr: 0.000020 loss_cls: 2.6742 (2.6188) grad_norm: 1.1374 (1.4848) time: 2.8719 data: 0.0002 max mem: 28452 +[2024-12-11 20:20:17 root] (utils.py 283): INFO Epoch: [0] [2210/2502] eta: 0:13:59 lr: 0.000020 loss_cls: 2.5519 (2.6186) grad_norm: 1.1013 (1.4833) time: 2.8697 data: 0.0002 max mem: 28452 +[2024-12-11 20:20:46 root] (utils.py 283): INFO Epoch: [0] [2220/2502] eta: 0:13:30 lr: 0.000020 loss_cls: 2.6726 (2.6185) grad_norm: 1.1438 (1.4823) time: 2.8700 data: 0.0002 max mem: 28452 +[2024-12-11 20:21:14 root] (utils.py 283): INFO Epoch: [0] [2230/2502] eta: 0:13:01 lr: 0.000020 loss_cls: 2.6598 (2.6180) grad_norm: 1.1451 (1.4810) time: 2.8710 data: 0.0003 max mem: 28452 +[2024-12-11 20:21:43 root] (utils.py 283): INFO Epoch: [0] [2240/2502] eta: 0:12:33 lr: 0.000020 loss_cls: 2.5742 (2.6175) grad_norm: 1.1216 (1.4795) time: 2.8715 data: 0.0003 max mem: 28452 +[2024-12-11 20:22:12 root] (utils.py 283): INFO Epoch: [0] [2250/2502] eta: 0:12:04 lr: 0.000020 loss_cls: 2.4488 (2.6170) grad_norm: 1.1009 (1.4786) time: 2.8748 data: 0.0003 max mem: 28452 +[2024-12-11 20:22:40 root] (utils.py 283): INFO Epoch: [0] [2260/2502] eta: 0:11:35 lr: 0.000020 loss_cls: 2.3337 (2.6165) grad_norm: 1.0773 (1.4774) time: 2.8752 data: 0.0003 max mem: 28452 +[2024-12-11 20:23:09 root] (utils.py 283): INFO Epoch: [0] [2270/2502] eta: 0:11:06 lr: 0.000020 loss_cls: 2.6421 (2.6170) grad_norm: 1.1697 (1.4765) time: 2.8728 data: 0.0003 max mem: 28452 +[2024-12-11 20:23:38 root] (utils.py 283): INFO Epoch: [0] [2280/2502] eta: 0:10:38 lr: 0.000020 loss_cls: 2.7301 (2.6177) grad_norm: 1.2016 (1.4769) time: 2.8740 data: 0.0002 max mem: 28452 +[2024-12-11 20:24:07 root] (utils.py 283): INFO Epoch: [0] [2290/2502] eta: 0:10:09 lr: 0.000020 loss_cls: 2.6526 (2.6175) grad_norm: 1.1060 (1.4754) time: 2.8741 data: 0.0002 max mem: 28452 +[2024-12-11 20:24:35 root] (utils.py 283): INFO Epoch: [0] [2300/2502] eta: 0:09:40 lr: 0.000020 loss_cls: 2.5416 (2.6171) grad_norm: 1.0952 (1.4738) time: 2.8730 data: 0.0002 max mem: 28452 +[2024-12-11 20:25:04 root] (utils.py 283): INFO Epoch: [0] [2310/2502] eta: 0:09:11 lr: 0.000020 loss_cls: 2.5416 (2.6167) grad_norm: 1.2576 (1.4897) time: 2.8730 data: 0.0002 max mem: 28452 +[2024-12-11 20:25:33 root] (utils.py 283): INFO Epoch: [0] [2320/2502] eta: 0:08:43 lr: 0.000020 loss_cls: 2.7107 (2.6175) grad_norm: 1.6154 (1.4897) time: 2.8724 data: 0.0002 max mem: 28452 +[2024-12-11 20:26:02 root] (utils.py 283): INFO Epoch: [0] [2330/2502] eta: 0:08:14 lr: 0.000020 loss_cls: 2.8844 (2.6185) grad_norm: 1.4124 (1.4891) time: 2.8696 data: 0.0002 max mem: 28452 +[2024-12-11 20:26:31 root] (utils.py 283): INFO Epoch: [0] [2340/2502] eta: 0:07:45 lr: 0.000020 loss_cls: 2.8531 (2.6181) grad_norm: 1.2209 (1.4893) time: 2.8834 data: 0.0002 max mem: 28452 +[2024-12-11 20:26:59 root] (utils.py 283): INFO Epoch: [0] [2350/2502] eta: 0:07:16 lr: 0.000020 loss_cls: 2.6119 (2.6183) grad_norm: 1.1318 (1.4876) time: 2.8821 data: 0.0002 max mem: 28452 +[2024-12-11 20:27:28 root] (utils.py 283): INFO Epoch: [0] [2360/2502] eta: 0:06:48 lr: 0.000020 loss_cls: 2.5332 (2.6177) grad_norm: 1.0396 (1.4858) time: 2.8682 data: 0.0002 max mem: 28452 +[2024-12-11 20:27:57 root] (utils.py 283): INFO Epoch: [0] [2370/2502] eta: 0:06:19 lr: 0.000020 loss_cls: 2.5295 (2.6175) grad_norm: 1.0798 (1.4849) time: 2.8725 data: 0.0002 max mem: 28452 +[2024-12-11 20:28:25 root] (utils.py 283): INFO Epoch: [0] [2380/2502] eta: 0:05:50 lr: 0.000020 loss_cls: 2.7558 (2.6179) grad_norm: 1.1404 (1.4840) time: 2.8715 data: 0.0002 max mem: 28452 +[2024-12-11 20:28:54 root] (utils.py 283): INFO Epoch: [0] [2390/2502] eta: 0:05:21 lr: 0.000020 loss_cls: 2.7683 (2.6172) grad_norm: 1.1404 (1.4865) time: 2.8718 data: 0.0002 max mem: 28452 +[2024-12-11 20:29:23 root] (utils.py 283): INFO Epoch: [0] [2400/2502] eta: 0:04:53 lr: 0.000020 loss_cls: 2.5703 (2.6173) grad_norm: 1.1633 (1.4852) time: 2.8711 data: 0.0002 max mem: 28452 +[2024-12-11 20:29:51 root] (utils.py 283): INFO Epoch: [0] [2410/2502] eta: 0:04:24 lr: 0.000020 loss_cls: 2.8068 (2.6179) grad_norm: 1.1453 (1.4839) time: 2.8692 data: 0.0002 max mem: 28452 +[2024-12-11 20:30:20 root] (utils.py 283): INFO Epoch: [0] [2420/2502] eta: 0:03:55 lr: 0.000020 loss_cls: 2.6662 (2.6174) grad_norm: 1.1465 (1.4826) time: 2.8698 data: 0.0002 max mem: 28452 +[2024-12-11 20:30:49 root] (utils.py 283): INFO Epoch: [0] [2430/2502] eta: 0:03:26 lr: 0.000020 loss_cls: 2.4876 (2.6173) grad_norm: 1.1663 (1.4821) time: 2.8718 data: 0.0002 max mem: 28452 +[2024-12-11 20:31:18 root] (utils.py 283): INFO Epoch: [0] [2440/2502] eta: 0:02:58 lr: 0.000020 loss_cls: 2.7710 (2.6176) grad_norm: 1.1090 (1.4807) time: 2.8741 data: 0.0003 max mem: 28452 +[2024-12-11 20:31:46 root] (utils.py 283): INFO Epoch: [0] [2450/2502] eta: 0:02:29 lr: 0.000020 loss_cls: 2.7710 (2.6183) grad_norm: 1.1438 (1.4857) time: 2.8733 data: 0.0003 max mem: 28452 +[2024-12-11 20:32:15 root] (utils.py 283): INFO Epoch: [0] [2460/2502] eta: 0:02:00 lr: 0.000020 loss_cls: 2.7296 (2.6183) grad_norm: 1.2039 (1.4846) time: 2.8747 data: 0.0002 max mem: 28452 +[2024-12-11 20:32:44 root] (utils.py 283): INFO Epoch: [0] [2470/2502] eta: 0:01:31 lr: 0.000020 loss_cls: 2.6777 (2.6176) grad_norm: 1.1553 (1.4909) time: 2.8762 data: 0.0002 max mem: 28452 +[2024-12-11 20:33:13 root] (utils.py 283): INFO Epoch: [0] [2480/2502] eta: 0:01:03 lr: 0.000020 loss_cls: 2.4999 (2.6172) grad_norm: 1.1328 (1.4895) time: 2.8758 data: 0.0002 max mem: 28452 +[2024-12-11 20:33:42 root] (utils.py 283): INFO Epoch: [0] [2490/2502] eta: 0:00:34 lr: 0.000020 loss_cls: 2.6460 (2.6179) grad_norm: 1.0934 (1.4882) time: 2.8949 data: 0.0200 max mem: 28452 +[2024-12-11 20:34:11 root] (utils.py 283): INFO Epoch: [0] [2500/2502] eta: 0:00:05 lr: 0.000020 loss_cls: 2.7169 (2.6178) grad_norm: 1.2132 (1.4872) time: 2.8925 data: 0.0200 max mem: 28452 +[2024-12-11 20:34:13 root] (utils.py 283): INFO Epoch: [0] [2501/2502] eta: 0:00:02 lr: 0.000020 loss_cls: 2.7169 (2.6174) grad_norm: 1.1750 (1.4870) time: 2.8920 data: 0.0200 max mem: 28452 +[2024-12-11 20:34:13 root] (utils.py 297): INFO Epoch: [0] Total time: 1:59:52 (2.8748 s / it) +[2024-12-11 20:34:13 root] (engine.py 179): INFO Averaged stats:lr: 0.000020 loss_cls: 2.7169 (2.6161) grad_norm: 1.1750 (1.4870) +[2024-12-11 20:34:16 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:53 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.3774 (0.3774) acc1: 92.1875 (92.1875) acc3: 98.4375 (98.4375) acc5: 99.2188 (99.2188) time: 0.5496 data: 0.0003 max mem: 28452 +[2024-12-11 20:34:21 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:48 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6152 (0.5920) acc1: 86.7188 (87.3580) acc3: 97.6562 (96.8750) acc5: 98.4375 (98.2244) time: 0.5476 data: 0.0004 max mem: 28452 +[2024-12-11 20:34:26 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:42 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6152 (0.6180) acc1: 86.7188 (87.0164) acc3: 96.0938 (96.5402) acc5: 98.4375 (97.9911) time: 0.5472 data: 0.0004 max mem: 28452 +[2024-12-11 20:34:32 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:37 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6215 (0.6511) acc1: 86.7188 (85.9879) acc3: 95.3125 (96.2702) acc5: 97.6562 (97.7319) time: 0.5477 data: 0.0004 max mem: 28452 +[2024-12-11 20:34:37 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:31 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6982 (0.6590) acc1: 85.9375 (85.6136) acc3: 96.0938 (96.2652) acc5: 97.6562 (97.6562) time: 0.5479 data: 0.0004 max mem: 28452 +[2024-12-11 20:34:43 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:26 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.8338 (0.7399) acc1: 79.6875 (83.7163) acc3: 93.7500 (95.0980) acc5: 94.5312 (96.8290) time: 0.5476 data: 0.0004 max mem: 28452 +[2024-12-11 20:34:48 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:20 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9659 (0.7676) acc1: 79.6875 (83.4144) acc3: 90.6250 (94.4032) acc5: 93.7500 (96.3243) time: 0.5484 data: 0.0004 max mem: 28452 +[2024-12-11 20:34:54 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:15 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9145 (0.7952) acc1: 81.2500 (82.7465) acc3: 91.4062 (94.1241) acc5: 94.5312 (96.1818) time: 0.5486 data: 0.0004 max mem: 28452 +[2024-12-11 20:34:59 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:09 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9600 (0.8166) acc1: 78.1250 (82.3302) acc3: 91.4062 (93.8272) acc5: 94.5312 (95.8623) time: 0.5486 data: 0.0007 max mem: 28452 +[2024-12-11 20:35:05 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:04 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9618 (0.8374) acc1: 76.5625 (81.6707) acc3: 91.4062 (93.5182) acc5: 93.7500 (95.7160) time: 0.5486 data: 0.0007 max mem: 28452 +[2024-12-11 20:35:09 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9255 (0.8364) acc1: 76.5625 (81.6640) acc3: 91.4062 (93.5200) acc5: 94.0476 (95.7440) time: 0.5524 data: 0.0006 max mem: 28452 +[2024-12-11 20:35:09 root] (utils.py 297): INFO Test: Total time: 0:00:53 (0.5491 s / it) +[2024-12-11 20:35:09 root] (engine.py 264): INFO * Acc@1 81.674 Acc@3 93.502 Acc@5 95.738 loss 0.837 flops 13.207 layer_flops 13.109 +[2024-12-11 20:35:09 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 81.7% +[2024-12-11 20:35:10 root] (main.py 576): INFO Max accuracy: 81.67% +[2024-12-11 20:35:13 root] (utils.py 283): INFO Epoch: [1] [ 0/2502] eta: 2:19:50 lr: 0.000020 loss_cls: 2.2788 (2.2788) grad_norm: 1.2200 (1.2200) time: 3.3536 data: 0.0003 max mem: 28452 +[2024-12-11 20:35:42 root] (utils.py 283): INFO Epoch: [1] [ 10/2502] eta: 2:00:47 lr: 0.000020 loss_cls: 2.3922 (2.5321) grad_norm: 1.0803 (1.1177) time: 2.9085 data: 0.0003 max mem: 28454 +[2024-12-11 20:36:11 root] (utils.py 283): INFO Epoch: [1] [ 20/2502] eta: 1:59:30 lr: 0.000020 loss_cls: 2.7837 (2.6224) grad_norm: 1.1635 (1.2549) time: 2.8657 data: 0.0002 max mem: 28454 +[2024-12-11 20:36:39 root] (utils.py 283): INFO Epoch: [1] [ 30/2502] eta: 1:58:46 lr: 0.000020 loss_cls: 2.8339 (2.6835) grad_norm: 1.2057 (1.2324) time: 2.8689 data: 0.0002 max mem: 28454 +[2024-12-11 20:37:08 root] (utils.py 283): INFO Epoch: [1] [ 40/2502] eta: 1:58:11 lr: 0.000020 loss_cls: 2.7207 (2.6584) grad_norm: 1.1420 (1.2073) time: 2.8714 data: 0.0003 max mem: 28454 +[2024-12-11 20:37:37 root] (utils.py 283): INFO Epoch: [1] [ 50/2502] eta: 1:57:40 lr: 0.000020 loss_cls: 2.6867 (2.6718) grad_norm: 1.0820 (1.1899) time: 2.8739 data: 0.0003 max mem: 28454 +[2024-12-11 20:38:05 root] (utils.py 283): INFO Epoch: [1] [ 60/2502] eta: 1:57:07 lr: 0.000020 loss_cls: 2.8215 (2.6998) grad_norm: 1.1292 (1.1815) time: 2.8728 data: 0.0003 max mem: 28454 +[2024-12-11 20:38:34 root] (utils.py 283): INFO Epoch: [1] [ 70/2502] eta: 1:56:35 lr: 0.000020 loss_cls: 2.7949 (2.7098) grad_norm: 1.1292 (1.1918) time: 2.8690 data: 0.0003 max mem: 28454 +[2024-12-11 20:39:03 root] (utils.py 283): INFO Epoch: [1] [ 80/2502] eta: 1:56:03 lr: 0.000020 loss_cls: 2.7083 (2.6971) grad_norm: 1.0938 (1.2019) time: 2.8658 data: 0.0003 max mem: 28454 +[2024-12-11 20:39:31 root] (utils.py 283): INFO Epoch: [1] [ 90/2502] eta: 1:55:32 lr: 0.000020 loss_cls: 2.6900 (2.6762) grad_norm: 1.0824 (1.1916) time: 2.8652 data: 0.0003 max mem: 28454 +[2024-12-11 20:40:00 root] (utils.py 283): INFO Epoch: [1] [ 100/2502] eta: 1:55:03 lr: 0.000020 loss_cls: 2.7642 (2.6748) grad_norm: 1.1725 (1.3179) time: 2.8704 data: 0.0003 max mem: 28454 +[2024-12-11 20:40:29 root] (utils.py 283): INFO Epoch: [1] [ 110/2502] eta: 1:54:34 lr: 0.000020 loss_cls: 2.6134 (2.6511) grad_norm: 1.2156 (1.3069) time: 2.8739 data: 0.0002 max mem: 28454 +[2024-12-11 20:40:58 root] (utils.py 283): INFO Epoch: [1] [ 120/2502] eta: 1:54:06 lr: 0.000020 loss_cls: 2.5772 (2.6565) grad_norm: 1.1138 (1.2922) time: 2.8742 data: 0.0003 max mem: 28454 +[2024-12-11 20:41:26 root] (utils.py 283): INFO Epoch: [1] [ 130/2502] eta: 1:53:37 lr: 0.000020 loss_cls: 2.7185 (2.6523) grad_norm: 1.0549 (1.2964) time: 2.8741 data: 0.0003 max mem: 28454 +[2024-12-11 20:41:55 root] (utils.py 283): INFO Epoch: [1] [ 140/2502] eta: 1:53:08 lr: 0.000020 loss_cls: 2.6706 (2.6536) grad_norm: 1.2601 (1.3154) time: 2.8728 data: 0.0003 max mem: 28454 +[2024-12-11 20:42:24 root] (utils.py 283): INFO Epoch: [1] [ 150/2502] eta: 1:52:39 lr: 0.000020 loss_cls: 2.6208 (2.6389) grad_norm: 1.2681 (1.3209) time: 2.8740 data: 0.0003 max mem: 28454 +[2024-12-11 20:42:53 root] (utils.py 283): INFO Epoch: [1] [ 160/2502] eta: 1:52:11 lr: 0.000020 loss_cls: 2.7158 (2.6434) grad_norm: 1.1509 (1.3109) time: 2.8753 data: 0.0003 max mem: 28454 +[2024-12-11 20:43:21 root] (utils.py 283): INFO Epoch: [1] [ 170/2502] eta: 1:51:43 lr: 0.000020 loss_cls: 2.7420 (2.6455) grad_norm: 1.0599 (1.3015) time: 2.8782 data: 0.0003 max mem: 28454 +[2024-12-11 20:43:50 root] (utils.py 283): INFO Epoch: [1] [ 180/2502] eta: 1:51:15 lr: 0.000020 loss_cls: 2.7667 (2.6520) grad_norm: 1.1053 (1.2910) time: 2.8797 data: 0.0003 max mem: 28454 +[2024-12-11 20:44:19 root] (utils.py 283): INFO Epoch: [1] [ 190/2502] eta: 1:50:46 lr: 0.000020 loss_cls: 2.7667 (2.6514) grad_norm: 1.1136 (1.2856) time: 2.8779 data: 0.0002 max mem: 28454 +[2024-12-11 20:44:48 root] (utils.py 283): INFO Epoch: [1] [ 200/2502] eta: 1:50:17 lr: 0.000020 loss_cls: 2.7829 (2.6580) grad_norm: 1.1396 (1.2985) time: 2.8767 data: 0.0002 max mem: 28454 +[2024-12-11 20:45:17 root] (utils.py 283): INFO Epoch: [1] [ 210/2502] eta: 1:49:50 lr: 0.000020 loss_cls: 2.7885 (2.6649) grad_norm: 1.1102 (1.3108) time: 2.8792 data: 0.0002 max mem: 28454 +[2024-12-11 20:45:45 root] (utils.py 283): INFO Epoch: [1] [ 220/2502] eta: 1:49:21 lr: 0.000020 loss_cls: 2.6939 (2.6597) grad_norm: 1.0983 (1.3335) time: 2.8815 data: 0.0003 max mem: 28454 +[2024-12-11 20:46:14 root] (utils.py 283): INFO Epoch: [1] [ 230/2502] eta: 1:48:53 lr: 0.000020 loss_cls: 2.8775 (2.6701) grad_norm: 1.2352 (1.3394) time: 2.8812 data: 0.0003 max mem: 28454 +[2024-12-11 20:46:43 root] (utils.py 283): INFO Epoch: [1] [ 240/2502] eta: 1:48:25 lr: 0.000020 loss_cls: 2.6683 (2.6556) grad_norm: 1.2107 (1.3469) time: 2.8838 data: 0.0003 max mem: 28454 +[2024-12-11 20:47:12 root] (utils.py 283): INFO Epoch: [1] [ 250/2502] eta: 1:47:57 lr: 0.000020 loss_cls: 2.5133 (2.6485) grad_norm: 1.1315 (1.3381) time: 2.8832 data: 0.0002 max mem: 28454 +[2024-12-11 20:47:41 root] (utils.py 283): INFO Epoch: [1] [ 260/2502] eta: 1:47:28 lr: 0.000020 loss_cls: 2.5711 (2.6443) grad_norm: 1.1562 (1.3645) time: 2.8795 data: 0.0002 max mem: 28454 +[2024-12-11 20:48:09 root] (utils.py 283): INFO Epoch: [1] [ 270/2502] eta: 1:46:59 lr: 0.000020 loss_cls: 2.8362 (2.6502) grad_norm: 1.2688 (1.3708) time: 2.8759 data: 0.0002 max mem: 28454 +[2024-12-11 20:48:38 root] (utils.py 283): INFO Epoch: [1] [ 280/2502] eta: 1:46:31 lr: 0.000020 loss_cls: 2.7889 (2.6472) grad_norm: 1.1486 (1.3719) time: 2.8758 data: 0.0002 max mem: 28454 +[2024-12-11 20:49:07 root] (utils.py 283): INFO Epoch: [1] [ 290/2502] eta: 1:46:02 lr: 0.000020 loss_cls: 2.4357 (2.6342) grad_norm: 1.0785 (1.3609) time: 2.8776 data: 0.0003 max mem: 28454 +[2024-12-11 20:49:36 root] (utils.py 283): INFO Epoch: [1] [ 300/2502] eta: 1:45:33 lr: 0.000020 loss_cls: 2.4682 (2.6318) grad_norm: 1.0900 (1.3557) time: 2.8767 data: 0.0003 max mem: 28454 +[2024-12-11 20:50:05 root] (utils.py 283): INFO Epoch: [1] [ 310/2502] eta: 1:45:05 lr: 0.000020 loss_cls: 2.6718 (2.6306) grad_norm: 1.1419 (1.3474) time: 2.8775 data: 0.0002 max mem: 28454 +[2024-12-11 20:50:33 root] (utils.py 283): INFO Epoch: [1] [ 320/2502] eta: 1:44:36 lr: 0.000020 loss_cls: 2.6096 (2.6285) grad_norm: 1.1297 (1.3474) time: 2.8755 data: 0.0003 max mem: 28454 +[2024-12-11 20:51:02 root] (utils.py 283): INFO Epoch: [1] [ 330/2502] eta: 1:44:07 lr: 0.000020 loss_cls: 2.6096 (2.6268) grad_norm: 1.1133 (1.3450) time: 2.8726 data: 0.0003 max mem: 28454 +[2024-12-11 20:51:31 root] (utils.py 283): INFO Epoch: [1] [ 340/2502] eta: 1:43:38 lr: 0.000020 loss_cls: 2.5048 (2.6228) grad_norm: 1.0544 (1.3428) time: 2.8728 data: 0.0003 max mem: 28454 +[2024-12-11 20:51:59 root] (utils.py 283): INFO Epoch: [1] [ 350/2502] eta: 1:43:09 lr: 0.000020 loss_cls: 2.5048 (2.6206) grad_norm: 1.2301 (1.3406) time: 2.8726 data: 0.0003 max mem: 28454 +[2024-12-11 20:52:28 root] (utils.py 283): INFO Epoch: [1] [ 360/2502] eta: 1:42:41 lr: 0.000020 loss_cls: 2.3150 (2.6079) grad_norm: 1.2301 (1.3373) time: 2.8826 data: 0.0003 max mem: 28454 +[2024-12-11 20:52:57 root] (utils.py 283): INFO Epoch: [1] [ 370/2502] eta: 1:42:12 lr: 0.000020 loss_cls: 2.2568 (2.6050) grad_norm: 1.0559 (1.3782) time: 2.8838 data: 0.0003 max mem: 28454 +[2024-12-11 20:53:26 root] (utils.py 283): INFO Epoch: [1] [ 380/2502] eta: 1:41:44 lr: 0.000020 loss_cls: 2.7037 (2.6047) grad_norm: 1.0891 (1.3704) time: 2.8774 data: 0.0003 max mem: 28454 +[2024-12-11 20:53:55 root] (utils.py 283): INFO Epoch: [1] [ 390/2502] eta: 1:41:15 lr: 0.000020 loss_cls: 2.6361 (2.6022) grad_norm: 1.1237 (1.3658) time: 2.8774 data: 0.0003 max mem: 28454 +[2024-12-11 20:54:23 root] (utils.py 283): INFO Epoch: [1] [ 400/2502] eta: 1:40:46 lr: 0.000020 loss_cls: 2.6730 (2.6030) grad_norm: 1.1653 (1.3622) time: 2.8774 data: 0.0003 max mem: 28454 +[2024-12-11 20:54:52 root] (utils.py 283): INFO Epoch: [1] [ 410/2502] eta: 1:40:18 lr: 0.000020 loss_cls: 2.7101 (2.6019) grad_norm: 1.1337 (1.3589) time: 2.8821 data: 0.0003 max mem: 28454 +[2024-12-11 20:55:21 root] (utils.py 283): INFO Epoch: [1] [ 420/2502] eta: 1:39:49 lr: 0.000020 loss_cls: 2.6920 (2.6032) grad_norm: 1.1183 (1.3588) time: 2.8815 data: 0.0002 max mem: 28454 +[2024-12-11 20:55:50 root] (utils.py 283): INFO Epoch: [1] [ 430/2502] eta: 1:39:20 lr: 0.000020 loss_cls: 2.7794 (2.6064) grad_norm: 1.1186 (1.3556) time: 2.8785 data: 0.0002 max mem: 28454 +[2024-12-11 20:56:19 root] (utils.py 283): INFO Epoch: [1] [ 440/2502] eta: 1:38:52 lr: 0.000020 loss_cls: 2.8326 (2.6097) grad_norm: 1.1547 (1.3567) time: 2.8799 data: 0.0002 max mem: 28454 +[2024-12-11 20:56:47 root] (utils.py 283): INFO Epoch: [1] [ 450/2502] eta: 1:38:23 lr: 0.000020 loss_cls: 2.5996 (2.6041) grad_norm: 1.1547 (1.3527) time: 2.8798 data: 0.0002 max mem: 28454 +[2024-12-11 20:57:16 root] (utils.py 283): INFO Epoch: [1] [ 460/2502] eta: 1:37:54 lr: 0.000020 loss_cls: 2.5731 (2.6026) grad_norm: 1.1241 (1.3533) time: 2.8771 data: 0.0003 max mem: 28454 +[2024-12-11 20:57:45 root] (utils.py 283): INFO Epoch: [1] [ 470/2502] eta: 1:37:26 lr: 0.000020 loss_cls: 2.6654 (2.6048) grad_norm: 1.1359 (1.3529) time: 2.8771 data: 0.0003 max mem: 28454 +[2024-12-11 20:58:14 root] (utils.py 283): INFO Epoch: [1] [ 480/2502] eta: 1:36:57 lr: 0.000020 loss_cls: 2.6814 (2.6020) grad_norm: 1.1094 (1.3477) time: 2.8792 data: 0.0002 max mem: 28454 +[2024-12-11 20:58:43 root] (utils.py 283): INFO Epoch: [1] [ 490/2502] eta: 1:36:28 lr: 0.000020 loss_cls: 2.6980 (2.6037) grad_norm: 1.1118 (1.3458) time: 2.8787 data: 0.0002 max mem: 28454 +[2024-12-11 20:59:11 root] (utils.py 283): INFO Epoch: [1] [ 500/2502] eta: 1:35:59 lr: 0.000020 loss_cls: 2.6980 (2.6015) grad_norm: 1.1244 (1.3419) time: 2.8759 data: 0.0002 max mem: 28454 +[2024-12-11 20:59:40 root] (utils.py 283): INFO Epoch: [1] [ 510/2502] eta: 1:35:30 lr: 0.000020 loss_cls: 2.5767 (2.6011) grad_norm: 1.1244 (1.3430) time: 2.8743 data: 0.0002 max mem: 28454 +[2024-12-11 21:00:09 root] (utils.py 283): INFO Epoch: [1] [ 520/2502] eta: 1:35:02 lr: 0.000020 loss_cls: 2.5767 (2.6005) grad_norm: 1.2062 (1.3501) time: 2.8745 data: 0.0002 max mem: 28454 +[2024-12-11 21:00:38 root] (utils.py 283): INFO Epoch: [1] [ 530/2502] eta: 1:34:33 lr: 0.000020 loss_cls: 2.5579 (2.5996) grad_norm: 1.2132 (1.3501) time: 2.8734 data: 0.0003 max mem: 28454 +[2024-12-11 21:01:06 root] (utils.py 283): INFO Epoch: [1] [ 540/2502] eta: 1:34:04 lr: 0.000020 loss_cls: 2.6299 (2.5990) grad_norm: 1.1601 (1.3474) time: 2.8730 data: 0.0003 max mem: 28454 +[2024-12-11 21:01:35 root] (utils.py 283): INFO Epoch: [1] [ 550/2502] eta: 1:33:35 lr: 0.000020 loss_cls: 2.7642 (2.6012) grad_norm: 1.1647 (1.3445) time: 2.8753 data: 0.0003 max mem: 28454 +[2024-12-11 21:02:04 root] (utils.py 283): INFO Epoch: [1] [ 560/2502] eta: 1:33:06 lr: 0.000020 loss_cls: 2.7086 (2.5996) grad_norm: 1.1647 (1.3510) time: 2.8788 data: 0.0003 max mem: 28454 +[2024-12-11 21:02:33 root] (utils.py 283): INFO Epoch: [1] [ 570/2502] eta: 1:32:38 lr: 0.000020 loss_cls: 2.5506 (2.5999) grad_norm: 1.0738 (1.3500) time: 2.8797 data: 0.0002 max mem: 28454 +[2024-12-11 21:03:01 root] (utils.py 283): INFO Epoch: [1] [ 580/2502] eta: 1:32:09 lr: 0.000020 loss_cls: 2.4541 (2.5967) grad_norm: 1.0667 (1.3531) time: 2.8784 data: 0.0002 max mem: 28454 +[2024-12-11 21:03:30 root] (utils.py 283): INFO Epoch: [1] [ 590/2502] eta: 1:31:40 lr: 0.000020 loss_cls: 2.7285 (2.5970) grad_norm: 1.1207 (1.3703) time: 2.8793 data: 0.0002 max mem: 28454 +[2024-12-11 21:03:59 root] (utils.py 283): INFO Epoch: [1] [ 600/2502] eta: 1:31:11 lr: 0.000020 loss_cls: 2.7531 (2.5977) grad_norm: 1.3743 (1.3711) time: 2.8781 data: 0.0002 max mem: 28454 +[2024-12-11 21:04:28 root] (utils.py 283): INFO Epoch: [1] [ 610/2502] eta: 1:30:43 lr: 0.000020 loss_cls: 2.7082 (2.5970) grad_norm: 1.2533 (1.3672) time: 2.8791 data: 0.0002 max mem: 28454 +[2024-12-11 21:04:57 root] (utils.py 283): INFO Epoch: [1] [ 620/2502] eta: 1:30:14 lr: 0.000020 loss_cls: 2.6066 (2.5981) grad_norm: 1.1423 (1.3642) time: 2.8806 data: 0.0003 max mem: 28454 +[2024-12-11 21:05:25 root] (utils.py 283): INFO Epoch: [1] [ 630/2502] eta: 1:29:45 lr: 0.000020 loss_cls: 2.6783 (2.5996) grad_norm: 1.2014 (1.3711) time: 2.8777 data: 0.0003 max mem: 28454 +[2024-12-11 21:05:54 root] (utils.py 283): INFO Epoch: [1] [ 640/2502] eta: 1:29:17 lr: 0.000020 loss_cls: 2.5892 (2.5955) grad_norm: 1.2920 (1.3685) time: 2.8769 data: 0.0002 max mem: 28454 +[2024-12-11 21:06:23 root] (utils.py 283): INFO Epoch: [1] [ 650/2502] eta: 1:28:48 lr: 0.000020 loss_cls: 2.6299 (2.5961) grad_norm: 1.1609 (1.4427) time: 2.8755 data: 0.0002 max mem: 28454 +[2024-12-11 21:06:52 root] (utils.py 283): INFO Epoch: [1] [ 660/2502] eta: 1:28:19 lr: 0.000020 loss_cls: 2.7606 (2.5951) grad_norm: 1.2778 (1.4464) time: 2.8749 data: 0.0002 max mem: 28454 +[2024-12-11 21:07:20 root] (utils.py 283): INFO Epoch: [1] [ 670/2502] eta: 1:27:50 lr: 0.000020 loss_cls: 2.7094 (2.5955) grad_norm: 1.3115 (1.4436) time: 2.8767 data: 0.0003 max mem: 28454 +[2024-12-11 21:07:49 root] (utils.py 283): INFO Epoch: [1] [ 680/2502] eta: 1:27:21 lr: 0.000020 loss_cls: 2.7094 (2.5958) grad_norm: 1.2303 (1.4497) time: 2.8775 data: 0.0003 max mem: 28454 +[2024-12-11 21:08:18 root] (utils.py 283): INFO Epoch: [1] [ 690/2502] eta: 1:26:53 lr: 0.000020 loss_cls: 2.5514 (2.5932) grad_norm: 1.1552 (1.4448) time: 2.8795 data: 0.0003 max mem: 28454 +[2024-12-11 21:08:47 root] (utils.py 283): INFO Epoch: [1] [ 700/2502] eta: 1:26:24 lr: 0.000020 loss_cls: 2.5751 (2.5938) grad_norm: 1.1217 (1.4429) time: 2.8792 data: 0.0003 max mem: 28454 +[2024-12-11 21:09:16 root] (utils.py 283): INFO Epoch: [1] [ 710/2502] eta: 1:25:55 lr: 0.000020 loss_cls: 2.8297 (2.5954) grad_norm: 1.1450 (1.4423) time: 2.8780 data: 0.0003 max mem: 28454 +[2024-12-11 21:09:44 root] (utils.py 283): INFO Epoch: [1] [ 720/2502] eta: 1:25:26 lr: 0.000020 loss_cls: 2.8052 (2.5947) grad_norm: 1.1450 (1.4383) time: 2.8768 data: 0.0003 max mem: 28454 +[2024-12-11 21:10:13 root] (utils.py 283): INFO Epoch: [1] [ 730/2502] eta: 1:24:58 lr: 0.000020 loss_cls: 2.6489 (2.5964) grad_norm: 1.0908 (1.4340) time: 2.8757 data: 0.0003 max mem: 28454 +[2024-12-11 21:10:42 root] (utils.py 283): INFO Epoch: [1] [ 740/2502] eta: 1:24:29 lr: 0.000020 loss_cls: 2.7848 (2.5999) grad_norm: 1.1292 (1.4306) time: 2.8741 data: 0.0003 max mem: 28454 +[2024-12-11 21:11:11 root] (utils.py 283): INFO Epoch: [1] [ 750/2502] eta: 1:24:00 lr: 0.000020 loss_cls: 2.7648 (2.5994) grad_norm: 1.1598 (1.4336) time: 2.8722 data: 0.0003 max mem: 28454 +[2024-12-11 21:11:39 root] (utils.py 283): INFO Epoch: [1] [ 760/2502] eta: 1:23:31 lr: 0.000020 loss_cls: 2.6516 (2.6015) grad_norm: 1.1957 (1.4325) time: 2.8733 data: 0.0002 max mem: 28454 +[2024-12-11 21:12:08 root] (utils.py 283): INFO Epoch: [1] [ 770/2502] eta: 1:23:02 lr: 0.000020 loss_cls: 2.6034 (2.5995) grad_norm: 1.1855 (1.4292) time: 2.8740 data: 0.0003 max mem: 28454 +[2024-12-11 21:12:37 root] (utils.py 283): INFO Epoch: [1] [ 780/2502] eta: 1:22:33 lr: 0.000020 loss_cls: 2.5627 (2.5989) grad_norm: 1.1437 (1.4265) time: 2.8747 data: 0.0003 max mem: 28454 +[2024-12-11 21:13:06 root] (utils.py 283): INFO Epoch: [1] [ 790/2502] eta: 1:22:05 lr: 0.000020 loss_cls: 2.6180 (2.5988) grad_norm: 1.1828 (1.4256) time: 2.8747 data: 0.0003 max mem: 28454 +[2024-12-11 21:13:34 root] (utils.py 283): INFO Epoch: [1] [ 800/2502] eta: 1:21:36 lr: 0.000020 loss_cls: 2.6835 (2.5992) grad_norm: 1.2898 (1.4257) time: 2.8750 data: 0.0003 max mem: 28454 +[2024-12-11 21:14:03 root] (utils.py 283): INFO Epoch: [1] [ 810/2502] eta: 1:21:07 lr: 0.000020 loss_cls: 2.7163 (2.5995) grad_norm: 1.1463 (1.4218) time: 2.8770 data: 0.0003 max mem: 28454 +[2024-12-11 21:14:32 root] (utils.py 283): INFO Epoch: [1] [ 820/2502] eta: 1:20:38 lr: 0.000020 loss_cls: 2.7142 (2.5989) grad_norm: 1.1009 (1.4192) time: 2.8778 data: 0.0003 max mem: 28454 +[2024-12-11 21:15:01 root] (utils.py 283): INFO Epoch: [1] [ 830/2502] eta: 1:20:10 lr: 0.000020 loss_cls: 2.6917 (2.5975) grad_norm: 1.1595 (1.4167) time: 2.8771 data: 0.0003 max mem: 28454 +[2024-12-11 21:15:29 root] (utils.py 283): INFO Epoch: [1] [ 840/2502] eta: 1:19:41 lr: 0.000020 loss_cls: 2.5675 (2.5948) grad_norm: 1.1158 (1.4705) time: 2.8763 data: 0.0002 max mem: 28454 +[2024-12-11 21:15:58 root] (utils.py 283): INFO Epoch: [1] [ 850/2502] eta: 1:19:12 lr: 0.000020 loss_cls: 2.6161 (2.5958) grad_norm: 1.3023 (1.4710) time: 2.8762 data: 0.0002 max mem: 28454 +[2024-12-11 21:16:27 root] (utils.py 283): INFO Epoch: [1] [ 860/2502] eta: 1:18:43 lr: 0.000020 loss_cls: 2.7800 (2.5956) grad_norm: 1.3886 (1.4705) time: 2.8778 data: 0.0002 max mem: 28454 +[2024-12-11 21:16:56 root] (utils.py 283): INFO Epoch: [1] [ 870/2502] eta: 1:18:14 lr: 0.000020 loss_cls: 2.6009 (2.5943) grad_norm: 1.1494 (1.4692) time: 2.8764 data: 0.0002 max mem: 28454 +[2024-12-11 21:17:24 root] (utils.py 283): INFO Epoch: [1] [ 880/2502] eta: 1:17:46 lr: 0.000020 loss_cls: 2.7578 (2.5956) grad_norm: 1.1356 (1.4652) time: 2.8761 data: 0.0002 max mem: 28454 +[2024-12-11 21:17:53 root] (utils.py 283): INFO Epoch: [1] [ 890/2502] eta: 1:17:17 lr: 0.000020 loss_cls: 2.8092 (2.5971) grad_norm: 1.0987 (1.4616) time: 2.8746 data: 0.0002 max mem: 28454 +[2024-12-11 21:18:22 root] (utils.py 283): INFO Epoch: [1] [ 900/2502] eta: 1:16:48 lr: 0.000020 loss_cls: 2.6454 (2.5954) grad_norm: 1.1564 (1.4598) time: 2.8735 data: 0.0002 max mem: 28454 +[2024-12-11 21:18:51 root] (utils.py 283): INFO Epoch: [1] [ 910/2502] eta: 1:16:19 lr: 0.000020 loss_cls: 2.5604 (2.5956) grad_norm: 1.0353 (1.4551) time: 2.8780 data: 0.0002 max mem: 28454 +[2024-12-11 21:19:20 root] (utils.py 283): INFO Epoch: [1] [ 920/2502] eta: 1:15:51 lr: 0.000020 loss_cls: 2.8097 (2.5971) grad_norm: 1.0353 (1.4518) time: 2.8784 data: 0.0002 max mem: 28454 +[2024-12-11 21:19:48 root] (utils.py 283): INFO Epoch: [1] [ 930/2502] eta: 1:15:22 lr: 0.000020 loss_cls: 2.6847 (2.5964) grad_norm: 1.0827 (1.4478) time: 2.8769 data: 0.0002 max mem: 28454 +[2024-12-11 21:20:17 root] (utils.py 283): INFO Epoch: [1] [ 940/2502] eta: 1:14:53 lr: 0.000020 loss_cls: 2.4334 (2.5955) grad_norm: 1.1249 (1.4473) time: 2.8756 data: 0.0002 max mem: 28454 +[2024-12-11 21:20:46 root] (utils.py 283): INFO Epoch: [1] [ 950/2502] eta: 1:14:24 lr: 0.000020 loss_cls: 2.4496 (2.5952) grad_norm: 1.1452 (1.4448) time: 2.8763 data: 0.0002 max mem: 28454 +[2024-12-11 21:21:15 root] (utils.py 283): INFO Epoch: [1] [ 960/2502] eta: 1:13:55 lr: 0.000020 loss_cls: 2.5894 (2.5954) grad_norm: 1.1280 (1.4450) time: 2.8766 data: 0.0002 max mem: 28454 +[2024-12-11 21:21:43 root] (utils.py 283): INFO Epoch: [1] [ 970/2502] eta: 1:13:27 lr: 0.000020 loss_cls: 2.4659 (2.5926) grad_norm: 1.1292 (1.4521) time: 2.8747 data: 0.0003 max mem: 28454 +[2024-12-11 21:22:12 root] (utils.py 283): INFO Epoch: [1] [ 980/2502] eta: 1:12:58 lr: 0.000020 loss_cls: 2.4855 (2.5923) grad_norm: 1.1548 (1.4778) time: 2.8758 data: 0.0002 max mem: 28454 +[2024-12-11 21:22:41 root] (utils.py 283): INFO Epoch: [1] [ 990/2502] eta: 1:12:29 lr: 0.000020 loss_cls: 2.6845 (2.5942) grad_norm: 1.5832 (1.4808) time: 2.8769 data: 0.0002 max mem: 28454 +[2024-12-11 21:23:10 root] (utils.py 283): INFO Epoch: [1] [1000/2502] eta: 1:12:00 lr: 0.000020 loss_cls: 2.6884 (2.5939) grad_norm: 1.5832 (1.4806) time: 2.8739 data: 0.0002 max mem: 28454 +[2024-12-11 21:23:38 root] (utils.py 283): INFO Epoch: [1] [1010/2502] eta: 1:11:31 lr: 0.000020 loss_cls: 2.7496 (2.5944) grad_norm: 1.3168 (1.4810) time: 2.8735 data: 0.0003 max mem: 28454 +[2024-12-11 21:24:07 root] (utils.py 283): INFO Epoch: [1] [1020/2502] eta: 1:11:03 lr: 0.000020 loss_cls: 2.6979 (2.5936) grad_norm: 1.1580 (1.4834) time: 2.8763 data: 0.0003 max mem: 28454 +[2024-12-11 21:24:36 root] (utils.py 283): INFO Epoch: [1] [1030/2502] eta: 1:10:34 lr: 0.000020 loss_cls: 2.6557 (2.5938) grad_norm: 1.1242 (1.4856) time: 2.8798 data: 0.0003 max mem: 28454 +[2024-12-11 21:25:05 root] (utils.py 283): INFO Epoch: [1] [1040/2502] eta: 1:10:05 lr: 0.000020 loss_cls: 2.5301 (2.5922) grad_norm: 1.1226 (1.4836) time: 2.8789 data: 0.0002 max mem: 28454 +[2024-12-11 21:25:33 root] (utils.py 283): INFO Epoch: [1] [1050/2502] eta: 1:09:36 lr: 0.000020 loss_cls: 2.5147 (2.5918) grad_norm: 1.1226 (1.4816) time: 2.8751 data: 0.0003 max mem: 28454 +[2024-12-11 21:26:02 root] (utils.py 283): INFO Epoch: [1] [1060/2502] eta: 1:09:08 lr: 0.000020 loss_cls: 2.7053 (2.5936) grad_norm: 1.2007 (1.4785) time: 2.8771 data: 0.0002 max mem: 28454 +[2024-12-11 21:26:31 root] (utils.py 283): INFO Epoch: [1] [1070/2502] eta: 1:08:39 lr: 0.000020 loss_cls: 2.8372 (2.5939) grad_norm: 1.2000 (1.4768) time: 2.8788 data: 0.0002 max mem: 28454 +[2024-12-11 21:27:00 root] (utils.py 283): INFO Epoch: [1] [1080/2502] eta: 1:08:10 lr: 0.000020 loss_cls: 2.7727 (2.5951) grad_norm: 1.2000 (1.4742) time: 2.8786 data: 0.0002 max mem: 28454 +[2024-12-11 21:27:29 root] (utils.py 283): INFO Epoch: [1] [1090/2502] eta: 1:07:41 lr: 0.000020 loss_cls: 2.7727 (2.5959) grad_norm: 1.1116 (1.4709) time: 2.8781 data: 0.0002 max mem: 28454 +[2024-12-11 21:27:57 root] (utils.py 283): INFO Epoch: [1] [1100/2502] eta: 1:07:13 lr: 0.000020 loss_cls: 2.7828 (2.5948) grad_norm: 1.1116 (1.4678) time: 2.8766 data: 0.0002 max mem: 28454 +[2024-12-11 21:28:26 root] (utils.py 283): INFO Epoch: [1] [1110/2502] eta: 1:06:44 lr: 0.000020 loss_cls: 2.8473 (2.5968) grad_norm: 1.1385 (1.4647) time: 2.8747 data: 0.0002 max mem: 28454 +[2024-12-11 21:28:55 root] (utils.py 283): INFO Epoch: [1] [1120/2502] eta: 1:06:15 lr: 0.000020 loss_cls: 2.8080 (2.5982) grad_norm: 1.1385 (1.4655) time: 2.8730 data: 0.0002 max mem: 28454 +[2024-12-11 21:29:24 root] (utils.py 283): INFO Epoch: [1] [1130/2502] eta: 1:05:46 lr: 0.000020 loss_cls: 2.8080 (2.6001) grad_norm: 1.1506 (1.4629) time: 2.8716 data: 0.0003 max mem: 28454 +[2024-12-11 21:29:52 root] (utils.py 283): INFO Epoch: [1] [1140/2502] eta: 1:05:17 lr: 0.000020 loss_cls: 2.8633 (2.6005) grad_norm: 1.1406 (1.4604) time: 2.8721 data: 0.0003 max mem: 28454 +[2024-12-11 21:30:21 root] (utils.py 283): INFO Epoch: [1] [1150/2502] eta: 1:04:49 lr: 0.000020 loss_cls: 2.8175 (2.6020) grad_norm: 1.1239 (1.4625) time: 2.8751 data: 0.0002 max mem: 28454 +[2024-12-11 21:30:50 root] (utils.py 283): INFO Epoch: [1] [1160/2502] eta: 1:04:20 lr: 0.000020 loss_cls: 2.7998 (2.6017) grad_norm: 1.1156 (1.4602) time: 2.8753 data: 0.0002 max mem: 28454 +[2024-12-11 21:31:19 root] (utils.py 283): INFO Epoch: [1] [1170/2502] eta: 1:03:51 lr: 0.000020 loss_cls: 2.7246 (2.6020) grad_norm: 1.1208 (1.4583) time: 2.8763 data: 0.0003 max mem: 28454 +[2024-12-11 21:31:47 root] (utils.py 283): INFO Epoch: [1] [1180/2502] eta: 1:03:22 lr: 0.000020 loss_cls: 2.7631 (2.6026) grad_norm: 1.1395 (1.4580) time: 2.8768 data: 0.0003 max mem: 28454 +[2024-12-11 21:32:16 root] (utils.py 283): INFO Epoch: [1] [1190/2502] eta: 1:02:54 lr: 0.000020 loss_cls: 2.7039 (2.6022) grad_norm: 1.1320 (1.4557) time: 2.8746 data: 0.0003 max mem: 28454 +[2024-12-11 21:32:45 root] (utils.py 283): INFO Epoch: [1] [1200/2502] eta: 1:02:25 lr: 0.000020 loss_cls: 2.6043 (2.6015) grad_norm: 1.0699 (1.4527) time: 2.8720 data: 0.0003 max mem: 28454 +[2024-12-11 21:33:13 root] (utils.py 283): INFO Epoch: [1] [1210/2502] eta: 1:01:56 lr: 0.000020 loss_cls: 2.5664 (2.6008) grad_norm: 1.1238 (1.4522) time: 2.8706 data: 0.0002 max mem: 28454 +[2024-12-11 21:33:42 root] (utils.py 283): INFO Epoch: [1] [1220/2502] eta: 1:01:27 lr: 0.000020 loss_cls: 2.6553 (2.6016) grad_norm: 1.2033 (1.4499) time: 2.8725 data: 0.0003 max mem: 28454 +[2024-12-11 21:34:11 root] (utils.py 283): INFO Epoch: [1] [1230/2502] eta: 1:00:58 lr: 0.000020 loss_cls: 2.6571 (2.6022) grad_norm: 1.1540 (1.4485) time: 2.8731 data: 0.0003 max mem: 28454 +[2024-12-11 21:34:40 root] (utils.py 283): INFO Epoch: [1] [1240/2502] eta: 1:00:29 lr: 0.000020 loss_cls: 2.7054 (2.6031) grad_norm: 1.1540 (1.4466) time: 2.8735 data: 0.0003 max mem: 28454 +[2024-12-11 21:35:08 root] (utils.py 283): INFO Epoch: [1] [1250/2502] eta: 1:00:01 lr: 0.000020 loss_cls: 2.7035 (2.6021) grad_norm: 1.1654 (1.4454) time: 2.8737 data: 0.0003 max mem: 28454 +[2024-12-11 21:35:37 root] (utils.py 283): INFO Epoch: [1] [1260/2502] eta: 0:59:32 lr: 0.000020 loss_cls: 2.6442 (2.6028) grad_norm: 1.1654 (1.4483) time: 2.8729 data: 0.0003 max mem: 28454 +[2024-12-11 21:36:06 root] (utils.py 283): INFO Epoch: [1] [1270/2502] eta: 0:59:03 lr: 0.000020 loss_cls: 2.7264 (2.6022) grad_norm: 1.1205 (1.4456) time: 2.8728 data: 0.0003 max mem: 28454 +[2024-12-11 21:36:35 root] (utils.py 283): INFO Epoch: [1] [1280/2502] eta: 0:58:34 lr: 0.000020 loss_cls: 2.7163 (2.6018) grad_norm: 1.1465 (1.4537) time: 2.8729 data: 0.0003 max mem: 28454 +[2024-12-11 21:37:03 root] (utils.py 283): INFO Epoch: [1] [1290/2502] eta: 0:58:06 lr: 0.000020 loss_cls: 2.7163 (2.6025) grad_norm: 1.2002 (1.4525) time: 2.8741 data: 0.0003 max mem: 28454 +[2024-12-11 21:37:32 root] (utils.py 283): INFO Epoch: [1] [1300/2502] eta: 0:57:37 lr: 0.000020 loss_cls: 2.6114 (2.6030) grad_norm: 1.1581 (1.4516) time: 2.8743 data: 0.0003 max mem: 28454 +[2024-12-11 21:38:01 root] (utils.py 283): INFO Epoch: [1] [1310/2502] eta: 0:57:08 lr: 0.000020 loss_cls: 2.5806 (2.6019) grad_norm: 1.0883 (1.4488) time: 2.8743 data: 0.0002 max mem: 28454 +[2024-12-11 21:38:30 root] (utils.py 283): INFO Epoch: [1] [1320/2502] eta: 0:56:39 lr: 0.000020 loss_cls: 2.5647 (2.6018) grad_norm: 1.0296 (1.4466) time: 2.8729 data: 0.0002 max mem: 28454 +[2024-12-11 21:38:58 root] (utils.py 283): INFO Epoch: [1] [1330/2502] eta: 0:56:10 lr: 0.000020 loss_cls: 2.7224 (2.6023) grad_norm: 1.0686 (1.4449) time: 2.8723 data: 0.0003 max mem: 28454 +[2024-12-11 21:39:27 root] (utils.py 283): INFO Epoch: [1] [1340/2502] eta: 0:55:42 lr: 0.000020 loss_cls: 2.7224 (2.6008) grad_norm: 1.1408 (1.4460) time: 2.8780 data: 0.0003 max mem: 28454 +[2024-12-11 21:39:56 root] (utils.py 283): INFO Epoch: [1] [1350/2502] eta: 0:55:13 lr: 0.000020 loss_cls: 2.7313 (2.6019) grad_norm: 1.3182 (1.4447) time: 2.8758 data: 0.0003 max mem: 28454 +[2024-12-11 21:40:24 root] (utils.py 283): INFO Epoch: [1] [1360/2502] eta: 0:54:44 lr: 0.000020 loss_cls: 2.7163 (2.6006) grad_norm: 1.1067 (1.4424) time: 2.8675 data: 0.0003 max mem: 28454 +[2024-12-11 21:40:53 root] (utils.py 283): INFO Epoch: [1] [1370/2502] eta: 0:54:15 lr: 0.000020 loss_cls: 2.5717 (2.5999) grad_norm: 1.0636 (1.4409) time: 2.8673 data: 0.0003 max mem: 28454 +[2024-12-11 21:41:22 root] (utils.py 283): INFO Epoch: [1] [1380/2502] eta: 0:53:46 lr: 0.000020 loss_cls: 2.5596 (2.5982) grad_norm: 1.0930 (1.4579) time: 2.8712 data: 0.0003 max mem: 28454 +[2024-12-11 21:41:51 root] (utils.py 283): INFO Epoch: [1] [1390/2502] eta: 0:53:18 lr: 0.000020 loss_cls: 2.5596 (2.5979) grad_norm: 1.2100 (1.4565) time: 2.8711 data: 0.0003 max mem: 28454 +[2024-12-11 21:42:19 root] (utils.py 283): INFO Epoch: [1] [1400/2502] eta: 0:52:49 lr: 0.000020 loss_cls: 2.7243 (2.5972) grad_norm: 1.2518 (1.4559) time: 2.8679 data: 0.0003 max mem: 28454 +[2024-12-11 21:42:48 root] (utils.py 283): INFO Epoch: [1] [1410/2502] eta: 0:52:20 lr: 0.000020 loss_cls: 2.6642 (2.5973) grad_norm: 1.1614 (1.4538) time: 2.8670 data: 0.0003 max mem: 28454 +[2024-12-11 21:43:17 root] (utils.py 283): INFO Epoch: [1] [1420/2502] eta: 0:51:51 lr: 0.000020 loss_cls: 2.6570 (2.5974) grad_norm: 1.1404 (1.4517) time: 2.8671 data: 0.0003 max mem: 28454 +[2024-12-11 21:43:45 root] (utils.py 283): INFO Epoch: [1] [1430/2502] eta: 0:51:22 lr: 0.000020 loss_cls: 2.8115 (2.5971) grad_norm: 1.1952 (1.4540) time: 2.8690 data: 0.0002 max mem: 28454 +[2024-12-11 21:44:14 root] (utils.py 283): INFO Epoch: [1] [1440/2502] eta: 0:50:54 lr: 0.000020 loss_cls: 2.5257 (2.5959) grad_norm: 1.2019 (1.4537) time: 2.8725 data: 0.0002 max mem: 28454 +[2024-12-11 21:44:43 root] (utils.py 283): INFO Epoch: [1] [1450/2502] eta: 0:50:25 lr: 0.000020 loss_cls: 2.5257 (2.5946) grad_norm: 1.1901 (1.4555) time: 2.8744 data: 0.0002 max mem: 28454 +[2024-12-11 21:45:12 root] (utils.py 283): INFO Epoch: [1] [1460/2502] eta: 0:49:56 lr: 0.000020 loss_cls: 2.6127 (2.5951) grad_norm: 1.1953 (1.4542) time: 2.8738 data: 0.0003 max mem: 28454 +[2024-12-11 21:45:40 root] (utils.py 283): INFO Epoch: [1] [1470/2502] eta: 0:49:27 lr: 0.000020 loss_cls: 2.7071 (2.5963) grad_norm: 1.2386 (1.4555) time: 2.8689 data: 0.0003 max mem: 28454 +[2024-12-11 21:46:09 root] (utils.py 283): INFO Epoch: [1] [1480/2502] eta: 0:48:58 lr: 0.000020 loss_cls: 2.7201 (2.5959) grad_norm: 1.2645 (1.4558) time: 2.8659 data: 0.0002 max mem: 28454 +[2024-12-11 21:46:37 root] (utils.py 283): INFO Epoch: [1] [1490/2502] eta: 0:48:29 lr: 0.000020 loss_cls: 2.6783 (2.5959) grad_norm: 1.2075 (1.4549) time: 2.8647 data: 0.0003 max mem: 28454 +[2024-12-11 21:47:06 root] (utils.py 283): INFO Epoch: [1] [1500/2502] eta: 0:48:01 lr: 0.000020 loss_cls: 2.6655 (2.5946) grad_norm: 1.2075 (1.4547) time: 2.8659 data: 0.0003 max mem: 28454 +[2024-12-11 21:47:35 root] (utils.py 283): INFO Epoch: [1] [1510/2502] eta: 0:47:32 lr: 0.000020 loss_cls: 2.4513 (2.5936) grad_norm: 1.1253 (1.4527) time: 2.8702 data: 0.0002 max mem: 28454 +[2024-12-11 21:48:04 root] (utils.py 283): INFO Epoch: [1] [1520/2502] eta: 0:47:03 lr: 0.000020 loss_cls: 2.6129 (2.5940) grad_norm: 1.0895 (1.4507) time: 2.8684 data: 0.0002 max mem: 28454 +[2024-12-11 21:48:32 root] (utils.py 283): INFO Epoch: [1] [1530/2502] eta: 0:46:34 lr: 0.000020 loss_cls: 2.6041 (2.5937) grad_norm: 1.1321 (1.4501) time: 2.8649 data: 0.0002 max mem: 28454 +[2024-12-11 21:49:01 root] (utils.py 283): INFO Epoch: [1] [1540/2502] eta: 0:46:05 lr: 0.000020 loss_cls: 2.6009 (2.5945) grad_norm: 1.1310 (1.4484) time: 2.8645 data: 0.0003 max mem: 28454 +[2024-12-11 21:49:30 root] (utils.py 283): INFO Epoch: [1] [1550/2502] eta: 0:45:37 lr: 0.000020 loss_cls: 2.8302 (2.5960) grad_norm: 1.0943 (1.4484) time: 2.8678 data: 0.0002 max mem: 28454 +[2024-12-11 21:49:58 root] (utils.py 283): INFO Epoch: [1] [1560/2502] eta: 0:45:08 lr: 0.000020 loss_cls: 2.8438 (2.5967) grad_norm: 1.1179 (1.4494) time: 2.8709 data: 0.0002 max mem: 28454 +[2024-12-11 21:50:27 root] (utils.py 283): INFO Epoch: [1] [1570/2502] eta: 0:44:39 lr: 0.000020 loss_cls: 2.4843 (2.5948) grad_norm: 1.1733 (1.4480) time: 2.8720 data: 0.0002 max mem: 28454 +[2024-12-11 21:50:56 root] (utils.py 283): INFO Epoch: [1] [1580/2502] eta: 0:44:10 lr: 0.000020 loss_cls: 2.2307 (2.5932) grad_norm: 1.0671 (1.4456) time: 2.8703 data: 0.0002 max mem: 28454 +[2024-12-11 21:51:24 root] (utils.py 283): INFO Epoch: [1] [1590/2502] eta: 0:43:42 lr: 0.000020 loss_cls: 2.3637 (2.5923) grad_norm: 1.1053 (1.4452) time: 2.8690 data: 0.0002 max mem: 28454 +[2024-12-11 21:51:53 root] (utils.py 283): INFO Epoch: [1] [1600/2502] eta: 0:43:13 lr: 0.000020 loss_cls: 2.3939 (2.5909) grad_norm: 1.1092 (1.4435) time: 2.8688 data: 0.0002 max mem: 28454 +[2024-12-11 21:52:22 root] (utils.py 283): INFO Epoch: [1] [1610/2502] eta: 0:42:44 lr: 0.000020 loss_cls: 2.5771 (2.5908) grad_norm: 1.0846 (1.4427) time: 2.8707 data: 0.0002 max mem: 28454 +[2024-12-11 21:52:50 root] (utils.py 283): INFO Epoch: [1] [1620/2502] eta: 0:42:15 lr: 0.000020 loss_cls: 2.6391 (2.5905) grad_norm: 1.2082 (1.4462) time: 2.8730 data: 0.0002 max mem: 28454 +[2024-12-11 21:53:19 root] (utils.py 283): INFO Epoch: [1] [1630/2502] eta: 0:41:46 lr: 0.000020 loss_cls: 2.6899 (2.5908) grad_norm: 1.2461 (1.4454) time: 2.8682 data: 0.0002 max mem: 28454 +[2024-12-11 21:53:48 root] (utils.py 283): INFO Epoch: [1] [1640/2502] eta: 0:41:18 lr: 0.000020 loss_cls: 2.7229 (2.5912) grad_norm: 1.2085 (1.4438) time: 2.8685 data: 0.0002 max mem: 28454 +[2024-12-11 21:54:17 root] (utils.py 283): INFO Epoch: [1] [1650/2502] eta: 0:40:49 lr: 0.000020 loss_cls: 2.5910 (2.5906) grad_norm: 1.1553 (1.4431) time: 2.8730 data: 0.0002 max mem: 28454 +[2024-12-11 21:54:45 root] (utils.py 283): INFO Epoch: [1] [1660/2502] eta: 0:40:20 lr: 0.000020 loss_cls: 2.5313 (2.5896) grad_norm: 1.1507 (1.4427) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-11 21:55:14 root] (utils.py 283): INFO Epoch: [1] [1670/2502] eta: 0:39:51 lr: 0.000020 loss_cls: 2.5888 (2.5907) grad_norm: 1.1782 (1.4419) time: 2.8696 data: 0.0002 max mem: 28454 +[2024-12-11 21:55:43 root] (utils.py 283): INFO Epoch: [1] [1680/2502] eta: 0:39:23 lr: 0.000020 loss_cls: 2.7124 (2.5915) grad_norm: 1.1779 (1.4406) time: 2.8699 data: 0.0002 max mem: 28454 +[2024-12-11 21:56:11 root] (utils.py 283): INFO Epoch: [1] [1690/2502] eta: 0:38:54 lr: 0.000020 loss_cls: 2.5340 (2.5901) grad_norm: 1.1348 (1.4391) time: 2.8728 data: 0.0002 max mem: 28454 +[2024-12-11 21:56:40 root] (utils.py 283): INFO Epoch: [1] [1700/2502] eta: 0:38:25 lr: 0.000020 loss_cls: 2.6579 (2.5907) grad_norm: 1.1648 (1.4376) time: 2.8755 data: 0.0002 max mem: 28454 +[2024-12-11 21:57:09 root] (utils.py 283): INFO Epoch: [1] [1710/2502] eta: 0:37:56 lr: 0.000020 loss_cls: 2.6902 (2.5907) grad_norm: 1.1188 (1.4363) time: 2.8751 data: 0.0002 max mem: 28454 +[2024-12-11 21:57:38 root] (utils.py 283): INFO Epoch: [1] [1720/2502] eta: 0:37:28 lr: 0.000020 loss_cls: 2.7841 (2.5915) grad_norm: 1.1121 (1.4348) time: 2.8739 data: 0.0002 max mem: 28454 +[2024-12-11 21:58:06 root] (utils.py 283): INFO Epoch: [1] [1730/2502] eta: 0:36:59 lr: 0.000020 loss_cls: 2.7684 (2.5899) grad_norm: 1.1205 (1.4335) time: 2.8735 data: 0.0002 max mem: 28454 +[2024-12-11 21:58:35 root] (utils.py 283): INFO Epoch: [1] [1740/2502] eta: 0:36:30 lr: 0.000020 loss_cls: 2.5856 (2.5897) grad_norm: 1.1098 (1.4317) time: 2.8713 data: 0.0002 max mem: 28454 +[2024-12-11 21:59:04 root] (utils.py 283): INFO Epoch: [1] [1750/2502] eta: 0:36:01 lr: 0.000020 loss_cls: 2.5999 (2.5899) grad_norm: 1.1225 (1.4304) time: 2.8725 data: 0.0002 max mem: 28454 +[2024-12-11 21:59:33 root] (utils.py 283): INFO Epoch: [1] [1760/2502] eta: 0:35:33 lr: 0.000020 loss_cls: 2.5981 (2.5899) grad_norm: 1.2799 (1.4345) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-11 22:00:01 root] (utils.py 283): INFO Epoch: [1] [1770/2502] eta: 0:35:04 lr: 0.000020 loss_cls: 2.5545 (2.5892) grad_norm: 1.2041 (1.4335) time: 2.8722 data: 0.0002 max mem: 28454 +[2024-12-11 22:00:30 root] (utils.py 283): INFO Epoch: [1] [1780/2502] eta: 0:34:35 lr: 0.000020 loss_cls: 2.5556 (2.5885) grad_norm: 1.1250 (1.4318) time: 2.8720 data: 0.0002 max mem: 28454 +[2024-12-11 22:00:59 root] (utils.py 283): INFO Epoch: [1] [1790/2502] eta: 0:34:06 lr: 0.000020 loss_cls: 2.5538 (2.5881) grad_norm: 1.1250 (1.4337) time: 2.8700 data: 0.0002 max mem: 28454 +[2024-12-11 22:01:27 root] (utils.py 283): INFO Epoch: [1] [1800/2502] eta: 0:33:38 lr: 0.000020 loss_cls: 2.5528 (2.5876) grad_norm: 1.1376 (1.4330) time: 2.8724 data: 0.0002 max mem: 28454 +[2024-12-11 22:01:56 root] (utils.py 283): INFO Epoch: [1] [1810/2502] eta: 0:33:09 lr: 0.000020 loss_cls: 2.6926 (2.5885) grad_norm: 1.1033 (1.4317) time: 2.8715 data: 0.0003 max mem: 28454 +[2024-12-11 22:02:25 root] (utils.py 283): INFO Epoch: [1] [1820/2502] eta: 0:32:40 lr: 0.000020 loss_cls: 2.7758 (2.5887) grad_norm: 1.1449 (1.4301) time: 2.8722 data: 0.0002 max mem: 28454 +[2024-12-11 22:02:54 root] (utils.py 283): INFO Epoch: [1] [1830/2502] eta: 0:32:11 lr: 0.000020 loss_cls: 2.6097 (2.5887) grad_norm: 1.1449 (1.4289) time: 2.8714 data: 0.0002 max mem: 28454 +[2024-12-11 22:03:22 root] (utils.py 283): INFO Epoch: [1] [1840/2502] eta: 0:31:43 lr: 0.000020 loss_cls: 2.6072 (2.5873) grad_norm: 1.1236 (1.4274) time: 2.8715 data: 0.0002 max mem: 28454 +[2024-12-11 22:03:51 root] (utils.py 283): INFO Epoch: [1] [1850/2502] eta: 0:31:14 lr: 0.000020 loss_cls: 2.7199 (2.5872) grad_norm: 1.0847 (1.4321) time: 2.8726 data: 0.0002 max mem: 28454 +[2024-12-11 22:04:20 root] (utils.py 283): INFO Epoch: [1] [1860/2502] eta: 0:30:45 lr: 0.000020 loss_cls: 2.6666 (2.5871) grad_norm: 1.2035 (1.4346) time: 2.8731 data: 0.0002 max mem: 28454 +[2024-12-11 22:04:49 root] (utils.py 283): INFO Epoch: [1] [1870/2502] eta: 0:30:16 lr: 0.000020 loss_cls: 2.6879 (2.5878) grad_norm: 1.1795 (1.4337) time: 2.8759 data: 0.0002 max mem: 28454 +[2024-12-11 22:05:17 root] (utils.py 283): INFO Epoch: [1] [1880/2502] eta: 0:29:48 lr: 0.000020 loss_cls: 2.7644 (2.5874) grad_norm: 1.1723 (1.4347) time: 2.8775 data: 0.0002 max mem: 28454 +[2024-12-11 22:05:46 root] (utils.py 283): INFO Epoch: [1] [1890/2502] eta: 0:29:19 lr: 0.000020 loss_cls: 2.7422 (2.5875) grad_norm: 1.1848 (1.4333) time: 2.8737 data: 0.0002 max mem: 28454 +[2024-12-11 22:06:15 root] (utils.py 283): INFO Epoch: [1] [1900/2502] eta: 0:28:50 lr: 0.000020 loss_cls: 2.7575 (2.5883) grad_norm: 1.1321 (1.4326) time: 2.8679 data: 0.0003 max mem: 28454 +[2024-12-11 22:06:43 root] (utils.py 283): INFO Epoch: [1] [1910/2502] eta: 0:28:21 lr: 0.000020 loss_cls: 2.7895 (2.5887) grad_norm: 1.1332 (1.4350) time: 2.8684 data: 0.0003 max mem: 28454 +[2024-12-11 22:07:12 root] (utils.py 283): INFO Epoch: [1] [1920/2502] eta: 0:27:52 lr: 0.000020 loss_cls: 2.7481 (2.5890) grad_norm: 1.2909 (1.4376) time: 2.8731 data: 0.0002 max mem: 28454 +[2024-12-11 22:07:41 root] (utils.py 283): INFO Epoch: [1] [1930/2502] eta: 0:27:24 lr: 0.000020 loss_cls: 2.7481 (2.5878) grad_norm: 1.2480 (1.4363) time: 2.8726 data: 0.0003 max mem: 28454 +[2024-12-11 22:08:10 root] (utils.py 283): INFO Epoch: [1] [1940/2502] eta: 0:26:55 lr: 0.000020 loss_cls: 2.7482 (2.5879) grad_norm: 1.2199 (1.4356) time: 2.8703 data: 0.0002 max mem: 28454 +[2024-12-11 22:08:38 root] (utils.py 283): INFO Epoch: [1] [1950/2502] eta: 0:26:26 lr: 0.000020 loss_cls: 2.7482 (2.5880) grad_norm: 1.2180 (1.4347) time: 2.8711 data: 0.0003 max mem: 28454 +[2024-12-11 22:09:07 root] (utils.py 283): INFO Epoch: [1] [1960/2502] eta: 0:25:57 lr: 0.000020 loss_cls: 2.6859 (2.5872) grad_norm: 1.1212 (1.4331) time: 2.8717 data: 0.0003 max mem: 28454 +[2024-12-11 22:09:36 root] (utils.py 283): INFO Epoch: [1] [1970/2502] eta: 0:25:29 lr: 0.000020 loss_cls: 2.7250 (2.5887) grad_norm: 1.1366 (1.4320) time: 2.8718 data: 0.0002 max mem: 28454 +[2024-12-11 22:10:04 root] (utils.py 283): INFO Epoch: [1] [1980/2502] eta: 0:25:00 lr: 0.000020 loss_cls: 2.8381 (2.5890) grad_norm: 1.1304 (1.4304) time: 2.8682 data: 0.0002 max mem: 28454 +[2024-12-11 22:10:33 root] (utils.py 283): INFO Epoch: [1] [1990/2502] eta: 0:24:31 lr: 0.000020 loss_cls: 2.4813 (2.5875) grad_norm: 1.0891 (1.4291) time: 2.8653 data: 0.0002 max mem: 28454 +[2024-12-11 22:11:02 root] (utils.py 283): INFO Epoch: [1] [2000/2502] eta: 0:24:02 lr: 0.000020 loss_cls: 2.5939 (2.5879) grad_norm: 1.1735 (1.4284) time: 2.8632 data: 0.0002 max mem: 28454 +[2024-12-11 22:11:30 root] (utils.py 283): INFO Epoch: [1] [2010/2502] eta: 0:23:34 lr: 0.000020 loss_cls: 2.8193 (2.5883) grad_norm: 1.1791 (1.4299) time: 2.8645 data: 0.0002 max mem: 28454 +[2024-12-11 22:11:59 root] (utils.py 283): INFO Epoch: [1] [2020/2502] eta: 0:23:05 lr: 0.000020 loss_cls: 2.7376 (2.5884) grad_norm: 1.2188 (1.4300) time: 2.8704 data: 0.0002 max mem: 28454 +[2024-12-11 22:12:28 root] (utils.py 283): INFO Epoch: [1] [2030/2502] eta: 0:22:36 lr: 0.000020 loss_cls: 2.7376 (2.5885) grad_norm: 1.2320 (1.4300) time: 2.8724 data: 0.0002 max mem: 28454 +[2024-12-11 22:12:56 root] (utils.py 283): INFO Epoch: [1] [2040/2502] eta: 0:22:07 lr: 0.000020 loss_cls: 2.7351 (2.5881) grad_norm: 1.2082 (1.4291) time: 2.8714 data: 0.0002 max mem: 28454 +[2024-12-11 22:13:25 root] (utils.py 283): INFO Epoch: [1] [2050/2502] eta: 0:21:39 lr: 0.000020 loss_cls: 2.6836 (2.5884) grad_norm: 1.1447 (1.4286) time: 2.8731 data: 0.0002 max mem: 28454 +[2024-12-11 22:13:54 root] (utils.py 283): INFO Epoch: [1] [2060/2502] eta: 0:21:10 lr: 0.000020 loss_cls: 2.7663 (2.5890) grad_norm: 1.0957 (1.4270) time: 2.8745 data: 0.0002 max mem: 28454 +[2024-12-11 22:14:23 root] (utils.py 283): INFO Epoch: [1] [2070/2502] eta: 0:20:41 lr: 0.000020 loss_cls: 2.7663 (2.5893) grad_norm: 1.0761 (1.4257) time: 2.8735 data: 0.0002 max mem: 28454 +[2024-12-11 22:14:51 root] (utils.py 283): INFO Epoch: [1] [2080/2502] eta: 0:20:12 lr: 0.000020 loss_cls: 2.8056 (2.5900) grad_norm: 1.1225 (1.4250) time: 2.8715 data: 0.0003 max mem: 28454 +[2024-12-11 22:15:20 root] (utils.py 283): INFO Epoch: [1] [2090/2502] eta: 0:19:44 lr: 0.000020 loss_cls: 2.8056 (2.5903) grad_norm: 1.1645 (1.4249) time: 2.8699 data: 0.0002 max mem: 28454 +[2024-12-11 22:15:49 root] (utils.py 283): INFO Epoch: [1] [2100/2502] eta: 0:19:15 lr: 0.000020 loss_cls: 2.6037 (2.5902) grad_norm: 1.1484 (1.4236) time: 2.8712 data: 0.0002 max mem: 28454 +[2024-12-11 22:16:18 root] (utils.py 283): INFO Epoch: [1] [2110/2502] eta: 0:18:46 lr: 0.000020 loss_cls: 2.5513 (2.5895) grad_norm: 1.1314 (1.4226) time: 2.8721 data: 0.0002 max mem: 28454 +[2024-12-11 22:16:46 root] (utils.py 283): INFO Epoch: [1] [2120/2502] eta: 0:18:17 lr: 0.000020 loss_cls: 2.6700 (2.5900) grad_norm: 1.1804 (1.4246) time: 2.8715 data: 0.0002 max mem: 28454 +[2024-12-11 22:17:15 root] (utils.py 283): INFO Epoch: [1] [2130/2502] eta: 0:17:49 lr: 0.000020 loss_cls: 2.7987 (2.5900) grad_norm: 1.1623 (1.4234) time: 2.8720 data: 0.0002 max mem: 28454 +[2024-12-11 22:17:44 root] (utils.py 283): INFO Epoch: [1] [2140/2502] eta: 0:17:20 lr: 0.000020 loss_cls: 2.6920 (2.5906) grad_norm: 1.1299 (1.4220) time: 2.8724 data: 0.0002 max mem: 28454 +[2024-12-11 22:18:12 root] (utils.py 283): INFO Epoch: [1] [2150/2502] eta: 0:16:51 lr: 0.000020 loss_cls: 2.6539 (2.5907) grad_norm: 1.1332 (1.4224) time: 2.8741 data: 0.0003 max mem: 28454 +[2024-12-11 22:18:41 root] (utils.py 283): INFO Epoch: [1] [2160/2502] eta: 0:16:22 lr: 0.000020 loss_cls: 2.6748 (2.5916) grad_norm: 1.1266 (1.4211) time: 2.8753 data: 0.0003 max mem: 28454 +[2024-12-11 22:19:10 root] (utils.py 283): INFO Epoch: [1] [2170/2502] eta: 0:15:54 lr: 0.000020 loss_cls: 2.6949 (2.5914) grad_norm: 1.1460 (1.4239) time: 2.8746 data: 0.0002 max mem: 28454 +[2024-12-11 22:19:39 root] (utils.py 283): INFO Epoch: [1] [2180/2502] eta: 0:15:25 lr: 0.000020 loss_cls: 2.6979 (2.5916) grad_norm: 1.1823 (1.4230) time: 2.8748 data: 0.0002 max mem: 28454 +[2024-12-11 22:20:07 root] (utils.py 283): INFO Epoch: [1] [2190/2502] eta: 0:14:56 lr: 0.000020 loss_cls: 2.7855 (2.5927) grad_norm: 1.1331 (1.4228) time: 2.8739 data: 0.0002 max mem: 28454 +[2024-12-11 22:20:36 root] (utils.py 283): INFO Epoch: [1] [2200/2502] eta: 0:14:27 lr: 0.000020 loss_cls: 2.7184 (2.5931) grad_norm: 1.1031 (1.4214) time: 2.8732 data: 0.0002 max mem: 28454 +[2024-12-11 22:21:05 root] (utils.py 283): INFO Epoch: [1] [2210/2502] eta: 0:13:59 lr: 0.000020 loss_cls: 2.7138 (2.5931) grad_norm: 1.1031 (1.4202) time: 2.8732 data: 0.0002 max mem: 28454 +[2024-12-11 22:21:34 root] (utils.py 283): INFO Epoch: [1] [2220/2502] eta: 0:13:30 lr: 0.000020 loss_cls: 2.7205 (2.5941) grad_norm: 1.0764 (1.4191) time: 2.8718 data: 0.0002 max mem: 28454 +[2024-12-11 22:22:02 root] (utils.py 283): INFO Epoch: [1] [2230/2502] eta: 0:13:01 lr: 0.000020 loss_cls: 2.7205 (2.5933) grad_norm: 1.0764 (1.4188) time: 2.8711 data: 0.0002 max mem: 28454 +[2024-12-11 22:22:31 root] (utils.py 283): INFO Epoch: [1] [2240/2502] eta: 0:12:33 lr: 0.000020 loss_cls: 2.7061 (2.5941) grad_norm: 1.0816 (1.4181) time: 2.8716 data: 0.0002 max mem: 28454 +[2024-12-11 22:23:00 root] (utils.py 283): INFO Epoch: [1] [2250/2502] eta: 0:12:04 lr: 0.000020 loss_cls: 2.7061 (2.5935) grad_norm: 1.0813 (1.4168) time: 2.8725 data: 0.0003 max mem: 28454 +[2024-12-11 22:23:29 root] (utils.py 283): INFO Epoch: [1] [2260/2502] eta: 0:11:35 lr: 0.000020 loss_cls: 2.5822 (2.5935) grad_norm: 1.0648 (1.4163) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-11 22:23:57 root] (utils.py 283): INFO Epoch: [1] [2270/2502] eta: 0:11:06 lr: 0.000020 loss_cls: 2.5977 (2.5929) grad_norm: 1.1342 (1.4154) time: 2.8714 data: 0.0002 max mem: 28454 +[2024-12-11 22:24:26 root] (utils.py 283): INFO Epoch: [1] [2280/2502] eta: 0:10:38 lr: 0.000020 loss_cls: 2.5977 (2.5929) grad_norm: 1.1340 (1.4143) time: 2.8693 data: 0.0002 max mem: 28454 +[2024-12-11 22:24:55 root] (utils.py 283): INFO Epoch: [1] [2290/2502] eta: 0:10:09 lr: 0.000020 loss_cls: 2.6962 (2.5932) grad_norm: 1.1340 (1.4132) time: 2.8662 data: 0.0002 max mem: 28454 +[2024-12-11 22:25:23 root] (utils.py 283): INFO Epoch: [1] [2300/2502] eta: 0:09:40 lr: 0.000020 loss_cls: 2.7738 (2.5935) grad_norm: 1.1524 (1.4183) time: 2.8667 data: 0.0002 max mem: 28454 +[2024-12-11 22:25:52 root] (utils.py 283): INFO Epoch: [1] [2310/2502] eta: 0:09:11 lr: 0.000020 loss_cls: 2.7738 (2.5938) grad_norm: 1.1211 (1.4181) time: 2.8695 data: 0.0003 max mem: 28454 +[2024-12-11 22:26:21 root] (utils.py 283): INFO Epoch: [1] [2320/2502] eta: 0:08:43 lr: 0.000020 loss_cls: 2.5109 (2.5930) grad_norm: 1.1030 (1.4191) time: 2.8702 data: 0.0002 max mem: 28454 +[2024-12-11 22:26:49 root] (utils.py 283): INFO Epoch: [1] [2330/2502] eta: 0:08:14 lr: 0.000020 loss_cls: 2.6879 (2.5937) grad_norm: 1.0603 (1.4180) time: 2.8695 data: 0.0002 max mem: 28454 +[2024-12-11 22:27:18 root] (utils.py 283): INFO Epoch: [1] [2340/2502] eta: 0:07:45 lr: 0.000020 loss_cls: 2.6959 (2.5939) grad_norm: 1.0603 (1.4179) time: 2.8671 data: 0.0002 max mem: 28454 +[2024-12-11 22:27:47 root] (utils.py 283): INFO Epoch: [1] [2350/2502] eta: 0:07:16 lr: 0.000020 loss_cls: 2.7386 (2.5934) grad_norm: 1.0888 (1.4169) time: 2.8659 data: 0.0002 max mem: 28454 +[2024-12-11 22:28:15 root] (utils.py 283): INFO Epoch: [1] [2360/2502] eta: 0:06:48 lr: 0.000020 loss_cls: 2.5549 (2.5923) grad_norm: 1.2485 (1.4161) time: 2.8640 data: 0.0003 max mem: 28454 +[2024-12-11 22:28:44 root] (utils.py 283): INFO Epoch: [1] [2370/2502] eta: 0:06:19 lr: 0.000020 loss_cls: 2.4829 (2.5912) grad_norm: 1.1603 (1.4148) time: 2.8652 data: 0.0002 max mem: 28454 +[2024-12-11 22:29:13 root] (utils.py 283): INFO Epoch: [1] [2380/2502] eta: 0:05:50 lr: 0.000020 loss_cls: 2.5207 (2.5913) grad_norm: 1.1200 (1.4137) time: 2.8679 data: 0.0002 max mem: 28454 +[2024-12-11 22:29:41 root] (utils.py 283): INFO Epoch: [1] [2390/2502] eta: 0:05:21 lr: 0.000020 loss_cls: 2.6280 (2.5910) grad_norm: 1.0889 (1.4125) time: 2.8675 data: 0.0003 max mem: 28454 +[2024-12-11 22:30:10 root] (utils.py 283): INFO Epoch: [1] [2400/2502] eta: 0:04:53 lr: 0.000020 loss_cls: 2.5183 (2.5904) grad_norm: 1.0520 (1.4114) time: 2.8674 data: 0.0002 max mem: 28454 +[2024-12-11 22:30:39 root] (utils.py 283): INFO Epoch: [1] [2410/2502] eta: 0:04:24 lr: 0.000020 loss_cls: 2.5840 (2.5904) grad_norm: 1.0668 (1.4104) time: 2.8683 data: 0.0002 max mem: 28454 +[2024-12-11 22:31:07 root] (utils.py 283): INFO Epoch: [1] [2420/2502] eta: 0:03:55 lr: 0.000020 loss_cls: 2.7091 (2.5908) grad_norm: 1.1035 (1.4093) time: 2.8692 data: 0.0002 max mem: 28454 +[2024-12-11 22:31:36 root] (utils.py 283): INFO Epoch: [1] [2430/2502] eta: 0:03:26 lr: 0.000020 loss_cls: 2.7693 (2.5911) grad_norm: 1.1719 (1.4084) time: 2.8698 data: 0.0002 max mem: 28454 +[2024-12-11 22:32:05 root] (utils.py 283): INFO Epoch: [1] [2440/2502] eta: 0:02:58 lr: 0.000020 loss_cls: 2.7980 (2.5917) grad_norm: 1.1719 (1.4073) time: 2.8707 data: 0.0002 max mem: 28454 +[2024-12-11 22:32:33 root] (utils.py 283): INFO Epoch: [1] [2450/2502] eta: 0:02:29 lr: 0.000020 loss_cls: 2.7691 (2.5917) grad_norm: 1.1704 (1.4077) time: 2.8692 data: 0.0002 max mem: 28454 +[2024-12-11 22:33:02 root] (utils.py 283): INFO Epoch: [1] [2460/2502] eta: 0:02:00 lr: 0.000020 loss_cls: 2.6389 (2.5910) grad_norm: 1.1940 (1.4074) time: 2.8673 data: 0.0002 max mem: 28454 +[2024-12-11 22:33:31 root] (utils.py 283): INFO Epoch: [1] [2470/2502] eta: 0:01:31 lr: 0.000020 loss_cls: 2.6545 (2.5913) grad_norm: 1.1007 (1.4063) time: 2.8662 data: 0.0002 max mem: 28454 +[2024-12-11 22:33:59 root] (utils.py 283): INFO Epoch: [1] [2480/2502] eta: 0:01:03 lr: 0.000020 loss_cls: 2.8123 (2.5925) grad_norm: 1.0964 (1.4055) time: 2.8648 data: 0.0002 max mem: 28454 +[2024-12-11 22:34:29 root] (utils.py 283): INFO Epoch: [1] [2490/2502] eta: 0:00:34 lr: 0.000020 loss_cls: 2.7467 (2.5924) grad_norm: 1.0836 (1.4047) time: 2.8856 data: 0.0205 max mem: 28454 +[2024-12-11 22:34:57 root] (utils.py 283): INFO Epoch: [1] [2500/2502] eta: 0:00:05 lr: 0.000020 loss_cls: 2.6807 (2.5930) grad_norm: 1.0836 (1.4039) time: 2.8874 data: 0.0205 max mem: 28454 +[2024-12-11 22:35:00 root] (utils.py 283): INFO Epoch: [1] [2501/2502] eta: 0:00:02 lr: 0.000020 loss_cls: 2.7178 (2.5932) grad_norm: 1.1058 (1.4039) time: 2.8879 data: 0.0205 max mem: 28454 +[2024-12-11 22:35:00 root] (utils.py 297): INFO Epoch: [1] Total time: 1:59:50 (2.8738 s / it) +[2024-12-11 22:35:00 root] (engine.py 179): INFO Averaged stats:lr: 0.000020 loss_cls: 2.7178 (2.5906) grad_norm: 1.1058 (1.4039) +[2024-12-11 22:35:03 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:53 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.4091 (0.4091) acc1: 91.4062 (91.4062) acc3: 98.4375 (98.4375) acc5: 99.2188 (99.2188) time: 0.5424 data: 0.0006 max mem: 28454 +[2024-12-11 22:35:09 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:48 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6198 (0.5966) acc1: 86.7188 (87.3580) acc3: 96.8750 (96.8040) acc5: 98.4375 (98.0114) time: 0.5462 data: 0.0006 max mem: 28454 +[2024-12-11 22:35:14 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:42 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6331 (0.6259) acc1: 86.7188 (86.8676) acc3: 96.0938 (96.2798) acc5: 97.6562 (97.7679) time: 0.5465 data: 0.0005 max mem: 28454 +[2024-12-11 22:35:19 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:37 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6498 (0.6594) acc1: 85.9375 (85.7359) acc3: 95.3125 (95.9677) acc5: 97.6562 (97.5050) time: 0.5468 data: 0.0005 max mem: 28454 +[2024-12-11 22:35:25 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:31 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6921 (0.6670) acc1: 85.9375 (85.5755) acc3: 96.0938 (95.9604) acc5: 97.6562 (97.4848) time: 0.5473 data: 0.0004 max mem: 28454 +[2024-12-11 22:35:30 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:26 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.8088 (0.7472) acc1: 78.9062 (83.6550) acc3: 93.7500 (94.9295) acc5: 95.3125 (96.6452) time: 0.5474 data: 0.0004 max mem: 28454 +[2024-12-11 22:35:36 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:20 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9486 (0.7728) acc1: 78.9062 (83.3120) acc3: 90.6250 (94.3263) acc5: 93.7500 (96.1962) time: 0.5473 data: 0.0004 max mem: 28454 +[2024-12-11 22:35:41 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:15 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9464 (0.8043) acc1: 78.9062 (82.4824) acc3: 91.4062 (93.9371) acc5: 94.5312 (96.0167) time: 0.5474 data: 0.0004 max mem: 28454 +[2024-12-11 22:35:47 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:09 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9881 (0.8276) acc1: 77.3438 (82.0698) acc3: 91.4062 (93.6343) acc5: 94.5312 (95.7369) time: 0.5479 data: 0.0006 max mem: 28454 +[2024-12-11 22:35:52 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:04 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 1.0038 (0.8473) acc1: 77.3438 (81.4818) acc3: 89.8438 (93.3207) acc5: 94.5312 (95.5786) time: 0.5478 data: 0.0006 max mem: 28454 +[2024-12-11 22:35:56 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9109 (0.8466) acc1: 78.1250 (81.4640) acc3: 91.4062 (93.3120) acc5: 94.5312 (95.6000) time: 0.5379 data: 0.0005 max mem: 28454 +[2024-12-11 22:35:56 root] (utils.py 297): INFO Test: Total time: 0:00:53 (0.5455 s / it) +[2024-12-11 22:35:56 root] (engine.py 264): INFO * Acc@1 81.622 Acc@3 93.376 Acc@5 95.680 loss 0.841 flops 13.207 layer_flops 13.109 +[2024-12-11 22:35:56 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 81.6% +[2024-12-11 22:35:56 root] (main.py 576): INFO Max accuracy: 81.67% +[2024-12-11 22:35:59 root] (utils.py 283): INFO Epoch: [2] [ 0/2502] eta: 1:58:50 lr: 0.000020 loss_cls: 2.9368 (2.9368) grad_norm: 0.9626 (0.9626) time: 2.8500 data: 0.0002 max mem: 28454 +[2024-12-11 22:36:28 root] (utils.py 283): INFO Epoch: [2] [ 10/2502] eta: 1:59:06 lr: 0.000020 loss_cls: 2.6767 (2.6239) grad_norm: 1.1165 (1.1172) time: 2.8677 data: 0.0002 max mem: 28454 +[2024-12-11 22:36:56 root] (utils.py 283): INFO Epoch: [2] [ 20/2502] eta: 1:58:44 lr: 0.000020 loss_cls: 2.6767 (2.5608) grad_norm: 1.1165 (1.2489) time: 2.8714 data: 0.0002 max mem: 28454 +[2024-12-11 22:37:25 root] (utils.py 283): INFO Epoch: [2] [ 30/2502] eta: 1:58:17 lr: 0.000020 loss_cls: 2.4868 (2.5144) grad_norm: 1.1399 (1.2302) time: 2.8730 data: 0.0002 max mem: 28454 +[2024-12-11 22:37:54 root] (utils.py 283): INFO Epoch: [2] [ 40/2502] eta: 1:57:49 lr: 0.000020 loss_cls: 2.6293 (2.5490) grad_norm: 1.1492 (1.4689) time: 2.8722 data: 0.0002 max mem: 28454 +[2024-12-11 22:38:22 root] (utils.py 283): INFO Epoch: [2] [ 50/2502] eta: 1:57:22 lr: 0.000020 loss_cls: 2.6654 (2.5462) grad_norm: 1.1492 (1.4938) time: 2.8734 data: 0.0002 max mem: 28454 +[2024-12-11 22:38:51 root] (utils.py 283): INFO Epoch: [2] [ 60/2502] eta: 1:56:53 lr: 0.000020 loss_cls: 2.7019 (2.5631) grad_norm: 1.1144 (1.4780) time: 2.8738 data: 0.0002 max mem: 28454 +[2024-12-11 22:39:20 root] (utils.py 283): INFO Epoch: [2] [ 70/2502] eta: 1:56:25 lr: 0.000020 loss_cls: 2.6992 (2.5436) grad_norm: 1.1324 (1.5074) time: 2.8725 data: 0.0002 max mem: 28454 +[2024-12-11 22:39:49 root] (utils.py 283): INFO Epoch: [2] [ 80/2502] eta: 1:55:57 lr: 0.000020 loss_cls: 2.6577 (2.5466) grad_norm: 1.0696 (1.4586) time: 2.8739 data: 0.0002 max mem: 28454 +[2024-12-11 22:40:17 root] (utils.py 283): INFO Epoch: [2] [ 90/2502] eta: 1:55:28 lr: 0.000020 loss_cls: 2.6576 (2.5439) grad_norm: 1.1111 (2.3172) time: 2.8747 data: 0.0002 max mem: 28454 +[2024-12-11 22:40:46 root] (utils.py 283): INFO Epoch: [2] [ 100/2502] eta: 1:55:00 lr: 0.000020 loss_cls: 2.6810 (2.5861) grad_norm: 1.6018 (2.2971) time: 2.8743 data: 0.0002 max mem: 28454 +[2024-12-11 22:41:15 root] (utils.py 283): INFO Epoch: [2] [ 110/2502] eta: 1:54:31 lr: 0.000020 loss_cls: 2.8714 (2.5918) grad_norm: 1.7305 (2.2345) time: 2.8730 data: 0.0002 max mem: 28454 +[2024-12-11 22:41:44 root] (utils.py 283): INFO Epoch: [2] [ 120/2502] eta: 1:54:02 lr: 0.000020 loss_cls: 2.5336 (2.5892) grad_norm: 1.3628 (2.1840) time: 2.8704 data: 0.0002 max mem: 28454 +[2024-12-11 22:42:12 root] (utils.py 283): INFO Epoch: [2] [ 130/2502] eta: 1:53:33 lr: 0.000020 loss_cls: 2.6786 (2.5950) grad_norm: 1.1989 (2.1055) time: 2.8698 data: 0.0002 max mem: 28454 +[2024-12-11 22:42:41 root] (utils.py 283): INFO Epoch: [2] [ 140/2502] eta: 1:53:04 lr: 0.000020 loss_cls: 2.7462 (2.6045) grad_norm: 1.1391 (2.0373) time: 2.8726 data: 0.0002 max mem: 28454 +[2024-12-11 22:43:10 root] (utils.py 283): INFO Epoch: [2] [ 150/2502] eta: 1:52:36 lr: 0.000020 loss_cls: 2.7030 (2.5931) grad_norm: 1.1391 (1.9852) time: 2.8745 data: 0.0002 max mem: 28454 +[2024-12-11 22:43:38 root] (utils.py 283): INFO Epoch: [2] [ 160/2502] eta: 1:52:07 lr: 0.000020 loss_cls: 2.7152 (2.6060) grad_norm: 1.1859 (1.9394) time: 2.8734 data: 0.0002 max mem: 28454 +[2024-12-11 22:44:07 root] (utils.py 283): INFO Epoch: [2] [ 170/2502] eta: 1:51:38 lr: 0.000020 loss_cls: 2.8024 (2.6017) grad_norm: 1.1889 (1.9067) time: 2.8711 data: 0.0002 max mem: 28454 +[2024-12-11 22:44:36 root] (utils.py 283): INFO Epoch: [2] [ 180/2502] eta: 1:51:09 lr: 0.000020 loss_cls: 2.7641 (2.6106) grad_norm: 1.1524 (1.8708) time: 2.8700 data: 0.0002 max mem: 28454 +[2024-12-11 22:45:05 root] (utils.py 283): INFO Epoch: [2] [ 190/2502] eta: 1:50:41 lr: 0.000020 loss_cls: 2.6751 (2.6071) grad_norm: 1.0786 (1.8436) time: 2.8727 data: 0.0002 max mem: 28454 +[2024-12-11 22:45:33 root] (utils.py 283): INFO Epoch: [2] [ 200/2502] eta: 1:50:12 lr: 0.000020 loss_cls: 2.7588 (2.6117) grad_norm: 1.0968 (1.8092) time: 2.8736 data: 0.0002 max mem: 28454 +[2024-12-11 22:46:02 root] (utils.py 283): INFO Epoch: [2] [ 210/2502] eta: 1:49:43 lr: 0.000020 loss_cls: 2.8895 (2.6182) grad_norm: 1.1453 (1.7786) time: 2.8729 data: 0.0002 max mem: 28454 +[2024-12-11 22:46:31 root] (utils.py 283): INFO Epoch: [2] [ 220/2502] eta: 1:49:15 lr: 0.000020 loss_cls: 2.8748 (2.6252) grad_norm: 1.1151 (1.7488) time: 2.8750 data: 0.0002 max mem: 28454 +[2024-12-11 22:47:00 root] (utils.py 283): INFO Epoch: [2] [ 230/2502] eta: 1:48:46 lr: 0.000020 loss_cls: 2.8235 (2.6199) grad_norm: 1.0794 (1.7218) time: 2.8751 data: 0.0002 max mem: 28454 +[2024-12-11 22:47:28 root] (utils.py 283): INFO Epoch: [2] [ 240/2502] eta: 1:48:17 lr: 0.000020 loss_cls: 2.8312 (2.6287) grad_norm: 1.0709 (1.6951) time: 2.8708 data: 0.0002 max mem: 28454 +[2024-12-11 22:47:57 root] (utils.py 283): INFO Epoch: [2] [ 250/2502] eta: 1:47:49 lr: 0.000020 loss_cls: 2.7534 (2.6262) grad_norm: 1.0958 (1.6959) time: 2.8714 data: 0.0002 max mem: 28454 +[2024-12-11 22:48:26 root] (utils.py 283): INFO Epoch: [2] [ 260/2502] eta: 1:47:20 lr: 0.000020 loss_cls: 2.6950 (2.6269) grad_norm: 1.1717 (1.6768) time: 2.8734 data: 0.0002 max mem: 28454 +[2024-12-11 22:48:54 root] (utils.py 283): INFO Epoch: [2] [ 270/2502] eta: 1:46:51 lr: 0.000020 loss_cls: 2.7422 (2.6256) grad_norm: 1.1445 (1.6651) time: 2.8727 data: 0.0002 max mem: 28454 +[2024-12-11 22:49:23 root] (utils.py 283): INFO Epoch: [2] [ 280/2502] eta: 1:46:22 lr: 0.000020 loss_cls: 2.7255 (2.6263) grad_norm: 1.1684 (1.6501) time: 2.8714 data: 0.0002 max mem: 28454 +[2024-12-11 22:49:52 root] (utils.py 283): INFO Epoch: [2] [ 290/2502] eta: 1:45:54 lr: 0.000020 loss_cls: 2.7496 (2.6262) grad_norm: 1.1684 (1.6390) time: 2.8726 data: 0.0002 max mem: 28454 +[2024-12-11 22:50:21 root] (utils.py 283): INFO Epoch: [2] [ 300/2502] eta: 1:45:25 lr: 0.000020 loss_cls: 2.7620 (2.6329) grad_norm: 1.1154 (1.6218) time: 2.8732 data: 0.0002 max mem: 28454 +[2024-12-11 22:50:49 root] (utils.py 283): INFO Epoch: [2] [ 310/2502] eta: 1:44:56 lr: 0.000020 loss_cls: 2.6767 (2.6272) grad_norm: 1.1543 (1.6105) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-11 22:51:18 root] (utils.py 283): INFO Epoch: [2] [ 320/2502] eta: 1:44:27 lr: 0.000020 loss_cls: 2.5486 (2.6255) grad_norm: 1.1273 (1.6124) time: 2.8714 data: 0.0002 max mem: 28454 +[2024-12-11 22:51:47 root] (utils.py 283): INFO Epoch: [2] [ 330/2502] eta: 1:43:58 lr: 0.000020 loss_cls: 2.4820 (2.6201) grad_norm: 1.1162 (1.5982) time: 2.8706 data: 0.0002 max mem: 28454 +[2024-12-11 22:52:15 root] (utils.py 283): INFO Epoch: [2] [ 340/2502] eta: 1:43:30 lr: 0.000020 loss_cls: 2.6177 (2.6230) grad_norm: 1.2280 (1.5948) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-11 22:52:44 root] (utils.py 283): INFO Epoch: [2] [ 350/2502] eta: 1:43:01 lr: 0.000020 loss_cls: 2.8163 (2.6240) grad_norm: 1.1381 (1.5791) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-11 22:53:13 root] (utils.py 283): INFO Epoch: [2] [ 360/2502] eta: 1:42:32 lr: 0.000020 loss_cls: 2.6943 (2.6226) grad_norm: 1.0697 (1.5704) time: 2.8717 data: 0.0002 max mem: 28454 +[2024-12-11 22:53:42 root] (utils.py 283): INFO Epoch: [2] [ 370/2502] eta: 1:42:03 lr: 0.000020 loss_cls: 2.5206 (2.6191) grad_norm: 1.1383 (1.5592) time: 2.8722 data: 0.0002 max mem: 28454 +[2024-12-11 22:54:10 root] (utils.py 283): INFO Epoch: [2] [ 380/2502] eta: 1:41:35 lr: 0.000020 loss_cls: 2.5206 (2.6167) grad_norm: 1.1307 (1.5492) time: 2.8733 data: 0.0002 max mem: 28454 +[2024-12-11 22:54:39 root] (utils.py 283): INFO Epoch: [2] [ 390/2502] eta: 1:41:06 lr: 0.000020 loss_cls: 2.6603 (2.6165) grad_norm: 1.1461 (1.5386) time: 2.8743 data: 0.0002 max mem: 28454 +[2024-12-11 22:55:08 root] (utils.py 283): INFO Epoch: [2] [ 400/2502] eta: 1:40:38 lr: 0.000020 loss_cls: 2.6603 (2.6191) grad_norm: 1.1461 (1.5322) time: 2.8757 data: 0.0002 max mem: 28454 +[2024-12-11 22:55:37 root] (utils.py 283): INFO Epoch: [2] [ 410/2502] eta: 1:40:09 lr: 0.000020 loss_cls: 2.6297 (2.6190) grad_norm: 1.1733 (1.5291) time: 2.8781 data: 0.0002 max mem: 28454 +[2024-12-11 22:56:05 root] (utils.py 283): INFO Epoch: [2] [ 420/2502] eta: 1:39:41 lr: 0.000020 loss_cls: 2.5383 (2.6139) grad_norm: 1.1469 (1.5198) time: 2.8778 data: 0.0002 max mem: 28454 +[2024-12-11 22:56:34 root] (utils.py 283): INFO Epoch: [2] [ 430/2502] eta: 1:39:12 lr: 0.000020 loss_cls: 2.6004 (2.6133) grad_norm: 1.0881 (1.5109) time: 2.8761 data: 0.0002 max mem: 28454 +[2024-12-11 22:57:03 root] (utils.py 283): INFO Epoch: [2] [ 440/2502] eta: 1:38:44 lr: 0.000020 loss_cls: 2.6779 (2.6139) grad_norm: 1.1133 (1.5045) time: 2.8767 data: 0.0002 max mem: 28454 +[2024-12-11 22:57:32 root] (utils.py 283): INFO Epoch: [2] [ 450/2502] eta: 1:38:15 lr: 0.000020 loss_cls: 2.6917 (2.6141) grad_norm: 1.1712 (1.5814) time: 2.8793 data: 0.0002 max mem: 28454 +[2024-12-11 22:58:01 root] (utils.py 283): INFO Epoch: [2] [ 460/2502] eta: 1:37:47 lr: 0.000020 loss_cls: 2.8090 (2.6173) grad_norm: 1.3288 (1.5911) time: 2.8797 data: 0.0002 max mem: 28454 +[2024-12-11 22:58:29 root] (utils.py 283): INFO Epoch: [2] [ 470/2502] eta: 1:37:18 lr: 0.000020 loss_cls: 2.8914 (2.6207) grad_norm: 1.4785 (1.5918) time: 2.8764 data: 0.0002 max mem: 28454 +[2024-12-11 22:58:58 root] (utils.py 283): INFO Epoch: [2] [ 480/2502] eta: 1:36:50 lr: 0.000020 loss_cls: 2.7685 (2.6179) grad_norm: 1.2985 (1.5833) time: 2.8762 data: 0.0002 max mem: 28454 +[2024-12-11 22:59:27 root] (utils.py 283): INFO Epoch: [2] [ 490/2502] eta: 1:36:21 lr: 0.000020 loss_cls: 2.7994 (2.6206) grad_norm: 1.1581 (1.5782) time: 2.8764 data: 0.0002 max mem: 28454 +[2024-12-11 22:59:56 root] (utils.py 283): INFO Epoch: [2] [ 500/2502] eta: 1:35:52 lr: 0.000020 loss_cls: 2.8202 (2.6214) grad_norm: 1.1277 (1.5687) time: 2.8751 data: 0.0002 max mem: 28454 +[2024-12-11 23:00:24 root] (utils.py 283): INFO Epoch: [2] [ 510/2502] eta: 1:35:24 lr: 0.000020 loss_cls: 2.6711 (2.6189) grad_norm: 1.0726 (1.5606) time: 2.8749 data: 0.0002 max mem: 28454 +[2024-12-11 23:00:53 root] (utils.py 283): INFO Epoch: [2] [ 520/2502] eta: 1:34:55 lr: 0.000020 loss_cls: 2.6994 (2.6221) grad_norm: 1.1616 (1.5549) time: 2.8760 data: 0.0002 max mem: 28454 +[2024-12-11 23:01:22 root] (utils.py 283): INFO Epoch: [2] [ 530/2502] eta: 1:34:26 lr: 0.000020 loss_cls: 2.5205 (2.6154) grad_norm: 1.1309 (1.5458) time: 2.8754 data: 0.0002 max mem: 28454 +[2024-12-11 23:01:51 root] (utils.py 283): INFO Epoch: [2] [ 540/2502] eta: 1:33:58 lr: 0.000020 loss_cls: 2.5205 (2.6139) grad_norm: 1.0529 (1.5362) time: 2.8753 data: 0.0002 max mem: 28454 +[2024-12-11 23:02:19 root] (utils.py 283): INFO Epoch: [2] [ 550/2502] eta: 1:33:29 lr: 0.000020 loss_cls: 2.6956 (2.6126) grad_norm: 1.0529 (1.5295) time: 2.8735 data: 0.0002 max mem: 28454 +[2024-12-11 23:02:48 root] (utils.py 283): INFO Epoch: [2] [ 560/2502] eta: 1:33:00 lr: 0.000020 loss_cls: 2.8433 (2.6128) grad_norm: 1.0665 (1.5269) time: 2.8735 data: 0.0002 max mem: 28454 +[2024-12-11 23:03:17 root] (utils.py 283): INFO Epoch: [2] [ 570/2502] eta: 1:32:32 lr: 0.000020 loss_cls: 2.6156 (2.6099) grad_norm: 1.1690 (1.5226) time: 2.8775 data: 0.0002 max mem: 28454 +[2024-12-11 23:03:46 root] (utils.py 283): INFO Epoch: [2] [ 580/2502] eta: 1:32:03 lr: 0.000020 loss_cls: 2.5483 (2.6083) grad_norm: 1.2295 (1.5297) time: 2.8767 data: 0.0002 max mem: 28454 +[2024-12-11 23:04:14 root] (utils.py 283): INFO Epoch: [2] [ 590/2502] eta: 1:31:34 lr: 0.000020 loss_cls: 2.7993 (2.6123) grad_norm: 1.2056 (1.5264) time: 2.8769 data: 0.0002 max mem: 28454 +[2024-12-11 23:04:43 root] (utils.py 283): INFO Epoch: [2] [ 600/2502] eta: 1:31:06 lr: 0.000020 loss_cls: 2.7049 (2.6084) grad_norm: 1.1841 (1.5217) time: 2.8754 data: 0.0002 max mem: 28454 +[2024-12-11 23:05:12 root] (utils.py 283): INFO Epoch: [2] [ 610/2502] eta: 1:30:37 lr: 0.000020 loss_cls: 2.5298 (2.6085) grad_norm: 1.0944 (1.5165) time: 2.8745 data: 0.0002 max mem: 28454 +[2024-12-11 23:05:41 root] (utils.py 283): INFO Epoch: [2] [ 620/2502] eta: 1:30:08 lr: 0.000020 loss_cls: 2.7878 (2.6100) grad_norm: 1.0776 (1.5114) time: 2.8766 data: 0.0002 max mem: 28454 +[2024-12-11 23:06:09 root] (utils.py 283): INFO Epoch: [2] [ 630/2502] eta: 1:29:39 lr: 0.000020 loss_cls: 2.7878 (2.6134) grad_norm: 1.1274 (1.5049) time: 2.8744 data: 0.0002 max mem: 28454 +[2024-12-11 23:06:38 root] (utils.py 283): INFO Epoch: [2] [ 640/2502] eta: 1:29:11 lr: 0.000020 loss_cls: 2.7167 (2.6109) grad_norm: 1.0894 (1.4986) time: 2.8742 data: 0.0002 max mem: 28454 +[2024-12-11 23:07:07 root] (utils.py 283): INFO Epoch: [2] [ 650/2502] eta: 1:28:42 lr: 0.000020 loss_cls: 2.5658 (2.6114) grad_norm: 1.0894 (1.4932) time: 2.8760 data: 0.0002 max mem: 28454 +[2024-12-11 23:07:36 root] (utils.py 283): INFO Epoch: [2] [ 660/2502] eta: 1:28:13 lr: 0.000020 loss_cls: 2.6390 (2.6108) grad_norm: 1.1519 (1.4880) time: 2.8753 data: 0.0002 max mem: 28454 +[2024-12-11 23:08:04 root] (utils.py 283): INFO Epoch: [2] [ 670/2502] eta: 1:27:45 lr: 0.000020 loss_cls: 2.4963 (2.6113) grad_norm: 1.1614 (1.4842) time: 2.8754 data: 0.0002 max mem: 28454 +[2024-12-11 23:08:33 root] (utils.py 283): INFO Epoch: [2] [ 680/2502] eta: 1:27:16 lr: 0.000020 loss_cls: 2.6230 (2.6126) grad_norm: 1.1873 (1.4800) time: 2.8738 data: 0.0002 max mem: 28454 +[2024-12-11 23:09:02 root] (utils.py 283): INFO Epoch: [2] [ 690/2502] eta: 1:26:47 lr: 0.000020 loss_cls: 2.6622 (2.6138) grad_norm: 1.1949 (1.4809) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-11 23:09:31 root] (utils.py 283): INFO Epoch: [2] [ 700/2502] eta: 1:26:18 lr: 0.000020 loss_cls: 2.8211 (2.6150) grad_norm: 1.1197 (1.4761) time: 2.8728 data: 0.0002 max mem: 28454 +[2024-12-11 23:09:59 root] (utils.py 283): INFO Epoch: [2] [ 710/2502] eta: 1:25:50 lr: 0.000020 loss_cls: 2.7293 (2.6135) grad_norm: 1.0875 (1.4713) time: 2.8744 data: 0.0002 max mem: 28454 +[2024-12-11 23:10:28 root] (utils.py 283): INFO Epoch: [2] [ 720/2502] eta: 1:25:21 lr: 0.000020 loss_cls: 2.5205 (2.6120) grad_norm: 1.0705 (1.4661) time: 2.8753 data: 0.0002 max mem: 28454 +[2024-12-11 23:10:57 root] (utils.py 283): INFO Epoch: [2] [ 730/2502] eta: 1:24:52 lr: 0.000020 loss_cls: 2.5395 (2.6102) grad_norm: 1.0730 (1.4604) time: 2.8743 data: 0.0002 max mem: 28454 +[2024-12-11 23:11:26 root] (utils.py 283): INFO Epoch: [2] [ 740/2502] eta: 1:24:23 lr: 0.000020 loss_cls: 2.5395 (2.6088) grad_norm: 1.0824 (1.4565) time: 2.8737 data: 0.0002 max mem: 28454 +[2024-12-11 23:11:54 root] (utils.py 283): INFO Epoch: [2] [ 750/2502] eta: 1:23:55 lr: 0.000020 loss_cls: 2.7077 (2.6106) grad_norm: 1.1102 (1.4544) time: 2.8736 data: 0.0002 max mem: 28454 +[2024-12-11 23:12:23 root] (utils.py 283): INFO Epoch: [2] [ 760/2502] eta: 1:23:26 lr: 0.000020 loss_cls: 2.7438 (2.6127) grad_norm: 1.1121 (1.4529) time: 2.8726 data: 0.0002 max mem: 28454 +[2024-12-11 23:12:52 root] (utils.py 283): INFO Epoch: [2] [ 770/2502] eta: 1:22:57 lr: 0.000020 loss_cls: 2.7376 (2.6112) grad_norm: 1.1121 (1.4483) time: 2.8724 data: 0.0002 max mem: 28454 +[2024-12-11 23:13:21 root] (utils.py 283): INFO Epoch: [2] [ 780/2502] eta: 1:22:28 lr: 0.000020 loss_cls: 2.6278 (2.6113) grad_norm: 1.1315 (1.4514) time: 2.8729 data: 0.0002 max mem: 28454 +[2024-12-11 23:13:49 root] (utils.py 283): INFO Epoch: [2] [ 790/2502] eta: 1:22:00 lr: 0.000020 loss_cls: 2.6278 (2.6095) grad_norm: 1.1435 (1.4471) time: 2.8723 data: 0.0002 max mem: 28454 +[2024-12-11 23:14:18 root] (utils.py 283): INFO Epoch: [2] [ 800/2502] eta: 1:21:31 lr: 0.000020 loss_cls: 2.6003 (2.6096) grad_norm: 1.0947 (1.4454) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-11 23:14:47 root] (utils.py 283): INFO Epoch: [2] [ 810/2502] eta: 1:21:02 lr: 0.000020 loss_cls: 2.6003 (2.6091) grad_norm: 1.1208 (1.4423) time: 2.8733 data: 0.0002 max mem: 28454 +[2024-12-11 23:15:15 root] (utils.py 283): INFO Epoch: [2] [ 820/2502] eta: 1:20:33 lr: 0.000020 loss_cls: 2.6817 (2.6105) grad_norm: 1.1211 (1.4395) time: 2.8738 data: 0.0002 max mem: 28454 +[2024-12-11 23:15:44 root] (utils.py 283): INFO Epoch: [2] [ 830/2502] eta: 1:20:05 lr: 0.000020 loss_cls: 2.8114 (2.6116) grad_norm: 1.1511 (1.4367) time: 2.8727 data: 0.0002 max mem: 28454 +[2024-12-11 23:16:13 root] (utils.py 283): INFO Epoch: [2] [ 840/2502] eta: 1:19:36 lr: 0.000020 loss_cls: 2.7334 (2.6116) grad_norm: 1.1439 (1.4330) time: 2.8721 data: 0.0002 max mem: 28454 +[2024-12-11 23:16:42 root] (utils.py 283): INFO Epoch: [2] [ 850/2502] eta: 1:19:07 lr: 0.000020 loss_cls: 2.6477 (2.6100) grad_norm: 1.0848 (1.4293) time: 2.8728 data: 0.0002 max mem: 28454 +[2024-12-11 23:17:10 root] (utils.py 283): INFO Epoch: [2] [ 860/2502] eta: 1:18:38 lr: 0.000020 loss_cls: 2.7640 (2.6130) grad_norm: 1.1038 (1.4265) time: 2.8723 data: 0.0002 max mem: 28454 +[2024-12-11 23:17:39 root] (utils.py 283): INFO Epoch: [2] [ 870/2502] eta: 1:18:10 lr: 0.000020 loss_cls: 2.8945 (2.6161) grad_norm: 1.1450 (1.4455) time: 2.8734 data: 0.0002 max mem: 28454 +[2024-12-11 23:18:08 root] (utils.py 283): INFO Epoch: [2] [ 880/2502] eta: 1:17:41 lr: 0.000020 loss_cls: 2.8744 (2.6175) grad_norm: 1.2070 (1.4443) time: 2.8768 data: 0.0002 max mem: 28454 +[2024-12-11 23:18:37 root] (utils.py 283): INFO Epoch: [2] [ 890/2502] eta: 1:17:12 lr: 0.000020 loss_cls: 2.6945 (2.6161) grad_norm: 1.1570 (1.4425) time: 2.8750 data: 0.0002 max mem: 28454 +[2024-12-11 23:19:05 root] (utils.py 283): INFO Epoch: [2] [ 900/2502] eta: 1:16:43 lr: 0.000020 loss_cls: 2.5043 (2.6132) grad_norm: 1.1464 (1.4408) time: 2.8718 data: 0.0002 max mem: 28454 +[2024-12-11 23:19:34 root] (utils.py 283): INFO Epoch: [2] [ 910/2502] eta: 1:16:15 lr: 0.000020 loss_cls: 2.6646 (2.6142) grad_norm: 1.1411 (1.4377) time: 2.8722 data: 0.0002 max mem: 28454 +[2024-12-11 23:20:03 root] (utils.py 283): INFO Epoch: [2] [ 920/2502] eta: 1:15:46 lr: 0.000020 loss_cls: 2.7473 (2.6137) grad_norm: 1.1852 (1.4370) time: 2.8695 data: 0.0002 max mem: 28454 +[2024-12-11 23:20:31 root] (utils.py 283): INFO Epoch: [2] [ 930/2502] eta: 1:15:17 lr: 0.000020 loss_cls: 2.8063 (2.6143) grad_norm: 1.1742 (1.4350) time: 2.8686 data: 0.0002 max mem: 28454 +[2024-12-11 23:21:00 root] (utils.py 283): INFO Epoch: [2] [ 940/2502] eta: 1:14:48 lr: 0.000020 loss_cls: 2.8521 (2.6155) grad_norm: 1.0930 (1.4318) time: 2.8740 data: 0.0002 max mem: 28454 +[2024-12-11 23:21:29 root] (utils.py 283): INFO Epoch: [2] [ 950/2502] eta: 1:14:19 lr: 0.000020 loss_cls: 2.8373 (2.6153) grad_norm: 1.1308 (1.4302) time: 2.8726 data: 0.0002 max mem: 28454 +[2024-12-11 23:21:58 root] (utils.py 283): INFO Epoch: [2] [ 960/2502] eta: 1:13:51 lr: 0.000020 loss_cls: 2.6210 (2.6136) grad_norm: 1.1339 (1.4297) time: 2.8692 data: 0.0002 max mem: 28454 +[2024-12-11 23:22:26 root] (utils.py 283): INFO Epoch: [2] [ 970/2502] eta: 1:13:22 lr: 0.000020 loss_cls: 2.6550 (2.6145) grad_norm: 1.1240 (1.4262) time: 2.8717 data: 0.0002 max mem: 28454 +[2024-12-11 23:22:55 root] (utils.py 283): INFO Epoch: [2] [ 980/2502] eta: 1:12:53 lr: 0.000020 loss_cls: 2.7559 (2.6159) grad_norm: 1.1899 (1.4276) time: 2.8732 data: 0.0002 max mem: 28454 +[2024-12-11 23:23:24 root] (utils.py 283): INFO Epoch: [2] [ 990/2502] eta: 1:12:24 lr: 0.000020 loss_cls: 2.6834 (2.6160) grad_norm: 1.2578 (1.4280) time: 2.8734 data: 0.0002 max mem: 28454 +[2024-12-11 23:23:53 root] (utils.py 283): INFO Epoch: [2] [1000/2502] eta: 1:11:56 lr: 0.000020 loss_cls: 2.6749 (2.6166) grad_norm: 1.1707 (1.4261) time: 2.8731 data: 0.0002 max mem: 28454 +[2024-12-11 23:24:21 root] (utils.py 283): INFO Epoch: [2] [1010/2502] eta: 1:11:27 lr: 0.000020 loss_cls: 2.7342 (2.6176) grad_norm: 1.1707 (1.4230) time: 2.8720 data: 0.0003 max mem: 28454 +[2024-12-11 23:24:50 root] (utils.py 283): INFO Epoch: [2] [1020/2502] eta: 1:10:58 lr: 0.000020 loss_cls: 2.7513 (2.6171) grad_norm: 1.0705 (1.4197) time: 2.8733 data: 0.0003 max mem: 28454 +[2024-12-11 23:25:19 root] (utils.py 283): INFO Epoch: [2] [1030/2502] eta: 1:10:29 lr: 0.000020 loss_cls: 2.4437 (2.6142) grad_norm: 1.0705 (1.4167) time: 2.8740 data: 0.0002 max mem: 28454 +[2024-12-11 23:25:47 root] (utils.py 283): INFO Epoch: [2] [1040/2502] eta: 1:10:01 lr: 0.000020 loss_cls: 2.5270 (2.6142) grad_norm: 1.0910 (1.4183) time: 2.8751 data: 0.0002 max mem: 28454 +[2024-12-11 23:26:16 root] (utils.py 283): INFO Epoch: [2] [1050/2502] eta: 1:09:32 lr: 0.000020 loss_cls: 2.6506 (2.6139) grad_norm: 1.1391 (1.4165) time: 2.8763 data: 0.0003 max mem: 28454 +[2024-12-11 23:26:45 root] (utils.py 283): INFO Epoch: [2] [1060/2502] eta: 1:09:03 lr: 0.000020 loss_cls: 2.6506 (2.6145) grad_norm: 1.1930 (1.4536) time: 2.8767 data: 0.0003 max mem: 28454 +[2024-12-11 23:27:14 root] (utils.py 283): INFO Epoch: [2] [1070/2502] eta: 1:08:35 lr: 0.000020 loss_cls: 2.7647 (2.6161) grad_norm: 1.3950 (1.4535) time: 2.8773 data: 0.0002 max mem: 28454 +[2024-12-11 23:27:43 root] (utils.py 283): INFO Epoch: [2] [1080/2502] eta: 1:08:06 lr: 0.000020 loss_cls: 2.7769 (2.6164) grad_norm: 1.4208 (1.4536) time: 2.8781 data: 0.0002 max mem: 28454 +[2024-12-11 23:28:11 root] (utils.py 283): INFO Epoch: [2] [1090/2502] eta: 1:07:37 lr: 0.000020 loss_cls: 2.7786 (2.6171) grad_norm: 1.1728 (1.4514) time: 2.8805 data: 0.0002 max mem: 28454 +[2024-12-11 23:28:40 root] (utils.py 283): INFO Epoch: [2] [1100/2502] eta: 1:07:09 lr: 0.000020 loss_cls: 2.6647 (2.6169) grad_norm: 1.1476 (1.4488) time: 2.8808 data: 0.0003 max mem: 28454 +[2024-12-11 23:29:09 root] (utils.py 283): INFO Epoch: [2] [1110/2502] eta: 1:06:40 lr: 0.000020 loss_cls: 2.4458 (2.6146) grad_norm: 1.0816 (1.4457) time: 2.8783 data: 0.0002 max mem: 28454 +[2024-12-11 23:29:38 root] (utils.py 283): INFO Epoch: [2] [1120/2502] eta: 1:06:11 lr: 0.000020 loss_cls: 2.4458 (2.6143) grad_norm: 1.1528 (1.4440) time: 2.8776 data: 0.0002 max mem: 28454 +[2024-12-11 23:30:07 root] (utils.py 283): INFO Epoch: [2] [1130/2502] eta: 1:05:43 lr: 0.000020 loss_cls: 2.5153 (2.6138) grad_norm: 1.1607 (1.4431) time: 2.8792 data: 0.0002 max mem: 28454 +[2024-12-11 23:30:35 root] (utils.py 283): INFO Epoch: [2] [1140/2502] eta: 1:05:14 lr: 0.000020 loss_cls: 2.5153 (2.6118) grad_norm: 1.1554 (1.4407) time: 2.8811 data: 0.0003 max mem: 28454 +[2024-12-11 23:31:04 root] (utils.py 283): INFO Epoch: [2] [1150/2502] eta: 1:04:45 lr: 0.000020 loss_cls: 2.5578 (2.6123) grad_norm: 1.1715 (1.4385) time: 2.8828 data: 0.0002 max mem: 28454 +[2024-12-11 23:31:33 root] (utils.py 283): INFO Epoch: [2] [1160/2502] eta: 1:04:17 lr: 0.000020 loss_cls: 2.7760 (2.6125) grad_norm: 1.1534 (1.4377) time: 2.8824 data: 0.0002 max mem: 28454 +[2024-12-11 23:32:02 root] (utils.py 283): INFO Epoch: [2] [1170/2502] eta: 1:03:48 lr: 0.000020 loss_cls: 2.8250 (2.6145) grad_norm: 1.2649 (1.4679) time: 2.8813 data: 0.0002 max mem: 28454 +[2024-12-11 23:32:31 root] (utils.py 283): INFO Epoch: [2] [1180/2502] eta: 1:03:19 lr: 0.000020 loss_cls: 2.8887 (2.6147) grad_norm: 1.3873 (1.4719) time: 2.8795 data: 0.0002 max mem: 28454 +[2024-12-11 23:32:59 root] (utils.py 283): INFO Epoch: [2] [1190/2502] eta: 1:02:51 lr: 0.000020 loss_cls: 2.7317 (2.6149) grad_norm: 1.3873 (1.4710) time: 2.8795 data: 0.0002 max mem: 28454 +[2024-12-11 23:33:28 root] (utils.py 283): INFO Epoch: [2] [1200/2502] eta: 1:02:22 lr: 0.000020 loss_cls: 2.7452 (2.6164) grad_norm: 1.2581 (1.4709) time: 2.8800 data: 0.0002 max mem: 28454 +[2024-12-11 23:33:57 root] (utils.py 283): INFO Epoch: [2] [1210/2502] eta: 1:01:53 lr: 0.000020 loss_cls: 2.5316 (2.6133) grad_norm: 1.2581 (1.4694) time: 2.8783 data: 0.0002 max mem: 28454 +[2024-12-11 23:34:26 root] (utils.py 283): INFO Epoch: [2] [1220/2502] eta: 1:01:25 lr: 0.000020 loss_cls: 2.5316 (2.6137) grad_norm: 1.1307 (1.4690) time: 2.8774 data: 0.0002 max mem: 28454 +[2024-12-11 23:34:55 root] (utils.py 283): INFO Epoch: [2] [1230/2502] eta: 1:00:56 lr: 0.000020 loss_cls: 2.6879 (2.6130) grad_norm: 1.0853 (1.4661) time: 2.8762 data: 0.0002 max mem: 28454 +[2024-12-11 23:35:23 root] (utils.py 283): INFO Epoch: [2] [1240/2502] eta: 1:00:27 lr: 0.000020 loss_cls: 2.7304 (2.6141) grad_norm: 1.1386 (1.4650) time: 2.8778 data: 0.0003 max mem: 28454 +[2024-12-11 23:35:52 root] (utils.py 283): INFO Epoch: [2] [1250/2502] eta: 0:59:58 lr: 0.000020 loss_cls: 2.8621 (2.6159) grad_norm: 1.1561 (1.4626) time: 2.8782 data: 0.0003 max mem: 28454 +[2024-12-11 23:36:21 root] (utils.py 283): INFO Epoch: [2] [1260/2502] eta: 0:59:30 lr: 0.000020 loss_cls: 2.7554 (2.6167) grad_norm: 1.1356 (1.4598) time: 2.8772 data: 0.0002 max mem: 28454 +[2024-12-11 23:36:50 root] (utils.py 283): INFO Epoch: [2] [1270/2502] eta: 0:59:01 lr: 0.000020 loss_cls: 2.7415 (2.6170) grad_norm: 1.1023 (1.4693) time: 2.8793 data: 0.0002 max mem: 28454 +[2024-12-11 23:37:18 root] (utils.py 283): INFO Epoch: [2] [1280/2502] eta: 0:58:32 lr: 0.000020 loss_cls: 2.7415 (2.6169) grad_norm: 1.1650 (1.4673) time: 2.8780 data: 0.0002 max mem: 28454 +[2024-12-11 23:37:47 root] (utils.py 283): INFO Epoch: [2] [1290/2502] eta: 0:58:04 lr: 0.000020 loss_cls: 2.7842 (2.6189) grad_norm: 1.2345 (1.4690) time: 2.8771 data: 0.0002 max mem: 28454 +[2024-12-11 23:38:16 root] (utils.py 283): INFO Epoch: [2] [1300/2502] eta: 0:57:35 lr: 0.000020 loss_cls: 2.8212 (2.6205) grad_norm: 1.2345 (1.4673) time: 2.8898 data: 0.0002 max mem: 28454 +[2024-12-11 23:38:45 root] (utils.py 283): INFO Epoch: [2] [1310/2502] eta: 0:57:06 lr: 0.000020 loss_cls: 2.8690 (2.6213) grad_norm: 1.1182 (1.4737) time: 2.8864 data: 0.0002 max mem: 28454 +[2024-12-11 23:39:14 root] (utils.py 283): INFO Epoch: [2] [1320/2502] eta: 0:56:38 lr: 0.000020 loss_cls: 2.8709 (2.6224) grad_norm: 1.1787 (1.4725) time: 2.8739 data: 0.0002 max mem: 28454 +[2024-12-11 23:39:42 root] (utils.py 283): INFO Epoch: [2] [1330/2502] eta: 0:56:09 lr: 0.000020 loss_cls: 2.7663 (2.6211) grad_norm: 1.0571 (1.4696) time: 2.8739 data: 0.0002 max mem: 28454 +[2024-12-11 23:40:11 root] (utils.py 283): INFO Epoch: [2] [1340/2502] eta: 0:55:40 lr: 0.000020 loss_cls: 2.6898 (2.6216) grad_norm: 1.0797 (1.4682) time: 2.8744 data: 0.0002 max mem: 28454 +[2024-12-11 23:40:40 root] (utils.py 283): INFO Epoch: [2] [1350/2502] eta: 0:55:11 lr: 0.000020 loss_cls: 2.7978 (2.6211) grad_norm: 1.1573 (1.4660) time: 2.8756 data: 0.0002 max mem: 28454 +[2024-12-11 23:41:09 root] (utils.py 283): INFO Epoch: [2] [1360/2502] eta: 0:54:43 lr: 0.000020 loss_cls: 2.7209 (2.6216) grad_norm: 1.1365 (1.4633) time: 2.8749 data: 0.0002 max mem: 28454 +[2024-12-11 23:41:38 root] (utils.py 283): INFO Epoch: [2] [1370/2502] eta: 0:54:14 lr: 0.000020 loss_cls: 2.9094 (2.6226) grad_norm: 1.1396 (1.4693) time: 2.8778 data: 0.0002 max mem: 28454 +[2024-12-11 23:42:06 root] (utils.py 283): INFO Epoch: [2] [1380/2502] eta: 0:53:45 lr: 0.000020 loss_cls: 2.7482 (2.6229) grad_norm: 1.2849 (1.4682) time: 2.8789 data: 0.0002 max mem: 28454 +[2024-12-11 23:42:35 root] (utils.py 283): INFO Epoch: [2] [1390/2502] eta: 0:53:16 lr: 0.000020 loss_cls: 2.7149 (2.6234) grad_norm: 1.2849 (1.4663) time: 2.8758 data: 0.0002 max mem: 28454 +[2024-12-11 23:43:04 root] (utils.py 283): INFO Epoch: [2] [1400/2502] eta: 0:52:48 lr: 0.000020 loss_cls: 2.7149 (2.6232) grad_norm: 1.2165 (1.4672) time: 2.8764 data: 0.0002 max mem: 28454 +[2024-12-11 23:43:33 root] (utils.py 283): INFO Epoch: [2] [1410/2502] eta: 0:52:19 lr: 0.000020 loss_cls: 2.7065 (2.6232) grad_norm: 1.1929 (1.4658) time: 2.8812 data: 0.0002 max mem: 28454 +[2024-12-11 23:44:01 root] (utils.py 283): INFO Epoch: [2] [1420/2502] eta: 0:51:50 lr: 0.000020 loss_cls: 2.7410 (2.6233) grad_norm: 1.1006 (1.4640) time: 2.8826 data: 0.0002 max mem: 28454 +[2024-12-11 23:44:30 root] (utils.py 283): INFO Epoch: [2] [1430/2502] eta: 0:51:22 lr: 0.000020 loss_cls: 2.5149 (2.6219) grad_norm: 1.1972 (1.4626) time: 2.8825 data: 0.0002 max mem: 28454 +[2024-12-11 23:44:59 root] (utils.py 283): INFO Epoch: [2] [1440/2502] eta: 0:50:53 lr: 0.000020 loss_cls: 2.6219 (2.6222) grad_norm: 1.1972 (1.4610) time: 2.8831 data: 0.0002 max mem: 28454 +[2024-12-11 23:45:28 root] (utils.py 283): INFO Epoch: [2] [1450/2502] eta: 0:50:24 lr: 0.000020 loss_cls: 2.7784 (2.6216) grad_norm: 1.1814 (1.4602) time: 2.8843 data: 0.0002 max mem: 28454 +[2024-12-11 23:45:57 root] (utils.py 283): INFO Epoch: [2] [1460/2502] eta: 0:49:56 lr: 0.000020 loss_cls: 2.6776 (2.6210) grad_norm: 1.1814 (1.4623) time: 2.8850 data: 0.0002 max mem: 28454 +[2024-12-11 23:46:26 root] (utils.py 283): INFO Epoch: [2] [1470/2502] eta: 0:49:27 lr: 0.000020 loss_cls: 2.4663 (2.6195) grad_norm: 1.1660 (1.4691) time: 2.8823 data: 0.0002 max mem: 28454 +[2024-12-11 23:46:54 root] (utils.py 283): INFO Epoch: [2] [1480/2502] eta: 0:48:58 lr: 0.000020 loss_cls: 2.4663 (2.6190) grad_norm: 1.2236 (1.4679) time: 2.8803 data: 0.0002 max mem: 28454 +[2024-12-11 23:47:23 root] (utils.py 283): INFO Epoch: [2] [1490/2502] eta: 0:48:29 lr: 0.000020 loss_cls: 2.7273 (2.6195) grad_norm: 1.2369 (1.4684) time: 2.8804 data: 0.0002 max mem: 28454 +[2024-12-11 23:47:52 root] (utils.py 283): INFO Epoch: [2] [1500/2502] eta: 0:48:01 lr: 0.000020 loss_cls: 2.7872 (2.6188) grad_norm: 1.1209 (1.4659) time: 2.8796 data: 0.0002 max mem: 28454 +[2024-12-11 23:48:21 root] (utils.py 283): INFO Epoch: [2] [1510/2502] eta: 0:47:32 lr: 0.000020 loss_cls: 2.7690 (2.6193) grad_norm: 1.1185 (1.4641) time: 2.8808 data: 0.0002 max mem: 28454 +[2024-12-11 23:48:50 root] (utils.py 283): INFO Epoch: [2] [1520/2502] eta: 0:47:03 lr: 0.000020 loss_cls: 2.6440 (2.6184) grad_norm: 1.1442 (1.4620) time: 2.8829 data: 0.0002 max mem: 28454 +[2024-12-11 23:49:19 root] (utils.py 283): INFO Epoch: [2] [1530/2502] eta: 0:46:35 lr: 0.000020 loss_cls: 2.5526 (2.6187) grad_norm: 1.1624 (1.4615) time: 2.8825 data: 0.0002 max mem: 28454 +[2024-12-11 23:49:47 root] (utils.py 283): INFO Epoch: [2] [1540/2502] eta: 0:46:06 lr: 0.000020 loss_cls: 2.7723 (2.6189) grad_norm: 1.1819 (1.4602) time: 2.8822 data: 0.0002 max mem: 28454 +[2024-12-11 23:50:16 root] (utils.py 283): INFO Epoch: [2] [1550/2502] eta: 0:45:37 lr: 0.000020 loss_cls: 2.6565 (2.6191) grad_norm: 1.1915 (1.4583) time: 2.8798 data: 0.0002 max mem: 28454 +[2024-12-11 23:50:45 root] (utils.py 283): INFO Epoch: [2] [1560/2502] eta: 0:45:08 lr: 0.000020 loss_cls: 2.5913 (2.6188) grad_norm: 1.1695 (1.4568) time: 2.8788 data: 0.0002 max mem: 28454 +[2024-12-11 23:51:14 root] (utils.py 283): INFO Epoch: [2] [1570/2502] eta: 0:44:40 lr: 0.000020 loss_cls: 2.7109 (2.6191) grad_norm: 1.2642 (1.4568) time: 2.8803 data: 0.0002 max mem: 28454 +[2024-12-11 23:51:43 root] (utils.py 283): INFO Epoch: [2] [1580/2502] eta: 0:44:11 lr: 0.000020 loss_cls: 2.7927 (2.6187) grad_norm: 1.1655 (1.4557) time: 2.8812 data: 0.0002 max mem: 28454 +[2024-12-11 23:52:11 root] (utils.py 283): INFO Epoch: [2] [1590/2502] eta: 0:43:42 lr: 0.000020 loss_cls: 2.7565 (2.6190) grad_norm: 1.0978 (1.4537) time: 2.8813 data: 0.0002 max mem: 28454 +[2024-12-11 23:52:40 root] (utils.py 283): INFO Epoch: [2] [1600/2502] eta: 0:43:13 lr: 0.000020 loss_cls: 2.7004 (2.6186) grad_norm: 1.1330 (1.4520) time: 2.8806 data: 0.0002 max mem: 28454 +[2024-12-11 23:53:09 root] (utils.py 283): INFO Epoch: [2] [1610/2502] eta: 0:42:45 lr: 0.000020 loss_cls: 2.7715 (2.6184) grad_norm: 1.1099 (1.4515) time: 2.8787 data: 0.0002 max mem: 28454 +[2024-12-11 23:53:38 root] (utils.py 283): INFO Epoch: [2] [1620/2502] eta: 0:42:16 lr: 0.000020 loss_cls: 2.7818 (2.6192) grad_norm: 1.0885 (1.4505) time: 2.8758 data: 0.0002 max mem: 28454 +[2024-12-11 23:54:06 root] (utils.py 283): INFO Epoch: [2] [1630/2502] eta: 0:41:47 lr: 0.000020 loss_cls: 2.7773 (2.6184) grad_norm: 1.1306 (1.4528) time: 2.8746 data: 0.0002 max mem: 28454 +[2024-12-11 23:54:35 root] (utils.py 283): INFO Epoch: [2] [1640/2502] eta: 0:41:18 lr: 0.000020 loss_cls: 2.7773 (2.6189) grad_norm: 1.1133 (1.4518) time: 2.8759 data: 0.0002 max mem: 28454 +[2024-12-11 23:55:04 root] (utils.py 283): INFO Epoch: [2] [1650/2502] eta: 0:40:50 lr: 0.000020 loss_cls: 2.5881 (2.6182) grad_norm: 1.1158 (1.4499) time: 2.8763 data: 0.0002 max mem: 28454 +[2024-12-11 23:55:33 root] (utils.py 283): INFO Epoch: [2] [1660/2502] eta: 0:40:21 lr: 0.000020 loss_cls: 2.5822 (2.6176) grad_norm: 1.1118 (1.4476) time: 2.8748 data: 0.0003 max mem: 28454 +[2024-12-11 23:56:01 root] (utils.py 283): INFO Epoch: [2] [1670/2502] eta: 0:39:52 lr: 0.000020 loss_cls: 2.6109 (2.6165) grad_norm: 1.0390 (1.4454) time: 2.8736 data: 0.0002 max mem: 28454 +[2024-12-11 23:56:30 root] (utils.py 283): INFO Epoch: [2] [1680/2502] eta: 0:39:23 lr: 0.000020 loss_cls: 2.4643 (2.6158) grad_norm: 1.0396 (1.4431) time: 2.8743 data: 0.0002 max mem: 28454 +[2024-12-11 23:56:59 root] (utils.py 283): INFO Epoch: [2] [1690/2502] eta: 0:38:55 lr: 0.000020 loss_cls: 2.5249 (2.6149) grad_norm: 1.0837 (1.4409) time: 2.8729 data: 0.0003 max mem: 28454 +[2024-12-11 23:57:28 root] (utils.py 283): INFO Epoch: [2] [1700/2502] eta: 0:38:26 lr: 0.000020 loss_cls: 2.7188 (2.6158) grad_norm: 1.1049 (1.4403) time: 2.8729 data: 0.0002 max mem: 28454 +[2024-12-11 23:57:56 root] (utils.py 283): INFO Epoch: [2] [1710/2502] eta: 0:37:57 lr: 0.000020 loss_cls: 2.7224 (2.6161) grad_norm: 1.1049 (1.4389) time: 2.8736 data: 0.0002 max mem: 28454 +[2024-12-11 23:58:25 root] (utils.py 283): INFO Epoch: [2] [1720/2502] eta: 0:37:28 lr: 0.000020 loss_cls: 2.7712 (2.6174) grad_norm: 1.1647 (1.4391) time: 2.8706 data: 0.0003 max mem: 28454 +[2024-12-11 23:58:54 root] (utils.py 283): INFO Epoch: [2] [1730/2502] eta: 0:36:59 lr: 0.000020 loss_cls: 2.7536 (2.6172) grad_norm: 1.1917 (1.4377) time: 2.8721 data: 0.0002 max mem: 28454 +[2024-12-11 23:59:23 root] (utils.py 283): INFO Epoch: [2] [1740/2502] eta: 0:36:31 lr: 0.000020 loss_cls: 2.4508 (2.6157) grad_norm: 1.1625 (1.4361) time: 2.8723 data: 0.0002 max mem: 28454 +[2024-12-11 23:59:51 root] (utils.py 283): INFO Epoch: [2] [1750/2502] eta: 0:36:02 lr: 0.000020 loss_cls: 2.3593 (2.6146) grad_norm: 1.1183 (1.4343) time: 2.8700 data: 0.0003 max mem: 28454 +[2024-12-12 00:00:20 root] (utils.py 283): INFO Epoch: [2] [1760/2502] eta: 0:35:33 lr: 0.000020 loss_cls: 2.4517 (2.6137) grad_norm: 1.1204 (1.4334) time: 2.8710 data: 0.0003 max mem: 28454 +[2024-12-12 00:00:49 root] (utils.py 283): INFO Epoch: [2] [1770/2502] eta: 0:35:04 lr: 0.000020 loss_cls: 2.6720 (2.6140) grad_norm: 1.1536 (1.4326) time: 2.8732 data: 0.0003 max mem: 28454 +[2024-12-12 00:01:17 root] (utils.py 283): INFO Epoch: [2] [1780/2502] eta: 0:34:36 lr: 0.000020 loss_cls: 2.7503 (2.6140) grad_norm: 1.1280 (1.4307) time: 2.8744 data: 0.0002 max mem: 28454 +[2024-12-12 00:01:46 root] (utils.py 283): INFO Epoch: [2] [1790/2502] eta: 0:34:07 lr: 0.000020 loss_cls: 2.7482 (2.6149) grad_norm: 1.0745 (1.4288) time: 2.8710 data: 0.0002 max mem: 28454 +[2024-12-12 00:02:15 root] (utils.py 283): INFO Epoch: [2] [1800/2502] eta: 0:33:38 lr: 0.000020 loss_cls: 2.6946 (2.6141) grad_norm: 1.1093 (1.4273) time: 2.8696 data: 0.0002 max mem: 28454 +[2024-12-12 00:02:44 root] (utils.py 283): INFO Epoch: [2] [1810/2502] eta: 0:33:09 lr: 0.000020 loss_cls: 2.6946 (2.6137) grad_norm: 1.0460 (1.4259) time: 2.8712 data: 0.0002 max mem: 28454 +[2024-12-12 00:03:12 root] (utils.py 283): INFO Epoch: [2] [1820/2502] eta: 0:32:41 lr: 0.000020 loss_cls: 2.6994 (2.6137) grad_norm: 1.0538 (1.4249) time: 2.8707 data: 0.0002 max mem: 28454 +[2024-12-12 00:03:41 root] (utils.py 283): INFO Epoch: [2] [1830/2502] eta: 0:32:12 lr: 0.000020 loss_cls: 2.6267 (2.6131) grad_norm: 1.0640 (1.4230) time: 2.8697 data: 0.0002 max mem: 28454 +[2024-12-12 00:04:10 root] (utils.py 283): INFO Epoch: [2] [1840/2502] eta: 0:31:43 lr: 0.000020 loss_cls: 2.6267 (2.6128) grad_norm: 1.0679 (1.4213) time: 2.8681 data: 0.0002 max mem: 28454 +[2024-12-12 00:04:38 root] (utils.py 283): INFO Epoch: [2] [1850/2502] eta: 0:31:14 lr: 0.000020 loss_cls: 2.3965 (2.6114) grad_norm: 1.1177 (1.4198) time: 2.8676 data: 0.0002 max mem: 28454 +[2024-12-12 00:05:07 root] (utils.py 283): INFO Epoch: [2] [1860/2502] eta: 0:30:45 lr: 0.000020 loss_cls: 2.4139 (2.6110) grad_norm: 1.1540 (1.4193) time: 2.8738 data: 0.0002 max mem: 28454 +[2024-12-12 00:05:36 root] (utils.py 283): INFO Epoch: [2] [1870/2502] eta: 0:30:17 lr: 0.000020 loss_cls: 2.7088 (2.6110) grad_norm: 1.1837 (1.4182) time: 2.8714 data: 0.0002 max mem: 28454 +[2024-12-12 00:06:04 root] (utils.py 283): INFO Epoch: [2] [1880/2502] eta: 0:29:48 lr: 0.000020 loss_cls: 2.7111 (2.6100) grad_norm: 1.1420 (1.4167) time: 2.8642 data: 0.0002 max mem: 28454 +[2024-12-12 00:06:33 root] (utils.py 283): INFO Epoch: [2] [1890/2502] eta: 0:29:19 lr: 0.000020 loss_cls: 2.6477 (2.6098) grad_norm: 1.1091 (1.4150) time: 2.8671 data: 0.0002 max mem: 28454 +[2024-12-12 00:07:02 root] (utils.py 283): INFO Epoch: [2] [1900/2502] eta: 0:28:50 lr: 0.000020 loss_cls: 2.7042 (2.6093) grad_norm: 1.1274 (1.4139) time: 2.8710 data: 0.0003 max mem: 28454 +[2024-12-12 00:07:31 root] (utils.py 283): INFO Epoch: [2] [1910/2502] eta: 0:28:22 lr: 0.000020 loss_cls: 2.6344 (2.6085) grad_norm: 1.1274 (1.4124) time: 2.8723 data: 0.0002 max mem: 28454 +[2024-12-12 00:07:59 root] (utils.py 283): INFO Epoch: [2] [1920/2502] eta: 0:27:53 lr: 0.000020 loss_cls: 2.5915 (2.6082) grad_norm: 1.0548 (1.4115) time: 2.8718 data: 0.0002 max mem: 28454 +[2024-12-12 00:08:28 root] (utils.py 283): INFO Epoch: [2] [1930/2502] eta: 0:27:24 lr: 0.000020 loss_cls: 2.6355 (2.6083) grad_norm: 1.1393 (1.4107) time: 2.8709 data: 0.0003 max mem: 28454 +[2024-12-12 00:08:57 root] (utils.py 283): INFO Epoch: [2] [1940/2502] eta: 0:26:55 lr: 0.000020 loss_cls: 2.6355 (2.6076) grad_norm: 1.2160 (1.4097) time: 2.8685 data: 0.0003 max mem: 28454 +[2024-12-12 00:09:25 root] (utils.py 283): INFO Epoch: [2] [1950/2502] eta: 0:26:27 lr: 0.000020 loss_cls: 2.5825 (2.6063) grad_norm: 1.2192 (1.4087) time: 2.8693 data: 0.0003 max mem: 28454 +[2024-12-12 00:09:54 root] (utils.py 283): INFO Epoch: [2] [1960/2502] eta: 0:25:58 lr: 0.000020 loss_cls: 2.7472 (2.6070) grad_norm: 1.1510 (1.4110) time: 2.8698 data: 0.0002 max mem: 28454 +[2024-12-12 00:10:23 root] (utils.py 283): INFO Epoch: [2] [1970/2502] eta: 0:25:29 lr: 0.000020 loss_cls: 2.6933 (2.6065) grad_norm: 1.1045 (1.4101) time: 2.8669 data: 0.0002 max mem: 28454 +[2024-12-12 00:10:51 root] (utils.py 283): INFO Epoch: [2] [1980/2502] eta: 0:25:00 lr: 0.000020 loss_cls: 2.5496 (2.6066) grad_norm: 1.1004 (1.4090) time: 2.8656 data: 0.0002 max mem: 28454 +[2024-12-12 00:11:20 root] (utils.py 283): INFO Epoch: [2] [1990/2502] eta: 0:24:31 lr: 0.000020 loss_cls: 2.5276 (2.6059) grad_norm: 1.1004 (1.4075) time: 2.8669 data: 0.0002 max mem: 28454 +[2024-12-12 00:11:49 root] (utils.py 283): INFO Epoch: [2] [2000/2502] eta: 0:24:03 lr: 0.000020 loss_cls: 2.3707 (2.6045) grad_norm: 1.1298 (1.4065) time: 2.8679 data: 0.0002 max mem: 28454 +[2024-12-12 00:12:17 root] (utils.py 283): INFO Epoch: [2] [2010/2502] eta: 0:23:34 lr: 0.000020 loss_cls: 2.3792 (2.6045) grad_norm: 1.1611 (1.4052) time: 2.8678 data: 0.0002 max mem: 28454 +[2024-12-12 00:12:46 root] (utils.py 283): INFO Epoch: [2] [2020/2502] eta: 0:23:05 lr: 0.000020 loss_cls: 2.6330 (2.6043) grad_norm: 1.1611 (1.4039) time: 2.8682 data: 0.0002 max mem: 28454 +[2024-12-12 00:13:15 root] (utils.py 283): INFO Epoch: [2] [2030/2502] eta: 0:22:36 lr: 0.000020 loss_cls: 2.6602 (2.6045) grad_norm: 1.1089 (1.4025) time: 2.8669 data: 0.0002 max mem: 28454 +[2024-12-12 00:13:43 root] (utils.py 283): INFO Epoch: [2] [2040/2502] eta: 0:22:08 lr: 0.000020 loss_cls: 2.6272 (2.6032) grad_norm: 1.0998 (1.4018) time: 2.8619 data: 0.0002 max mem: 28454 +[2024-12-12 00:14:12 root] (utils.py 283): INFO Epoch: [2] [2050/2502] eta: 0:21:39 lr: 0.000020 loss_cls: 2.6447 (2.6030) grad_norm: 1.1044 (1.4127) time: 2.8591 data: 0.0002 max mem: 28454 +[2024-12-12 00:14:41 root] (utils.py 283): INFO Epoch: [2] [2060/2502] eta: 0:21:10 lr: 0.000020 loss_cls: 2.7546 (2.6038) grad_norm: 1.1184 (1.4117) time: 2.8628 data: 0.0002 max mem: 28454 +[2024-12-12 00:15:09 root] (utils.py 283): INFO Epoch: [2] [2070/2502] eta: 0:20:41 lr: 0.000020 loss_cls: 2.6675 (2.6033) grad_norm: 1.1442 (1.4106) time: 2.8647 data: 0.0002 max mem: 28454 +[2024-12-12 00:15:38 root] (utils.py 283): INFO Epoch: [2] [2080/2502] eta: 0:20:13 lr: 0.000020 loss_cls: 2.5560 (2.6023) grad_norm: 1.1108 (1.4095) time: 2.8648 data: 0.0002 max mem: 28454 +[2024-12-12 00:16:06 root] (utils.py 283): INFO Epoch: [2] [2090/2502] eta: 0:19:44 lr: 0.000020 loss_cls: 2.6099 (2.6024) grad_norm: 1.1177 (1.4133) time: 2.8653 data: 0.0002 max mem: 28454 +[2024-12-12 00:16:35 root] (utils.py 283): INFO Epoch: [2] [2100/2502] eta: 0:19:15 lr: 0.000020 loss_cls: 2.5684 (2.6023) grad_norm: 1.0700 (1.4116) time: 2.8649 data: 0.0002 max mem: 28454 +[2024-12-12 00:17:04 root] (utils.py 283): INFO Epoch: [2] [2110/2502] eta: 0:18:46 lr: 0.000020 loss_cls: 2.4459 (2.6013) grad_norm: 1.0531 (1.4102) time: 2.8650 data: 0.0002 max mem: 28454 +[2024-12-12 00:17:32 root] (utils.py 283): INFO Epoch: [2] [2120/2502] eta: 0:18:17 lr: 0.000020 loss_cls: 2.4485 (2.6009) grad_norm: 1.1314 (1.4095) time: 2.8670 data: 0.0002 max mem: 28454 +[2024-12-12 00:18:01 root] (utils.py 283): INFO Epoch: [2] [2130/2502] eta: 0:17:49 lr: 0.000020 loss_cls: 2.6466 (2.6015) grad_norm: 1.1314 (1.4083) time: 2.8675 data: 0.0002 max mem: 28454 +[2024-12-12 00:18:30 root] (utils.py 283): INFO Epoch: [2] [2140/2502] eta: 0:17:20 lr: 0.000020 loss_cls: 2.6953 (2.6012) grad_norm: 1.0743 (1.4068) time: 2.8645 data: 0.0002 max mem: 28454 +[2024-12-12 00:18:58 root] (utils.py 283): INFO Epoch: [2] [2150/2502] eta: 0:16:51 lr: 0.000020 loss_cls: 2.7798 (2.6028) grad_norm: 1.1468 (1.4063) time: 2.8641 data: 0.0002 max mem: 28454 +[2024-12-12 00:19:27 root] (utils.py 283): INFO Epoch: [2] [2160/2502] eta: 0:16:22 lr: 0.000020 loss_cls: 2.9437 (2.6038) grad_norm: 1.1972 (1.4083) time: 2.8660 data: 0.0002 max mem: 28454 +[2024-12-12 00:19:56 root] (utils.py 283): INFO Epoch: [2] [2170/2502] eta: 0:15:54 lr: 0.000020 loss_cls: 2.7599 (2.6037) grad_norm: 1.1787 (1.4097) time: 2.8669 data: 0.0002 max mem: 28454 +[2024-12-12 00:20:24 root] (utils.py 283): INFO Epoch: [2] [2180/2502] eta: 0:15:25 lr: 0.000020 loss_cls: 2.7081 (2.6029) grad_norm: 1.1123 (1.4084) time: 2.8649 data: 0.0002 max mem: 28454 +[2024-12-12 00:20:53 root] (utils.py 283): INFO Epoch: [2] [2190/2502] eta: 0:14:56 lr: 0.000020 loss_cls: 2.5987 (2.6026) grad_norm: 1.1390 (1.4074) time: 2.8623 data: 0.0002 max mem: 28454 +[2024-12-12 00:21:22 root] (utils.py 283): INFO Epoch: [2] [2200/2502] eta: 0:14:27 lr: 0.000020 loss_cls: 2.6962 (2.6031) grad_norm: 1.1639 (1.4102) time: 2.8638 data: 0.0002 max mem: 28454 +[2024-12-12 00:21:50 root] (utils.py 283): INFO Epoch: [2] [2210/2502] eta: 0:13:59 lr: 0.000020 loss_cls: 2.6514 (2.6028) grad_norm: 1.2322 (1.4103) time: 2.8660 data: 0.0002 max mem: 28454 +[2024-12-12 00:22:19 root] (utils.py 283): INFO Epoch: [2] [2220/2502] eta: 0:13:30 lr: 0.000020 loss_cls: 2.6247 (2.6032) grad_norm: 1.2322 (1.4093) time: 2.8656 data: 0.0002 max mem: 28454 +[2024-12-12 00:22:48 root] (utils.py 283): INFO Epoch: [2] [2230/2502] eta: 0:13:01 lr: 0.000020 loss_cls: 2.6993 (2.6034) grad_norm: 1.2928 (1.4118) time: 2.8641 data: 0.0002 max mem: 28454 +[2024-12-12 00:23:16 root] (utils.py 283): INFO Epoch: [2] [2240/2502] eta: 0:12:32 lr: 0.000020 loss_cls: 2.7232 (2.6039) grad_norm: 1.2695 (1.4109) time: 2.8630 data: 0.0002 max mem: 28454 +[2024-12-12 00:23:45 root] (utils.py 283): INFO Epoch: [2] [2250/2502] eta: 0:12:04 lr: 0.000020 loss_cls: 2.7380 (2.6034) grad_norm: 1.1526 (1.4097) time: 2.8639 data: 0.0002 max mem: 28454 +[2024-12-12 00:24:14 root] (utils.py 283): INFO Epoch: [2] [2260/2502] eta: 0:11:35 lr: 0.000020 loss_cls: 2.7508 (2.6044) grad_norm: 1.1527 (1.4088) time: 2.8621 data: 0.0002 max mem: 28454 +[2024-12-12 00:24:42 root] (utils.py 283): INFO Epoch: [2] [2270/2502] eta: 0:11:06 lr: 0.000020 loss_cls: 2.8247 (2.6049) grad_norm: 1.1778 (1.4085) time: 2.8638 data: 0.0002 max mem: 28454 +[2024-12-12 00:25:11 root] (utils.py 283): INFO Epoch: [2] [2280/2502] eta: 0:10:37 lr: 0.000020 loss_cls: 2.5030 (2.6039) grad_norm: 1.1641 (1.4159) time: 2.8661 data: 0.0002 max mem: 28454 +[2024-12-12 00:25:40 root] (utils.py 283): INFO Epoch: [2] [2290/2502] eta: 0:10:09 lr: 0.000020 loss_cls: 2.4609 (2.6035) grad_norm: 1.1319 (1.4149) time: 2.8665 data: 0.0002 max mem: 28454 +[2024-12-12 00:26:08 root] (utils.py 283): INFO Epoch: [2] [2300/2502] eta: 0:09:40 lr: 0.000020 loss_cls: 2.6496 (2.6042) grad_norm: 1.1331 (1.4142) time: 2.8670 data: 0.0002 max mem: 28454 +[2024-12-12 00:26:37 root] (utils.py 283): INFO Epoch: [2] [2310/2502] eta: 0:09:11 lr: 0.000020 loss_cls: 2.6675 (2.6043) grad_norm: 1.1782 (1.4131) time: 2.8642 data: 0.0002 max mem: 28454 +[2024-12-12 00:27:05 root] (utils.py 283): INFO Epoch: [2] [2320/2502] eta: 0:08:42 lr: 0.000020 loss_cls: 2.5646 (2.6045) grad_norm: 1.2256 (1.4131) time: 2.8629 data: 0.0002 max mem: 28454 +[2024-12-12 00:27:34 root] (utils.py 283): INFO Epoch: [2] [2330/2502] eta: 0:08:14 lr: 0.000020 loss_cls: 2.4993 (2.6037) grad_norm: 1.2179 (1.4119) time: 2.8655 data: 0.0002 max mem: 28454 +[2024-12-12 00:28:03 root] (utils.py 283): INFO Epoch: [2] [2340/2502] eta: 0:07:45 lr: 0.000020 loss_cls: 2.6804 (2.6043) grad_norm: 1.1136 (1.4106) time: 2.8700 data: 0.0003 max mem: 28454 +[2024-12-12 00:28:31 root] (utils.py 283): INFO Epoch: [2] [2350/2502] eta: 0:07:16 lr: 0.000020 loss_cls: 2.7144 (2.6045) grad_norm: 1.1262 (1.4099) time: 2.8668 data: 0.0003 max mem: 28454 +[2024-12-12 00:29:00 root] (utils.py 283): INFO Epoch: [2] [2360/2502] eta: 0:06:48 lr: 0.000020 loss_cls: 2.7144 (2.6047) grad_norm: 1.1712 (1.4102) time: 2.8636 data: 0.0002 max mem: 28454 +[2024-12-12 00:29:29 root] (utils.py 283): INFO Epoch: [2] [2370/2502] eta: 0:06:19 lr: 0.000020 loss_cls: 2.5262 (2.6042) grad_norm: 1.1503 (1.4089) time: 2.8635 data: 0.0003 max mem: 28454 +[2024-12-12 00:29:57 root] (utils.py 283): INFO Epoch: [2] [2380/2502] eta: 0:05:50 lr: 0.000020 loss_cls: 2.6331 (2.6043) grad_norm: 1.0989 (1.4076) time: 2.8631 data: 0.0003 max mem: 28454 +[2024-12-12 00:30:26 root] (utils.py 283): INFO Epoch: [2] [2390/2502] eta: 0:05:21 lr: 0.000020 loss_cls: 2.7638 (2.6047) grad_norm: 1.1042 (1.4066) time: 2.8650 data: 0.0002 max mem: 28454 +[2024-12-12 00:30:55 root] (utils.py 283): INFO Epoch: [2] [2400/2502] eta: 0:04:53 lr: 0.000020 loss_cls: 2.6258 (2.6038) grad_norm: 1.1042 (1.4073) time: 2.8666 data: 0.0002 max mem: 28454 +[2024-12-12 00:31:23 root] (utils.py 283): INFO Epoch: [2] [2410/2502] eta: 0:04:24 lr: 0.000020 loss_cls: 2.6411 (2.6037) grad_norm: 1.1009 (1.4063) time: 2.8680 data: 0.0002 max mem: 28454 +[2024-12-12 00:31:52 root] (utils.py 283): INFO Epoch: [2] [2420/2502] eta: 0:03:55 lr: 0.000020 loss_cls: 2.7707 (2.6045) grad_norm: 1.1228 (1.4076) time: 2.8694 data: 0.0002 max mem: 28454 +[2024-12-12 00:32:21 root] (utils.py 283): INFO Epoch: [2] [2430/2502] eta: 0:03:26 lr: 0.000020 loss_cls: 2.8046 (2.6045) grad_norm: 1.1792 (1.4067) time: 2.8680 data: 0.0002 max mem: 28454 +[2024-12-12 00:32:49 root] (utils.py 283): INFO Epoch: [2] [2440/2502] eta: 0:02:58 lr: 0.000020 loss_cls: 2.5056 (2.6034) grad_norm: 1.2041 (1.4060) time: 2.8671 data: 0.0003 max mem: 28454 +[2024-12-12 00:33:18 root] (utils.py 283): INFO Epoch: [2] [2450/2502] eta: 0:02:29 lr: 0.000020 loss_cls: 2.5058 (2.6030) grad_norm: 1.1775 (1.4053) time: 2.8688 data: 0.0003 max mem: 28454 +[2024-12-12 00:33:47 root] (utils.py 283): INFO Epoch: [2] [2460/2502] eta: 0:02:00 lr: 0.000020 loss_cls: 2.7074 (2.6034) grad_norm: 1.2314 (1.4047) time: 2.8721 data: 0.0002 max mem: 28454 +[2024-12-12 00:34:16 root] (utils.py 283): INFO Epoch: [2] [2470/2502] eta: 0:01:31 lr: 0.000020 loss_cls: 2.8145 (2.6038) grad_norm: 1.2314 (1.4039) time: 2.8709 data: 0.0002 max mem: 28454 +[2024-12-12 00:34:44 root] (utils.py 283): INFO Epoch: [2] [2480/2502] eta: 0:01:03 lr: 0.000020 loss_cls: 2.7033 (2.6041) grad_norm: 1.1450 (1.4026) time: 2.8677 data: 0.0002 max mem: 28454 +[2024-12-12 00:35:13 root] (utils.py 283): INFO Epoch: [2] [2490/2502] eta: 0:00:34 lr: 0.000020 loss_cls: 2.6063 (2.6036) grad_norm: 1.1194 (1.4019) time: 2.8884 data: 0.0195 max mem: 28454 +[2024-12-12 00:35:42 root] (utils.py 283): INFO Epoch: [2] [2500/2502] eta: 0:00:05 lr: 0.000020 loss_cls: 2.7109 (2.6033) grad_norm: 1.1863 (1.4011) time: 2.8865 data: 0.0195 max mem: 28454 +[2024-12-12 00:35:45 root] (utils.py 283): INFO Epoch: [2] [2501/2502] eta: 0:00:02 lr: 0.000020 loss_cls: 2.7109 (2.6032) grad_norm: 1.1957 (1.4010) time: 2.8859 data: 0.0195 max mem: 28454 +[2024-12-12 00:35:45 root] (utils.py 297): INFO Epoch: [2] Total time: 1:59:48 (2.8733 s / it) +[2024-12-12 00:35:45 root] (engine.py 179): INFO Averaged stats:lr: 0.000020 loss_cls: 2.7109 (2.5997) grad_norm: 1.1957 (1.4010) +[2024-12-12 00:35:48 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:52 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.4183 (0.4183) acc1: 91.4062 (91.4062) acc3: 98.4375 (98.4375) acc5: 98.4375 (98.4375) time: 0.5391 data: 0.0004 max mem: 28454 +[2024-12-12 00:35:53 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:48 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6104 (0.6017) acc1: 86.7188 (87.5000) acc3: 96.8750 (96.5909) acc5: 98.4375 (98.0114) time: 0.5466 data: 0.0005 max mem: 28454 +[2024-12-12 00:35:59 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:42 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6177 (0.6253) acc1: 85.9375 (86.6443) acc3: 96.8750 (96.2798) acc5: 97.6562 (97.7679) time: 0.5471 data: 0.0004 max mem: 28454 +[2024-12-12 00:36:04 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:37 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6346 (0.6577) acc1: 85.9375 (85.7107) acc3: 95.3125 (95.8417) acc5: 96.8750 (97.5554) time: 0.5478 data: 0.0004 max mem: 28454 +[2024-12-12 00:36:10 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:31 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6786 (0.6671) acc1: 86.7188 (85.4992) acc3: 96.0938 (95.8270) acc5: 96.8750 (97.5038) time: 0.5479 data: 0.0004 max mem: 28454 +[2024-12-12 00:36:15 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:26 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.8518 (0.7458) acc1: 78.9062 (83.5938) acc3: 92.9688 (94.8989) acc5: 96.0938 (96.7678) time: 0.5468 data: 0.0004 max mem: 28454 +[2024-12-12 00:36:21 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:20 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9824 (0.7737) acc1: 78.9062 (83.1839) acc3: 91.4062 (94.3391) acc5: 93.7500 (96.3627) time: 0.5468 data: 0.0004 max mem: 28454 +[2024-12-12 00:36:26 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:15 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9739 (0.8009) acc1: 80.4688 (82.4824) acc3: 92.1875 (94.1241) acc5: 94.5312 (96.2148) time: 0.5473 data: 0.0004 max mem: 28454 +[2024-12-12 00:36:32 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:09 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9650 (0.8260) acc1: 79.6875 (82.0312) acc3: 92.1875 (93.7307) acc5: 94.5312 (95.8623) time: 0.5475 data: 0.0006 max mem: 28454 +[2024-12-12 00:36:37 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:04 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9650 (0.8461) acc1: 78.1250 (81.3959) acc3: 90.6250 (93.4238) acc5: 93.7500 (95.6473) time: 0.5472 data: 0.0006 max mem: 28454 +[2024-12-12 00:36:41 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9199 (0.8440) acc1: 77.3438 (81.3840) acc3: 92.1875 (93.5040) acc5: 94.5312 (95.6960) time: 0.5377 data: 0.0006 max mem: 28454 +[2024-12-12 00:36:41 root] (utils.py 297): INFO Test: Total time: 0:00:53 (0.5455 s / it) +[2024-12-12 00:36:41 root] (engine.py 264): INFO * Acc@1 81.744 Acc@3 93.442 Acc@5 95.646 loss 0.842 flops 13.207 layer_flops 13.109 +[2024-12-12 00:36:41 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 81.7% +[2024-12-12 00:36:43 root] (main.py 576): INFO Max accuracy: 81.74% +[2024-12-12 00:36:46 root] (utils.py 283): INFO Epoch: [3] [ 0/2502] eta: 1:58:13 lr: 0.000019 loss_cls: 1.9193 (1.9193) grad_norm: 1.0496 (1.0496) time: 2.8352 data: 0.0005 max mem: 28454 +[2024-12-12 00:37:14 root] (utils.py 283): INFO Epoch: [3] [ 10/2502] eta: 1:58:33 lr: 0.000019 loss_cls: 2.4523 (2.4440) grad_norm: 1.0814 (1.1260) time: 2.8545 data: 0.0003 max mem: 28454 +[2024-12-12 00:37:43 root] (utils.py 283): INFO Epoch: [3] [ 20/2502] eta: 1:58:11 lr: 0.000019 loss_cls: 2.5894 (2.4999) grad_norm: 1.0974 (1.1173) time: 2.8583 data: 0.0003 max mem: 28454 +[2024-12-12 00:38:11 root] (utils.py 283): INFO Epoch: [3] [ 30/2502] eta: 1:57:51 lr: 0.000019 loss_cls: 2.5894 (2.4907) grad_norm: 1.1185 (1.1482) time: 2.8639 data: 0.0002 max mem: 28454 +[2024-12-12 00:38:40 root] (utils.py 283): INFO Epoch: [3] [ 40/2502] eta: 1:57:23 lr: 0.000019 loss_cls: 2.6059 (2.5532) grad_norm: 1.1984 (1.2109) time: 2.8650 data: 0.0002 max mem: 28454 +[2024-12-12 00:39:09 root] (utils.py 283): INFO Epoch: [3] [ 50/2502] eta: 1:56:58 lr: 0.000019 loss_cls: 2.6137 (2.5093) grad_norm: 1.1262 (1.2127) time: 2.8648 data: 0.0002 max mem: 28454 +[2024-12-12 00:39:37 root] (utils.py 283): INFO Epoch: [3] [ 60/2502] eta: 1:56:31 lr: 0.000019 loss_cls: 2.4429 (2.4779) grad_norm: 1.1021 (1.1990) time: 2.8668 data: 0.0002 max mem: 28454 +[2024-12-12 00:40:06 root] (utils.py 283): INFO Epoch: [3] [ 70/2502] eta: 1:56:04 lr: 0.000019 loss_cls: 2.5283 (2.5013) grad_norm: 1.1122 (1.1923) time: 2.8677 data: 0.0002 max mem: 28454 +[2024-12-12 00:40:35 root] (utils.py 283): INFO Epoch: [3] [ 80/2502] eta: 1:55:36 lr: 0.000019 loss_cls: 2.7401 (2.5300) grad_norm: 1.1305 (1.2159) time: 2.8680 data: 0.0002 max mem: 28454 +[2024-12-12 00:41:03 root] (utils.py 283): INFO Epoch: [3] [ 90/2502] eta: 1:55:09 lr: 0.000019 loss_cls: 2.6972 (2.5288) grad_norm: 1.1283 (1.2024) time: 2.8685 data: 0.0002 max mem: 28454 +[2024-12-12 00:41:32 root] (utils.py 283): INFO Epoch: [3] [ 100/2502] eta: 1:54:42 lr: 0.000019 loss_cls: 2.7496 (2.5418) grad_norm: 1.1147 (1.2009) time: 2.8695 data: 0.0002 max mem: 28454 +[2024-12-12 00:42:01 root] (utils.py 283): INFO Epoch: [3] [ 110/2502] eta: 1:54:15 lr: 0.000019 loss_cls: 2.7504 (2.5313) grad_norm: 1.1479 (1.1941) time: 2.8707 data: 0.0003 max mem: 28454 +[2024-12-12 00:42:29 root] (utils.py 283): INFO Epoch: [3] [ 120/2502] eta: 1:53:46 lr: 0.000019 loss_cls: 2.7453 (2.5403) grad_norm: 1.1448 (1.2093) time: 2.8698 data: 0.0002 max mem: 28454 +[2024-12-12 00:42:58 root] (utils.py 283): INFO Epoch: [3] [ 130/2502] eta: 1:53:18 lr: 0.000019 loss_cls: 2.7453 (2.5480) grad_norm: 1.1673 (1.2078) time: 2.8684 data: 0.0002 max mem: 28454 +[2024-12-12 00:43:27 root] (utils.py 283): INFO Epoch: [3] [ 140/2502] eta: 1:52:50 lr: 0.000019 loss_cls: 2.6749 (2.5460) grad_norm: 1.0824 (1.2410) time: 2.8693 data: 0.0002 max mem: 28454 +[2024-12-12 00:43:56 root] (utils.py 283): INFO Epoch: [3] [ 150/2502] eta: 1:52:22 lr: 0.000019 loss_cls: 2.7174 (2.5522) grad_norm: 1.0776 (1.2335) time: 2.8698 data: 0.0002 max mem: 28454 +[2024-12-12 00:44:24 root] (utils.py 283): INFO Epoch: [3] [ 160/2502] eta: 1:51:54 lr: 0.000019 loss_cls: 2.7194 (2.5655) grad_norm: 1.1277 (1.2299) time: 2.8701 data: 0.0002 max mem: 28454 +[2024-12-12 00:44:53 root] (utils.py 283): INFO Epoch: [3] [ 170/2502] eta: 1:51:25 lr: 0.000019 loss_cls: 2.6697 (2.5617) grad_norm: 1.1598 (1.2288) time: 2.8694 data: 0.0002 max mem: 28454 +[2024-12-12 00:45:22 root] (utils.py 283): INFO Epoch: [3] [ 180/2502] eta: 1:50:57 lr: 0.000019 loss_cls: 2.6697 (2.5656) grad_norm: 1.1598 (1.2274) time: 2.8699 data: 0.0002 max mem: 28454 +[2024-12-12 00:45:50 root] (utils.py 283): INFO Epoch: [3] [ 190/2502] eta: 1:50:30 lr: 0.000019 loss_cls: 2.6974 (2.5716) grad_norm: 1.1348 (1.2278) time: 2.8728 data: 0.0002 max mem: 28454 +[2024-12-12 00:46:19 root] (utils.py 283): INFO Epoch: [3] [ 200/2502] eta: 1:50:01 lr: 0.000019 loss_cls: 2.6923 (2.5796) grad_norm: 1.1854 (1.2283) time: 2.8736 data: 0.0003 max mem: 28454 +[2024-12-12 00:46:48 root] (utils.py 283): INFO Epoch: [3] [ 210/2502] eta: 1:49:33 lr: 0.000019 loss_cls: 2.6496 (2.5744) grad_norm: 1.0685 (1.2223) time: 2.8704 data: 0.0002 max mem: 28454 +[2024-12-12 00:47:17 root] (utils.py 283): INFO Epoch: [3] [ 220/2502] eta: 1:49:05 lr: 0.000019 loss_cls: 2.4722 (2.5559) grad_norm: 1.0829 (1.2432) time: 2.8715 data: 0.0002 max mem: 28454 +[2024-12-12 00:47:45 root] (utils.py 283): INFO Epoch: [3] [ 230/2502] eta: 1:48:36 lr: 0.000019 loss_cls: 2.5752 (2.5584) grad_norm: 1.1667 (1.2390) time: 2.8728 data: 0.0002 max mem: 28454 +[2024-12-12 00:48:14 root] (utils.py 283): INFO Epoch: [3] [ 240/2502] eta: 1:48:08 lr: 0.000019 loss_cls: 2.6380 (2.5599) grad_norm: 1.1799 (1.2376) time: 2.8731 data: 0.0002 max mem: 28454 +[2024-12-12 00:48:43 root] (utils.py 283): INFO Epoch: [3] [ 250/2502] eta: 1:47:40 lr: 0.000019 loss_cls: 2.6298 (2.5592) grad_norm: 1.1735 (1.2419) time: 2.8739 data: 0.0003 max mem: 28454 +[2024-12-12 00:49:12 root] (utils.py 283): INFO Epoch: [3] [ 260/2502] eta: 1:47:12 lr: 0.000019 loss_cls: 2.6684 (2.5594) grad_norm: 1.1606 (1.2385) time: 2.8738 data: 0.0003 max mem: 28454 +[2024-12-12 00:49:40 root] (utils.py 283): INFO Epoch: [3] [ 270/2502] eta: 1:46:43 lr: 0.000019 loss_cls: 2.6684 (2.5593) grad_norm: 1.1691 (1.2417) time: 2.8730 data: 0.0002 max mem: 28454 +[2024-12-12 00:50:09 root] (utils.py 283): INFO Epoch: [3] [ 280/2502] eta: 1:46:15 lr: 0.000019 loss_cls: 2.4666 (2.5596) grad_norm: 1.1126 (1.2364) time: 2.8724 data: 0.0003 max mem: 28454 +[2024-12-12 00:50:38 root] (utils.py 283): INFO Epoch: [3] [ 290/2502] eta: 1:45:47 lr: 0.000019 loss_cls: 2.4666 (2.5619) grad_norm: 1.1126 (1.2463) time: 2.8737 data: 0.0003 max mem: 28454 +[2024-12-12 00:51:06 root] (utils.py 283): INFO Epoch: [3] [ 300/2502] eta: 1:45:18 lr: 0.000019 loss_cls: 2.7428 (2.5640) grad_norm: 1.1345 (1.2426) time: 2.8727 data: 0.0003 max mem: 28454 +[2024-12-12 00:51:35 root] (utils.py 283): INFO Epoch: [3] [ 310/2502] eta: 1:44:50 lr: 0.000019 loss_cls: 2.7555 (2.5651) grad_norm: 1.1626 (1.2454) time: 2.8728 data: 0.0003 max mem: 28454 +[2024-12-12 00:52:04 root] (utils.py 283): INFO Epoch: [3] [ 320/2502] eta: 1:44:21 lr: 0.000019 loss_cls: 2.7071 (2.5624) grad_norm: 1.1663 (1.2397) time: 2.8715 data: 0.0003 max mem: 28454 +[2024-12-12 00:52:33 root] (utils.py 283): INFO Epoch: [3] [ 330/2502] eta: 1:43:52 lr: 0.000019 loss_cls: 2.7230 (2.5667) grad_norm: 1.0386 (1.2383) time: 2.8702 data: 0.0003 max mem: 28454 +[2024-12-12 00:53:01 root] (utils.py 283): INFO Epoch: [3] [ 340/2502] eta: 1:43:24 lr: 0.000019 loss_cls: 2.7366 (2.5658) grad_norm: 1.0652 (1.2325) time: 2.8696 data: 0.0003 max mem: 28454 +[2024-12-12 00:53:30 root] (utils.py 283): INFO Epoch: [3] [ 350/2502] eta: 1:42:55 lr: 0.000019 loss_cls: 2.5961 (2.5619) grad_norm: 1.0652 (1.2372) time: 2.8680 data: 0.0003 max mem: 28454 +[2024-12-12 00:53:59 root] (utils.py 283): INFO Epoch: [3] [ 360/2502] eta: 1:42:26 lr: 0.000019 loss_cls: 2.6139 (2.5678) grad_norm: 1.1246 (1.2370) time: 2.8704 data: 0.0002 max mem: 28454 +[2024-12-12 00:54:27 root] (utils.py 283): INFO Epoch: [3] [ 370/2502] eta: 1:41:58 lr: 0.000019 loss_cls: 2.7448 (2.5717) grad_norm: 1.1910 (1.2647) time: 2.8713 data: 0.0002 max mem: 28454 +[2024-12-12 00:54:56 root] (utils.py 283): INFO Epoch: [3] [ 380/2502] eta: 1:41:29 lr: 0.000019 loss_cls: 2.7938 (2.5716) grad_norm: 1.2057 (1.2701) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-12 00:55:25 root] (utils.py 283): INFO Epoch: [3] [ 390/2502] eta: 1:41:01 lr: 0.000019 loss_cls: 2.7447 (2.5750) grad_norm: 1.1370 (1.2660) time: 2.8741 data: 0.0002 max mem: 28454 +[2024-12-12 00:55:54 root] (utils.py 283): INFO Epoch: [3] [ 400/2502] eta: 1:40:32 lr: 0.000019 loss_cls: 2.7368 (2.5800) grad_norm: 1.1193 (1.2779) time: 2.8733 data: 0.0002 max mem: 28454 +[2024-12-12 00:56:22 root] (utils.py 283): INFO Epoch: [3] [ 410/2502] eta: 1:40:04 lr: 0.000019 loss_cls: 2.5608 (2.5769) grad_norm: 1.1933 (1.2772) time: 2.8736 data: 0.0002 max mem: 28454 +[2024-12-12 00:56:51 root] (utils.py 283): INFO Epoch: [3] [ 420/2502] eta: 1:39:35 lr: 0.000019 loss_cls: 2.5340 (2.5770) grad_norm: 1.1163 (1.2740) time: 2.8737 data: 0.0002 max mem: 28454 +[2024-12-12 00:57:20 root] (utils.py 283): INFO Epoch: [3] [ 430/2502] eta: 1:39:06 lr: 0.000019 loss_cls: 2.5493 (2.5763) grad_norm: 1.0664 (1.2732) time: 2.8722 data: 0.0002 max mem: 28454 +[2024-12-12 00:57:48 root] (utils.py 283): INFO Epoch: [3] [ 440/2502] eta: 1:38:38 lr: 0.000019 loss_cls: 2.7207 (2.5795) grad_norm: 1.1301 (1.2718) time: 2.8722 data: 0.0002 max mem: 28454 +[2024-12-12 00:58:17 root] (utils.py 283): INFO Epoch: [3] [ 450/2502] eta: 1:38:09 lr: 0.000019 loss_cls: 2.5584 (2.5761) grad_norm: 1.1935 (1.2784) time: 2.8738 data: 0.0002 max mem: 28454 +[2024-12-12 00:58:46 root] (utils.py 283): INFO Epoch: [3] [ 460/2502] eta: 1:37:41 lr: 0.000019 loss_cls: 2.5584 (2.5745) grad_norm: 1.1499 (1.2752) time: 2.8731 data: 0.0002 max mem: 28454 +[2024-12-12 00:59:15 root] (utils.py 283): INFO Epoch: [3] [ 470/2502] eta: 1:37:12 lr: 0.000019 loss_cls: 2.6917 (2.5740) grad_norm: 1.0992 (1.2720) time: 2.8700 data: 0.0002 max mem: 28454 +[2024-12-12 00:59:43 root] (utils.py 283): INFO Epoch: [3] [ 480/2502] eta: 1:36:43 lr: 0.000019 loss_cls: 2.5944 (2.5749) grad_norm: 1.1180 (1.2692) time: 2.8716 data: 0.0002 max mem: 28454 +[2024-12-12 01:00:12 root] (utils.py 283): INFO Epoch: [3] [ 490/2502] eta: 1:36:15 lr: 0.000019 loss_cls: 2.5043 (2.5729) grad_norm: 1.1180 (1.2685) time: 2.8743 data: 0.0002 max mem: 28454 +[2024-12-12 01:00:41 root] (utils.py 283): INFO Epoch: [3] [ 500/2502] eta: 1:35:46 lr: 0.000019 loss_cls: 2.6259 (2.5718) grad_norm: 1.0489 (1.2654) time: 2.8736 data: 0.0002 max mem: 28454 +[2024-12-12 01:01:10 root] (utils.py 283): INFO Epoch: [3] [ 510/2502] eta: 1:35:18 lr: 0.000019 loss_cls: 2.5469 (2.5713) grad_norm: 1.0487 (1.2616) time: 2.8757 data: 0.0002 max mem: 28454 +[2024-12-12 01:01:39 root] (utils.py 283): INFO Epoch: [3] [ 520/2502] eta: 1:34:50 lr: 0.000019 loss_cls: 2.5339 (2.5683) grad_norm: 1.1310 (1.2673) time: 2.8823 data: 0.0003 max mem: 28454 +[2024-12-12 01:02:07 root] (utils.py 283): INFO Epoch: [3] [ 530/2502] eta: 1:34:21 lr: 0.000019 loss_cls: 2.6584 (2.5716) grad_norm: 1.2732 (1.2669) time: 2.8816 data: 0.0002 max mem: 28454 +[2024-12-12 01:02:36 root] (utils.py 283): INFO Epoch: [3] [ 540/2502] eta: 1:33:53 lr: 0.000019 loss_cls: 2.7289 (2.5700) grad_norm: 1.1107 (1.2647) time: 2.8774 data: 0.0002 max mem: 28454 +[2024-12-12 01:03:05 root] (utils.py 283): INFO Epoch: [3] [ 550/2502] eta: 1:33:24 lr: 0.000019 loss_cls: 2.2463 (2.5636) grad_norm: 1.0998 (1.2670) time: 2.8785 data: 0.0002 max mem: 28454 +[2024-12-12 01:03:34 root] (utils.py 283): INFO Epoch: [3] [ 560/2502] eta: 1:32:56 lr: 0.000019 loss_cls: 2.6277 (2.5648) grad_norm: 1.0998 (1.2692) time: 2.8772 data: 0.0002 max mem: 28454 +[2024-12-12 01:04:02 root] (utils.py 283): INFO Epoch: [3] [ 570/2502] eta: 1:32:27 lr: 0.000019 loss_cls: 2.8449 (2.5670) grad_norm: 1.1623 (1.2694) time: 2.8762 data: 0.0003 max mem: 28454 +[2024-12-12 01:04:31 root] (utils.py 283): INFO Epoch: [3] [ 580/2502] eta: 1:31:59 lr: 0.000019 loss_cls: 2.7300 (2.5666) grad_norm: 1.1629 (1.2678) time: 2.8765 data: 0.0002 max mem: 28454 +[2024-12-12 01:05:00 root] (utils.py 283): INFO Epoch: [3] [ 590/2502] eta: 1:31:30 lr: 0.000019 loss_cls: 2.7584 (2.5718) grad_norm: 1.1550 (1.2672) time: 2.8750 data: 0.0002 max mem: 28454 +[2024-12-12 01:05:29 root] (utils.py 283): INFO Epoch: [3] [ 600/2502] eta: 1:31:01 lr: 0.000019 loss_cls: 2.7558 (2.5689) grad_norm: 1.1512 (1.2656) time: 2.8743 data: 0.0003 max mem: 28454 +[2024-12-12 01:05:57 root] (utils.py 283): INFO Epoch: [3] [ 610/2502] eta: 1:30:33 lr: 0.000019 loss_cls: 2.4259 (2.5667) grad_norm: 1.0368 (1.2627) time: 2.8704 data: 0.0003 max mem: 28454 +[2024-12-12 01:06:26 root] (utils.py 283): INFO Epoch: [3] [ 620/2502] eta: 1:30:04 lr: 0.000019 loss_cls: 2.6651 (2.5690) grad_norm: 1.0368 (1.2599) time: 2.8683 data: 0.0002 max mem: 28454 +[2024-12-12 01:06:55 root] (utils.py 283): INFO Epoch: [3] [ 630/2502] eta: 1:29:35 lr: 0.000019 loss_cls: 2.6934 (2.5676) grad_norm: 1.0508 (1.2583) time: 2.8702 data: 0.0002 max mem: 28454 +[2024-12-12 01:07:23 root] (utils.py 283): INFO Epoch: [3] [ 640/2502] eta: 1:29:06 lr: 0.000019 loss_cls: 2.6934 (2.5704) grad_norm: 1.1087 (1.2577) time: 2.8709 data: 0.0002 max mem: 28454 +[2024-12-12 01:07:52 root] (utils.py 283): INFO Epoch: [3] [ 650/2502] eta: 1:28:38 lr: 0.000019 loss_cls: 2.8472 (2.5720) grad_norm: 1.1957 (1.2574) time: 2.8708 data: 0.0002 max mem: 28454 +[2024-12-12 01:08:21 root] (utils.py 283): INFO Epoch: [3] [ 660/2502] eta: 1:28:09 lr: 0.000019 loss_cls: 2.7283 (2.5708) grad_norm: 1.1726 (1.2555) time: 2.8724 data: 0.0002 max mem: 28454 +[2024-12-12 01:08:50 root] (utils.py 283): INFO Epoch: [3] [ 670/2502] eta: 1:27:40 lr: 0.000019 loss_cls: 2.6444 (2.5715) grad_norm: 1.1726 (1.2672) time: 2.8750 data: 0.0002 max mem: 28454 +[2024-12-12 01:09:18 root] (utils.py 283): INFO Epoch: [3] [ 680/2502] eta: 1:27:12 lr: 0.000019 loss_cls: 2.7737 (2.5738) grad_norm: 1.1815 (1.2665) time: 2.8744 data: 0.0002 max mem: 28454 +[2024-12-12 01:09:47 root] (utils.py 283): INFO Epoch: [3] [ 690/2502] eta: 1:26:43 lr: 0.000019 loss_cls: 2.6700 (2.5702) grad_norm: 1.1748 (1.2672) time: 2.8725 data: 0.0002 max mem: 28454 +[2024-12-12 01:10:16 root] (utils.py 283): INFO Epoch: [3] [ 700/2502] eta: 1:26:14 lr: 0.000019 loss_cls: 2.5151 (2.5720) grad_norm: 1.1124 (1.2716) time: 2.8740 data: 0.0002 max mem: 28454 +[2024-12-12 01:10:45 root] (utils.py 283): INFO Epoch: [3] [ 710/2502] eta: 1:25:46 lr: 0.000019 loss_cls: 2.6321 (2.5713) grad_norm: 1.1330 (1.2706) time: 2.8735 data: 0.0002 max mem: 28454 +[2024-12-12 01:11:13 root] (utils.py 283): INFO Epoch: [3] [ 720/2502] eta: 1:25:17 lr: 0.000019 loss_cls: 2.4784 (2.5681) grad_norm: 1.1330 (1.2685) time: 2.8712 data: 0.0002 max mem: 28454 +[2024-12-12 01:11:42 root] (utils.py 283): INFO Epoch: [3] [ 730/2502] eta: 1:24:48 lr: 0.000019 loss_cls: 2.2858 (2.5669) grad_norm: 1.1563 (1.2732) time: 2.8718 data: 0.0002 max mem: 28454 +[2024-12-12 01:12:11 root] (utils.py 283): INFO Epoch: [3] [ 740/2502] eta: 1:24:19 lr: 0.000019 loss_cls: 2.6520 (2.5672) grad_norm: 1.1542 (1.2717) time: 2.8703 data: 0.0002 max mem: 28454 +[2024-12-12 01:12:39 root] (utils.py 283): INFO Epoch: [3] [ 750/2502] eta: 1:23:51 lr: 0.000019 loss_cls: 2.7196 (2.5680) grad_norm: 1.1355 (1.2762) time: 2.8708 data: 0.0003 max mem: 28454 +[2024-12-12 01:13:08 root] (utils.py 283): INFO Epoch: [3] [ 760/2502] eta: 1:23:22 lr: 0.000019 loss_cls: 2.8095 (2.5706) grad_norm: 1.1664 (1.2762) time: 2.8735 data: 0.0003 max mem: 28454 +[2024-12-12 01:13:37 root] (utils.py 283): INFO Epoch: [3] [ 770/2502] eta: 1:22:53 lr: 0.000019 loss_cls: 2.7129 (2.5719) grad_norm: 1.1488 (1.2748) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-12 01:14:06 root] (utils.py 283): INFO Epoch: [3] [ 780/2502] eta: 1:22:25 lr: 0.000019 loss_cls: 2.6759 (2.5714) grad_norm: 1.1643 (1.2740) time: 2.8717 data: 0.0002 max mem: 28454 +[2024-12-12 01:14:34 root] (utils.py 283): INFO Epoch: [3] [ 790/2502] eta: 1:21:56 lr: 0.000019 loss_cls: 2.5963 (2.5722) grad_norm: 1.1802 (1.2731) time: 2.8768 data: 0.0002 max mem: 28454 +[2024-12-12 01:15:03 root] (utils.py 283): INFO Epoch: [3] [ 800/2502] eta: 1:21:27 lr: 0.000019 loss_cls: 2.5501 (2.5707) grad_norm: 1.1663 (1.2715) time: 2.8777 data: 0.0002 max mem: 28454 +[2024-12-12 01:15:32 root] (utils.py 283): INFO Epoch: [3] [ 810/2502] eta: 1:20:59 lr: 0.000019 loss_cls: 2.4126 (2.5702) grad_norm: 1.0827 (1.2709) time: 2.8729 data: 0.0002 max mem: 28454 +[2024-12-12 01:16:01 root] (utils.py 283): INFO Epoch: [3] [ 820/2502] eta: 1:20:30 lr: 0.000019 loss_cls: 2.6821 (2.5703) grad_norm: 1.1169 (1.2693) time: 2.8704 data: 0.0002 max mem: 28454 +[2024-12-12 01:16:29 root] (utils.py 283): INFO Epoch: [3] [ 830/2502] eta: 1:20:01 lr: 0.000019 loss_cls: 2.7239 (2.5716) grad_norm: 1.1169 (1.2675) time: 2.8705 data: 0.0002 max mem: 28454 +[2024-12-12 01:16:58 root] (utils.py 283): INFO Epoch: [3] [ 840/2502] eta: 1:19:32 lr: 0.000019 loss_cls: 2.5221 (2.5695) grad_norm: 1.1339 (1.2697) time: 2.8715 data: 0.0003 max mem: 28454 +[2024-12-12 01:17:27 root] (utils.py 283): INFO Epoch: [3] [ 850/2502] eta: 1:19:04 lr: 0.000019 loss_cls: 2.6845 (2.5712) grad_norm: 1.2326 (1.2692) time: 2.8716 data: 0.0002 max mem: 28454 +[2024-12-12 01:17:55 root] (utils.py 283): INFO Epoch: [3] [ 860/2502] eta: 1:18:35 lr: 0.000019 loss_cls: 2.7993 (2.5711) grad_norm: 1.2007 (1.2721) time: 2.8713 data: 0.0002 max mem: 28454 +[2024-12-12 01:18:24 root] (utils.py 283): INFO Epoch: [3] [ 870/2502] eta: 1:18:07 lr: 0.000019 loss_cls: 2.7791 (2.5729) grad_norm: 1.1530 (1.2728) time: 2.8795 data: 0.0002 max mem: 28454 +[2024-12-12 01:18:53 root] (utils.py 283): INFO Epoch: [3] [ 880/2502] eta: 1:17:38 lr: 0.000019 loss_cls: 2.7757 (2.5751) grad_norm: 1.1565 (1.2741) time: 2.8794 data: 0.0003 max mem: 28454 +[2024-12-12 01:19:22 root] (utils.py 283): INFO Epoch: [3] [ 890/2502] eta: 1:17:09 lr: 0.000019 loss_cls: 2.6589 (2.5767) grad_norm: 1.1435 (1.2724) time: 2.8731 data: 0.0003 max mem: 28454 +[2024-12-12 01:19:50 root] (utils.py 283): INFO Epoch: [3] [ 900/2502] eta: 1:16:40 lr: 0.000019 loss_cls: 2.6330 (2.5764) grad_norm: 1.1041 (1.2704) time: 2.8725 data: 0.0003 max mem: 28454 +[2024-12-12 01:20:19 root] (utils.py 283): INFO Epoch: [3] [ 910/2502] eta: 1:16:12 lr: 0.000019 loss_cls: 2.4902 (2.5744) grad_norm: 1.0244 (1.2687) time: 2.8723 data: 0.0003 max mem: 28454 +[2024-12-12 01:20:48 root] (utils.py 283): INFO Epoch: [3] [ 920/2502] eta: 1:15:43 lr: 0.000019 loss_cls: 2.6197 (2.5746) grad_norm: 1.0805 (1.2682) time: 2.8719 data: 0.0003 max mem: 28454 +[2024-12-12 01:21:17 root] (utils.py 283): INFO Epoch: [3] [ 930/2502] eta: 1:15:14 lr: 0.000019 loss_cls: 2.6372 (2.5753) grad_norm: 1.1511 (1.2720) time: 2.8699 data: 0.0002 max mem: 28454 +[2024-12-12 01:21:45 root] (utils.py 283): INFO Epoch: [3] [ 940/2502] eta: 1:14:46 lr: 0.000019 loss_cls: 2.6450 (2.5771) grad_norm: 1.1399 (1.2706) time: 2.8714 data: 0.0002 max mem: 28454 +[2024-12-12 01:22:14 root] (utils.py 283): INFO Epoch: [3] [ 950/2502] eta: 1:14:17 lr: 0.000019 loss_cls: 2.7364 (2.5788) grad_norm: 1.1515 (1.2699) time: 2.8719 data: 0.0003 max mem: 28454 +[2024-12-12 01:22:43 root] (utils.py 283): INFO Epoch: [3] [ 960/2502] eta: 1:13:48 lr: 0.000019 loss_cls: 2.7607 (2.5798) grad_norm: 1.1515 (1.2687) time: 2.8724 data: 0.0003 max mem: 28454 +[2024-12-12 01:23:12 root] (utils.py 283): INFO Epoch: [3] [ 970/2502] eta: 1:13:19 lr: 0.000019 loss_cls: 2.6424 (2.5783) grad_norm: 1.0994 (1.2667) time: 2.8725 data: 0.0003 max mem: 28454 +[2024-12-12 01:23:40 root] (utils.py 283): INFO Epoch: [3] [ 980/2502] eta: 1:12:51 lr: 0.000019 loss_cls: 2.4394 (2.5771) grad_norm: 1.0152 (1.2792) time: 2.8729 data: 0.0003 max mem: 28454 +[2024-12-12 01:24:09 root] (utils.py 283): INFO Epoch: [3] [ 990/2502] eta: 1:12:22 lr: 0.000019 loss_cls: 2.7349 (2.5806) grad_norm: 1.1090 (1.2791) time: 2.8747 data: 0.0002 max mem: 28454 +[2024-12-12 01:24:38 root] (utils.py 283): INFO Epoch: [3] [1000/2502] eta: 1:11:53 lr: 0.000019 loss_cls: 2.9008 (2.5816) grad_norm: 1.1727 (1.2779) time: 2.8753 data: 0.0002 max mem: 28454 +[2024-12-12 01:25:06 root] (utils.py 283): INFO Epoch: [3] [1010/2502] eta: 1:11:25 lr: 0.000019 loss_cls: 2.6836 (2.5814) grad_norm: 1.1358 (1.2780) time: 2.8744 data: 0.0002 max mem: 28454 +[2024-12-12 01:25:35 root] (utils.py 283): INFO Epoch: [3] [1020/2502] eta: 1:10:56 lr: 0.000019 loss_cls: 2.6895 (2.5823) grad_norm: 1.2007 (1.2947) time: 2.8748 data: 0.0002 max mem: 28454 +[2024-12-12 01:26:04 root] (utils.py 283): INFO Epoch: [3] [1030/2502] eta: 1:10:27 lr: 0.000019 loss_cls: 2.7436 (2.5842) grad_norm: 1.2419 (1.2936) time: 2.8736 data: 0.0002 max mem: 28454 +[2024-12-12 01:26:33 root] (utils.py 283): INFO Epoch: [3] [1040/2502] eta: 1:09:59 lr: 0.000019 loss_cls: 2.7664 (2.5850) grad_norm: 1.1259 (1.2921) time: 2.8728 data: 0.0002 max mem: 28454 +[2024-12-12 01:27:02 root] (utils.py 283): INFO Epoch: [3] [1050/2502] eta: 1:09:30 lr: 0.000019 loss_cls: 2.7664 (2.5866) grad_norm: 1.1079 (1.2928) time: 2.8780 data: 0.0002 max mem: 28454 +[2024-12-12 01:27:30 root] (utils.py 283): INFO Epoch: [3] [1060/2502] eta: 1:09:01 lr: 0.000019 loss_cls: 2.7944 (2.5880) grad_norm: 1.2638 (1.3948) time: 2.8775 data: 0.0002 max mem: 28454 +[2024-12-12 01:27:59 root] (utils.py 283): INFO Epoch: [3] [1070/2502] eta: 1:08:33 lr: 0.000019 loss_cls: 2.7944 (2.5886) grad_norm: 1.5759 (1.3972) time: 2.8767 data: 0.0002 max mem: 28454 +[2024-12-12 01:28:28 root] (utils.py 283): INFO Epoch: [3] [1080/2502] eta: 1:08:04 lr: 0.000019 loss_cls: 2.8313 (2.5890) grad_norm: 1.4810 (1.3975) time: 2.8796 data: 0.0003 max mem: 28454 +[2024-12-12 01:28:57 root] (utils.py 283): INFO Epoch: [3] [1090/2502] eta: 1:07:35 lr: 0.000019 loss_cls: 2.8765 (2.5905) grad_norm: 1.3189 (1.3969) time: 2.8778 data: 0.0003 max mem: 28454 +[2024-12-12 01:29:25 root] (utils.py 283): INFO Epoch: [3] [1100/2502] eta: 1:07:07 lr: 0.000019 loss_cls: 2.7879 (2.5917) grad_norm: 1.2326 (1.3948) time: 2.8745 data: 0.0002 max mem: 28454 +[2024-12-12 01:29:54 root] (utils.py 283): INFO Epoch: [3] [1110/2502] eta: 1:06:38 lr: 0.000019 loss_cls: 2.7468 (2.5918) grad_norm: 1.1205 (1.3925) time: 2.8755 data: 0.0003 max mem: 28454 +[2024-12-12 01:30:23 root] (utils.py 283): INFO Epoch: [3] [1120/2502] eta: 1:06:09 lr: 0.000019 loss_cls: 2.7764 (2.5927) grad_norm: 1.2060 (1.4166) time: 2.8771 data: 0.0003 max mem: 28454 +[2024-12-12 01:30:52 root] (utils.py 283): INFO Epoch: [3] [1130/2502] eta: 1:05:41 lr: 0.000019 loss_cls: 2.7404 (2.5915) grad_norm: 1.2462 (1.4146) time: 2.8738 data: 0.0002 max mem: 28454 +[2024-12-12 01:31:20 root] (utils.py 283): INFO Epoch: [3] [1140/2502] eta: 1:05:12 lr: 0.000019 loss_cls: 2.7094 (2.5928) grad_norm: 1.2090 (1.4129) time: 2.8707 data: 0.0002 max mem: 28454 +[2024-12-12 01:31:49 root] (utils.py 283): INFO Epoch: [3] [1150/2502] eta: 1:04:43 lr: 0.000019 loss_cls: 2.7094 (2.5921) grad_norm: 1.1819 (1.4117) time: 2.8725 data: 0.0002 max mem: 28454 +[2024-12-12 01:32:18 root] (utils.py 283): INFO Epoch: [3] [1160/2502] eta: 1:04:14 lr: 0.000019 loss_cls: 2.6171 (2.5924) grad_norm: 1.1620 (1.4094) time: 2.8741 data: 0.0003 max mem: 28454 +[2024-12-12 01:32:47 root] (utils.py 283): INFO Epoch: [3] [1170/2502] eta: 1:03:46 lr: 0.000019 loss_cls: 2.6515 (2.5915) grad_norm: 1.0939 (1.4062) time: 2.8741 data: 0.0002 max mem: 28454 +[2024-12-12 01:33:15 root] (utils.py 283): INFO Epoch: [3] [1180/2502] eta: 1:03:17 lr: 0.000019 loss_cls: 2.6339 (2.5895) grad_norm: 1.0791 (1.4049) time: 2.8742 data: 0.0002 max mem: 28454 +[2024-12-12 01:33:44 root] (utils.py 283): INFO Epoch: [3] [1190/2502] eta: 1:02:48 lr: 0.000019 loss_cls: 2.4799 (2.5891) grad_norm: 1.1760 (1.4045) time: 2.8733 data: 0.0002 max mem: 28454 +[2024-12-12 01:34:13 root] (utils.py 283): INFO Epoch: [3] [1200/2502] eta: 1:02:20 lr: 0.000019 loss_cls: 2.7074 (2.5898) grad_norm: 1.1096 (1.4039) time: 2.8742 data: 0.0002 max mem: 28454 +[2024-12-12 01:34:41 root] (utils.py 283): INFO Epoch: [3] [1210/2502] eta: 1:01:51 lr: 0.000019 loss_cls: 2.6519 (2.5900) grad_norm: 1.1881 (1.4048) time: 2.8742 data: 0.0002 max mem: 28454 +[2024-12-12 01:35:10 root] (utils.py 283): INFO Epoch: [3] [1220/2502] eta: 1:01:22 lr: 0.000019 loss_cls: 2.6519 (2.5902) grad_norm: 1.1767 (1.4084) time: 2.8730 data: 0.0002 max mem: 28454 +[2024-12-12 01:35:39 root] (utils.py 283): INFO Epoch: [3] [1230/2502] eta: 1:00:53 lr: 0.000019 loss_cls: 2.7581 (2.5906) grad_norm: 1.1644 (1.4066) time: 2.8732 data: 0.0002 max mem: 28454 +[2024-12-12 01:36:08 root] (utils.py 283): INFO Epoch: [3] [1240/2502] eta: 1:00:25 lr: 0.000019 loss_cls: 2.7581 (2.5917) grad_norm: 1.0738 (1.4044) time: 2.8746 data: 0.0002 max mem: 28454 +[2024-12-12 01:36:36 root] (utils.py 283): INFO Epoch: [3] [1250/2502] eta: 0:59:56 lr: 0.000019 loss_cls: 2.8249 (2.5933) grad_norm: 1.0470 (1.4029) time: 2.8744 data: 0.0002 max mem: 28454 +[2024-12-12 01:37:05 root] (utils.py 283): INFO Epoch: [3] [1260/2502] eta: 0:59:27 lr: 0.000019 loss_cls: 2.7770 (2.5937) grad_norm: 1.0339 (1.4348) time: 2.8728 data: 0.0002 max mem: 28454 +[2024-12-12 01:37:34 root] (utils.py 283): INFO Epoch: [3] [1270/2502] eta: 0:58:59 lr: 0.000019 loss_cls: 2.6835 (2.5940) grad_norm: 1.1509 (1.4332) time: 2.8744 data: 0.0002 max mem: 28454 +[2024-12-12 01:38:03 root] (utils.py 283): INFO Epoch: [3] [1280/2502] eta: 0:58:30 lr: 0.000019 loss_cls: 2.7107 (2.5946) grad_norm: 1.2596 (1.4335) time: 2.8734 data: 0.0002 max mem: 28454 +[2024-12-12 01:38:31 root] (utils.py 283): INFO Epoch: [3] [1290/2502] eta: 0:58:01 lr: 0.000019 loss_cls: 2.8185 (2.5952) grad_norm: 1.1559 (1.4307) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-12 01:39:00 root] (utils.py 283): INFO Epoch: [3] [1300/2502] eta: 0:57:32 lr: 0.000019 loss_cls: 2.7105 (2.5944) grad_norm: 1.0826 (1.4279) time: 2.8726 data: 0.0002 max mem: 28454 +[2024-12-12 01:39:29 root] (utils.py 283): INFO Epoch: [3] [1310/2502] eta: 0:57:04 lr: 0.000019 loss_cls: 2.5686 (2.5948) grad_norm: 1.0917 (1.4532) time: 2.8708 data: 0.0002 max mem: 28454 +[2024-12-12 01:39:57 root] (utils.py 283): INFO Epoch: [3] [1320/2502] eta: 0:56:35 lr: 0.000019 loss_cls: 2.7135 (2.5960) grad_norm: 1.2548 (1.4526) time: 2.8692 data: 0.0002 max mem: 28454 +[2024-12-12 01:40:26 root] (utils.py 283): INFO Epoch: [3] [1330/2502] eta: 0:56:06 lr: 0.000019 loss_cls: 2.7594 (2.5968) grad_norm: 1.4012 (1.4549) time: 2.8689 data: 0.0002 max mem: 28454 +[2024-12-12 01:40:55 root] (utils.py 283): INFO Epoch: [3] [1340/2502] eta: 0:55:37 lr: 0.000019 loss_cls: 2.7912 (2.5985) grad_norm: 1.3955 (1.4537) time: 2.8662 data: 0.0002 max mem: 28454 +[2024-12-12 01:41:23 root] (utils.py 283): INFO Epoch: [3] [1350/2502] eta: 0:55:08 lr: 0.000019 loss_cls: 2.7039 (2.5985) grad_norm: 1.1872 (1.4606) time: 2.8624 data: 0.0002 max mem: 28454 +[2024-12-12 01:41:52 root] (utils.py 283): INFO Epoch: [3] [1360/2502] eta: 0:54:40 lr: 0.000019 loss_cls: 2.5935 (2.5982) grad_norm: 1.2286 (1.4595) time: 2.8649 data: 0.0002 max mem: 28454 +[2024-12-12 01:42:21 root] (utils.py 283): INFO Epoch: [3] [1370/2502] eta: 0:54:11 lr: 0.000019 loss_cls: 2.8006 (2.5985) grad_norm: 1.2286 (1.4579) time: 2.8665 data: 0.0003 max mem: 28454 +[2024-12-12 01:42:49 root] (utils.py 283): INFO Epoch: [3] [1380/2502] eta: 0:53:42 lr: 0.000019 loss_cls: 2.7005 (2.5987) grad_norm: 1.1179 (1.4575) time: 2.8658 data: 0.0002 max mem: 28454 +[2024-12-12 01:43:18 root] (utils.py 283): INFO Epoch: [3] [1390/2502] eta: 0:53:13 lr: 0.000019 loss_cls: 2.6354 (2.5983) grad_norm: 1.1355 (1.4588) time: 2.8672 data: 0.0002 max mem: 28454 +[2024-12-12 01:43:47 root] (utils.py 283): INFO Epoch: [3] [1400/2502] eta: 0:52:45 lr: 0.000019 loss_cls: 2.5909 (2.5973) grad_norm: 1.1156 (1.4562) time: 2.8683 data: 0.0003 max mem: 28454 +[2024-12-12 01:44:16 root] (utils.py 283): INFO Epoch: [3] [1410/2502] eta: 0:52:16 lr: 0.000019 loss_cls: 2.7069 (2.5984) grad_norm: 1.0875 (1.4547) time: 2.8712 data: 0.0003 max mem: 28454 +[2024-12-12 01:44:44 root] (utils.py 283): INFO Epoch: [3] [1420/2502] eta: 0:51:47 lr: 0.000019 loss_cls: 2.7640 (2.5993) grad_norm: 1.0951 (1.4526) time: 2.8719 data: 0.0003 max mem: 28454 +[2024-12-12 01:45:13 root] (utils.py 283): INFO Epoch: [3] [1430/2502] eta: 0:51:19 lr: 0.000019 loss_cls: 2.7175 (2.6009) grad_norm: 1.0951 (1.4507) time: 2.8716 data: 0.0003 max mem: 28454 +[2024-12-12 01:45:42 root] (utils.py 283): INFO Epoch: [3] [1440/2502] eta: 0:50:50 lr: 0.000019 loss_cls: 2.6104 (2.6005) grad_norm: 1.1241 (1.4497) time: 2.8695 data: 0.0002 max mem: 28454 +[2024-12-12 01:46:10 root] (utils.py 283): INFO Epoch: [3] [1450/2502] eta: 0:50:21 lr: 0.000019 loss_cls: 2.5619 (2.6000) grad_norm: 1.0491 (1.4519) time: 2.8702 data: 0.0002 max mem: 28454 +[2024-12-12 01:46:39 root] (utils.py 283): INFO Epoch: [3] [1460/2502] eta: 0:49:52 lr: 0.000019 loss_cls: 2.9017 (2.6024) grad_norm: 1.1335 (1.4499) time: 2.8716 data: 0.0002 max mem: 28454 +[2024-12-12 01:47:08 root] (utils.py 283): INFO Epoch: [3] [1470/2502] eta: 0:49:24 lr: 0.000019 loss_cls: 2.8329 (2.6018) grad_norm: 1.1415 (1.4478) time: 2.8713 data: 0.0002 max mem: 28454 +[2024-12-12 01:47:37 root] (utils.py 283): INFO Epoch: [3] [1480/2502] eta: 0:48:55 lr: 0.000019 loss_cls: 2.6538 (2.6019) grad_norm: 1.1382 (1.4454) time: 2.8736 data: 0.0002 max mem: 28454 +[2024-12-12 01:48:05 root] (utils.py 283): INFO Epoch: [3] [1490/2502] eta: 0:48:26 lr: 0.000019 loss_cls: 2.6004 (2.6017) grad_norm: 1.1139 (1.4435) time: 2.8739 data: 0.0002 max mem: 28454 +[2024-12-12 01:48:34 root] (utils.py 283): INFO Epoch: [3] [1500/2502] eta: 0:47:57 lr: 0.000019 loss_cls: 2.4676 (2.5998) grad_norm: 1.1251 (1.4412) time: 2.8733 data: 0.0002 max mem: 28454 +[2024-12-12 01:49:03 root] (utils.py 283): INFO Epoch: [3] [1510/2502] eta: 0:47:29 lr: 0.000019 loss_cls: 2.4473 (2.5994) grad_norm: 1.0682 (1.4388) time: 2.8720 data: 0.0002 max mem: 28454 +[2024-12-12 01:49:31 root] (utils.py 283): INFO Epoch: [3] [1520/2502] eta: 0:47:00 lr: 0.000019 loss_cls: 2.7388 (2.6002) grad_norm: 1.1177 (1.4464) time: 2.8720 data: 0.0002 max mem: 28454 +[2024-12-12 01:50:00 root] (utils.py 283): INFO Epoch: [3] [1530/2502] eta: 0:46:31 lr: 0.000019 loss_cls: 2.8116 (2.6004) grad_norm: 1.1894 (1.4447) time: 2.8750 data: 0.0002 max mem: 28454 +[2024-12-12 01:50:29 root] (utils.py 283): INFO Epoch: [3] [1540/2502] eta: 0:46:03 lr: 0.000019 loss_cls: 2.5469 (2.5989) grad_norm: 1.1244 (1.4425) time: 2.8752 data: 0.0002 max mem: 28454 +[2024-12-12 01:50:58 root] (utils.py 283): INFO Epoch: [3] [1550/2502] eta: 0:45:34 lr: 0.000019 loss_cls: 2.4083 (2.5983) grad_norm: 1.1115 (1.4404) time: 2.8728 data: 0.0002 max mem: 28454 +[2024-12-12 01:51:26 root] (utils.py 283): INFO Epoch: [3] [1560/2502] eta: 0:45:05 lr: 0.000019 loss_cls: 2.6602 (2.5978) grad_norm: 1.1330 (1.4385) time: 2.8722 data: 0.0002 max mem: 28454 +[2024-12-12 01:51:55 root] (utils.py 283): INFO Epoch: [3] [1570/2502] eta: 0:44:36 lr: 0.000019 loss_cls: 2.6053 (2.5980) grad_norm: 1.1143 (1.4368) time: 2.8717 data: 0.0002 max mem: 28454 +[2024-12-12 01:52:24 root] (utils.py 283): INFO Epoch: [3] [1580/2502] eta: 0:44:08 lr: 0.000019 loss_cls: 2.6396 (2.5980) grad_norm: 1.0863 (1.4352) time: 2.8708 data: 0.0002 max mem: 28454 +[2024-12-12 01:52:53 root] (utils.py 283): INFO Epoch: [3] [1590/2502] eta: 0:43:39 lr: 0.000019 loss_cls: 2.5481 (2.5971) grad_norm: 1.1322 (1.4417) time: 2.8698 data: 0.0002 max mem: 28454 +[2024-12-12 01:53:21 root] (utils.py 283): INFO Epoch: [3] [1600/2502] eta: 0:43:10 lr: 0.000019 loss_cls: 2.5481 (2.5974) grad_norm: 1.1756 (1.4399) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-12 01:53:50 root] (utils.py 283): INFO Epoch: [3] [1610/2502] eta: 0:42:41 lr: 0.000019 loss_cls: 2.7113 (2.5976) grad_norm: 1.1047 (1.4381) time: 2.8706 data: 0.0002 max mem: 28454 +[2024-12-12 01:54:19 root] (utils.py 283): INFO Epoch: [3] [1620/2502] eta: 0:42:13 lr: 0.000019 loss_cls: 2.6717 (2.5966) grad_norm: 1.1206 (1.4366) time: 2.8702 data: 0.0002 max mem: 28454 +[2024-12-12 01:54:47 root] (utils.py 283): INFO Epoch: [3] [1630/2502] eta: 0:41:44 lr: 0.000019 loss_cls: 2.4847 (2.5966) grad_norm: 1.1701 (1.4349) time: 2.8725 data: 0.0002 max mem: 28454 +[2024-12-12 01:55:16 root] (utils.py 283): INFO Epoch: [3] [1640/2502] eta: 0:41:15 lr: 0.000019 loss_cls: 2.6837 (2.5964) grad_norm: 1.1420 (1.4329) time: 2.8742 data: 0.0002 max mem: 28454 +[2024-12-12 01:55:45 root] (utils.py 283): INFO Epoch: [3] [1650/2502] eta: 0:40:47 lr: 0.000019 loss_cls: 2.5833 (2.5957) grad_norm: 1.0936 (1.4339) time: 2.8746 data: 0.0002 max mem: 28454 +[2024-12-12 01:56:14 root] (utils.py 283): INFO Epoch: [3] [1660/2502] eta: 0:40:18 lr: 0.000019 loss_cls: 2.5834 (2.5961) grad_norm: 1.1494 (1.4335) time: 2.8733 data: 0.0002 max mem: 28454 +[2024-12-12 01:56:42 root] (utils.py 283): INFO Epoch: [3] [1670/2502] eta: 0:39:49 lr: 0.000019 loss_cls: 2.5909 (2.5947) grad_norm: 1.1179 (1.4318) time: 2.8708 data: 0.0002 max mem: 28454 +[2024-12-12 01:57:11 root] (utils.py 283): INFO Epoch: [3] [1680/2502] eta: 0:39:20 lr: 0.000019 loss_cls: 2.4791 (2.5940) grad_norm: 1.1122 (1.4315) time: 2.8698 data: 0.0002 max mem: 28454 +[2024-12-12 01:57:40 root] (utils.py 283): INFO Epoch: [3] [1690/2502] eta: 0:38:52 lr: 0.000019 loss_cls: 2.6375 (2.5942) grad_norm: 1.1327 (1.4301) time: 2.8742 data: 0.0002 max mem: 28454 +[2024-12-12 01:58:09 root] (utils.py 283): INFO Epoch: [3] [1700/2502] eta: 0:38:23 lr: 0.000019 loss_cls: 2.6706 (2.5936) grad_norm: 1.1327 (1.4287) time: 2.8755 data: 0.0002 max mem: 28454 +[2024-12-12 01:58:37 root] (utils.py 283): INFO Epoch: [3] [1710/2502] eta: 0:37:54 lr: 0.000019 loss_cls: 2.6749 (2.5945) grad_norm: 1.1444 (1.4275) time: 2.8711 data: 0.0002 max mem: 28454 +[2024-12-12 01:59:06 root] (utils.py 283): INFO Epoch: [3] [1720/2502] eta: 0:37:26 lr: 0.000019 loss_cls: 2.6885 (2.5947) grad_norm: 1.0699 (1.4254) time: 2.8685 data: 0.0002 max mem: 28454 +[2024-12-12 01:59:35 root] (utils.py 283): INFO Epoch: [3] [1730/2502] eta: 0:36:57 lr: 0.000019 loss_cls: 2.4601 (2.5938) grad_norm: 1.0429 (1.4237) time: 2.8697 data: 0.0002 max mem: 28454 +[2024-12-12 02:00:03 root] (utils.py 283): INFO Epoch: [3] [1740/2502] eta: 0:36:28 lr: 0.000019 loss_cls: 2.5113 (2.5932) grad_norm: 1.0429 (1.4217) time: 2.8707 data: 0.0002 max mem: 28454 +[2024-12-12 02:00:32 root] (utils.py 283): INFO Epoch: [3] [1750/2502] eta: 0:35:59 lr: 0.000019 loss_cls: 2.6981 (2.5933) grad_norm: 1.0181 (1.4200) time: 2.8711 data: 0.0002 max mem: 28454 +[2024-12-12 02:01:01 root] (utils.py 283): INFO Epoch: [3] [1760/2502] eta: 0:35:31 lr: 0.000019 loss_cls: 2.7702 (2.5940) grad_norm: 1.1113 (1.4184) time: 2.8702 data: 0.0002 max mem: 28454 +[2024-12-12 02:01:29 root] (utils.py 283): INFO Epoch: [3] [1770/2502] eta: 0:35:02 lr: 0.000019 loss_cls: 2.7670 (2.5941) grad_norm: 1.1284 (1.4169) time: 2.8705 data: 0.0002 max mem: 28454 +[2024-12-12 02:01:58 root] (utils.py 283): INFO Epoch: [3] [1780/2502] eta: 0:34:33 lr: 0.000019 loss_cls: 2.8352 (2.5951) grad_norm: 1.1705 (1.4190) time: 2.8725 data: 0.0002 max mem: 28454 +[2024-12-12 02:02:27 root] (utils.py 283): INFO Epoch: [3] [1790/2502] eta: 0:34:04 lr: 0.000019 loss_cls: 2.7465 (2.5942) grad_norm: 1.1178 (1.4181) time: 2.8727 data: 0.0002 max mem: 28454 +[2024-12-12 02:02:56 root] (utils.py 283): INFO Epoch: [3] [1800/2502] eta: 0:33:36 lr: 0.000019 loss_cls: 2.5605 (2.5944) grad_norm: 1.1486 (1.4211) time: 2.8703 data: 0.0002 max mem: 28454 +[2024-12-12 02:03:24 root] (utils.py 283): INFO Epoch: [3] [1810/2502] eta: 0:33:07 lr: 0.000019 loss_cls: 2.5605 (2.5931) grad_norm: 1.1815 (1.4200) time: 2.8710 data: 0.0002 max mem: 28454 +[2024-12-12 02:03:53 root] (utils.py 283): INFO Epoch: [3] [1820/2502] eta: 0:32:38 lr: 0.000019 loss_cls: 2.5598 (2.5929) grad_norm: 1.0780 (1.4182) time: 2.8717 data: 0.0002 max mem: 28454 +[2024-12-12 02:04:22 root] (utils.py 283): INFO Epoch: [3] [1830/2502] eta: 0:32:10 lr: 0.000019 loss_cls: 2.5494 (2.5921) grad_norm: 1.0417 (1.4165) time: 2.8705 data: 0.0002 max mem: 28454 +[2024-12-12 02:04:50 root] (utils.py 283): INFO Epoch: [3] [1840/2502] eta: 0:31:41 lr: 0.000019 loss_cls: 2.4860 (2.5916) grad_norm: 1.1228 (1.4155) time: 2.8706 data: 0.0002 max mem: 28454 +[2024-12-12 02:05:19 root] (utils.py 283): INFO Epoch: [3] [1850/2502] eta: 0:31:12 lr: 0.000019 loss_cls: 2.5815 (2.5918) grad_norm: 1.1552 (1.4149) time: 2.8717 data: 0.0002 max mem: 28454 +[2024-12-12 02:05:48 root] (utils.py 283): INFO Epoch: [3] [1860/2502] eta: 0:30:43 lr: 0.000019 loss_cls: 2.7799 (2.5931) grad_norm: 1.1386 (1.4135) time: 2.8722 data: 0.0002 max mem: 28454 +[2024-12-12 02:06:17 root] (utils.py 283): INFO Epoch: [3] [1870/2502] eta: 0:30:15 lr: 0.000019 loss_cls: 2.7147 (2.5924) grad_norm: 1.1386 (1.4136) time: 2.8710 data: 0.0002 max mem: 28454 +[2024-12-12 02:06:45 root] (utils.py 283): INFO Epoch: [3] [1880/2502] eta: 0:29:46 lr: 0.000019 loss_cls: 2.7147 (2.5929) grad_norm: 1.1423 (1.4123) time: 2.8694 data: 0.0002 max mem: 28454 +[2024-12-12 02:07:14 root] (utils.py 283): INFO Epoch: [3] [1890/2502] eta: 0:29:17 lr: 0.000019 loss_cls: 2.7929 (2.5935) grad_norm: 1.1605 (1.4111) time: 2.8683 data: 0.0002 max mem: 28454 +[2024-12-12 02:07:43 root] (utils.py 283): INFO Epoch: [3] [1900/2502] eta: 0:28:48 lr: 0.000019 loss_cls: 2.7351 (2.5931) grad_norm: 1.1739 (1.4101) time: 2.8693 data: 0.0002 max mem: 28454 +[2024-12-12 02:08:11 root] (utils.py 283): INFO Epoch: [3] [1910/2502] eta: 0:28:20 lr: 0.000019 loss_cls: 2.7495 (2.5938) grad_norm: 1.1797 (1.4090) time: 2.8699 data: 0.0002 max mem: 28454 +[2024-12-12 02:08:40 root] (utils.py 283): INFO Epoch: [3] [1920/2502] eta: 0:27:51 lr: 0.000019 loss_cls: 2.8277 (2.5949) grad_norm: 1.1593 (1.4121) time: 2.8720 data: 0.0002 max mem: 28454 +[2024-12-12 02:09:09 root] (utils.py 283): INFO Epoch: [3] [1930/2502] eta: 0:27:22 lr: 0.000019 loss_cls: 2.7623 (2.5952) grad_norm: 1.2078 (1.4121) time: 2.8739 data: 0.0003 max mem: 28454 +[2024-12-12 02:09:38 root] (utils.py 283): INFO Epoch: [3] [1940/2502] eta: 0:26:54 lr: 0.000019 loss_cls: 2.7406 (2.5959) grad_norm: 1.2078 (1.4107) time: 2.8726 data: 0.0003 max mem: 28454 +[2024-12-12 02:10:06 root] (utils.py 283): INFO Epoch: [3] [1950/2502] eta: 0:26:25 lr: 0.000019 loss_cls: 2.6534 (2.5955) grad_norm: 1.0915 (1.4094) time: 2.8733 data: 0.0003 max mem: 28454 +[2024-12-12 02:10:35 root] (utils.py 283): INFO Epoch: [3] [1960/2502] eta: 0:25:56 lr: 0.000019 loss_cls: 2.6794 (2.5965) grad_norm: 1.1054 (1.4083) time: 2.8760 data: 0.0002 max mem: 28454 +[2024-12-12 02:11:04 root] (utils.py 283): INFO Epoch: [3] [1970/2502] eta: 0:25:27 lr: 0.000019 loss_cls: 2.7284 (2.5965) grad_norm: 1.1284 (1.4071) time: 2.8749 data: 0.0003 max mem: 28454 +[2024-12-12 02:11:33 root] (utils.py 283): INFO Epoch: [3] [1980/2502] eta: 0:24:59 lr: 0.000019 loss_cls: 2.6199 (2.5971) grad_norm: 1.1864 (1.4282) time: 2.8728 data: 0.0003 max mem: 28454 +[2024-12-12 02:12:01 root] (utils.py 283): INFO Epoch: [3] [1990/2502] eta: 0:24:30 lr: 0.000019 loss_cls: 2.7383 (2.5980) grad_norm: 1.4608 (1.4317) time: 2.8742 data: 0.0002 max mem: 28454 +[2024-12-12 02:12:30 root] (utils.py 283): INFO Epoch: [3] [2000/2502] eta: 0:24:01 lr: 0.000019 loss_cls: 2.3493 (2.5960) grad_norm: 1.5129 (1.4333) time: 2.8759 data: 0.0002 max mem: 28454 +[2024-12-12 02:12:59 root] (utils.py 283): INFO Epoch: [3] [2010/2502] eta: 0:23:33 lr: 0.000019 loss_cls: 2.2264 (2.5951) grad_norm: 1.3722 (1.4344) time: 2.8758 data: 0.0002 max mem: 28454 +[2024-12-12 02:13:28 root] (utils.py 283): INFO Epoch: [3] [2020/2502] eta: 0:23:04 lr: 0.000019 loss_cls: 2.5332 (2.5956) grad_norm: 1.1828 (1.4332) time: 2.8768 data: 0.0002 max mem: 28454 +[2024-12-12 02:13:56 root] (utils.py 283): INFO Epoch: [3] [2030/2502] eta: 0:22:35 lr: 0.000019 loss_cls: 2.7387 (2.5964) grad_norm: 1.1920 (1.4321) time: 2.8818 data: 0.0002 max mem: 28454 +[2024-12-12 02:14:25 root] (utils.py 283): INFO Epoch: [3] [2040/2502] eta: 0:22:06 lr: 0.000019 loss_cls: 2.5633 (2.5960) grad_norm: 1.1920 (1.4314) time: 2.8829 data: 0.0002 max mem: 28454 +[2024-12-12 02:14:54 root] (utils.py 283): INFO Epoch: [3] [2050/2502] eta: 0:21:38 lr: 0.000019 loss_cls: 2.5920 (2.5960) grad_norm: 1.1588 (1.4305) time: 2.8807 data: 0.0002 max mem: 28454 +[2024-12-12 02:15:23 root] (utils.py 283): INFO Epoch: [3] [2060/2502] eta: 0:21:09 lr: 0.000019 loss_cls: 2.6362 (2.5962) grad_norm: 1.1707 (1.4294) time: 2.8782 data: 0.0002 max mem: 28454 +[2024-12-12 02:15:52 root] (utils.py 283): INFO Epoch: [3] [2070/2502] eta: 0:20:40 lr: 0.000019 loss_cls: 2.6362 (2.5961) grad_norm: 1.1196 (1.4290) time: 2.8777 data: 0.0002 max mem: 28454 +[2024-12-12 02:16:20 root] (utils.py 283): INFO Epoch: [3] [2080/2502] eta: 0:20:12 lr: 0.000019 loss_cls: 2.4777 (2.5956) grad_norm: 1.1196 (1.4283) time: 2.8763 data: 0.0002 max mem: 28454 +[2024-12-12 02:16:49 root] (utils.py 283): INFO Epoch: [3] [2090/2502] eta: 0:19:43 lr: 0.000019 loss_cls: 2.4777 (2.5953) grad_norm: 1.1380 (1.4271) time: 2.8739 data: 0.0002 max mem: 28454 +[2024-12-12 02:17:18 root] (utils.py 283): INFO Epoch: [3] [2100/2502] eta: 0:19:14 lr: 0.000019 loss_cls: 2.5460 (2.5955) grad_norm: 1.1380 (1.4263) time: 2.8769 data: 0.0002 max mem: 28454 +[2024-12-12 02:17:47 root] (utils.py 283): INFO Epoch: [3] [2110/2502] eta: 0:18:46 lr: 0.000019 loss_cls: 2.5460 (2.5945) grad_norm: 1.1091 (1.4252) time: 2.8781 data: 0.0002 max mem: 28454 +[2024-12-12 02:18:15 root] (utils.py 283): INFO Epoch: [3] [2120/2502] eta: 0:18:17 lr: 0.000019 loss_cls: 2.7581 (2.5958) grad_norm: 1.1452 (1.4260) time: 2.8768 data: 0.0002 max mem: 28454 +[2024-12-12 02:18:44 root] (utils.py 283): INFO Epoch: [3] [2130/2502] eta: 0:17:48 lr: 0.000019 loss_cls: 2.7581 (2.5951) grad_norm: 1.0747 (1.4250) time: 2.8769 data: 0.0002 max mem: 28454 +[2024-12-12 02:19:13 root] (utils.py 283): INFO Epoch: [3] [2140/2502] eta: 0:17:19 lr: 0.000019 loss_cls: 2.4405 (2.5943) grad_norm: 1.0577 (1.4234) time: 2.8771 data: 0.0002 max mem: 28454 +[2024-12-12 02:19:42 root] (utils.py 283): INFO Epoch: [3] [2150/2502] eta: 0:16:51 lr: 0.000019 loss_cls: 2.5920 (2.5948) grad_norm: 1.0636 (1.4221) time: 2.8746 data: 0.0002 max mem: 28454 +[2024-12-12 02:20:10 root] (utils.py 283): INFO Epoch: [3] [2160/2502] eta: 0:16:22 lr: 0.000019 loss_cls: 2.7010 (2.5954) grad_norm: 1.0850 (1.4206) time: 2.8739 data: 0.0003 max mem: 28454 +[2024-12-12 02:20:39 root] (utils.py 283): INFO Epoch: [3] [2170/2502] eta: 0:15:53 lr: 0.000019 loss_cls: 2.7719 (2.5961) grad_norm: 1.0850 (1.4191) time: 2.8760 data: 0.0002 max mem: 28454 +[2024-12-12 02:21:08 root] (utils.py 283): INFO Epoch: [3] [2180/2502] eta: 0:15:24 lr: 0.000019 loss_cls: 2.7772 (2.5968) grad_norm: 1.1184 (1.4452) time: 2.8765 data: 0.0002 max mem: 28454 +[2024-12-12 02:21:37 root] (utils.py 283): INFO Epoch: [3] [2190/2502] eta: 0:14:56 lr: 0.000019 loss_cls: 2.8113 (2.5975) grad_norm: 1.1551 (1.4448) time: 2.8756 data: 0.0002 max mem: 28454 +[2024-12-12 02:22:05 root] (utils.py 283): INFO Epoch: [3] [2200/2502] eta: 0:14:27 lr: 0.000019 loss_cls: 2.8302 (2.5982) grad_norm: 1.1740 (1.4440) time: 2.8746 data: 0.0002 max mem: 28454 +[2024-12-12 02:22:34 root] (utils.py 283): INFO Epoch: [3] [2210/2502] eta: 0:13:58 lr: 0.000019 loss_cls: 2.8105 (2.5988) grad_norm: 1.1660 (1.4427) time: 2.8763 data: 0.0002 max mem: 28454 +[2024-12-12 02:23:03 root] (utils.py 283): INFO Epoch: [3] [2220/2502] eta: 0:13:30 lr: 0.000019 loss_cls: 2.7391 (2.5988) grad_norm: 1.1118 (1.4416) time: 2.8768 data: 0.0002 max mem: 28454 +[2024-12-12 02:23:32 root] (utils.py 283): INFO Epoch: [3] [2230/2502] eta: 0:13:01 lr: 0.000019 loss_cls: 2.7092 (2.5984) grad_norm: 1.1027 (1.4407) time: 2.8767 data: 0.0002 max mem: 28454 +[2024-12-12 02:24:01 root] (utils.py 283): INFO Epoch: [3] [2240/2502] eta: 0:12:32 lr: 0.000019 loss_cls: 2.5068 (2.5987) grad_norm: 1.1448 (1.4401) time: 2.8785 data: 0.0002 max mem: 28454 +[2024-12-12 02:24:29 root] (utils.py 283): INFO Epoch: [3] [2250/2502] eta: 0:12:03 lr: 0.000019 loss_cls: 2.5068 (2.5980) grad_norm: 1.1448 (1.4387) time: 2.8744 data: 0.0002 max mem: 28454 +[2024-12-12 02:24:58 root] (utils.py 283): INFO Epoch: [3] [2260/2502] eta: 0:11:35 lr: 0.000019 loss_cls: 2.5679 (2.5976) grad_norm: 1.0554 (1.4378) time: 2.8730 data: 0.0002 max mem: 28454 +[2024-12-12 02:25:27 root] (utils.py 283): INFO Epoch: [3] [2270/2502] eta: 0:11:06 lr: 0.000019 loss_cls: 2.6182 (2.5973) grad_norm: 1.1257 (1.4369) time: 2.8754 data: 0.0002 max mem: 28454 +[2024-12-12 02:25:56 root] (utils.py 283): INFO Epoch: [3] [2280/2502] eta: 0:10:37 lr: 0.000019 loss_cls: 2.6185 (2.5975) grad_norm: 1.1557 (1.4357) time: 2.8751 data: 0.0002 max mem: 28454 +[2024-12-12 02:26:24 root] (utils.py 283): INFO Epoch: [3] [2290/2502] eta: 0:10:09 lr: 0.000019 loss_cls: 2.6185 (2.5978) grad_norm: 1.1102 (1.4345) time: 2.8727 data: 0.0002 max mem: 28454 +[2024-12-12 02:26:53 root] (utils.py 283): INFO Epoch: [3] [2300/2502] eta: 0:09:40 lr: 0.000019 loss_cls: 2.8247 (2.5984) grad_norm: 1.1040 (1.4349) time: 2.8802 data: 0.0002 max mem: 28454 +[2024-12-12 02:27:22 root] (utils.py 283): INFO Epoch: [3] [2310/2502] eta: 0:09:11 lr: 0.000019 loss_cls: 2.7431 (2.5977) grad_norm: 1.0809 (1.4335) time: 2.8791 data: 0.0003 max mem: 28454 +[2024-12-12 02:27:51 root] (utils.py 283): INFO Epoch: [3] [2320/2502] eta: 0:08:42 lr: 0.000019 loss_cls: 2.7106 (2.5981) grad_norm: 1.0809 (1.4325) time: 2.8730 data: 0.0002 max mem: 28454 +[2024-12-12 02:28:19 root] (utils.py 283): INFO Epoch: [3] [2330/2502] eta: 0:08:14 lr: 0.000019 loss_cls: 2.7507 (2.5980) grad_norm: 1.1270 (1.4328) time: 2.8775 data: 0.0002 max mem: 28454 +[2024-12-12 02:28:48 root] (utils.py 283): INFO Epoch: [3] [2340/2502] eta: 0:07:45 lr: 0.000019 loss_cls: 2.5288 (2.5976) grad_norm: 1.0849 (1.4329) time: 2.8760 data: 0.0002 max mem: 28454 +[2024-12-12 02:29:17 root] (utils.py 283): INFO Epoch: [3] [2350/2502] eta: 0:07:16 lr: 0.000019 loss_cls: 2.4550 (2.5968) grad_norm: 1.1137 (1.4381) time: 2.8746 data: 0.0002 max mem: 28454 +[2024-12-12 02:29:46 root] (utils.py 283): INFO Epoch: [3] [2360/2502] eta: 0:06:47 lr: 0.000019 loss_cls: 2.6475 (2.5974) grad_norm: 1.2789 (1.4385) time: 2.8744 data: 0.0003 max mem: 28454 +[2024-12-12 02:30:14 root] (utils.py 283): INFO Epoch: [3] [2370/2502] eta: 0:06:19 lr: 0.000019 loss_cls: 2.7301 (2.5972) grad_norm: 1.1959 (1.4378) time: 2.8748 data: 0.0003 max mem: 28454 +[2024-12-12 02:30:43 root] (utils.py 283): INFO Epoch: [3] [2380/2502] eta: 0:05:50 lr: 0.000019 loss_cls: 2.7342 (2.5975) grad_norm: 1.1068 (1.4375) time: 2.8776 data: 0.0002 max mem: 28454 +[2024-12-12 02:31:12 root] (utils.py 283): INFO Epoch: [3] [2390/2502] eta: 0:05:21 lr: 0.000019 loss_cls: 2.6849 (2.5976) grad_norm: 1.0952 (1.4392) time: 2.8867 data: 0.0002 max mem: 28454 +[2024-12-12 02:31:41 root] (utils.py 283): INFO Epoch: [3] [2400/2502] eta: 0:04:53 lr: 0.000019 loss_cls: 2.5846 (2.5973) grad_norm: 1.1997 (1.4387) time: 2.8880 data: 0.0002 max mem: 28454 +[2024-12-12 02:32:10 root] (utils.py 283): INFO Epoch: [3] [2410/2502] eta: 0:04:24 lr: 0.000019 loss_cls: 2.7698 (2.5983) grad_norm: 1.2430 (1.4401) time: 2.8820 data: 0.0002 max mem: 28454 +[2024-12-12 02:32:39 root] (utils.py 283): INFO Epoch: [3] [2420/2502] eta: 0:03:55 lr: 0.000019 loss_cls: 2.7744 (2.5980) grad_norm: 1.1723 (1.4388) time: 2.8808 data: 0.0002 max mem: 28454 +[2024-12-12 02:33:07 root] (utils.py 283): INFO Epoch: [3] [2430/2502] eta: 0:03:26 lr: 0.000019 loss_cls: 2.7406 (2.5981) grad_norm: 1.1312 (1.4378) time: 2.8798 data: 0.0002 max mem: 28454 +[2024-12-12 02:33:36 root] (utils.py 283): INFO Epoch: [3] [2440/2502] eta: 0:02:58 lr: 0.000019 loss_cls: 2.7924 (2.5987) grad_norm: 1.1243 (1.4367) time: 2.8808 data: 0.0002 max mem: 28454 +[2024-12-12 02:34:05 root] (utils.py 283): INFO Epoch: [3] [2450/2502] eta: 0:02:29 lr: 0.000019 loss_cls: 2.6141 (2.5981) grad_norm: 1.1020 (1.4387) time: 2.8825 data: 0.0002 max mem: 28454 +[2024-12-12 02:34:34 root] (utils.py 283): INFO Epoch: [3] [2460/2502] eta: 0:02:00 lr: 0.000019 loss_cls: 2.4148 (2.5978) grad_norm: 1.1409 (1.4379) time: 2.8818 data: 0.0002 max mem: 28454 +[2024-12-12 02:35:03 root] (utils.py 283): INFO Epoch: [3] [2470/2502] eta: 0:01:31 lr: 0.000019 loss_cls: 2.5398 (2.5980) grad_norm: 1.1287 (1.4367) time: 2.8807 data: 0.0002 max mem: 28454 +[2024-12-12 02:35:31 root] (utils.py 283): INFO Epoch: [3] [2480/2502] eta: 0:01:03 lr: 0.000019 loss_cls: 2.6251 (2.5974) grad_norm: 1.1216 (1.4355) time: 2.8793 data: 0.0002 max mem: 28454 +[2024-12-12 02:36:01 root] (utils.py 283): INFO Epoch: [3] [2490/2502] eta: 0:00:34 lr: 0.000019 loss_cls: 2.5294 (2.5969) grad_norm: 1.0951 (1.4340) time: 2.8979 data: 0.0206 max mem: 28454 +[2024-12-12 02:36:29 root] (utils.py 283): INFO Epoch: [3] [2500/2502] eta: 0:00:05 lr: 0.000019 loss_cls: 2.5950 (2.5974) grad_norm: 1.1053 (1.4331) time: 2.8988 data: 0.0206 max mem: 28454 +[2024-12-12 02:36:32 root] (utils.py 283): INFO Epoch: [3] [2501/2502] eta: 0:00:02 lr: 0.000019 loss_cls: 2.5294 (2.5973) grad_norm: 1.1053 (1.4330) time: 2.8979 data: 0.0206 max mem: 28454 +[2024-12-12 02:36:32 root] (utils.py 297): INFO Epoch: [3] Total time: 1:59:49 (2.8735 s / it) +[2024-12-12 02:36:32 root] (engine.py 179): INFO Averaged stats:lr: 0.000019 loss_cls: 2.5294 (2.5918) grad_norm: 1.1053 (1.4330) +[2024-12-12 02:36:35 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:53 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.4334 (0.4334) acc1: 90.6250 (90.6250) acc3: 98.4375 (98.4375) acc5: 98.4375 (98.4375) time: 0.5422 data: 0.0003 max mem: 28454 +[2024-12-12 02:36:41 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:48 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6327 (0.5895) acc1: 85.1562 (87.3580) acc3: 96.8750 (96.8040) acc5: 98.4375 (98.2955) time: 0.5473 data: 0.0006 max mem: 28454 +[2024-12-12 02:36:46 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:42 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.5902 (0.6180) acc1: 87.5000 (86.9792) acc3: 96.8750 (96.5402) acc5: 98.4375 (97.8423) time: 0.5479 data: 0.0005 max mem: 28454 +[2024-12-12 02:36:52 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:37 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.5902 (0.6493) acc1: 87.5000 (85.9123) acc3: 96.0938 (96.2954) acc5: 97.6562 (97.6562) time: 0.5492 data: 0.0005 max mem: 28454 +[2024-12-12 02:36:57 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:31 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6758 (0.6557) acc1: 85.1562 (85.7470) acc3: 96.0938 (96.2271) acc5: 96.8750 (97.5800) time: 0.5496 data: 0.0005 max mem: 28454 +[2024-12-12 02:37:03 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:26 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.8081 (0.7354) acc1: 79.6875 (83.8235) acc3: 92.9688 (95.1134) acc5: 96.0938 (96.8444) time: 0.5487 data: 0.0004 max mem: 28454 +[2024-12-12 02:37:08 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:20 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9978 (0.7645) acc1: 78.1250 (83.3760) acc3: 90.6250 (94.5312) acc5: 93.7500 (96.4395) time: 0.5490 data: 0.0004 max mem: 28454 +[2024-12-12 02:37:14 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:15 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9767 (0.7946) acc1: 78.1250 (82.5704) acc3: 92.1875 (94.2342) acc5: 94.5312 (96.1818) time: 0.5491 data: 0.0005 max mem: 28454 +[2024-12-12 02:37:19 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:09 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9767 (0.8152) acc1: 78.1250 (82.1373) acc3: 91.4062 (93.9429) acc5: 94.5312 (95.9201) time: 0.5494 data: 0.0007 max mem: 28454 +[2024-12-12 02:37:25 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:04 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9937 (0.8357) acc1: 78.9062 (81.4818) acc3: 90.6250 (93.5697) acc5: 93.7500 (95.7246) time: 0.5500 data: 0.0007 max mem: 28454 +[2024-12-12 02:37:28 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.8511 (0.8320) acc1: 78.9062 (81.5360) acc3: 91.4062 (93.6240) acc5: 94.5312 (95.8000) time: 0.5403 data: 0.0006 max mem: 28454 +[2024-12-12 02:37:28 root] (utils.py 297): INFO Test: Total time: 0:00:53 (0.5473 s / it) +[2024-12-12 02:37:28 root] (engine.py 264): INFO * Acc@1 81.736 Acc@3 93.484 Acc@5 95.808 loss 0.830 flops 13.207 layer_flops 13.109 +[2024-12-12 02:37:28 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 81.7% +[2024-12-12 02:37:28 root] (main.py 576): INFO Max accuracy: 81.74% +[2024-12-12 02:37:31 root] (utils.py 283): INFO Epoch: [4] [ 0/2502] eta: 1:58:44 lr: 0.000018 loss_cls: 2.5688 (2.5688) grad_norm: 1.3295 (1.3295) time: 2.8474 data: 0.0002 max mem: 28454 +[2024-12-12 02:38:00 root] (utils.py 283): INFO Epoch: [4] [ 10/2502] eta: 1:59:24 lr: 0.000018 loss_cls: 2.7329 (2.6547) grad_norm: 1.1665 (1.2221) time: 2.8751 data: 0.0003 max mem: 28454 +[2024-12-12 02:38:29 root] (utils.py 283): INFO Epoch: [4] [ 20/2502] eta: 1:59:03 lr: 0.000018 loss_cls: 2.7442 (2.7032) grad_norm: 1.1665 (1.2362) time: 2.8795 data: 0.0003 max mem: 28454 +[2024-12-12 02:38:58 root] (utils.py 283): INFO Epoch: [4] [ 30/2502] eta: 1:58:36 lr: 0.000018 loss_cls: 2.8699 (2.7170) grad_norm: 1.2379 (1.2261) time: 2.8812 data: 0.0003 max mem: 28454 +[2024-12-12 02:39:26 root] (utils.py 283): INFO Epoch: [4] [ 40/2502] eta: 1:58:08 lr: 0.000018 loss_cls: 2.6348 (2.6208) grad_norm: 1.0741 (1.1947) time: 2.8802 data: 0.0002 max mem: 28454 +[2024-12-12 02:39:55 root] (utils.py 283): INFO Epoch: [4] [ 50/2502] eta: 1:57:41 lr: 0.000018 loss_cls: 2.6468 (2.6588) grad_norm: 1.0741 (1.1913) time: 2.8810 data: 0.0002 max mem: 28454 +[2024-12-12 02:40:24 root] (utils.py 283): INFO Epoch: [4] [ 60/2502] eta: 1:57:11 lr: 0.000018 loss_cls: 2.7079 (2.6520) grad_norm: 1.0736 (1.1780) time: 2.8802 data: 0.0003 max mem: 28454 +[2024-12-12 02:40:53 root] (utils.py 283): INFO Epoch: [4] [ 70/2502] eta: 1:56:41 lr: 0.000018 loss_cls: 2.6675 (2.6575) grad_norm: 1.1314 (1.2248) time: 2.8769 data: 0.0002 max mem: 28454 +[2024-12-12 02:41:22 root] (utils.py 283): INFO Epoch: [4] [ 80/2502] eta: 1:56:12 lr: 0.000018 loss_cls: 2.7262 (2.6490) grad_norm: 1.0950 (1.2090) time: 2.8765 data: 0.0002 max mem: 28454 +[2024-12-12 02:41:50 root] (utils.py 283): INFO Epoch: [4] [ 90/2502] eta: 1:55:43 lr: 0.000018 loss_cls: 2.7192 (2.6603) grad_norm: 1.1864 (1.2601) time: 2.8770 data: 0.0002 max mem: 28454 +[2024-12-12 02:42:19 root] (utils.py 283): INFO Epoch: [4] [ 100/2502] eta: 1:55:14 lr: 0.000018 loss_cls: 2.6900 (2.6456) grad_norm: 1.2011 (1.2486) time: 2.8775 data: 0.0002 max mem: 28454 +[2024-12-12 02:42:48 root] (utils.py 283): INFO Epoch: [4] [ 110/2502] eta: 1:54:45 lr: 0.000018 loss_cls: 2.6054 (2.6563) grad_norm: 1.1724 (1.3479) time: 2.8784 data: 0.0002 max mem: 28454 +[2024-12-12 02:43:17 root] (utils.py 283): INFO Epoch: [4] [ 120/2502] eta: 1:54:15 lr: 0.000018 loss_cls: 2.8241 (2.6507) grad_norm: 1.1931 (1.3842) time: 2.8753 data: 0.0002 max mem: 28454 +[2024-12-12 02:43:45 root] (utils.py 283): INFO Epoch: [4] [ 130/2502] eta: 1:53:44 lr: 0.000018 loss_cls: 2.7189 (2.6520) grad_norm: 1.1904 (1.3957) time: 2.8703 data: 0.0002 max mem: 28454 +[2024-12-12 02:44:14 root] (utils.py 283): INFO Epoch: [4] [ 140/2502] eta: 1:53:15 lr: 0.000018 loss_cls: 2.5732 (2.6347) grad_norm: 1.1894 (1.3774) time: 2.8714 data: 0.0002 max mem: 28454 +[2024-12-12 02:44:43 root] (utils.py 283): INFO Epoch: [4] [ 150/2502] eta: 1:52:46 lr: 0.000018 loss_cls: 2.4422 (2.6257) grad_norm: 1.1807 (1.4144) time: 2.8732 data: 0.0002 max mem: 28454 +[2024-12-12 02:45:12 root] (utils.py 283): INFO Epoch: [4] [ 160/2502] eta: 1:52:16 lr: 0.000018 loss_cls: 2.6753 (2.6280) grad_norm: 1.1807 (1.4031) time: 2.8723 data: 0.0002 max mem: 28454 +[2024-12-12 02:45:40 root] (utils.py 283): INFO Epoch: [4] [ 170/2502] eta: 1:51:47 lr: 0.000018 loss_cls: 2.7629 (2.6355) grad_norm: 1.1706 (1.3989) time: 2.8718 data: 0.0003 max mem: 28454 +[2024-12-12 02:46:09 root] (utils.py 283): INFO Epoch: [4] [ 180/2502] eta: 1:51:17 lr: 0.000018 loss_cls: 2.7736 (2.6291) grad_norm: 1.1301 (1.3893) time: 2.8722 data: 0.0002 max mem: 28454 +[2024-12-12 02:46:38 root] (utils.py 283): INFO Epoch: [4] [ 190/2502] eta: 1:50:48 lr: 0.000018 loss_cls: 2.8292 (2.6383) grad_norm: 1.0835 (1.3763) time: 2.8720 data: 0.0002 max mem: 28454 +[2024-12-12 02:47:06 root] (utils.py 283): INFO Epoch: [4] [ 200/2502] eta: 1:50:19 lr: 0.000018 loss_cls: 2.8059 (2.6421) grad_norm: 1.1065 (1.3679) time: 2.8707 data: 0.0002 max mem: 28454 +[2024-12-12 02:47:35 root] (utils.py 283): INFO Epoch: [4] [ 210/2502] eta: 1:49:49 lr: 0.000018 loss_cls: 2.7406 (2.6359) grad_norm: 1.0698 (1.3549) time: 2.8705 data: 0.0002 max mem: 28454 +[2024-12-12 02:48:04 root] (utils.py 283): INFO Epoch: [4] [ 220/2502] eta: 1:49:21 lr: 0.000018 loss_cls: 2.4932 (2.6271) grad_norm: 1.0953 (1.3486) time: 2.8723 data: 0.0002 max mem: 28454 +[2024-12-12 02:48:32 root] (utils.py 283): INFO Epoch: [4] [ 230/2502] eta: 1:48:51 lr: 0.000018 loss_cls: 2.5745 (2.6286) grad_norm: 1.0953 (1.3402) time: 2.8702 data: 0.0002 max mem: 28454 +[2024-12-12 02:49:01 root] (utils.py 283): INFO Epoch: [4] [ 240/2502] eta: 1:48:22 lr: 0.000018 loss_cls: 2.8391 (2.6335) grad_norm: 1.0957 (1.3960) time: 2.8684 data: 0.0002 max mem: 28454 +[2024-12-12 02:49:30 root] (utils.py 283): INFO Epoch: [4] [ 250/2502] eta: 1:47:53 lr: 0.000018 loss_cls: 2.7898 (2.6391) grad_norm: 1.1752 (1.3879) time: 2.8694 data: 0.0002 max mem: 28454 +[2024-12-12 02:49:59 root] (utils.py 283): INFO Epoch: [4] [ 260/2502] eta: 1:47:23 lr: 0.000018 loss_cls: 2.7133 (2.6411) grad_norm: 1.1850 (1.3952) time: 2.8692 data: 0.0002 max mem: 28454 +[2024-12-12 02:50:27 root] (utils.py 283): INFO Epoch: [4] [ 270/2502] eta: 1:46:54 lr: 0.000018 loss_cls: 2.5773 (2.6324) grad_norm: 1.1850 (1.3876) time: 2.8683 data: 0.0002 max mem: 28454 +[2024-12-12 02:50:56 root] (utils.py 283): INFO Epoch: [4] [ 280/2502] eta: 1:46:25 lr: 0.000018 loss_cls: 2.5773 (2.6344) grad_norm: 1.1574 (1.3793) time: 2.8671 data: 0.0003 max mem: 28454 +[2024-12-12 02:51:25 root] (utils.py 283): INFO Epoch: [4] [ 290/2502] eta: 1:45:55 lr: 0.000018 loss_cls: 2.6624 (2.6340) grad_norm: 1.1173 (1.3707) time: 2.8657 data: 0.0003 max mem: 28454 +[2024-12-12 02:51:53 root] (utils.py 283): INFO Epoch: [4] [ 300/2502] eta: 1:45:26 lr: 0.000018 loss_cls: 2.7197 (2.6376) grad_norm: 1.1593 (1.4063) time: 2.8655 data: 0.0003 max mem: 28454 +[2024-12-12 02:52:22 root] (utils.py 283): INFO Epoch: [4] [ 310/2502] eta: 1:44:57 lr: 0.000018 loss_cls: 2.5083 (2.6295) grad_norm: 1.2888 (1.4038) time: 2.8654 data: 0.0003 max mem: 28454 +[2024-12-12 02:52:51 root] (utils.py 283): INFO Epoch: [4] [ 320/2502] eta: 1:44:27 lr: 0.000018 loss_cls: 2.4612 (2.6291) grad_norm: 1.2907 (1.4010) time: 2.8638 data: 0.0003 max mem: 28454 +[2024-12-12 02:53:19 root] (utils.py 283): INFO Epoch: [4] [ 330/2502] eta: 1:43:58 lr: 0.000018 loss_cls: 2.7051 (2.6302) grad_norm: 1.1467 (1.3922) time: 2.8644 data: 0.0002 max mem: 28454 +[2024-12-12 02:53:48 root] (utils.py 283): INFO Epoch: [4] [ 340/2502] eta: 1:43:29 lr: 0.000018 loss_cls: 2.6946 (2.6318) grad_norm: 1.1397 (1.3842) time: 2.8650 data: 0.0002 max mem: 28454 +[2024-12-12 02:54:16 root] (utils.py 283): INFO Epoch: [4] [ 350/2502] eta: 1:43:00 lr: 0.000018 loss_cls: 2.7199 (2.6331) grad_norm: 1.1613 (1.3834) time: 2.8643 data: 0.0002 max mem: 28454 +[2024-12-12 02:54:45 root] (utils.py 283): INFO Epoch: [4] [ 360/2502] eta: 1:42:31 lr: 0.000018 loss_cls: 2.7301 (2.6366) grad_norm: 1.1613 (1.3805) time: 2.8644 data: 0.0002 max mem: 28454 +[2024-12-12 02:55:14 root] (utils.py 283): INFO Epoch: [4] [ 370/2502] eta: 1:42:02 lr: 0.000018 loss_cls: 2.6653 (2.6274) grad_norm: 1.2224 (1.4304) time: 2.8661 data: 0.0002 max mem: 28454 +[2024-12-12 02:55:42 root] (utils.py 283): INFO Epoch: [4] [ 380/2502] eta: 1:41:33 lr: 0.000018 loss_cls: 2.3876 (2.6223) grad_norm: 1.3104 (1.4341) time: 2.8685 data: 0.0002 max mem: 28454 +[2024-12-12 02:56:11 root] (utils.py 283): INFO Epoch: [4] [ 390/2502] eta: 1:41:04 lr: 0.000018 loss_cls: 2.6069 (2.6215) grad_norm: 1.3256 (1.4369) time: 2.8711 data: 0.0002 max mem: 28454 +[2024-12-12 02:56:40 root] (utils.py 283): INFO Epoch: [4] [ 400/2502] eta: 1:40:35 lr: 0.000018 loss_cls: 2.5223 (2.6167) grad_norm: 1.2093 (1.4309) time: 2.8699 data: 0.0002 max mem: 28454 +[2024-12-12 02:57:09 root] (utils.py 283): INFO Epoch: [4] [ 410/2502] eta: 1:40:07 lr: 0.000018 loss_cls: 2.5223 (2.6165) grad_norm: 1.1340 (1.4287) time: 2.8695 data: 0.0002 max mem: 28454 +[2024-12-12 02:57:37 root] (utils.py 283): INFO Epoch: [4] [ 420/2502] eta: 1:39:38 lr: 0.000018 loss_cls: 2.4440 (2.6131) grad_norm: 1.1237 (1.4248) time: 2.8700 data: 0.0002 max mem: 28454 +[2024-12-12 02:58:06 root] (utils.py 283): INFO Epoch: [4] [ 430/2502] eta: 1:39:09 lr: 0.000018 loss_cls: 2.4440 (2.6102) grad_norm: 1.1465 (1.4191) time: 2.8701 data: 0.0002 max mem: 28454 +[2024-12-12 02:58:35 root] (utils.py 283): INFO Epoch: [4] [ 440/2502] eta: 1:38:40 lr: 0.000018 loss_cls: 2.6397 (2.6133) grad_norm: 1.1713 (1.4200) time: 2.8686 data: 0.0002 max mem: 28454 +[2024-12-12 02:59:03 root] (utils.py 283): INFO Epoch: [4] [ 450/2502] eta: 1:38:11 lr: 0.000018 loss_cls: 2.6268 (2.6118) grad_norm: 1.1664 (1.4127) time: 2.8679 data: 0.0003 max mem: 28454 +[2024-12-12 02:59:32 root] (utils.py 283): INFO Epoch: [4] [ 460/2502] eta: 1:37:42 lr: 0.000018 loss_cls: 2.6268 (2.6114) grad_norm: 1.0981 (1.4136) time: 2.8692 data: 0.0002 max mem: 28454 +[2024-12-12 03:00:01 root] (utils.py 283): INFO Epoch: [4] [ 470/2502] eta: 1:37:13 lr: 0.000018 loss_cls: 2.7079 (2.6144) grad_norm: 1.1071 (1.4084) time: 2.8661 data: 0.0003 max mem: 28454 +[2024-12-12 03:00:29 root] (utils.py 283): INFO Epoch: [4] [ 480/2502] eta: 1:36:45 lr: 0.000018 loss_cls: 2.6711 (2.6111) grad_norm: 1.0378 (1.4027) time: 2.8669 data: 0.0002 max mem: 28454 +[2024-12-12 03:00:58 root] (utils.py 283): INFO Epoch: [4] [ 490/2502] eta: 1:36:16 lr: 0.000018 loss_cls: 2.6244 (2.6098) grad_norm: 1.0991 (1.4026) time: 2.8717 data: 0.0002 max mem: 28454 +[2024-12-12 03:01:27 root] (utils.py 283): INFO Epoch: [4] [ 500/2502] eta: 1:35:47 lr: 0.000018 loss_cls: 2.6296 (2.6126) grad_norm: 1.1490 (1.3976) time: 2.8730 data: 0.0002 max mem: 28454 +[2024-12-12 03:01:56 root] (utils.py 283): INFO Epoch: [4] [ 510/2502] eta: 1:35:19 lr: 0.000018 loss_cls: 2.6250 (2.6117) grad_norm: 1.1884 (1.3935) time: 2.8714 data: 0.0002 max mem: 28454 +[2024-12-12 03:02:24 root] (utils.py 283): INFO Epoch: [4] [ 520/2502] eta: 1:34:50 lr: 0.000018 loss_cls: 2.6057 (2.6118) grad_norm: 1.2080 (1.3904) time: 2.8683 data: 0.0002 max mem: 28454 +[2024-12-12 03:02:53 root] (utils.py 283): INFO Epoch: [4] [ 530/2502] eta: 1:34:21 lr: 0.000018 loss_cls: 2.4180 (2.6071) grad_norm: 1.1686 (1.4032) time: 2.8651 data: 0.0002 max mem: 28454 +[2024-12-12 03:03:22 root] (utils.py 283): INFO Epoch: [4] [ 540/2502] eta: 1:33:52 lr: 0.000018 loss_cls: 2.4666 (2.6089) grad_norm: 1.0798 (1.3983) time: 2.8656 data: 0.0002 max mem: 28454 +[2024-12-12 03:03:50 root] (utils.py 283): INFO Epoch: [4] [ 550/2502] eta: 1:33:23 lr: 0.000018 loss_cls: 2.8105 (2.6085) grad_norm: 1.1593 (1.3947) time: 2.8678 data: 0.0002 max mem: 28454 +[2024-12-12 03:04:19 root] (utils.py 283): INFO Epoch: [4] [ 560/2502] eta: 1:32:54 lr: 0.000018 loss_cls: 2.3383 (2.6023) grad_norm: 1.1579 (1.3924) time: 2.8665 data: 0.0002 max mem: 28454 +[2024-12-12 03:04:48 root] (utils.py 283): INFO Epoch: [4] [ 570/2502] eta: 1:32:25 lr: 0.000018 loss_cls: 2.3617 (2.6030) grad_norm: 1.0580 (1.3870) time: 2.8666 data: 0.0002 max mem: 28454 +[2024-12-12 03:05:16 root] (utils.py 283): INFO Epoch: [4] [ 580/2502] eta: 1:31:57 lr: 0.000018 loss_cls: 2.6268 (2.6024) grad_norm: 1.0580 (1.3829) time: 2.8685 data: 0.0002 max mem: 28454 +[2024-12-12 03:05:45 root] (utils.py 283): INFO Epoch: [4] [ 590/2502] eta: 1:31:28 lr: 0.000018 loss_cls: 2.6164 (2.5992) grad_norm: 1.0645 (1.3777) time: 2.8685 data: 0.0002 max mem: 28454 +[2024-12-12 03:06:14 root] (utils.py 283): INFO Epoch: [4] [ 600/2502] eta: 1:30:59 lr: 0.000018 loss_cls: 2.6164 (2.5991) grad_norm: 1.0720 (1.3729) time: 2.8670 data: 0.0002 max mem: 28454 +[2024-12-12 03:06:42 root] (utils.py 283): INFO Epoch: [4] [ 610/2502] eta: 1:30:30 lr: 0.000018 loss_cls: 2.6550 (2.5981) grad_norm: 1.1190 (1.3698) time: 2.8663 data: 0.0002 max mem: 28454 +[2024-12-12 03:07:11 root] (utils.py 283): INFO Epoch: [4] [ 620/2502] eta: 1:30:01 lr: 0.000018 loss_cls: 2.5854 (2.5976) grad_norm: 1.0815 (1.3646) time: 2.8661 data: 0.0002 max mem: 28454 +[2024-12-12 03:07:40 root] (utils.py 283): INFO Epoch: [4] [ 630/2502] eta: 1:29:33 lr: 0.000018 loss_cls: 2.7399 (2.6006) grad_norm: 1.0871 (1.3638) time: 2.8697 data: 0.0002 max mem: 28454 +[2024-12-12 03:08:08 root] (utils.py 283): INFO Epoch: [4] [ 640/2502] eta: 1:29:04 lr: 0.000018 loss_cls: 2.7002 (2.5984) grad_norm: 1.1879 (1.3609) time: 2.8710 data: 0.0002 max mem: 28454 +[2024-12-12 03:08:37 root] (utils.py 283): INFO Epoch: [4] [ 650/2502] eta: 1:28:35 lr: 0.000018 loss_cls: 2.4592 (2.5954) grad_norm: 1.1675 (1.3592) time: 2.8666 data: 0.0002 max mem: 28454 +[2024-12-12 03:09:06 root] (utils.py 283): INFO Epoch: [4] [ 660/2502] eta: 1:28:06 lr: 0.000018 loss_cls: 2.6157 (2.5959) grad_norm: 1.0948 (1.3558) time: 2.8652 data: 0.0003 max mem: 28454 +[2024-12-12 03:09:34 root] (utils.py 283): INFO Epoch: [4] [ 670/2502] eta: 1:27:38 lr: 0.000018 loss_cls: 2.6902 (2.5972) grad_norm: 1.0792 (1.3515) time: 2.8667 data: 0.0002 max mem: 28454 +[2024-12-12 03:10:03 root] (utils.py 283): INFO Epoch: [4] [ 680/2502] eta: 1:27:09 lr: 0.000018 loss_cls: 2.7068 (2.5994) grad_norm: 1.1226 (1.3573) time: 2.8664 data: 0.0002 max mem: 28454 +[2024-12-12 03:10:32 root] (utils.py 283): INFO Epoch: [4] [ 690/2502] eta: 1:26:40 lr: 0.000018 loss_cls: 2.7241 (2.5999) grad_norm: 1.1339 (1.3541) time: 2.8647 data: 0.0002 max mem: 28454 +[2024-12-12 03:11:00 root] (utils.py 283): INFO Epoch: [4] [ 700/2502] eta: 1:26:11 lr: 0.000018 loss_cls: 2.7241 (2.6007) grad_norm: 1.1666 (1.3551) time: 2.8642 data: 0.0002 max mem: 28454 +[2024-12-12 03:11:29 root] (utils.py 283): INFO Epoch: [4] [ 710/2502] eta: 1:25:42 lr: 0.000018 loss_cls: 2.8244 (2.6007) grad_norm: 1.1676 (1.3585) time: 2.8632 data: 0.0002 max mem: 28454 +[2024-12-12 03:11:57 root] (utils.py 283): INFO Epoch: [4] [ 720/2502] eta: 1:25:13 lr: 0.000018 loss_cls: 2.6803 (2.5992) grad_norm: 1.1070 (1.3652) time: 2.8601 data: 0.0002 max mem: 28454 +[2024-12-12 03:12:26 root] (utils.py 283): INFO Epoch: [4] [ 730/2502] eta: 1:24:44 lr: 0.000018 loss_cls: 2.7133 (2.6008) grad_norm: 1.1159 (1.3643) time: 2.8601 data: 0.0002 max mem: 28454 +[2024-12-12 03:12:55 root] (utils.py 283): INFO Epoch: [4] [ 740/2502] eta: 1:24:15 lr: 0.000018 loss_cls: 2.8597 (2.6012) grad_norm: 1.1824 (1.3618) time: 2.8645 data: 0.0002 max mem: 28454 +[2024-12-12 03:13:23 root] (utils.py 283): INFO Epoch: [4] [ 750/2502] eta: 1:23:47 lr: 0.000018 loss_cls: 2.8208 (2.6026) grad_norm: 1.1454 (1.3589) time: 2.8625 data: 0.0002 max mem: 28454 +[2024-12-12 03:13:52 root] (utils.py 283): INFO Epoch: [4] [ 760/2502] eta: 1:23:18 lr: 0.000018 loss_cls: 2.6219 (2.6021) grad_norm: 1.1311 (1.3552) time: 2.8578 data: 0.0002 max mem: 28454 +[2024-12-12 03:14:20 root] (utils.py 283): INFO Epoch: [4] [ 770/2502] eta: 1:22:49 lr: 0.000018 loss_cls: 2.5741 (2.6003) grad_norm: 1.1285 (1.3528) time: 2.8572 data: 0.0002 max mem: 28454 +[2024-12-12 03:14:49 root] (utils.py 283): INFO Epoch: [4] [ 780/2502] eta: 1:22:20 lr: 0.000018 loss_cls: 2.6714 (2.6013) grad_norm: 1.1389 (1.3500) time: 2.8578 data: 0.0002 max mem: 28454 +[2024-12-12 03:15:18 root] (utils.py 283): INFO Epoch: [4] [ 790/2502] eta: 1:21:51 lr: 0.000018 loss_cls: 2.6730 (2.5984) grad_norm: 1.0800 (1.3466) time: 2.8632 data: 0.0002 max mem: 28454 +[2024-12-12 03:15:46 root] (utils.py 283): INFO Epoch: [4] [ 800/2502] eta: 1:21:22 lr: 0.000018 loss_cls: 2.6043 (2.5989) grad_norm: 1.0965 (1.3451) time: 2.8670 data: 0.0002 max mem: 28454 +[2024-12-12 03:16:15 root] (utils.py 283): INFO Epoch: [4] [ 810/2502] eta: 1:20:54 lr: 0.000018 loss_cls: 2.7277 (2.5986) grad_norm: 1.1807 (1.3428) time: 2.8676 data: 0.0002 max mem: 28454 +[2024-12-12 03:16:44 root] (utils.py 283): INFO Epoch: [4] [ 820/2502] eta: 1:20:25 lr: 0.000018 loss_cls: 2.5036 (2.5959) grad_norm: 1.0616 (1.3393) time: 2.8674 data: 0.0003 max mem: 28454 +[2024-12-12 03:17:12 root] (utils.py 283): INFO Epoch: [4] [ 830/2502] eta: 1:19:56 lr: 0.000018 loss_cls: 2.7020 (2.5979) grad_norm: 1.0595 (1.3395) time: 2.8651 data: 0.0003 max mem: 28454 +[2024-12-12 03:17:41 root] (utils.py 283): INFO Epoch: [4] [ 840/2502] eta: 1:19:27 lr: 0.000018 loss_cls: 2.8131 (2.5979) grad_norm: 1.1398 (1.3376) time: 2.8628 data: 0.0003 max mem: 28454 +[2024-12-12 03:18:10 root] (utils.py 283): INFO Epoch: [4] [ 850/2502] eta: 1:18:58 lr: 0.000018 loss_cls: 2.7421 (2.5988) grad_norm: 1.1424 (1.3371) time: 2.8631 data: 0.0003 max mem: 28454 +[2024-12-12 03:18:38 root] (utils.py 283): INFO Epoch: [4] [ 860/2502] eta: 1:18:30 lr: 0.000018 loss_cls: 2.8070 (2.6000) grad_norm: 1.1352 (1.3346) time: 2.8647 data: 0.0002 max mem: 28454 +[2024-12-12 03:19:07 root] (utils.py 283): INFO Epoch: [4] [ 870/2502] eta: 1:18:01 lr: 0.000018 loss_cls: 2.8093 (2.6002) grad_norm: 1.0966 (1.3346) time: 2.8649 data: 0.0002 max mem: 28454 +[2024-12-12 03:19:36 root] (utils.py 283): INFO Epoch: [4] [ 880/2502] eta: 1:17:32 lr: 0.000018 loss_cls: 2.7862 (2.6002) grad_norm: 1.1359 (1.3492) time: 2.8649 data: 0.0002 max mem: 28454 +[2024-12-12 03:20:04 root] (utils.py 283): INFO Epoch: [4] [ 890/2502] eta: 1:17:04 lr: 0.000018 loss_cls: 2.7036 (2.5996) grad_norm: 1.1278 (1.3468) time: 2.8707 data: 0.0002 max mem: 28454 +[2024-12-12 03:20:33 root] (utils.py 283): INFO Epoch: [4] [ 900/2502] eta: 1:16:35 lr: 0.000018 loss_cls: 2.6433 (2.6006) grad_norm: 1.1278 (1.3458) time: 2.8713 data: 0.0003 max mem: 28454 +[2024-12-12 03:21:02 root] (utils.py 283): INFO Epoch: [4] [ 910/2502] eta: 1:16:06 lr: 0.000018 loss_cls: 2.6433 (2.5983) grad_norm: 1.1376 (1.3440) time: 2.8663 data: 0.0002 max mem: 28454 +[2024-12-12 03:21:30 root] (utils.py 283): INFO Epoch: [4] [ 920/2502] eta: 1:15:38 lr: 0.000018 loss_cls: 2.3761 (2.5971) grad_norm: 1.1376 (1.3455) time: 2.8690 data: 0.0002 max mem: 28454 +[2024-12-12 03:21:59 root] (utils.py 283): INFO Epoch: [4] [ 930/2502] eta: 1:15:09 lr: 0.000018 loss_cls: 2.3761 (2.5956) grad_norm: 1.1162 (1.3432) time: 2.8720 data: 0.0002 max mem: 28454 +[2024-12-12 03:22:28 root] (utils.py 283): INFO Epoch: [4] [ 940/2502] eta: 1:14:40 lr: 0.000018 loss_cls: 2.5585 (2.5965) grad_norm: 1.1162 (1.3416) time: 2.8703 data: 0.0002 max mem: 28454 +[2024-12-12 03:22:56 root] (utils.py 283): INFO Epoch: [4] [ 950/2502] eta: 1:14:11 lr: 0.000018 loss_cls: 2.5585 (2.5961) grad_norm: 1.1381 (1.3397) time: 2.8666 data: 0.0002 max mem: 28454 +[2024-12-12 03:23:25 root] (utils.py 283): INFO Epoch: [4] [ 960/2502] eta: 1:13:43 lr: 0.000018 loss_cls: 2.7506 (2.5980) grad_norm: 1.1158 (1.3384) time: 2.8655 data: 0.0002 max mem: 28454 +[2024-12-12 03:23:54 root] (utils.py 283): INFO Epoch: [4] [ 970/2502] eta: 1:13:14 lr: 0.000018 loss_cls: 2.8383 (2.5981) grad_norm: 1.1005 (1.3364) time: 2.8659 data: 0.0002 max mem: 28454 +[2024-12-12 03:24:22 root] (utils.py 283): INFO Epoch: [4] [ 980/2502] eta: 1:12:45 lr: 0.000018 loss_cls: 2.5145 (2.5970) grad_norm: 1.1030 (1.3342) time: 2.8680 data: 0.0002 max mem: 28454 +[2024-12-12 03:24:51 root] (utils.py 283): INFO Epoch: [4] [ 990/2502] eta: 1:12:17 lr: 0.000018 loss_cls: 2.6056 (2.5968) grad_norm: 1.0913 (1.3323) time: 2.8701 data: 0.0002 max mem: 28454 +[2024-12-12 03:25:20 root] (utils.py 283): INFO Epoch: [4] [1000/2502] eta: 1:11:48 lr: 0.000018 loss_cls: 2.8085 (2.5990) grad_norm: 1.1129 (1.3308) time: 2.8696 data: 0.0002 max mem: 28454 +[2024-12-12 03:25:49 root] (utils.py 283): INFO Epoch: [4] [1010/2502] eta: 1:11:19 lr: 0.000018 loss_cls: 2.8114 (2.6001) grad_norm: 1.1214 (1.3290) time: 2.8695 data: 0.0002 max mem: 28454 +[2024-12-12 03:26:17 root] (utils.py 283): INFO Epoch: [4] [1020/2502] eta: 1:10:51 lr: 0.000018 loss_cls: 2.6194 (2.6005) grad_norm: 1.1171 (1.3283) time: 2.8707 data: 0.0002 max mem: 28454 +[2024-12-12 03:26:46 root] (utils.py 283): INFO Epoch: [4] [1030/2502] eta: 1:10:22 lr: 0.000018 loss_cls: 2.6065 (2.6005) grad_norm: 1.0990 (1.3316) time: 2.8712 data: 0.0002 max mem: 28454 +[2024-12-12 03:27:15 root] (utils.py 283): INFO Epoch: [4] [1040/2502] eta: 1:09:53 lr: 0.000018 loss_cls: 2.6549 (2.6004) grad_norm: 1.1120 (1.3302) time: 2.8714 data: 0.0002 max mem: 28454 +[2024-12-12 03:27:43 root] (utils.py 283): INFO Epoch: [4] [1050/2502] eta: 1:09:25 lr: 0.000018 loss_cls: 2.6320 (2.5987) grad_norm: 1.1030 (1.3293) time: 2.8724 data: 0.0002 max mem: 28454 +[2024-12-12 03:28:12 root] (utils.py 283): INFO Epoch: [4] [1060/2502] eta: 1:08:56 lr: 0.000018 loss_cls: 2.6705 (2.6000) grad_norm: 1.1771 (1.3284) time: 2.8726 data: 0.0002 max mem: 28454 +[2024-12-12 03:28:41 root] (utils.py 283): INFO Epoch: [4] [1070/2502] eta: 1:08:27 lr: 0.000018 loss_cls: 2.7721 (2.5992) grad_norm: 1.1771 (1.3273) time: 2.8711 data: 0.0002 max mem: 28454 +[2024-12-12 03:29:10 root] (utils.py 283): INFO Epoch: [4] [1080/2502] eta: 1:07:59 lr: 0.000018 loss_cls: 2.5678 (2.5987) grad_norm: 1.1671 (1.3257) time: 2.8705 data: 0.0002 max mem: 28454 +[2024-12-12 03:29:38 root] (utils.py 283): INFO Epoch: [4] [1090/2502] eta: 1:07:30 lr: 0.000018 loss_cls: 2.6255 (2.6001) grad_norm: 1.1624 (1.3239) time: 2.8720 data: 0.0002 max mem: 28454 +[2024-12-12 03:30:07 root] (utils.py 283): INFO Epoch: [4] [1100/2502] eta: 1:07:02 lr: 0.000018 loss_cls: 2.5504 (2.5979) grad_norm: 1.1445 (1.3239) time: 2.8722 data: 0.0002 max mem: 28454 +[2024-12-12 03:30:36 root] (utils.py 283): INFO Epoch: [4] [1110/2502] eta: 1:06:33 lr: 0.000018 loss_cls: 2.5504 (2.5985) grad_norm: 1.1621 (1.3243) time: 2.8691 data: 0.0002 max mem: 28454 +[2024-12-12 03:31:04 root] (utils.py 283): INFO Epoch: [4] [1120/2502] eta: 1:06:04 lr: 0.000018 loss_cls: 2.7728 (2.5993) grad_norm: 1.1882 (1.3230) time: 2.8691 data: 0.0002 max mem: 28454 +[2024-12-12 03:31:33 root] (utils.py 283): INFO Epoch: [4] [1130/2502] eta: 1:05:36 lr: 0.000018 loss_cls: 2.7717 (2.5987) grad_norm: 1.1868 (1.3222) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-12 03:32:02 root] (utils.py 283): INFO Epoch: [4] [1140/2502] eta: 1:05:07 lr: 0.000018 loss_cls: 2.6917 (2.5998) grad_norm: 1.1515 (1.3222) time: 2.8701 data: 0.0002 max mem: 28454 +[2024-12-12 03:32:31 root] (utils.py 283): INFO Epoch: [4] [1150/2502] eta: 1:04:38 lr: 0.000018 loss_cls: 2.7582 (2.5999) grad_norm: 1.1128 (1.3219) time: 2.8690 data: 0.0002 max mem: 28454 +[2024-12-12 03:32:59 root] (utils.py 283): INFO Epoch: [4] [1160/2502] eta: 1:04:09 lr: 0.000018 loss_cls: 2.7347 (2.5993) grad_norm: 1.1294 (1.3204) time: 2.8690 data: 0.0002 max mem: 28454 +[2024-12-12 03:33:28 root] (utils.py 283): INFO Epoch: [4] [1170/2502] eta: 1:03:41 lr: 0.000018 loss_cls: 2.6179 (2.5983) grad_norm: 1.0966 (1.3189) time: 2.8700 data: 0.0002 max mem: 28454 +[2024-12-12 03:33:57 root] (utils.py 283): INFO Epoch: [4] [1180/2502] eta: 1:03:12 lr: 0.000018 loss_cls: 2.5028 (2.5970) grad_norm: 1.1263 (1.3189) time: 2.8721 data: 0.0002 max mem: 28454 +[2024-12-12 03:34:25 root] (utils.py 283): INFO Epoch: [4] [1190/2502] eta: 1:02:43 lr: 0.000018 loss_cls: 2.6607 (2.5964) grad_norm: 1.1556 (1.3178) time: 2.8703 data: 0.0002 max mem: 28454 +[2024-12-12 03:34:54 root] (utils.py 283): INFO Epoch: [4] [1200/2502] eta: 1:02:15 lr: 0.000018 loss_cls: 2.6607 (2.5960) grad_norm: 1.1314 (1.3170) time: 2.8692 data: 0.0002 max mem: 28454 +[2024-12-12 03:35:23 root] (utils.py 283): INFO Epoch: [4] [1210/2502] eta: 1:01:46 lr: 0.000018 loss_cls: 2.7069 (2.5968) grad_norm: 1.1776 (1.3156) time: 2.8678 data: 0.0002 max mem: 28454 +[2024-12-12 03:35:51 root] (utils.py 283): INFO Epoch: [4] [1220/2502] eta: 1:01:17 lr: 0.000018 loss_cls: 2.6401 (2.5951) grad_norm: 1.0785 (1.3142) time: 2.8670 data: 0.0002 max mem: 28454 +[2024-12-12 03:36:20 root] (utils.py 283): INFO Epoch: [4] [1230/2502] eta: 1:00:49 lr: 0.000018 loss_cls: 2.3486 (2.5950) grad_norm: 1.0883 (1.3133) time: 2.8750 data: 0.0002 max mem: 28454 +[2024-12-12 03:36:49 root] (utils.py 283): INFO Epoch: [4] [1240/2502] eta: 1:00:20 lr: 0.000018 loss_cls: 2.7154 (2.5933) grad_norm: 1.0883 (1.3112) time: 2.8741 data: 0.0002 max mem: 28454 +[2024-12-12 03:37:18 root] (utils.py 283): INFO Epoch: [4] [1250/2502] eta: 0:59:51 lr: 0.000018 loss_cls: 2.7332 (2.5938) grad_norm: 1.0777 (1.3100) time: 2.8691 data: 0.0002 max mem: 28454 +[2024-12-12 03:37:46 root] (utils.py 283): INFO Epoch: [4] [1260/2502] eta: 0:59:23 lr: 0.000018 loss_cls: 2.6968 (2.5936) grad_norm: 1.1274 (1.3087) time: 2.8700 data: 0.0002 max mem: 28454 +[2024-12-12 03:38:15 root] (utils.py 283): INFO Epoch: [4] [1270/2502] eta: 0:58:54 lr: 0.000018 loss_cls: 2.5464 (2.5928) grad_norm: 1.1035 (1.3072) time: 2.8668 data: 0.0002 max mem: 28454 +[2024-12-12 03:38:44 root] (utils.py 283): INFO Epoch: [4] [1280/2502] eta: 0:58:25 lr: 0.000018 loss_cls: 2.6482 (2.5933) grad_norm: 1.0945 (1.3083) time: 2.8681 data: 0.0002 max mem: 28454 +[2024-12-12 03:39:12 root] (utils.py 283): INFO Epoch: [4] [1290/2502] eta: 0:57:57 lr: 0.000018 loss_cls: 2.8097 (2.5944) grad_norm: 1.1683 (1.3107) time: 2.8692 data: 0.0002 max mem: 28454 +[2024-12-12 03:39:41 root] (utils.py 283): INFO Epoch: [4] [1300/2502] eta: 0:57:28 lr: 0.000018 loss_cls: 2.7943 (2.5950) grad_norm: 1.2049 (1.3125) time: 2.8674 data: 0.0002 max mem: 28454 +[2024-12-12 03:40:10 root] (utils.py 283): INFO Epoch: [4] [1310/2502] eta: 0:56:59 lr: 0.000018 loss_cls: 2.6068 (2.5942) grad_norm: 1.1944 (1.3150) time: 2.8673 data: 0.0002 max mem: 28454 +[2024-12-12 03:40:38 root] (utils.py 283): INFO Epoch: [4] [1320/2502] eta: 0:56:31 lr: 0.000018 loss_cls: 2.5808 (2.5951) grad_norm: 1.1214 (1.3138) time: 2.8678 data: 0.0002 max mem: 28454 +[2024-12-12 03:41:07 root] (utils.py 283): INFO Epoch: [4] [1330/2502] eta: 0:56:02 lr: 0.000018 loss_cls: 2.6005 (2.5948) grad_norm: 1.1392 (1.3125) time: 2.8682 data: 0.0002 max mem: 28454 +[2024-12-12 03:41:36 root] (utils.py 283): INFO Epoch: [4] [1340/2502] eta: 0:55:33 lr: 0.000018 loss_cls: 2.7100 (2.5961) grad_norm: 1.1496 (1.3117) time: 2.8704 data: 0.0002 max mem: 28454 +[2024-12-12 03:42:04 root] (utils.py 283): INFO Epoch: [4] [1350/2502] eta: 0:55:05 lr: 0.000018 loss_cls: 2.7962 (2.5969) grad_norm: 1.1181 (1.3103) time: 2.8718 data: 0.0002 max mem: 28454 +[2024-12-12 03:42:33 root] (utils.py 283): INFO Epoch: [4] [1360/2502] eta: 0:54:36 lr: 0.000018 loss_cls: 2.7015 (2.5968) grad_norm: 1.1330 (1.3096) time: 2.8706 data: 0.0002 max mem: 28454 +[2024-12-12 03:43:02 root] (utils.py 283): INFO Epoch: [4] [1370/2502] eta: 0:54:07 lr: 0.000018 loss_cls: 2.7645 (2.5972) grad_norm: 1.1540 (1.3102) time: 2.8717 data: 0.0002 max mem: 28454 +[2024-12-12 03:43:31 root] (utils.py 283): INFO Epoch: [4] [1380/2502] eta: 0:53:39 lr: 0.000018 loss_cls: 2.6555 (2.5962) grad_norm: 1.1540 (1.3155) time: 2.8717 data: 0.0002 max mem: 28454 +[2024-12-12 03:43:59 root] (utils.py 283): INFO Epoch: [4] [1390/2502] eta: 0:53:10 lr: 0.000018 loss_cls: 2.5292 (2.5961) grad_norm: 1.1540 (1.3147) time: 2.8694 data: 0.0002 max mem: 28454 +[2024-12-12 03:44:28 root] (utils.py 283): INFO Epoch: [4] [1400/2502] eta: 0:52:41 lr: 0.000018 loss_cls: 2.4889 (2.5946) grad_norm: 1.1309 (1.3152) time: 2.8699 data: 0.0002 max mem: 28454 +[2024-12-12 03:44:57 root] (utils.py 283): INFO Epoch: [4] [1410/2502] eta: 0:52:12 lr: 0.000018 loss_cls: 2.7229 (2.5964) grad_norm: 1.2381 (1.3723) time: 2.8720 data: 0.0002 max mem: 28454 +[2024-12-12 03:45:25 root] (utils.py 283): INFO Epoch: [4] [1420/2502] eta: 0:51:44 lr: 0.000018 loss_cls: 2.8758 (2.5968) grad_norm: 1.5858 (1.3771) time: 2.8727 data: 0.0002 max mem: 28454 +[2024-12-12 03:45:54 root] (utils.py 283): INFO Epoch: [4] [1430/2502] eta: 0:51:15 lr: 0.000018 loss_cls: 2.6091 (2.5975) grad_norm: 1.5674 (1.3781) time: 2.8721 data: 0.0002 max mem: 28454 +[2024-12-12 03:46:23 root] (utils.py 283): INFO Epoch: [4] [1440/2502] eta: 0:50:46 lr: 0.000018 loss_cls: 2.7192 (2.5978) grad_norm: 1.2572 (1.3768) time: 2.8723 data: 0.0002 max mem: 28454 +[2024-12-12 03:46:52 root] (utils.py 283): INFO Epoch: [4] [1450/2502] eta: 0:50:18 lr: 0.000018 loss_cls: 2.7552 (2.5982) grad_norm: 1.2741 (1.3847) time: 2.8786 data: 0.0002 max mem: 28454 +[2024-12-12 03:47:20 root] (utils.py 283): INFO Epoch: [4] [1460/2502] eta: 0:49:49 lr: 0.000018 loss_cls: 2.7844 (2.5995) grad_norm: 1.4251 (1.3864) time: 2.8776 data: 0.0002 max mem: 28454 +[2024-12-12 03:47:49 root] (utils.py 283): INFO Epoch: [4] [1470/2502] eta: 0:49:21 lr: 0.000018 loss_cls: 2.7498 (2.5992) grad_norm: 1.1805 (1.3857) time: 2.8707 data: 0.0002 max mem: 28454 +[2024-12-12 03:48:18 root] (utils.py 283): INFO Epoch: [4] [1480/2502] eta: 0:48:52 lr: 0.000018 loss_cls: 2.8926 (2.6015) grad_norm: 1.1628 (1.3849) time: 2.8704 data: 0.0002 max mem: 28454 +[2024-12-12 03:48:47 root] (utils.py 283): INFO Epoch: [4] [1490/2502] eta: 0:48:23 lr: 0.000018 loss_cls: 2.9131 (2.6034) grad_norm: 1.1918 (1.3838) time: 2.8696 data: 0.0002 max mem: 28454 +[2024-12-12 03:49:15 root] (utils.py 283): INFO Epoch: [4] [1500/2502] eta: 0:47:54 lr: 0.000018 loss_cls: 2.8834 (2.6048) grad_norm: 1.1788 (1.3826) time: 2.8685 data: 0.0002 max mem: 28454 +[2024-12-12 03:49:44 root] (utils.py 283): INFO Epoch: [4] [1510/2502] eta: 0:47:26 lr: 0.000018 loss_cls: 2.7846 (2.6057) grad_norm: 1.1444 (1.3812) time: 2.8707 data: 0.0002 max mem: 28454 +[2024-12-12 03:50:13 root] (utils.py 283): INFO Epoch: [4] [1520/2502] eta: 0:46:57 lr: 0.000018 loss_cls: 2.6632 (2.6042) grad_norm: 1.0660 (1.3794) time: 2.8721 data: 0.0003 max mem: 28454 +[2024-12-12 03:50:41 root] (utils.py 283): INFO Epoch: [4] [1530/2502] eta: 0:46:28 lr: 0.000018 loss_cls: 2.6104 (2.6039) grad_norm: 1.1282 (1.3803) time: 2.8717 data: 0.0002 max mem: 28454 +[2024-12-12 03:51:10 root] (utils.py 283): INFO Epoch: [4] [1540/2502] eta: 0:46:00 lr: 0.000018 loss_cls: 2.6956 (2.6047) grad_norm: 1.1282 (1.3853) time: 2.8706 data: 0.0002 max mem: 28454 +[2024-12-12 03:51:39 root] (utils.py 283): INFO Epoch: [4] [1550/2502] eta: 0:45:31 lr: 0.000018 loss_cls: 2.6879 (2.6050) grad_norm: 1.1296 (1.3846) time: 2.8684 data: 0.0003 max mem: 28454 +[2024-12-12 03:52:07 root] (utils.py 283): INFO Epoch: [4] [1560/2502] eta: 0:45:02 lr: 0.000018 loss_cls: 2.5698 (2.6043) grad_norm: 1.1542 (1.3833) time: 2.8684 data: 0.0003 max mem: 28454 +[2024-12-12 03:52:36 root] (utils.py 283): INFO Epoch: [4] [1570/2502] eta: 0:44:34 lr: 0.000018 loss_cls: 2.5477 (2.6033) grad_norm: 1.1368 (1.3828) time: 2.8687 data: 0.0003 max mem: 28454 +[2024-12-12 03:53:05 root] (utils.py 283): INFO Epoch: [4] [1580/2502] eta: 0:44:05 lr: 0.000018 loss_cls: 2.6212 (2.6029) grad_norm: 1.1004 (1.3809) time: 2.8672 data: 0.0003 max mem: 28454 +[2024-12-12 03:53:34 root] (utils.py 283): INFO Epoch: [4] [1590/2502] eta: 0:43:36 lr: 0.000018 loss_cls: 2.6515 (2.6023) grad_norm: 1.0524 (1.3802) time: 2.8681 data: 0.0003 max mem: 28454 +[2024-12-12 03:54:02 root] (utils.py 283): INFO Epoch: [4] [1600/2502] eta: 0:43:08 lr: 0.000018 loss_cls: 2.5450 (2.6022) grad_norm: 1.0985 (1.3793) time: 2.8691 data: 0.0003 max mem: 28454 +[2024-12-12 03:54:31 root] (utils.py 283): INFO Epoch: [4] [1610/2502] eta: 0:42:39 lr: 0.000018 loss_cls: 2.5450 (2.6022) grad_norm: 1.0985 (1.3780) time: 2.8667 data: 0.0002 max mem: 28454 +[2024-12-12 03:55:00 root] (utils.py 283): INFO Epoch: [4] [1620/2502] eta: 0:42:10 lr: 0.000018 loss_cls: 2.4944 (2.6009) grad_norm: 1.0739 (1.3767) time: 2.8683 data: 0.0002 max mem: 28454 +[2024-12-12 03:55:28 root] (utils.py 283): INFO Epoch: [4] [1630/2502] eta: 0:41:41 lr: 0.000018 loss_cls: 2.5593 (2.6008) grad_norm: 1.1067 (1.3792) time: 2.8716 data: 0.0002 max mem: 28454 +[2024-12-12 03:55:57 root] (utils.py 283): INFO Epoch: [4] [1640/2502] eta: 0:41:13 lr: 0.000018 loss_cls: 2.7309 (2.6007) grad_norm: 1.1163 (1.3781) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-12 03:56:26 root] (utils.py 283): INFO Epoch: [4] [1650/2502] eta: 0:40:44 lr: 0.000018 loss_cls: 2.7794 (2.6007) grad_norm: 1.1163 (1.3768) time: 2.8704 data: 0.0003 max mem: 28454 +[2024-12-12 03:56:54 root] (utils.py 283): INFO Epoch: [4] [1660/2502] eta: 0:40:15 lr: 0.000018 loss_cls: 2.6468 (2.6002) grad_norm: 1.1755 (1.3771) time: 2.8697 data: 0.0003 max mem: 28454 +[2024-12-12 03:57:23 root] (utils.py 283): INFO Epoch: [4] [1670/2502] eta: 0:39:47 lr: 0.000018 loss_cls: 2.6513 (2.6012) grad_norm: 1.1633 (1.3756) time: 2.8709 data: 0.0002 max mem: 28454 +[2024-12-12 03:57:52 root] (utils.py 283): INFO Epoch: [4] [1680/2502] eta: 0:39:18 lr: 0.000018 loss_cls: 2.7400 (2.6003) grad_norm: 1.0856 (1.3739) time: 2.8707 data: 0.0002 max mem: 28454 +[2024-12-12 03:58:21 root] (utils.py 283): INFO Epoch: [4] [1690/2502] eta: 0:38:49 lr: 0.000018 loss_cls: 2.6124 (2.6005) grad_norm: 1.1286 (1.3733) time: 2.8692 data: 0.0003 max mem: 28454 +[2024-12-12 03:58:49 root] (utils.py 283): INFO Epoch: [4] [1700/2502] eta: 0:38:21 lr: 0.000018 loss_cls: 2.6511 (2.6007) grad_norm: 1.1727 (1.3721) time: 2.8670 data: 0.0002 max mem: 28454 +[2024-12-12 03:59:18 root] (utils.py 283): INFO Epoch: [4] [1710/2502] eta: 0:37:52 lr: 0.000018 loss_cls: 2.5973 (2.5996) grad_norm: 1.1264 (1.3704) time: 2.8697 data: 0.0002 max mem: 28454 +[2024-12-12 03:59:47 root] (utils.py 283): INFO Epoch: [4] [1720/2502] eta: 0:37:23 lr: 0.000018 loss_cls: 2.3021 (2.5984) grad_norm: 1.0965 (1.3692) time: 2.8732 data: 0.0002 max mem: 28454 +[2024-12-12 04:00:15 root] (utils.py 283): INFO Epoch: [4] [1730/2502] eta: 0:36:55 lr: 0.000018 loss_cls: 2.4093 (2.5977) grad_norm: 1.1460 (1.3678) time: 2.8706 data: 0.0002 max mem: 28454 +[2024-12-12 04:00:44 root] (utils.py 283): INFO Epoch: [4] [1740/2502] eta: 0:36:26 lr: 0.000018 loss_cls: 2.5215 (2.5973) grad_norm: 1.1480 (1.3707) time: 2.8697 data: 0.0002 max mem: 28454 +[2024-12-12 04:01:13 root] (utils.py 283): INFO Epoch: [4] [1750/2502] eta: 0:35:57 lr: 0.000018 loss_cls: 2.5131 (2.5964) grad_norm: 1.0643 (1.3693) time: 2.8692 data: 0.0002 max mem: 28454 +[2024-12-12 04:01:41 root] (utils.py 283): INFO Epoch: [4] [1760/2502] eta: 0:35:29 lr: 0.000018 loss_cls: 2.7190 (2.5968) grad_norm: 1.0969 (1.3683) time: 2.8696 data: 0.0002 max mem: 28454 +[2024-12-12 04:02:10 root] (utils.py 283): INFO Epoch: [4] [1770/2502] eta: 0:35:00 lr: 0.000018 loss_cls: 2.7190 (2.5971) grad_norm: 1.1245 (1.3670) time: 2.8710 data: 0.0002 max mem: 28454 +[2024-12-12 04:02:39 root] (utils.py 283): INFO Epoch: [4] [1780/2502] eta: 0:34:31 lr: 0.000018 loss_cls: 2.6435 (2.5971) grad_norm: 1.0122 (1.3660) time: 2.8714 data: 0.0002 max mem: 28454 +[2024-12-12 04:03:08 root] (utils.py 283): INFO Epoch: [4] [1790/2502] eta: 0:34:02 lr: 0.000018 loss_cls: 2.5502 (2.5963) grad_norm: 1.0564 (1.3646) time: 2.8726 data: 0.0002 max mem: 28454 +[2024-12-12 04:03:36 root] (utils.py 283): INFO Epoch: [4] [1800/2502] eta: 0:33:34 lr: 0.000018 loss_cls: 2.6060 (2.5966) grad_norm: 1.1018 (1.3641) time: 2.8708 data: 0.0002 max mem: 28454 +[2024-12-12 04:04:05 root] (utils.py 283): INFO Epoch: [4] [1810/2502] eta: 0:33:05 lr: 0.000018 loss_cls: 2.7284 (2.5974) grad_norm: 1.1746 (1.3633) time: 2.8706 data: 0.0002 max mem: 28454 +[2024-12-12 04:04:34 root] (utils.py 283): INFO Epoch: [4] [1820/2502] eta: 0:32:36 lr: 0.000018 loss_cls: 2.5808 (2.5969) grad_norm: 1.1655 (1.3643) time: 2.8722 data: 0.0002 max mem: 28454 +[2024-12-12 04:05:02 root] (utils.py 283): INFO Epoch: [4] [1830/2502] eta: 0:32:08 lr: 0.000018 loss_cls: 2.5778 (2.5964) grad_norm: 1.1453 (1.3632) time: 2.8719 data: 0.0003 max mem: 28454 +[2024-12-12 04:05:31 root] (utils.py 283): INFO Epoch: [4] [1840/2502] eta: 0:31:39 lr: 0.000018 loss_cls: 2.6884 (2.5969) grad_norm: 1.1453 (1.3639) time: 2.8715 data: 0.0003 max mem: 28454 +[2024-12-12 04:06:00 root] (utils.py 283): INFO Epoch: [4] [1850/2502] eta: 0:31:10 lr: 0.000018 loss_cls: 2.7025 (2.5963) grad_norm: 1.2171 (1.3675) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-12 04:06:29 root] (utils.py 283): INFO Epoch: [4] [1860/2502] eta: 0:30:42 lr: 0.000018 loss_cls: 2.5587 (2.5960) grad_norm: 1.3129 (1.3692) time: 2.8720 data: 0.0002 max mem: 28454 +[2024-12-12 04:06:57 root] (utils.py 283): INFO Epoch: [4] [1870/2502] eta: 0:30:13 lr: 0.000018 loss_cls: 2.5778 (2.5962) grad_norm: 1.3212 (1.3845) time: 2.8712 data: 0.0002 max mem: 28454 +[2024-12-12 04:07:26 root] (utils.py 283): INFO Epoch: [4] [1880/2502] eta: 0:29:44 lr: 0.000018 loss_cls: 2.5778 (2.5951) grad_norm: 1.2927 (1.3835) time: 2.8716 data: 0.0002 max mem: 28454 +[2024-12-12 04:07:55 root] (utils.py 283): INFO Epoch: [4] [1890/2502] eta: 0:29:16 lr: 0.000018 loss_cls: 2.7070 (2.5963) grad_norm: 1.1967 (1.3833) time: 2.8718 data: 0.0002 max mem: 28454 +[2024-12-12 04:08:23 root] (utils.py 283): INFO Epoch: [4] [1900/2502] eta: 0:28:47 lr: 0.000018 loss_cls: 2.7333 (2.5964) grad_norm: 1.2126 (1.3834) time: 2.8725 data: 0.0002 max mem: 28454 +[2024-12-12 04:08:52 root] (utils.py 283): INFO Epoch: [4] [1910/2502] eta: 0:28:18 lr: 0.000018 loss_cls: 2.6606 (2.5965) grad_norm: 1.0887 (1.3819) time: 2.8733 data: 0.0002 max mem: 28454 +[2024-12-12 04:09:21 root] (utils.py 283): INFO Epoch: [4] [1920/2502] eta: 0:27:50 lr: 0.000018 loss_cls: 2.6966 (2.5969) grad_norm: 1.0887 (1.3839) time: 2.8726 data: 0.0003 max mem: 28454 +[2024-12-12 04:09:50 root] (utils.py 283): INFO Epoch: [4] [1930/2502] eta: 0:27:21 lr: 0.000018 loss_cls: 2.5138 (2.5964) grad_norm: 1.1493 (1.3829) time: 2.8716 data: 0.0002 max mem: 28454 +[2024-12-12 04:10:18 root] (utils.py 283): INFO Epoch: [4] [1940/2502] eta: 0:26:52 lr: 0.000018 loss_cls: 2.6305 (2.5969) grad_norm: 1.2514 (1.3833) time: 2.8711 data: 0.0002 max mem: 28454 +[2024-12-12 04:10:47 root] (utils.py 283): INFO Epoch: [4] [1950/2502] eta: 0:26:23 lr: 0.000018 loss_cls: 2.6403 (2.5963) grad_norm: 1.1818 (1.3827) time: 2.8714 data: 0.0002 max mem: 28454 +[2024-12-12 04:11:16 root] (utils.py 283): INFO Epoch: [4] [1960/2502] eta: 0:25:55 lr: 0.000018 loss_cls: 2.6205 (2.5965) grad_norm: 1.1262 (1.3817) time: 2.8703 data: 0.0002 max mem: 28454 +[2024-12-12 04:11:44 root] (utils.py 283): INFO Epoch: [4] [1970/2502] eta: 0:25:26 lr: 0.000018 loss_cls: 2.5448 (2.5961) grad_norm: 1.0629 (1.3800) time: 2.8709 data: 0.0002 max mem: 28454 +[2024-12-12 04:12:13 root] (utils.py 283): INFO Epoch: [4] [1980/2502] eta: 0:24:57 lr: 0.000018 loss_cls: 2.4462 (2.5958) grad_norm: 1.0484 (1.3793) time: 2.8738 data: 0.0002 max mem: 28454 +[2024-12-12 04:12:42 root] (utils.py 283): INFO Epoch: [4] [1990/2502] eta: 0:24:29 lr: 0.000018 loss_cls: 2.5008 (2.5949) grad_norm: 1.0857 (1.3794) time: 2.8733 data: 0.0002 max mem: 28454 +[2024-12-12 04:13:11 root] (utils.py 283): INFO Epoch: [4] [2000/2502] eta: 0:24:00 lr: 0.000018 loss_cls: 2.8272 (2.5962) grad_norm: 1.1466 (1.3849) time: 2.8705 data: 0.0002 max mem: 28454 +[2024-12-12 04:13:39 root] (utils.py 283): INFO Epoch: [4] [2010/2502] eta: 0:23:31 lr: 0.000018 loss_cls: 2.6980 (2.5955) grad_norm: 1.1838 (1.3846) time: 2.8722 data: 0.0002 max mem: 28454 +[2024-12-12 04:14:08 root] (utils.py 283): INFO Epoch: [4] [2020/2502] eta: 0:23:03 lr: 0.000018 loss_cls: 2.6660 (2.5953) grad_norm: 1.1378 (1.3836) time: 2.8716 data: 0.0002 max mem: 28454 +[2024-12-12 04:14:37 root] (utils.py 283): INFO Epoch: [4] [2030/2502] eta: 0:22:34 lr: 0.000018 loss_cls: 2.7032 (2.5958) grad_norm: 1.1241 (1.3827) time: 2.8708 data: 0.0002 max mem: 28454 +[2024-12-12 04:15:06 root] (utils.py 283): INFO Epoch: [4] [2040/2502] eta: 0:22:05 lr: 0.000018 loss_cls: 2.6289 (2.5955) grad_norm: 1.1056 (1.3814) time: 2.8722 data: 0.0002 max mem: 28454 +[2024-12-12 04:15:34 root] (utils.py 283): INFO Epoch: [4] [2050/2502] eta: 0:21:37 lr: 0.000018 loss_cls: 2.6101 (2.5961) grad_norm: 1.1087 (1.3801) time: 2.8726 data: 0.0002 max mem: 28454 +[2024-12-12 04:16:03 root] (utils.py 283): INFO Epoch: [4] [2060/2502] eta: 0:21:08 lr: 0.000018 loss_cls: 2.7272 (2.5956) grad_norm: 1.1087 (1.3803) time: 2.8716 data: 0.0002 max mem: 28454 +[2024-12-12 04:16:32 root] (utils.py 283): INFO Epoch: [4] [2070/2502] eta: 0:20:39 lr: 0.000018 loss_cls: 2.7272 (2.5962) grad_norm: 1.2092 (1.3800) time: 2.8691 data: 0.0002 max mem: 28454 +[2024-12-12 04:17:00 root] (utils.py 283): INFO Epoch: [4] [2080/2502] eta: 0:20:10 lr: 0.000018 loss_cls: 2.6425 (2.5965) grad_norm: 1.1726 (1.3792) time: 2.8685 data: 0.0002 max mem: 28454 +[2024-12-12 04:17:29 root] (utils.py 283): INFO Epoch: [4] [2090/2502] eta: 0:19:42 lr: 0.000018 loss_cls: 2.5776 (2.5948) grad_norm: 1.0538 (1.3776) time: 2.8659 data: 0.0002 max mem: 28454 +[2024-12-12 04:17:58 root] (utils.py 283): INFO Epoch: [4] [2100/2502] eta: 0:19:13 lr: 0.000018 loss_cls: 2.4305 (2.5942) grad_norm: 1.0518 (1.3774) time: 2.8674 data: 0.0002 max mem: 28454 +[2024-12-12 04:18:26 root] (utils.py 283): INFO Epoch: [4] [2110/2502] eta: 0:18:44 lr: 0.000018 loss_cls: 2.4993 (2.5937) grad_norm: 1.0399 (1.3761) time: 2.8699 data: 0.0002 max mem: 28454 +[2024-12-12 04:18:55 root] (utils.py 283): INFO Epoch: [4] [2120/2502] eta: 0:18:16 lr: 0.000018 loss_cls: 2.5328 (2.5932) grad_norm: 1.1303 (1.3803) time: 2.8722 data: 0.0002 max mem: 28454 +[2024-12-12 04:19:24 root] (utils.py 283): INFO Epoch: [4] [2130/2502] eta: 0:17:47 lr: 0.000018 loss_cls: 2.4978 (2.5923) grad_norm: 1.1986 (1.3801) time: 2.8747 data: 0.0002 max mem: 28454 +[2024-12-12 04:19:53 root] (utils.py 283): INFO Epoch: [4] [2140/2502] eta: 0:17:18 lr: 0.000018 loss_cls: 2.5869 (2.5924) grad_norm: 1.1363 (1.3791) time: 2.8757 data: 0.0002 max mem: 28454 +[2024-12-12 04:20:21 root] (utils.py 283): INFO Epoch: [4] [2150/2502] eta: 0:16:50 lr: 0.000018 loss_cls: 2.6451 (2.5921) grad_norm: 1.0449 (1.3775) time: 2.8723 data: 0.0002 max mem: 28454 +[2024-12-12 04:20:50 root] (utils.py 283): INFO Epoch: [4] [2160/2502] eta: 0:16:21 lr: 0.000018 loss_cls: 2.6109 (2.5927) grad_norm: 1.0449 (1.3763) time: 2.8694 data: 0.0002 max mem: 28454 +[2024-12-12 04:21:19 root] (utils.py 283): INFO Epoch: [4] [2170/2502] eta: 0:15:52 lr: 0.000018 loss_cls: 2.5734 (2.5920) grad_norm: 1.1066 (1.4124) time: 2.8707 data: 0.0002 max mem: 28454 +[2024-12-12 04:21:47 root] (utils.py 283): INFO Epoch: [4] [2180/2502] eta: 0:15:24 lr: 0.000018 loss_cls: 2.4240 (2.5915) grad_norm: 1.3520 (1.4130) time: 2.8697 data: 0.0002 max mem: 28454 +[2024-12-12 04:22:16 root] (utils.py 283): INFO Epoch: [4] [2190/2502] eta: 0:14:55 lr: 0.000018 loss_cls: 2.5304 (2.5909) grad_norm: 1.4974 (1.4127) time: 2.8702 data: 0.0002 max mem: 28454 +[2024-12-12 04:22:45 root] (utils.py 283): INFO Epoch: [4] [2200/2502] eta: 0:14:26 lr: 0.000018 loss_cls: 2.3985 (2.5901) grad_norm: 1.2151 (1.4117) time: 2.8718 data: 0.0002 max mem: 28454 +[2024-12-12 04:23:14 root] (utils.py 283): INFO Epoch: [4] [2210/2502] eta: 0:13:57 lr: 0.000018 loss_cls: 2.5529 (2.5902) grad_norm: 1.1774 (1.4106) time: 2.8724 data: 0.0002 max mem: 28454 +[2024-12-12 04:23:42 root] (utils.py 283): INFO Epoch: [4] [2220/2502] eta: 0:13:29 lr: 0.000018 loss_cls: 2.6716 (2.5902) grad_norm: 1.1165 (1.4093) time: 2.8715 data: 0.0002 max mem: 28454 +[2024-12-12 04:24:11 root] (utils.py 283): INFO Epoch: [4] [2230/2502] eta: 0:13:00 lr: 0.000018 loss_cls: 2.6712 (2.5902) grad_norm: 1.0838 (1.4082) time: 2.8706 data: 0.0002 max mem: 28454 +[2024-12-12 04:24:40 root] (utils.py 283): INFO Epoch: [4] [2240/2502] eta: 0:12:31 lr: 0.000018 loss_cls: 2.5829 (2.5902) grad_norm: 1.0909 (1.4075) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-12 04:25:08 root] (utils.py 283): INFO Epoch: [4] [2250/2502] eta: 0:12:03 lr: 0.000018 loss_cls: 2.6557 (2.5907) grad_norm: 1.2289 (1.4109) time: 2.8690 data: 0.0003 max mem: 28454 +[2024-12-12 04:25:37 root] (utils.py 283): INFO Epoch: [4] [2260/2502] eta: 0:11:34 lr: 0.000018 loss_cls: 2.7683 (2.5911) grad_norm: 1.1651 (1.4100) time: 2.8661 data: 0.0003 max mem: 28454 +[2024-12-12 04:26:06 root] (utils.py 283): INFO Epoch: [4] [2270/2502] eta: 0:11:05 lr: 0.000018 loss_cls: 2.5341 (2.5903) grad_norm: 1.1634 (1.4089) time: 2.8670 data: 0.0002 max mem: 28454 +[2024-12-12 04:26:34 root] (utils.py 283): INFO Epoch: [4] [2280/2502] eta: 0:10:37 lr: 0.000018 loss_cls: 2.6046 (2.5909) grad_norm: 1.2010 (1.4085) time: 2.8697 data: 0.0002 max mem: 28454 +[2024-12-12 04:27:03 root] (utils.py 283): INFO Epoch: [4] [2290/2502] eta: 0:10:08 lr: 0.000018 loss_cls: 2.7886 (2.5915) grad_norm: 1.1766 (1.4074) time: 2.8708 data: 0.0002 max mem: 28454 +[2024-12-12 04:27:32 root] (utils.py 283): INFO Epoch: [4] [2300/2502] eta: 0:09:39 lr: 0.000018 loss_cls: 2.7320 (2.5913) grad_norm: 1.1005 (1.4070) time: 2.8771 data: 0.0002 max mem: 28454 +[2024-12-12 04:28:01 root] (utils.py 283): INFO Epoch: [4] [2310/2502] eta: 0:09:10 lr: 0.000018 loss_cls: 2.6583 (2.5916) grad_norm: 1.1170 (1.4064) time: 2.8755 data: 0.0002 max mem: 28454 +[2024-12-12 04:28:29 root] (utils.py 283): INFO Epoch: [4] [2320/2502] eta: 0:08:42 lr: 0.000018 loss_cls: 2.7863 (2.5922) grad_norm: 1.1177 (1.4054) time: 2.8685 data: 0.0002 max mem: 28454 +[2024-12-12 04:28:58 root] (utils.py 283): INFO Epoch: [4] [2330/2502] eta: 0:08:13 lr: 0.000018 loss_cls: 2.7040 (2.5915) grad_norm: 1.2091 (1.4047) time: 2.8693 data: 0.0002 max mem: 28454 +[2024-12-12 04:29:27 root] (utils.py 283): INFO Epoch: [4] [2340/2502] eta: 0:07:44 lr: 0.000018 loss_cls: 2.6688 (2.5919) grad_norm: 1.2160 (1.4059) time: 2.8715 data: 0.0002 max mem: 28454 +[2024-12-12 04:29:56 root] (utils.py 283): INFO Epoch: [4] [2350/2502] eta: 0:07:16 lr: 0.000018 loss_cls: 2.6656 (2.5912) grad_norm: 1.0875 (1.4046) time: 2.8731 data: 0.0002 max mem: 28454 +[2024-12-12 04:30:24 root] (utils.py 283): INFO Epoch: [4] [2360/2502] eta: 0:06:47 lr: 0.000018 loss_cls: 2.4555 (2.5899) grad_norm: 1.1183 (1.4036) time: 2.8693 data: 0.0002 max mem: 28454 +[2024-12-12 04:30:53 root] (utils.py 283): INFO Epoch: [4] [2370/2502] eta: 0:06:18 lr: 0.000018 loss_cls: 2.4555 (2.5893) grad_norm: 1.1272 (1.4024) time: 2.8697 data: 0.0002 max mem: 28454 +[2024-12-12 04:31:22 root] (utils.py 283): INFO Epoch: [4] [2380/2502] eta: 0:05:50 lr: 0.000018 loss_cls: 2.7033 (2.5894) grad_norm: 1.1161 (1.4014) time: 2.8699 data: 0.0002 max mem: 28454 +[2024-12-12 04:31:50 root] (utils.py 283): INFO Epoch: [4] [2390/2502] eta: 0:05:21 lr: 0.000018 loss_cls: 2.6396 (2.5889) grad_norm: 1.0837 (1.3999) time: 2.8690 data: 0.0003 max mem: 28454 +[2024-12-12 04:32:19 root] (utils.py 283): INFO Epoch: [4] [2400/2502] eta: 0:04:52 lr: 0.000018 loss_cls: 2.5330 (2.5892) grad_norm: 1.1195 (1.3995) time: 2.8704 data: 0.0003 max mem: 28454 +[2024-12-12 04:32:48 root] (utils.py 283): INFO Epoch: [4] [2410/2502] eta: 0:04:24 lr: 0.000018 loss_cls: 2.5393 (2.5883) grad_norm: 1.1488 (1.4007) time: 2.8687 data: 0.0002 max mem: 28454 +[2024-12-12 04:33:16 root] (utils.py 283): INFO Epoch: [4] [2420/2502] eta: 0:03:55 lr: 0.000018 loss_cls: 2.5523 (2.5885) grad_norm: 1.0715 (1.3995) time: 2.8690 data: 0.0002 max mem: 28454 +[2024-12-12 04:33:45 root] (utils.py 283): INFO Epoch: [4] [2430/2502] eta: 0:03:26 lr: 0.000018 loss_cls: 2.5523 (2.5878) grad_norm: 1.0715 (1.3987) time: 2.8717 data: 0.0002 max mem: 28454 +[2024-12-12 04:34:14 root] (utils.py 283): INFO Epoch: [4] [2440/2502] eta: 0:02:57 lr: 0.000018 loss_cls: 2.5820 (2.5882) grad_norm: 1.1504 (1.4008) time: 2.8736 data: 0.0002 max mem: 28454 +[2024-12-12 04:34:43 root] (utils.py 283): INFO Epoch: [4] [2450/2502] eta: 0:02:29 lr: 0.000018 loss_cls: 2.7708 (2.5877) grad_norm: 1.0983 (1.3997) time: 2.8760 data: 0.0002 max mem: 28454 +[2024-12-12 04:35:11 root] (utils.py 283): INFO Epoch: [4] [2460/2502] eta: 0:02:00 lr: 0.000018 loss_cls: 2.7708 (2.5882) grad_norm: 1.0983 (1.3998) time: 2.8738 data: 0.0002 max mem: 28454 +[2024-12-12 04:35:40 root] (utils.py 283): INFO Epoch: [4] [2470/2502] eta: 0:01:31 lr: 0.000018 loss_cls: 2.6086 (2.5882) grad_norm: 1.2005 (1.3989) time: 2.8736 data: 0.0002 max mem: 28454 +[2024-12-12 04:36:09 root] (utils.py 283): INFO Epoch: [4] [2480/2502] eta: 0:01:03 lr: 0.000018 loss_cls: 2.5994 (2.5879) grad_norm: 1.2005 (1.3987) time: 2.8751 data: 0.0002 max mem: 28454 +[2024-12-12 04:36:38 root] (utils.py 283): INFO Epoch: [4] [2490/2502] eta: 0:00:34 lr: 0.000018 loss_cls: 2.6538 (2.5878) grad_norm: 1.2236 (1.3985) time: 2.8922 data: 0.0196 max mem: 28454 +[2024-12-12 04:37:07 root] (utils.py 283): INFO Epoch: [4] [2500/2502] eta: 0:00:05 lr: 0.000018 loss_cls: 2.6484 (2.5874) grad_norm: 1.1386 (1.3975) time: 2.8926 data: 0.0196 max mem: 28454 +[2024-12-12 04:37:10 root] (utils.py 283): INFO Epoch: [4] [2501/2502] eta: 0:00:02 lr: 0.000018 loss_cls: 2.6484 (2.5875) grad_norm: 1.1386 (1.3977) time: 2.8935 data: 0.0196 max mem: 28454 +[2024-12-12 04:37:10 root] (utils.py 297): INFO Epoch: [4] Total time: 1:59:41 (2.8702 s / it) +[2024-12-12 04:37:10 root] (engine.py 179): INFO Averaged stats:lr: 0.000018 loss_cls: 2.6484 (2.5924) grad_norm: 1.1386 (1.3977) +[2024-12-12 04:37:13 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:52 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.4253 (0.4253) acc1: 89.8438 (89.8438) acc3: 98.4375 (98.4375) acc5: 98.4375 (98.4375) time: 0.5400 data: 0.0003 max mem: 28454 +[2024-12-12 04:37:18 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:48 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6116 (0.5921) acc1: 85.9375 (87.5710) acc3: 97.6562 (96.8040) acc5: 98.4375 (98.1534) time: 0.5460 data: 0.0004 max mem: 28454 +[2024-12-12 04:37:23 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:42 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.5909 (0.6209) acc1: 85.9375 (87.0164) acc3: 96.8750 (96.4658) acc5: 98.4375 (97.7307) time: 0.5473 data: 0.0004 max mem: 28454 +[2024-12-12 04:37:29 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:37 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.5909 (0.6523) acc1: 85.9375 (85.9627) acc3: 96.0938 (96.1190) acc5: 97.6562 (97.6310) time: 0.5481 data: 0.0004 max mem: 28454 +[2024-12-12 04:37:34 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:31 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6574 (0.6573) acc1: 85.9375 (85.8041) acc3: 96.0938 (96.0366) acc5: 97.6562 (97.5419) time: 0.5483 data: 0.0004 max mem: 28454 +[2024-12-12 04:37:40 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:26 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.8481 (0.7384) acc1: 79.6875 (83.9461) acc3: 92.9688 (94.9908) acc5: 95.3125 (96.7984) time: 0.5483 data: 0.0004 max mem: 28454 +[2024-12-12 04:37:45 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:20 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9669 (0.7666) acc1: 79.6875 (83.5297) acc3: 91.4062 (94.3904) acc5: 93.7500 (96.3243) time: 0.5482 data: 0.0004 max mem: 28454 +[2024-12-12 04:37:51 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:15 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9541 (0.7922) acc1: 81.2500 (82.8015) acc3: 92.1875 (94.1131) acc5: 94.5312 (96.1708) time: 0.5483 data: 0.0004 max mem: 28454 +[2024-12-12 04:37:56 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:09 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9541 (0.8147) acc1: 79.6875 (82.4171) acc3: 92.1875 (93.8465) acc5: 94.5312 (95.9008) time: 0.5483 data: 0.0006 max mem: 28454 +[2024-12-12 04:38:02 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:04 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9959 (0.8368) acc1: 77.3438 (81.6535) acc3: 91.4062 (93.5783) acc5: 93.7500 (95.6902) time: 0.5482 data: 0.0006 max mem: 28454 +[2024-12-12 04:38:05 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.8849 (0.8327) acc1: 76.5625 (81.7120) acc3: 91.4062 (93.6160) acc5: 94.5312 (95.7600) time: 0.5387 data: 0.0005 max mem: 28454 +[2024-12-12 04:38:05 root] (utils.py 297): INFO Test: Total time: 0:00:53 (0.5462 s / it) +[2024-12-12 04:38:06 root] (engine.py 264): INFO * Acc@1 81.556 Acc@3 93.438 Acc@5 95.708 loss 0.836 flops 13.207 layer_flops 13.109 +[2024-12-12 04:38:06 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 81.6% +[2024-12-12 04:38:06 root] (main.py 576): INFO Max accuracy: 81.74% +[2024-12-12 04:38:08 root] (utils.py 283): INFO Epoch: [5] [ 0/2502] eta: 1:59:00 lr: 0.000017 loss_cls: 2.9755 (2.9755) grad_norm: 1.1509 (1.1509) time: 2.8539 data: 0.0002 max mem: 28454 +[2024-12-12 04:38:37 root] (utils.py 283): INFO Epoch: [5] [ 10/2502] eta: 1:59:14 lr: 0.000017 loss_cls: 2.6706 (2.7707) grad_norm: 1.1472 (1.1534) time: 2.8709 data: 0.0002 max mem: 28454 +[2024-12-12 04:39:06 root] (utils.py 283): INFO Epoch: [5] [ 20/2502] eta: 1:58:52 lr: 0.000017 loss_cls: 2.6706 (2.7037) grad_norm: 1.1472 (1.7511) time: 2.8746 data: 0.0002 max mem: 28454 +[2024-12-12 04:39:35 root] (utils.py 283): INFO Epoch: [5] [ 30/2502] eta: 1:58:20 lr: 0.000017 loss_cls: 2.6225 (2.6410) grad_norm: 1.1988 (1.5877) time: 2.8734 data: 0.0002 max mem: 28454 +[2024-12-12 04:40:03 root] (utils.py 283): INFO Epoch: [5] [ 40/2502] eta: 1:57:53 lr: 0.000017 loss_cls: 2.5870 (2.6414) grad_norm: 1.2384 (1.5272) time: 2.8721 data: 0.0002 max mem: 28454 +[2024-12-12 04:40:32 root] (utils.py 283): INFO Epoch: [5] [ 50/2502] eta: 1:57:23 lr: 0.000017 loss_cls: 2.6490 (2.6068) grad_norm: 1.1686 (1.4500) time: 2.8729 data: 0.0002 max mem: 28454 +[2024-12-12 04:41:01 root] (utils.py 283): INFO Epoch: [5] [ 60/2502] eta: 1:56:55 lr: 0.000017 loss_cls: 2.4551 (2.5988) grad_norm: 1.0977 (1.3969) time: 2.8723 data: 0.0002 max mem: 28454 +[2024-12-12 04:41:29 root] (utils.py 283): INFO Epoch: [5] [ 70/2502] eta: 1:56:26 lr: 0.000017 loss_cls: 2.8007 (2.6196) grad_norm: 1.0998 (1.3609) time: 2.8728 data: 0.0002 max mem: 28454 +[2024-12-12 04:41:58 root] (utils.py 283): INFO Epoch: [5] [ 80/2502] eta: 1:55:56 lr: 0.000017 loss_cls: 2.8699 (2.6448) grad_norm: 1.1594 (1.3682) time: 2.8714 data: 0.0002 max mem: 28454 +[2024-12-12 04:42:27 root] (utils.py 283): INFO Epoch: [5] [ 90/2502] eta: 1:55:27 lr: 0.000017 loss_cls: 2.7604 (2.6330) grad_norm: 1.1279 (1.3365) time: 2.8700 data: 0.0002 max mem: 28454 +[2024-12-12 04:42:56 root] (utils.py 283): INFO Epoch: [5] [ 100/2502] eta: 1:54:58 lr: 0.000017 loss_cls: 2.4921 (2.6203) grad_norm: 1.0643 (1.3119) time: 2.8705 data: 0.0002 max mem: 28454 +[2024-12-12 04:43:24 root] (utils.py 283): INFO Epoch: [5] [ 110/2502] eta: 1:54:30 lr: 0.000017 loss_cls: 2.6452 (2.6188) grad_norm: 1.0651 (1.3222) time: 2.8721 data: 0.0002 max mem: 28454 +[2024-12-12 04:43:53 root] (utils.py 283): INFO Epoch: [5] [ 120/2502] eta: 1:54:02 lr: 0.000017 loss_cls: 2.6330 (2.6136) grad_norm: 1.0879 (1.3438) time: 2.8759 data: 0.0002 max mem: 28454 +[2024-12-12 04:44:22 root] (utils.py 283): INFO Epoch: [5] [ 130/2502] eta: 1:53:35 lr: 0.000017 loss_cls: 2.6306 (2.6098) grad_norm: 1.1182 (1.3927) time: 2.8801 data: 0.0002 max mem: 28454 +[2024-12-12 04:44:51 root] (utils.py 283): INFO Epoch: [5] [ 140/2502] eta: 1:53:08 lr: 0.000017 loss_cls: 2.6651 (2.6130) grad_norm: 1.1297 (1.3874) time: 2.8813 data: 0.0002 max mem: 28454 +[2024-12-12 04:45:20 root] (utils.py 283): INFO Epoch: [5] [ 150/2502] eta: 1:52:39 lr: 0.000017 loss_cls: 2.7274 (2.6170) grad_norm: 1.0990 (1.3698) time: 2.8795 data: 0.0002 max mem: 28454 +[2024-12-12 04:45:48 root] (utils.py 283): INFO Epoch: [5] [ 160/2502] eta: 1:52:11 lr: 0.000017 loss_cls: 2.8043 (2.6289) grad_norm: 1.0834 (1.3594) time: 2.8753 data: 0.0003 max mem: 28454 +[2024-12-12 04:46:17 root] (utils.py 283): INFO Epoch: [5] [ 170/2502] eta: 1:51:42 lr: 0.000017 loss_cls: 2.7678 (2.6296) grad_norm: 1.1148 (1.3497) time: 2.8750 data: 0.0002 max mem: 28454 +[2024-12-12 04:46:46 root] (utils.py 283): INFO Epoch: [5] [ 180/2502] eta: 1:51:15 lr: 0.000017 loss_cls: 2.7395 (2.6272) grad_norm: 1.1064 (1.3362) time: 2.8799 data: 0.0002 max mem: 28454 +[2024-12-12 04:47:15 root] (utils.py 283): INFO Epoch: [5] [ 190/2502] eta: 1:50:46 lr: 0.000017 loss_cls: 2.7516 (2.6278) grad_norm: 1.0605 (1.3213) time: 2.8810 data: 0.0002 max mem: 28454 +[2024-12-12 04:47:43 root] (utils.py 283): INFO Epoch: [5] [ 200/2502] eta: 1:50:18 lr: 0.000017 loss_cls: 2.7554 (2.6226) grad_norm: 1.1120 (1.3514) time: 2.8776 data: 0.0002 max mem: 28454 +[2024-12-12 04:48:12 root] (utils.py 283): INFO Epoch: [5] [ 210/2502] eta: 1:49:49 lr: 0.000017 loss_cls: 2.7133 (2.6297) grad_norm: 1.1980 (1.3554) time: 2.8767 data: 0.0002 max mem: 28454 +[2024-12-12 04:48:41 root] (utils.py 283): INFO Epoch: [5] [ 220/2502] eta: 1:49:21 lr: 0.000017 loss_cls: 2.8084 (2.6310) grad_norm: 1.2446 (1.3463) time: 2.8789 data: 0.0002 max mem: 28454 +[2024-12-12 04:49:10 root] (utils.py 283): INFO Epoch: [5] [ 230/2502] eta: 1:48:53 lr: 0.000017 loss_cls: 2.7361 (2.6306) grad_norm: 1.0896 (1.3445) time: 2.8792 data: 0.0002 max mem: 28454 +[2024-12-12 04:49:39 root] (utils.py 283): INFO Epoch: [5] [ 240/2502] eta: 1:48:24 lr: 0.000017 loss_cls: 2.5364 (2.6194) grad_norm: 1.0896 (1.3594) time: 2.8779 data: 0.0002 max mem: 28454 +[2024-12-12 04:50:07 root] (utils.py 283): INFO Epoch: [5] [ 250/2502] eta: 1:47:55 lr: 0.000017 loss_cls: 2.4959 (2.6208) grad_norm: 1.1001 (1.3648) time: 2.8755 data: 0.0002 max mem: 28454 +[2024-12-12 04:50:36 root] (utils.py 283): INFO Epoch: [5] [ 260/2502] eta: 1:47:26 lr: 0.000017 loss_cls: 2.7798 (2.6285) grad_norm: 1.1352 (1.3566) time: 2.8743 data: 0.0002 max mem: 28454 +[2024-12-12 04:51:05 root] (utils.py 283): INFO Epoch: [5] [ 270/2502] eta: 1:46:58 lr: 0.000017 loss_cls: 2.5309 (2.6189) grad_norm: 1.0853 (1.3456) time: 2.8760 data: 0.0002 max mem: 28454 +[2024-12-12 04:51:34 root] (utils.py 283): INFO Epoch: [5] [ 280/2502] eta: 1:46:29 lr: 0.000017 loss_cls: 2.4305 (2.6140) grad_norm: 1.0987 (1.3549) time: 2.8758 data: 0.0002 max mem: 28454 +[2024-12-12 04:52:02 root] (utils.py 283): INFO Epoch: [5] [ 290/2502] eta: 1:46:00 lr: 0.000017 loss_cls: 2.5172 (2.6099) grad_norm: 1.1244 (1.3503) time: 2.8757 data: 0.0002 max mem: 28454 +[2024-12-12 04:52:31 root] (utils.py 283): INFO Epoch: [5] [ 300/2502] eta: 1:45:32 lr: 0.000017 loss_cls: 2.5850 (2.6079) grad_norm: 1.1377 (1.3482) time: 2.8791 data: 0.0002 max mem: 28454 +[2024-12-12 04:53:00 root] (utils.py 283): INFO Epoch: [5] [ 310/2502] eta: 1:45:03 lr: 0.000017 loss_cls: 2.8209 (2.6122) grad_norm: 1.1390 (1.3463) time: 2.8804 data: 0.0002 max mem: 28454 +[2024-12-12 04:53:29 root] (utils.py 283): INFO Epoch: [5] [ 320/2502] eta: 1:44:35 lr: 0.000017 loss_cls: 2.8130 (2.6112) grad_norm: 1.2047 (1.3431) time: 2.8790 data: 0.0002 max mem: 28454 +[2024-12-12 04:53:57 root] (utils.py 283): INFO Epoch: [5] [ 330/2502] eta: 1:44:06 lr: 0.000017 loss_cls: 2.6247 (2.6077) grad_norm: 1.1183 (1.3346) time: 2.8771 data: 0.0002 max mem: 28454 +[2024-12-12 04:54:26 root] (utils.py 283): INFO Epoch: [5] [ 340/2502] eta: 1:43:37 lr: 0.000017 loss_cls: 2.6748 (2.6110) grad_norm: 1.0746 (1.3276) time: 2.8750 data: 0.0002 max mem: 28454 +[2024-12-12 04:54:55 root] (utils.py 283): INFO Epoch: [5] [ 350/2502] eta: 1:43:08 lr: 0.000017 loss_cls: 2.6585 (2.6048) grad_norm: 1.0437 (1.3212) time: 2.8738 data: 0.0002 max mem: 28454 +[2024-12-12 04:55:24 root] (utils.py 283): INFO Epoch: [5] [ 360/2502] eta: 1:42:39 lr: 0.000017 loss_cls: 2.5592 (2.6070) grad_norm: 1.0397 (1.3157) time: 2.8745 data: 0.0002 max mem: 28454 +[2024-12-12 04:55:52 root] (utils.py 283): INFO Epoch: [5] [ 370/2502] eta: 1:42:11 lr: 0.000017 loss_cls: 2.7457 (2.6080) grad_norm: 1.0854 (1.3106) time: 2.8744 data: 0.0002 max mem: 28454 +[2024-12-12 04:56:21 root] (utils.py 283): INFO Epoch: [5] [ 380/2502] eta: 1:41:41 lr: 0.000017 loss_cls: 2.7433 (2.6067) grad_norm: 1.1188 (1.3062) time: 2.8710 data: 0.0002 max mem: 28454 +[2024-12-12 04:56:50 root] (utils.py 283): INFO Epoch: [5] [ 390/2502] eta: 1:41:12 lr: 0.000017 loss_cls: 2.7433 (2.6063) grad_norm: 1.1372 (1.3543) time: 2.8680 data: 0.0002 max mem: 28454 +[2024-12-12 04:57:19 root] (utils.py 283): INFO Epoch: [5] [ 400/2502] eta: 1:40:43 lr: 0.000017 loss_cls: 2.8342 (2.6117) grad_norm: 1.1659 (1.3534) time: 2.8710 data: 0.0002 max mem: 28454 +[2024-12-12 04:57:47 root] (utils.py 283): INFO Epoch: [5] [ 410/2502] eta: 1:40:15 lr: 0.000017 loss_cls: 2.8309 (2.6167) grad_norm: 1.2578 (1.3535) time: 2.8747 data: 0.0002 max mem: 28454 +[2024-12-12 04:58:16 root] (utils.py 283): INFO Epoch: [5] [ 420/2502] eta: 1:39:46 lr: 0.000017 loss_cls: 2.6187 (2.6148) grad_norm: 1.1865 (1.3474) time: 2.8748 data: 0.0002 max mem: 28454 +[2024-12-12 04:58:45 root] (utils.py 283): INFO Epoch: [5] [ 430/2502] eta: 1:39:17 lr: 0.000017 loss_cls: 2.4754 (2.6139) grad_norm: 1.0579 (1.3426) time: 2.8751 data: 0.0002 max mem: 28454 +[2024-12-12 04:59:14 root] (utils.py 283): INFO Epoch: [5] [ 440/2502] eta: 1:38:48 lr: 0.000017 loss_cls: 2.6497 (2.6126) grad_norm: 1.0428 (1.3387) time: 2.8752 data: 0.0002 max mem: 28454 +[2024-12-12 04:59:42 root] (utils.py 283): INFO Epoch: [5] [ 450/2502] eta: 1:38:19 lr: 0.000017 loss_cls: 2.7509 (2.6152) grad_norm: 1.0428 (1.3329) time: 2.8729 data: 0.0002 max mem: 28454 +[2024-12-12 05:00:11 root] (utils.py 283): INFO Epoch: [5] [ 460/2502] eta: 1:37:51 lr: 0.000017 loss_cls: 2.7813 (2.6163) grad_norm: 1.0392 (1.3279) time: 2.8740 data: 0.0002 max mem: 28454 +[2024-12-12 05:00:40 root] (utils.py 283): INFO Epoch: [5] [ 470/2502] eta: 1:37:22 lr: 0.000017 loss_cls: 2.6617 (2.6128) grad_norm: 1.0940 (1.3242) time: 2.8749 data: 0.0002 max mem: 28454 +[2024-12-12 05:01:08 root] (utils.py 283): INFO Epoch: [5] [ 480/2502] eta: 1:36:53 lr: 0.000017 loss_cls: 2.6458 (2.6095) grad_norm: 1.1036 (1.3327) time: 2.8717 data: 0.0002 max mem: 28454 +[2024-12-12 05:01:37 root] (utils.py 283): INFO Epoch: [5] [ 490/2502] eta: 1:36:24 lr: 0.000017 loss_cls: 2.4871 (2.6062) grad_norm: 1.0949 (1.3286) time: 2.8723 data: 0.0002 max mem: 28454 +[2024-12-12 05:02:06 root] (utils.py 283): INFO Epoch: [5] [ 500/2502] eta: 1:35:55 lr: 0.000017 loss_cls: 2.6959 (2.6088) grad_norm: 1.0949 (1.3306) time: 2.8739 data: 0.0002 max mem: 28454 +[2024-12-12 05:02:35 root] (utils.py 283): INFO Epoch: [5] [ 510/2502] eta: 1:35:26 lr: 0.000017 loss_cls: 2.8175 (2.6124) grad_norm: 1.2706 (1.3290) time: 2.8726 data: 0.0002 max mem: 28454 +[2024-12-12 05:03:03 root] (utils.py 283): INFO Epoch: [5] [ 520/2502] eta: 1:34:58 lr: 0.000017 loss_cls: 2.7353 (2.6123) grad_norm: 1.1515 (1.3251) time: 2.8710 data: 0.0002 max mem: 28454 +[2024-12-12 05:03:32 root] (utils.py 283): INFO Epoch: [5] [ 530/2502] eta: 1:34:29 lr: 0.000017 loss_cls: 2.6718 (2.6144) grad_norm: 1.1112 (1.3254) time: 2.8725 data: 0.0002 max mem: 28454 +[2024-12-12 05:04:01 root] (utils.py 283): INFO Epoch: [5] [ 540/2502] eta: 1:34:00 lr: 0.000017 loss_cls: 2.7681 (2.6158) grad_norm: 1.1112 (1.3220) time: 2.8743 data: 0.0002 max mem: 28454 +[2024-12-12 05:04:30 root] (utils.py 283): INFO Epoch: [5] [ 550/2502] eta: 1:33:31 lr: 0.000017 loss_cls: 2.7669 (2.6153) grad_norm: 1.1086 (1.3274) time: 2.8741 data: 0.0002 max mem: 28454 +[2024-12-12 05:04:58 root] (utils.py 283): INFO Epoch: [5] [ 560/2502] eta: 1:33:02 lr: 0.000017 loss_cls: 2.5667 (2.6143) grad_norm: 1.1051 (1.3244) time: 2.8730 data: 0.0002 max mem: 28454 +[2024-12-12 05:05:27 root] (utils.py 283): INFO Epoch: [5] [ 570/2502] eta: 1:32:34 lr: 0.000017 loss_cls: 2.5667 (2.6102) grad_norm: 1.1224 (1.3222) time: 2.8728 data: 0.0002 max mem: 28454 +[2024-12-12 05:05:56 root] (utils.py 283): INFO Epoch: [5] [ 580/2502] eta: 1:32:05 lr: 0.000017 loss_cls: 2.7478 (2.6118) grad_norm: 1.1224 (1.3202) time: 2.8756 data: 0.0002 max mem: 28454 +[2024-12-12 05:06:25 root] (utils.py 283): INFO Epoch: [5] [ 590/2502] eta: 1:31:36 lr: 0.000017 loss_cls: 2.8063 (2.6127) grad_norm: 1.1327 (1.3194) time: 2.8733 data: 0.0002 max mem: 28454 +[2024-12-12 05:06:53 root] (utils.py 283): INFO Epoch: [5] [ 600/2502] eta: 1:31:07 lr: 0.000017 loss_cls: 2.6289 (2.6116) grad_norm: 1.1458 (1.3166) time: 2.8691 data: 0.0002 max mem: 28454 +[2024-12-12 05:07:22 root] (utils.py 283): INFO Epoch: [5] [ 610/2502] eta: 1:30:38 lr: 0.000017 loss_cls: 2.5558 (2.6095) grad_norm: 1.1834 (1.3201) time: 2.8708 data: 0.0002 max mem: 28454 +[2024-12-12 05:07:51 root] (utils.py 283): INFO Epoch: [5] [ 620/2502] eta: 1:30:09 lr: 0.000017 loss_cls: 2.6358 (2.6085) grad_norm: 1.1574 (1.9073) time: 2.8704 data: 0.0002 max mem: 28454 +[2024-12-12 05:08:19 root] (utils.py 283): INFO Epoch: [5] [ 630/2502] eta: 1:29:40 lr: 0.000017 loss_cls: 2.7155 (2.6093) grad_norm: 1.4029 (1.9138) time: 2.8683 data: 0.0002 max mem: 28454 +[2024-12-12 05:08:48 root] (utils.py 283): INFO Epoch: [5] [ 640/2502] eta: 1:29:11 lr: 0.000017 loss_cls: 2.9393 (2.6152) grad_norm: 1.6722 (1.9100) time: 2.8676 data: 0.0002 max mem: 28454 +[2024-12-12 05:09:17 root] (utils.py 283): INFO Epoch: [5] [ 650/2502] eta: 1:28:42 lr: 0.000017 loss_cls: 2.9296 (2.6158) grad_norm: 1.5997 (1.9044) time: 2.8670 data: 0.0002 max mem: 28454 +[2024-12-12 05:09:45 root] (utils.py 283): INFO Epoch: [5] [ 660/2502] eta: 1:28:14 lr: 0.000017 loss_cls: 2.7467 (2.6133) grad_norm: 1.4406 (1.8956) time: 2.8675 data: 0.0002 max mem: 28454 +[2024-12-12 05:10:14 root] (utils.py 283): INFO Epoch: [5] [ 670/2502] eta: 1:27:45 lr: 0.000017 loss_cls: 2.7467 (2.6164) grad_norm: 1.2915 (1.8873) time: 2.8691 data: 0.0002 max mem: 28454 +[2024-12-12 05:10:43 root] (utils.py 283): INFO Epoch: [5] [ 680/2502] eta: 1:27:16 lr: 0.000017 loss_cls: 2.7299 (2.6155) grad_norm: 1.2862 (1.8813) time: 2.8702 data: 0.0002 max mem: 28454 +[2024-12-12 05:11:11 root] (utils.py 283): INFO Epoch: [5] [ 690/2502] eta: 1:26:47 lr: 0.000017 loss_cls: 2.6338 (2.6164) grad_norm: 1.1777 (1.8708) time: 2.8679 data: 0.0002 max mem: 28454 +[2024-12-12 05:11:40 root] (utils.py 283): INFO Epoch: [5] [ 700/2502] eta: 1:26:18 lr: 0.000017 loss_cls: 2.7475 (2.6183) grad_norm: 1.1777 (1.8635) time: 2.8700 data: 0.0002 max mem: 28454 +[2024-12-12 05:12:09 root] (utils.py 283): INFO Epoch: [5] [ 710/2502] eta: 1:25:49 lr: 0.000017 loss_cls: 2.9124 (2.6209) grad_norm: 1.2360 (1.8546) time: 2.8721 data: 0.0002 max mem: 28454 +[2024-12-12 05:12:38 root] (utils.py 283): INFO Epoch: [5] [ 720/2502] eta: 1:25:20 lr: 0.000017 loss_cls: 2.8754 (2.6224) grad_norm: 1.2310 (1.8951) time: 2.8680 data: 0.0002 max mem: 28454 +[2024-12-12 05:13:06 root] (utils.py 283): INFO Epoch: [5] [ 730/2502] eta: 1:24:52 lr: 0.000017 loss_cls: 2.6202 (2.6201) grad_norm: 1.1913 (1.8870) time: 2.8673 data: 0.0002 max mem: 28454 +[2024-12-12 05:13:35 root] (utils.py 283): INFO Epoch: [5] [ 740/2502] eta: 1:24:23 lr: 0.000017 loss_cls: 2.5001 (2.6178) grad_norm: 1.1721 (1.8782) time: 2.8693 data: 0.0002 max mem: 28454 +[2024-12-12 05:14:04 root] (utils.py 283): INFO Epoch: [5] [ 750/2502] eta: 1:23:54 lr: 0.000017 loss_cls: 2.6147 (2.6167) grad_norm: 1.1333 (1.8697) time: 2.8684 data: 0.0002 max mem: 28454 +[2024-12-12 05:14:32 root] (utils.py 283): INFO Epoch: [5] [ 760/2502] eta: 1:23:25 lr: 0.000017 loss_cls: 2.6147 (2.6151) grad_norm: 1.1333 (1.8615) time: 2.8655 data: 0.0002 max mem: 28454 +[2024-12-12 05:15:01 root] (utils.py 283): INFO Epoch: [5] [ 770/2502] eta: 1:22:56 lr: 0.000017 loss_cls: 2.4909 (2.6144) grad_norm: 1.0881 (1.8515) time: 2.8645 data: 0.0002 max mem: 28454 +[2024-12-12 05:15:29 root] (utils.py 283): INFO Epoch: [5] [ 780/2502] eta: 1:22:27 lr: 0.000017 loss_cls: 2.6489 (2.6153) grad_norm: 1.1385 (1.8487) time: 2.8641 data: 0.0002 max mem: 28454 +[2024-12-12 05:15:58 root] (utils.py 283): INFO Epoch: [5] [ 790/2502] eta: 1:21:58 lr: 0.000017 loss_cls: 2.8873 (2.6148) grad_norm: 1.2253 (1.8403) time: 2.8691 data: 0.0002 max mem: 28454 +[2024-12-12 05:16:27 root] (utils.py 283): INFO Epoch: [5] [ 800/2502] eta: 1:21:29 lr: 0.000017 loss_cls: 2.3971 (2.6098) grad_norm: 1.0984 (1.8316) time: 2.8696 data: 0.0002 max mem: 28454 +[2024-12-12 05:16:56 root] (utils.py 283): INFO Epoch: [5] [ 810/2502] eta: 1:21:01 lr: 0.000017 loss_cls: 2.3971 (2.6085) grad_norm: 1.0970 (1.8225) time: 2.8663 data: 0.0003 max mem: 28454 +[2024-12-12 05:17:24 root] (utils.py 283): INFO Epoch: [5] [ 820/2502] eta: 1:20:32 lr: 0.000017 loss_cls: 2.6376 (2.6088) grad_norm: 1.1374 (1.8164) time: 2.8648 data: 0.0002 max mem: 28454 +[2024-12-12 05:17:53 root] (utils.py 283): INFO Epoch: [5] [ 830/2502] eta: 1:20:03 lr: 0.000017 loss_cls: 2.6805 (2.6077) grad_norm: 1.1515 (1.8085) time: 2.8625 data: 0.0002 max mem: 28454 +[2024-12-12 05:18:21 root] (utils.py 283): INFO Epoch: [5] [ 840/2502] eta: 1:19:34 lr: 0.000017 loss_cls: 2.7021 (2.6084) grad_norm: 1.1316 (1.8009) time: 2.8621 data: 0.0002 max mem: 28454 +[2024-12-12 05:18:50 root] (utils.py 283): INFO Epoch: [5] [ 850/2502] eta: 1:19:05 lr: 0.000017 loss_cls: 2.7021 (2.6090) grad_norm: 1.1265 (1.7950) time: 2.8628 data: 0.0002 max mem: 28454 +[2024-12-12 05:19:19 root] (utils.py 283): INFO Epoch: [5] [ 860/2502] eta: 1:18:36 lr: 0.000017 loss_cls: 2.8404 (2.6111) grad_norm: 1.1666 (1.7893) time: 2.8634 data: 0.0002 max mem: 28454 +[2024-12-12 05:19:47 root] (utils.py 283): INFO Epoch: [5] [ 870/2502] eta: 1:18:07 lr: 0.000017 loss_cls: 2.8261 (2.6131) grad_norm: 1.1947 (1.7844) time: 2.8607 data: 0.0002 max mem: 28454 +[2024-12-12 05:20:16 root] (utils.py 283): INFO Epoch: [5] [ 880/2502] eta: 1:17:38 lr: 0.000017 loss_cls: 2.6424 (2.6119) grad_norm: 1.2039 (1.8181) time: 2.8598 data: 0.0002 max mem: 28454 +[2024-12-12 05:20:45 root] (utils.py 283): INFO Epoch: [5] [ 890/2502] eta: 1:17:09 lr: 0.000017 loss_cls: 2.3560 (2.6078) grad_norm: 1.2051 (1.8150) time: 2.8609 data: 0.0002 max mem: 28454 +[2024-12-12 05:21:13 root] (utils.py 283): INFO Epoch: [5] [ 900/2502] eta: 1:16:40 lr: 0.000017 loss_cls: 2.1338 (2.6035) grad_norm: 1.3474 (1.8127) time: 2.8626 data: 0.0002 max mem: 28454 +[2024-12-12 05:21:42 root] (utils.py 283): INFO Epoch: [5] [ 910/2502] eta: 1:16:11 lr: 0.000017 loss_cls: 2.4767 (2.6037) grad_norm: 1.3056 (1.8061) time: 2.8647 data: 0.0002 max mem: 28454 +[2024-12-12 05:22:10 root] (utils.py 283): INFO Epoch: [5] [ 920/2502] eta: 1:15:42 lr: 0.000017 loss_cls: 2.7482 (2.6054) grad_norm: 1.1052 (1.7995) time: 2.8617 data: 0.0002 max mem: 28454 +[2024-12-12 05:22:39 root] (utils.py 283): INFO Epoch: [5] [ 930/2502] eta: 1:15:14 lr: 0.000017 loss_cls: 2.8721 (2.6074) grad_norm: 1.1052 (1.7959) time: 2.8596 data: 0.0002 max mem: 28454 +[2024-12-12 05:23:08 root] (utils.py 283): INFO Epoch: [5] [ 940/2502] eta: 1:14:45 lr: 0.000017 loss_cls: 2.7957 (2.6081) grad_norm: 1.1018 (1.7888) time: 2.8651 data: 0.0002 max mem: 28454 +[2024-12-12 05:23:36 root] (utils.py 283): INFO Epoch: [5] [ 950/2502] eta: 1:14:16 lr: 0.000017 loss_cls: 2.6909 (2.6078) grad_norm: 1.0918 (1.7825) time: 2.8709 data: 0.0002 max mem: 28454 +[2024-12-12 05:24:05 root] (utils.py 283): INFO Epoch: [5] [ 960/2502] eta: 1:13:47 lr: 0.000017 loss_cls: 2.6725 (2.6087) grad_norm: 1.2161 (1.7882) time: 2.8703 data: 0.0002 max mem: 28454 +[2024-12-12 05:24:34 root] (utils.py 283): INFO Epoch: [5] [ 970/2502] eta: 1:13:19 lr: 0.000017 loss_cls: 2.7006 (2.6084) grad_norm: 1.1932 (1.7820) time: 2.8693 data: 0.0002 max mem: 28454 +[2024-12-12 05:25:03 root] (utils.py 283): INFO Epoch: [5] [ 980/2502] eta: 1:12:50 lr: 0.000017 loss_cls: 2.5545 (2.6083) grad_norm: 1.1598 (1.7763) time: 2.8696 data: 0.0003 max mem: 28454 +[2024-12-12 05:25:31 root] (utils.py 283): INFO Epoch: [5] [ 990/2502] eta: 1:12:21 lr: 0.000017 loss_cls: 2.5545 (2.6078) grad_norm: 1.0818 (1.7695) time: 2.8690 data: 0.0002 max mem: 28454 +[2024-12-12 05:26:00 root] (utils.py 283): INFO Epoch: [5] [1000/2502] eta: 1:11:52 lr: 0.000017 loss_cls: 2.5861 (2.6090) grad_norm: 1.0707 (1.7631) time: 2.8682 data: 0.0002 max mem: 28454 +[2024-12-12 05:26:29 root] (utils.py 283): INFO Epoch: [5] [1010/2502] eta: 1:11:24 lr: 0.000017 loss_cls: 2.5412 (2.6052) grad_norm: 1.0707 (1.7570) time: 2.8707 data: 0.0002 max mem: 28454 +[2024-12-12 05:26:57 root] (utils.py 283): INFO Epoch: [5] [1020/2502] eta: 1:10:55 lr: 0.000017 loss_cls: 2.4614 (2.6036) grad_norm: 1.0889 (1.7513) time: 2.8721 data: 0.0002 max mem: 28454 +[2024-12-12 05:27:26 root] (utils.py 283): INFO Epoch: [5] [1030/2502] eta: 1:10:26 lr: 0.000017 loss_cls: 2.5348 (2.6033) grad_norm: 1.1457 (1.7457) time: 2.8692 data: 0.0002 max mem: 28454 +[2024-12-12 05:27:55 root] (utils.py 283): INFO Epoch: [5] [1040/2502] eta: 1:09:57 lr: 0.000017 loss_cls: 2.7276 (2.6029) grad_norm: 1.1901 (1.7420) time: 2.8675 data: 0.0002 max mem: 28454 +[2024-12-12 05:28:23 root] (utils.py 283): INFO Epoch: [5] [1050/2502] eta: 1:09:29 lr: 0.000017 loss_cls: 2.7250 (2.6022) grad_norm: 1.1372 (1.7356) time: 2.8687 data: 0.0002 max mem: 28454 +[2024-12-12 05:28:52 root] (utils.py 283): INFO Epoch: [5] [1060/2502] eta: 1:09:00 lr: 0.000017 loss_cls: 2.5538 (2.6006) grad_norm: 1.0558 (1.7291) time: 2.8694 data: 0.0002 max mem: 28454 +[2024-12-12 05:29:21 root] (utils.py 283): INFO Epoch: [5] [1070/2502] eta: 1:08:31 lr: 0.000017 loss_cls: 2.4560 (2.6000) grad_norm: 1.0558 (1.7243) time: 2.8677 data: 0.0002 max mem: 28454 +[2024-12-12 05:29:49 root] (utils.py 283): INFO Epoch: [5] [1080/2502] eta: 1:08:02 lr: 0.000017 loss_cls: 2.6751 (2.6000) grad_norm: 1.0925 (1.7187) time: 2.8677 data: 0.0002 max mem: 28454 +[2024-12-12 05:30:18 root] (utils.py 283): INFO Epoch: [5] [1090/2502] eta: 1:07:34 lr: 0.000017 loss_cls: 2.4563 (2.5978) grad_norm: 1.0936 (1.7130) time: 2.8682 data: 0.0003 max mem: 28454 +[2024-12-12 05:30:47 root] (utils.py 283): INFO Epoch: [5] [1100/2502] eta: 1:07:05 lr: 0.000017 loss_cls: 2.4870 (2.5975) grad_norm: 1.0996 (1.7077) time: 2.8681 data: 0.0003 max mem: 28454 +[2024-12-12 05:31:15 root] (utils.py 283): INFO Epoch: [5] [1110/2502] eta: 1:06:36 lr: 0.000017 loss_cls: 2.7522 (2.5970) grad_norm: 1.1108 (1.7066) time: 2.8682 data: 0.0003 max mem: 28454 +[2024-12-12 05:31:44 root] (utils.py 283): INFO Epoch: [5] [1120/2502] eta: 1:06:07 lr: 0.000017 loss_cls: 2.7739 (2.5989) grad_norm: 1.1893 (1.7097) time: 2.8660 data: 0.0003 max mem: 28454 +[2024-12-12 05:32:13 root] (utils.py 283): INFO Epoch: [5] [1130/2502] eta: 1:05:39 lr: 0.000017 loss_cls: 2.7463 (2.5959) grad_norm: 1.1151 (1.7051) time: 2.8685 data: 0.0003 max mem: 28454 +[2024-12-12 05:32:42 root] (utils.py 283): INFO Epoch: [5] [1140/2502] eta: 1:05:10 lr: 0.000017 loss_cls: 2.1941 (2.5934) grad_norm: 1.1409 (1.7173) time: 2.8731 data: 0.0002 max mem: 28454 +[2024-12-12 05:33:10 root] (utils.py 283): INFO Epoch: [5] [1150/2502] eta: 1:04:41 lr: 0.000017 loss_cls: 2.2275 (2.5923) grad_norm: 1.1543 (1.7124) time: 2.8729 data: 0.0003 max mem: 28454 +[2024-12-12 05:33:39 root] (utils.py 283): INFO Epoch: [5] [1160/2502] eta: 1:04:13 lr: 0.000017 loss_cls: 2.7416 (2.5934) grad_norm: 1.1994 (1.7084) time: 2.8720 data: 0.0002 max mem: 28454 +[2024-12-12 05:34:08 root] (utils.py 283): INFO Epoch: [5] [1170/2502] eta: 1:03:44 lr: 0.000017 loss_cls: 2.7644 (2.5941) grad_norm: 1.1994 (1.7041) time: 2.8730 data: 0.0003 max mem: 28454 +[2024-12-12 05:34:36 root] (utils.py 283): INFO Epoch: [5] [1180/2502] eta: 1:03:15 lr: 0.000017 loss_cls: 2.6463 (2.5927) grad_norm: 1.1619 (1.7002) time: 2.8719 data: 0.0003 max mem: 28454 +[2024-12-12 05:35:05 root] (utils.py 283): INFO Epoch: [5] [1190/2502] eta: 1:02:46 lr: 0.000017 loss_cls: 2.6074 (2.5929) grad_norm: 1.1619 (1.6985) time: 2.8723 data: 0.0002 max mem: 28454 +[2024-12-12 05:35:34 root] (utils.py 283): INFO Epoch: [5] [1200/2502] eta: 1:02:18 lr: 0.000017 loss_cls: 2.8564 (2.5937) grad_norm: 1.1915 (1.6955) time: 2.8758 data: 0.0002 max mem: 28454 +[2024-12-12 05:36:03 root] (utils.py 283): INFO Epoch: [5] [1210/2502] eta: 1:01:49 lr: 0.000017 loss_cls: 2.6153 (2.5936) grad_norm: 1.1492 (1.6942) time: 2.8761 data: 0.0002 max mem: 28454 +[2024-12-12 05:36:31 root] (utils.py 283): INFO Epoch: [5] [1220/2502] eta: 1:01:20 lr: 0.000017 loss_cls: 2.6147 (2.5929) grad_norm: 1.1056 (1.6895) time: 2.8737 data: 0.0002 max mem: 28454 +[2024-12-12 05:37:00 root] (utils.py 283): INFO Epoch: [5] [1230/2502] eta: 1:00:52 lr: 0.000017 loss_cls: 2.6728 (2.5930) grad_norm: 1.0669 (1.6852) time: 2.8703 data: 0.0002 max mem: 28454 +[2024-12-12 05:37:29 root] (utils.py 283): INFO Epoch: [5] [1240/2502] eta: 1:00:23 lr: 0.000017 loss_cls: 2.6905 (2.5946) grad_norm: 1.1502 (1.6818) time: 2.8720 data: 0.0002 max mem: 28454 +[2024-12-12 05:37:58 root] (utils.py 283): INFO Epoch: [5] [1250/2502] eta: 0:59:54 lr: 0.000017 loss_cls: 2.7041 (2.5948) grad_norm: 1.1255 (1.6778) time: 2.8730 data: 0.0002 max mem: 28454 +[2024-12-12 05:38:26 root] (utils.py 283): INFO Epoch: [5] [1260/2502] eta: 0:59:26 lr: 0.000017 loss_cls: 2.6019 (2.5952) grad_norm: 1.1255 (1.6736) time: 2.8730 data: 0.0002 max mem: 28454 +[2024-12-12 05:38:55 root] (utils.py 283): INFO Epoch: [5] [1270/2502] eta: 0:58:57 lr: 0.000017 loss_cls: 2.7669 (2.5966) grad_norm: 1.1858 (1.6709) time: 2.8747 data: 0.0002 max mem: 28454 +[2024-12-12 05:39:24 root] (utils.py 283): INFO Epoch: [5] [1280/2502] eta: 0:58:28 lr: 0.000017 loss_cls: 2.7868 (2.5960) grad_norm: 1.0631 (1.6662) time: 2.8769 data: 0.0002 max mem: 28454 +[2024-12-12 05:39:53 root] (utils.py 283): INFO Epoch: [5] [1290/2502] eta: 0:58:00 lr: 0.000017 loss_cls: 2.5684 (2.5961) grad_norm: 1.0631 (1.6620) time: 2.8765 data: 0.0002 max mem: 28454 +[2024-12-12 05:40:21 root] (utils.py 283): INFO Epoch: [5] [1300/2502] eta: 0:57:31 lr: 0.000017 loss_cls: 2.5290 (2.5945) grad_norm: 1.1605 (1.6585) time: 2.8775 data: 0.0002 max mem: 28454 +[2024-12-12 05:40:50 root] (utils.py 283): INFO Epoch: [5] [1310/2502] eta: 0:57:02 lr: 0.000017 loss_cls: 2.4978 (2.5933) grad_norm: 1.1563 (1.6557) time: 2.8749 data: 0.0002 max mem: 28454 +[2024-12-12 05:41:19 root] (utils.py 283): INFO Epoch: [5] [1320/2502] eta: 0:56:34 lr: 0.000017 loss_cls: 2.5680 (2.5931) grad_norm: 1.1293 (1.6519) time: 2.8732 data: 0.0003 max mem: 28454 +[2024-12-12 05:41:48 root] (utils.py 283): INFO Epoch: [5] [1330/2502] eta: 0:56:05 lr: 0.000017 loss_cls: 2.7906 (2.5938) grad_norm: 1.1393 (1.6668) time: 2.8779 data: 0.0002 max mem: 28454 +[2024-12-12 05:42:17 root] (utils.py 283): INFO Epoch: [5] [1340/2502] eta: 0:55:36 lr: 0.000017 loss_cls: 2.7841 (2.5938) grad_norm: 1.4391 (1.6671) time: 2.8807 data: 0.0002 max mem: 28454 +[2024-12-12 05:42:45 root] (utils.py 283): INFO Epoch: [5] [1350/2502] eta: 0:55:08 lr: 0.000017 loss_cls: 2.6926 (2.5942) grad_norm: 1.4391 (1.6647) time: 2.8825 data: 0.0002 max mem: 28454 +[2024-12-12 05:43:14 root] (utils.py 283): INFO Epoch: [5] [1360/2502] eta: 0:54:39 lr: 0.000017 loss_cls: 2.6926 (2.5946) grad_norm: 1.2005 (1.6617) time: 2.8839 data: 0.0002 max mem: 28454 +[2024-12-12 05:43:43 root] (utils.py 283): INFO Epoch: [5] [1370/2502] eta: 0:54:10 lr: 0.000017 loss_cls: 2.6779 (2.5933) grad_norm: 1.1158 (1.6585) time: 2.8837 data: 0.0002 max mem: 28454 +[2024-12-12 05:44:12 root] (utils.py 283): INFO Epoch: [5] [1380/2502] eta: 0:53:42 lr: 0.000017 loss_cls: 2.6966 (2.5940) grad_norm: 1.1773 (1.6557) time: 2.8812 data: 0.0002 max mem: 28454 +[2024-12-12 05:44:41 root] (utils.py 283): INFO Epoch: [5] [1390/2502] eta: 0:53:13 lr: 0.000017 loss_cls: 2.7855 (2.5941) grad_norm: 1.1873 (1.6526) time: 2.8790 data: 0.0002 max mem: 28454 +[2024-12-12 05:45:09 root] (utils.py 283): INFO Epoch: [5] [1400/2502] eta: 0:52:44 lr: 0.000017 loss_cls: 2.6678 (2.5943) grad_norm: 1.0606 (1.6488) time: 2.8773 data: 0.0002 max mem: 28454 +[2024-12-12 05:45:38 root] (utils.py 283): INFO Epoch: [5] [1410/2502] eta: 0:52:16 lr: 0.000017 loss_cls: 2.5546 (2.5933) grad_norm: 1.0600 (1.6473) time: 2.8841 data: 0.0002 max mem: 28454 +[2024-12-12 05:46:07 root] (utils.py 283): INFO Epoch: [5] [1420/2502] eta: 0:51:47 lr: 0.000017 loss_cls: 2.6521 (2.5926) grad_norm: 1.0457 (1.6439) time: 2.8818 data: 0.0002 max mem: 28454 +[2024-12-12 05:46:36 root] (utils.py 283): INFO Epoch: [5] [1430/2502] eta: 0:51:18 lr: 0.000017 loss_cls: 2.6521 (2.5926) grad_norm: 1.0621 (1.6405) time: 2.8736 data: 0.0002 max mem: 28454 +[2024-12-12 05:47:04 root] (utils.py 283): INFO Epoch: [5] [1440/2502] eta: 0:50:50 lr: 0.000017 loss_cls: 2.6383 (2.5916) grad_norm: 1.1308 (1.6372) time: 2.8735 data: 0.0002 max mem: 28454 +[2024-12-12 05:47:33 root] (utils.py 283): INFO Epoch: [5] [1450/2502] eta: 0:50:21 lr: 0.000017 loss_cls: 2.6383 (2.5914) grad_norm: 1.1707 (1.6348) time: 2.8710 data: 0.0002 max mem: 28454 +[2024-12-12 05:48:02 root] (utils.py 283): INFO Epoch: [5] [1460/2502] eta: 0:49:52 lr: 0.000017 loss_cls: 2.6826 (2.5911) grad_norm: 1.1630 (1.6317) time: 2.8715 data: 0.0002 max mem: 28454 +[2024-12-12 05:48:31 root] (utils.py 283): INFO Epoch: [5] [1470/2502] eta: 0:49:24 lr: 0.000017 loss_cls: 2.6786 (2.5905) grad_norm: 1.1013 (1.6282) time: 2.8718 data: 0.0003 max mem: 28454 +[2024-12-12 05:48:59 root] (utils.py 283): INFO Epoch: [5] [1480/2502] eta: 0:48:55 lr: 0.000017 loss_cls: 2.7481 (2.5921) grad_norm: 1.1169 (1.6257) time: 2.8722 data: 0.0003 max mem: 28454 +[2024-12-12 05:49:28 root] (utils.py 283): INFO Epoch: [5] [1490/2502] eta: 0:48:26 lr: 0.000017 loss_cls: 2.7961 (2.5931) grad_norm: 1.0871 (1.6223) time: 2.8737 data: 0.0002 max mem: 28454 +[2024-12-12 05:49:57 root] (utils.py 283): INFO Epoch: [5] [1500/2502] eta: 0:47:57 lr: 0.000017 loss_cls: 2.7961 (2.5941) grad_norm: 1.1153 (1.6215) time: 2.8738 data: 0.0002 max mem: 28454 +[2024-12-12 05:50:26 root] (utils.py 283): INFO Epoch: [5] [1510/2502] eta: 0:47:29 lr: 0.000017 loss_cls: 2.6135 (2.5934) grad_norm: 1.1621 (1.6185) time: 2.8762 data: 0.0002 max mem: 28454 +[2024-12-12 05:50:54 root] (utils.py 283): INFO Epoch: [5] [1520/2502] eta: 0:47:00 lr: 0.000017 loss_cls: 2.5932 (2.5946) grad_norm: 1.1661 (1.6161) time: 2.8762 data: 0.0002 max mem: 28454 +[2024-12-12 05:51:23 root] (utils.py 283): INFO Epoch: [5] [1530/2502] eta: 0:46:31 lr: 0.000017 loss_cls: 2.6888 (2.5939) grad_norm: 1.1892 (1.6138) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-12 05:51:52 root] (utils.py 283): INFO Epoch: [5] [1540/2502] eta: 0:46:03 lr: 0.000017 loss_cls: 2.5887 (2.5947) grad_norm: 1.0662 (1.6103) time: 2.8675 data: 0.0002 max mem: 28454 +[2024-12-12 05:52:20 root] (utils.py 283): INFO Epoch: [5] [1550/2502] eta: 0:45:34 lr: 0.000017 loss_cls: 2.5374 (2.5941) grad_norm: 1.0650 (1.6081) time: 2.8617 data: 0.0002 max mem: 28454 +[2024-12-12 05:52:49 root] (utils.py 283): INFO Epoch: [5] [1560/2502] eta: 0:45:05 lr: 0.000017 loss_cls: 2.6276 (2.5957) grad_norm: 1.1324 (1.6060) time: 2.8610 data: 0.0002 max mem: 28454 +[2024-12-12 05:53:18 root] (utils.py 283): INFO Epoch: [5] [1570/2502] eta: 0:44:36 lr: 0.000017 loss_cls: 2.8074 (2.5967) grad_norm: 1.1530 (1.6039) time: 2.8638 data: 0.0002 max mem: 28454 +[2024-12-12 05:53:46 root] (utils.py 283): INFO Epoch: [5] [1580/2502] eta: 0:44:07 lr: 0.000017 loss_cls: 2.7949 (2.5978) grad_norm: 1.1537 (1.6078) time: 2.8625 data: 0.0002 max mem: 28454 +[2024-12-12 05:54:15 root] (utils.py 283): INFO Epoch: [5] [1590/2502] eta: 0:43:39 lr: 0.000017 loss_cls: 2.7087 (2.5981) grad_norm: 1.1242 (1.6049) time: 2.8611 data: 0.0003 max mem: 28454 +[2024-12-12 05:54:43 root] (utils.py 283): INFO Epoch: [5] [1600/2502] eta: 0:43:10 lr: 0.000017 loss_cls: 2.5889 (2.5976) grad_norm: 1.0983 (1.6016) time: 2.8644 data: 0.0003 max mem: 28454 +[2024-12-12 05:55:12 root] (utils.py 283): INFO Epoch: [5] [1610/2502] eta: 0:42:41 lr: 0.000017 loss_cls: 2.4677 (2.5967) grad_norm: 1.0856 (1.6009) time: 2.8658 data: 0.0002 max mem: 28454 +[2024-12-12 05:55:41 root] (utils.py 283): INFO Epoch: [5] [1620/2502] eta: 0:42:12 lr: 0.000017 loss_cls: 2.6898 (2.5972) grad_norm: 1.1277 (1.5995) time: 2.8660 data: 0.0002 max mem: 28454 +[2024-12-12 05:56:09 root] (utils.py 283): INFO Epoch: [5] [1630/2502] eta: 0:41:44 lr: 0.000017 loss_cls: 2.7064 (2.5974) grad_norm: 1.1557 (1.5965) time: 2.8653 data: 0.0002 max mem: 28454 +[2024-12-12 05:56:38 root] (utils.py 283): INFO Epoch: [5] [1640/2502] eta: 0:41:15 lr: 0.000017 loss_cls: 2.7954 (2.5986) grad_norm: 1.1240 (1.5988) time: 2.8607 data: 0.0002 max mem: 28454 +[2024-12-12 05:57:07 root] (utils.py 283): INFO Epoch: [5] [1650/2502] eta: 0:40:46 lr: 0.000017 loss_cls: 2.7954 (2.5986) grad_norm: 1.2250 (1.5965) time: 2.8636 data: 0.0002 max mem: 28454 +[2024-12-12 05:57:35 root] (utils.py 283): INFO Epoch: [5] [1660/2502] eta: 0:40:17 lr: 0.000017 loss_cls: 2.5672 (2.5978) grad_norm: 1.1677 (1.5970) time: 2.8663 data: 0.0002 max mem: 28454 +[2024-12-12 05:58:04 root] (utils.py 283): INFO Epoch: [5] [1670/2502] eta: 0:39:49 lr: 0.000017 loss_cls: 2.4280 (2.5971) grad_norm: 1.1729 (1.5950) time: 2.8651 data: 0.0002 max mem: 28454 +[2024-12-12 05:58:33 root] (utils.py 283): INFO Epoch: [5] [1680/2502] eta: 0:39:20 lr: 0.000017 loss_cls: 2.7778 (2.5985) grad_norm: 1.1361 (1.5925) time: 2.8647 data: 0.0002 max mem: 28454 +[2024-12-12 05:59:01 root] (utils.py 283): INFO Epoch: [5] [1690/2502] eta: 0:38:51 lr: 0.000017 loss_cls: 2.7428 (2.5994) grad_norm: 1.1361 (1.5900) time: 2.8669 data: 0.0002 max mem: 28454 +[2024-12-12 05:59:30 root] (utils.py 283): INFO Epoch: [5] [1700/2502] eta: 0:38:22 lr: 0.000017 loss_cls: 2.6135 (2.5973) grad_norm: 1.1593 (1.5894) time: 2.8769 data: 0.0002 max mem: 28454 +[2024-12-12 05:59:59 root] (utils.py 283): INFO Epoch: [5] [1710/2502] eta: 0:37:54 lr: 0.000017 loss_cls: 2.2913 (2.5967) grad_norm: 1.1373 (1.5867) time: 2.8833 data: 0.0002 max mem: 28454 +[2024-12-12 06:00:28 root] (utils.py 283): INFO Epoch: [5] [1720/2502] eta: 0:37:25 lr: 0.000017 loss_cls: 2.5110 (2.5961) grad_norm: 1.0712 (1.5838) time: 2.8817 data: 0.0002 max mem: 28454 +[2024-12-12 06:00:57 root] (utils.py 283): INFO Epoch: [5] [1730/2502] eta: 0:36:56 lr: 0.000017 loss_cls: 2.6281 (2.5962) grad_norm: 1.1215 (1.5850) time: 2.8813 data: 0.0002 max mem: 28454 +[2024-12-12 06:01:25 root] (utils.py 283): INFO Epoch: [5] [1740/2502] eta: 0:36:28 lr: 0.000017 loss_cls: 2.7211 (2.5968) grad_norm: 1.1557 (1.5842) time: 2.8813 data: 0.0002 max mem: 28454 +[2024-12-12 06:01:54 root] (utils.py 283): INFO Epoch: [5] [1750/2502] eta: 0:35:59 lr: 0.000017 loss_cls: 2.5519 (2.5961) grad_norm: 1.1167 (1.5816) time: 2.8805 data: 0.0002 max mem: 28454 +[2024-12-12 06:02:23 root] (utils.py 283): INFO Epoch: [5] [1760/2502] eta: 0:35:30 lr: 0.000017 loss_cls: 2.5195 (2.5955) grad_norm: 1.1145 (1.5791) time: 2.8816 data: 0.0002 max mem: 28454 +[2024-12-12 06:02:52 root] (utils.py 283): INFO Epoch: [5] [1770/2502] eta: 0:35:02 lr: 0.000017 loss_cls: 2.5159 (2.5947) grad_norm: 1.1455 (1.5768) time: 2.8790 data: 0.0002 max mem: 28454 +[2024-12-12 06:03:21 root] (utils.py 283): INFO Epoch: [5] [1780/2502] eta: 0:34:33 lr: 0.000017 loss_cls: 2.5790 (2.5954) grad_norm: 1.0980 (1.5742) time: 2.8760 data: 0.0002 max mem: 28454 +[2024-12-12 06:03:49 root] (utils.py 283): INFO Epoch: [5] [1790/2502] eta: 0:34:04 lr: 0.000017 loss_cls: 2.8034 (2.5956) grad_norm: 1.0654 (1.5715) time: 2.8785 data: 0.0002 max mem: 28454 +[2024-12-12 06:04:18 root] (utils.py 283): INFO Epoch: [5] [1800/2502] eta: 0:33:36 lr: 0.000017 loss_cls: 2.4521 (2.5945) grad_norm: 1.0615 (1.5687) time: 2.8779 data: 0.0002 max mem: 28454 +[2024-12-12 06:04:47 root] (utils.py 283): INFO Epoch: [5] [1810/2502] eta: 0:33:07 lr: 0.000017 loss_cls: 2.6429 (2.5951) grad_norm: 1.0574 (1.5663) time: 2.8757 data: 0.0002 max mem: 28454 +[2024-12-12 06:05:16 root] (utils.py 283): INFO Epoch: [5] [1820/2502] eta: 0:32:38 lr: 0.000017 loss_cls: 2.7068 (2.5954) grad_norm: 1.1021 (1.5642) time: 2.8767 data: 0.0002 max mem: 28454 +[2024-12-12 06:05:44 root] (utils.py 283): INFO Epoch: [5] [1830/2502] eta: 0:32:10 lr: 0.000017 loss_cls: 2.5766 (2.5952) grad_norm: 1.0862 (1.5635) time: 2.8767 data: 0.0002 max mem: 28454 +[2024-12-12 06:06:13 root] (utils.py 283): INFO Epoch: [5] [1840/2502] eta: 0:31:41 lr: 0.000017 loss_cls: 2.6283 (2.5957) grad_norm: 1.0965 (1.5616) time: 2.8765 data: 0.0002 max mem: 28454 +[2024-12-12 06:06:42 root] (utils.py 283): INFO Epoch: [5] [1850/2502] eta: 0:31:12 lr: 0.000017 loss_cls: 2.6283 (2.5949) grad_norm: 1.1355 (1.5594) time: 2.8756 data: 0.0002 max mem: 28454 +[2024-12-12 06:07:11 root] (utils.py 283): INFO Epoch: [5] [1860/2502] eta: 0:30:43 lr: 0.000017 loss_cls: 2.7745 (2.5962) grad_norm: 1.1741 (1.5590) time: 2.8740 data: 0.0002 max mem: 28454 +[2024-12-12 06:07:39 root] (utils.py 283): INFO Epoch: [5] [1870/2502] eta: 0:30:15 lr: 0.000017 loss_cls: 2.7696 (2.5966) grad_norm: 1.2032 (1.5575) time: 2.8726 data: 0.0002 max mem: 28454 +[2024-12-12 06:08:08 root] (utils.py 283): INFO Epoch: [5] [1880/2502] eta: 0:29:46 lr: 0.000017 loss_cls: 2.5968 (2.5962) grad_norm: 1.1604 (1.5556) time: 2.8715 data: 0.0002 max mem: 28454 +[2024-12-12 06:08:37 root] (utils.py 283): INFO Epoch: [5] [1890/2502] eta: 0:29:17 lr: 0.000017 loss_cls: 2.6621 (2.5971) grad_norm: 1.1700 (1.5547) time: 2.8708 data: 0.0002 max mem: 28454 +[2024-12-12 06:09:06 root] (utils.py 283): INFO Epoch: [5] [1900/2502] eta: 0:28:48 lr: 0.000017 loss_cls: 2.7159 (2.5977) grad_norm: 1.1818 (1.5527) time: 2.8699 data: 0.0002 max mem: 28454 +[2024-12-12 06:09:34 root] (utils.py 283): INFO Epoch: [5] [1910/2502] eta: 0:28:20 lr: 0.000017 loss_cls: 2.7142 (2.5971) grad_norm: 1.1102 (1.5503) time: 2.8716 data: 0.0002 max mem: 28454 +[2024-12-12 06:10:03 root] (utils.py 283): INFO Epoch: [5] [1920/2502] eta: 0:27:51 lr: 0.000017 loss_cls: 2.7951 (2.5984) grad_norm: 1.1286 (1.5485) time: 2.8717 data: 0.0002 max mem: 28454 +[2024-12-12 06:10:32 root] (utils.py 283): INFO Epoch: [5] [1930/2502] eta: 0:27:22 lr: 0.000017 loss_cls: 2.7951 (2.5980) grad_norm: 1.1687 (1.5465) time: 2.8698 data: 0.0002 max mem: 28454 +[2024-12-12 06:11:00 root] (utils.py 283): INFO Epoch: [5] [1940/2502] eta: 0:26:54 lr: 0.000017 loss_cls: 2.6542 (2.5978) grad_norm: 1.0419 (1.5448) time: 2.8689 data: 0.0002 max mem: 28454 +[2024-12-12 06:11:29 root] (utils.py 283): INFO Epoch: [5] [1950/2502] eta: 0:26:25 lr: 0.000017 loss_cls: 2.7306 (2.5983) grad_norm: 1.1378 (1.5462) time: 2.8693 data: 0.0002 max mem: 28454 +[2024-12-12 06:11:58 root] (utils.py 283): INFO Epoch: [5] [1960/2502] eta: 0:25:56 lr: 0.000017 loss_cls: 2.7403 (2.5994) grad_norm: 1.2200 (1.5447) time: 2.8666 data: 0.0002 max mem: 28454 +[2024-12-12 06:12:26 root] (utils.py 283): INFO Epoch: [5] [1970/2502] eta: 0:25:27 lr: 0.000017 loss_cls: 2.7403 (2.5996) grad_norm: 1.2113 (1.5429) time: 2.8660 data: 0.0002 max mem: 28454 +[2024-12-12 06:12:55 root] (utils.py 283): INFO Epoch: [5] [1980/2502] eta: 0:24:59 lr: 0.000017 loss_cls: 2.8257 (2.6009) grad_norm: 1.1637 (1.5443) time: 2.8664 data: 0.0002 max mem: 28454 +[2024-12-12 06:13:24 root] (utils.py 283): INFO Epoch: [5] [1990/2502] eta: 0:24:30 lr: 0.000017 loss_cls: 2.7328 (2.6007) grad_norm: 1.2167 (1.5494) time: 2.8664 data: 0.0002 max mem: 28454 +[2024-12-12 06:13:52 root] (utils.py 283): INFO Epoch: [5] [2000/2502] eta: 0:24:01 lr: 0.000017 loss_cls: 2.4752 (2.6003) grad_norm: 1.2167 (1.5482) time: 2.8658 data: 0.0002 max mem: 28454 +[2024-12-12 06:14:21 root] (utils.py 283): INFO Epoch: [5] [2010/2502] eta: 0:23:32 lr: 0.000017 loss_cls: 2.7024 (2.6010) grad_norm: 1.1731 (1.5468) time: 2.8656 data: 0.0002 max mem: 28454 +[2024-12-12 06:14:50 root] (utils.py 283): INFO Epoch: [5] [2020/2502] eta: 0:23:04 lr: 0.000017 loss_cls: 2.7332 (2.6008) grad_norm: 1.1401 (1.5457) time: 2.8650 data: 0.0002 max mem: 28454 +[2024-12-12 06:15:18 root] (utils.py 283): INFO Epoch: [5] [2030/2502] eta: 0:22:35 lr: 0.000017 loss_cls: 2.6302 (2.6007) grad_norm: 1.0769 (1.5432) time: 2.8601 data: 0.0002 max mem: 28454 +[2024-12-12 06:15:47 root] (utils.py 283): INFO Epoch: [5] [2040/2502] eta: 0:22:06 lr: 0.000017 loss_cls: 2.5015 (2.5997) grad_norm: 1.0669 (1.5415) time: 2.8606 data: 0.0002 max mem: 28454 +[2024-12-12 06:16:16 root] (utils.py 283): INFO Epoch: [5] [2050/2502] eta: 0:21:38 lr: 0.000017 loss_cls: 2.5292 (2.6003) grad_norm: 1.1000 (1.5396) time: 2.8658 data: 0.0002 max mem: 28454 +[2024-12-12 06:16:44 root] (utils.py 283): INFO Epoch: [5] [2060/2502] eta: 0:21:09 lr: 0.000017 loss_cls: 2.6981 (2.5999) grad_norm: 1.1029 (1.5376) time: 2.8698 data: 0.0002 max mem: 28454 +[2024-12-12 06:17:13 root] (utils.py 283): INFO Epoch: [5] [2070/2502] eta: 0:20:40 lr: 0.000017 loss_cls: 2.7529 (2.5999) grad_norm: 1.1521 (1.5368) time: 2.8696 data: 0.0002 max mem: 28454 +[2024-12-12 06:17:42 root] (utils.py 283): INFO Epoch: [5] [2080/2502] eta: 0:20:11 lr: 0.000017 loss_cls: 2.7044 (2.5998) grad_norm: 1.1776 (1.5353) time: 2.8663 data: 0.0002 max mem: 28454 +[2024-12-12 06:18:10 root] (utils.py 283): INFO Epoch: [5] [2090/2502] eta: 0:19:43 lr: 0.000017 loss_cls: 2.7137 (2.6001) grad_norm: 1.0757 (1.5331) time: 2.8652 data: 0.0002 max mem: 28454 +[2024-12-12 06:18:39 root] (utils.py 283): INFO Epoch: [5] [2100/2502] eta: 0:19:14 lr: 0.000017 loss_cls: 2.8206 (2.6013) grad_norm: 1.0719 (1.5327) time: 2.8656 data: 0.0002 max mem: 28454 +[2024-12-12 06:19:08 root] (utils.py 283): INFO Epoch: [5] [2110/2502] eta: 0:18:45 lr: 0.000017 loss_cls: 2.8206 (2.6019) grad_norm: 1.1086 (1.5315) time: 2.8656 data: 0.0002 max mem: 28454 +[2024-12-12 06:19:36 root] (utils.py 283): INFO Epoch: [5] [2120/2502] eta: 0:18:16 lr: 0.000017 loss_cls: 2.6870 (2.6018) grad_norm: 1.1702 (1.5301) time: 2.8659 data: 0.0002 max mem: 28454 +[2024-12-12 06:20:05 root] (utils.py 283): INFO Epoch: [5] [2130/2502] eta: 0:17:48 lr: 0.000017 loss_cls: 2.6317 (2.6006) grad_norm: 1.2001 (1.5286) time: 2.8649 data: 0.0002 max mem: 28454 +[2024-12-12 06:20:34 root] (utils.py 283): INFO Epoch: [5] [2140/2502] eta: 0:17:19 lr: 0.000017 loss_cls: 2.6317 (2.6006) grad_norm: 1.1794 (1.5268) time: 2.8675 data: 0.0002 max mem: 28454 +[2024-12-12 06:21:02 root] (utils.py 283): INFO Epoch: [5] [2150/2502] eta: 0:16:50 lr: 0.000017 loss_cls: 2.7457 (2.6011) grad_norm: 1.1205 (1.5251) time: 2.8707 data: 0.0002 max mem: 28454 +[2024-12-12 06:21:31 root] (utils.py 283): INFO Epoch: [5] [2160/2502] eta: 0:16:22 lr: 0.000017 loss_cls: 2.7128 (2.6002) grad_norm: 1.0670 (1.5231) time: 2.8708 data: 0.0002 max mem: 28454 +[2024-12-12 06:22:00 root] (utils.py 283): INFO Epoch: [5] [2170/2502] eta: 0:15:53 lr: 0.000017 loss_cls: 2.5748 (2.5992) grad_norm: 1.0670 (1.5213) time: 2.8678 data: 0.0002 max mem: 28454 +[2024-12-12 06:22:28 root] (utils.py 283): INFO Epoch: [5] [2180/2502] eta: 0:15:24 lr: 0.000017 loss_cls: 2.4956 (2.5986) grad_norm: 1.0687 (1.5194) time: 2.8674 data: 0.0002 max mem: 28454 +[2024-12-12 06:22:57 root] (utils.py 283): INFO Epoch: [5] [2190/2502] eta: 0:14:55 lr: 0.000017 loss_cls: 2.3212 (2.5961) grad_norm: 1.0801 (1.5179) time: 2.8662 data: 0.0002 max mem: 28454 +[2024-12-12 06:23:26 root] (utils.py 283): INFO Epoch: [5] [2200/2502] eta: 0:14:27 lr: 0.000017 loss_cls: 2.2624 (2.5959) grad_norm: 1.1749 (1.5165) time: 2.8649 data: 0.0002 max mem: 28454 +[2024-12-12 06:23:54 root] (utils.py 283): INFO Epoch: [5] [2210/2502] eta: 0:13:58 lr: 0.000017 loss_cls: 2.6777 (2.5955) grad_norm: 1.1631 (1.5148) time: 2.8664 data: 0.0002 max mem: 28454 +[2024-12-12 06:24:23 root] (utils.py 283): INFO Epoch: [5] [2220/2502] eta: 0:13:29 lr: 0.000017 loss_cls: 2.6167 (2.5954) grad_norm: 1.1307 (1.5159) time: 2.8662 data: 0.0002 max mem: 28454 +[2024-12-12 06:24:52 root] (utils.py 283): INFO Epoch: [5] [2230/2502] eta: 0:13:00 lr: 0.000017 loss_cls: 2.7512 (2.5955) grad_norm: 1.1409 (1.5144) time: 2.8680 data: 0.0002 max mem: 28454 +[2024-12-12 06:25:20 root] (utils.py 283): INFO Epoch: [5] [2240/2502] eta: 0:12:32 lr: 0.000017 loss_cls: 2.6282 (2.5954) grad_norm: 1.1846 (1.5133) time: 2.8666 data: 0.0002 max mem: 28454 +[2024-12-12 06:25:49 root] (utils.py 283): INFO Epoch: [5] [2250/2502] eta: 0:12:03 lr: 0.000017 loss_cls: 2.5040 (2.5945) grad_norm: 1.1526 (1.5120) time: 2.8658 data: 0.0002 max mem: 28454 +[2024-12-12 06:26:18 root] (utils.py 283): INFO Epoch: [5] [2260/2502] eta: 0:11:34 lr: 0.000017 loss_cls: 2.2854 (2.5938) grad_norm: 1.1625 (1.5108) time: 2.8660 data: 0.0002 max mem: 28454 +[2024-12-12 06:26:46 root] (utils.py 283): INFO Epoch: [5] [2270/2502] eta: 0:11:06 lr: 0.000017 loss_cls: 2.2854 (2.5927) grad_norm: 1.0845 (1.5124) time: 2.8655 data: 0.0002 max mem: 28454 +[2024-12-12 06:27:15 root] (utils.py 283): INFO Epoch: [5] [2280/2502] eta: 0:10:37 lr: 0.000017 loss_cls: 2.6711 (2.5939) grad_norm: 1.0745 (1.5108) time: 2.8660 data: 0.0002 max mem: 28454 +[2024-12-12 06:27:44 root] (utils.py 283): INFO Epoch: [5] [2290/2502] eta: 0:10:08 lr: 0.000017 loss_cls: 2.8174 (2.5946) grad_norm: 1.0745 (1.5128) time: 2.8659 data: 0.0002 max mem: 28454 +[2024-12-12 06:28:12 root] (utils.py 283): INFO Epoch: [5] [2300/2502] eta: 0:09:39 lr: 0.000017 loss_cls: 2.7156 (2.5946) grad_norm: 1.0884 (1.5134) time: 2.8652 data: 0.0002 max mem: 28454 +[2024-12-12 06:28:41 root] (utils.py 283): INFO Epoch: [5] [2310/2502] eta: 0:09:11 lr: 0.000017 loss_cls: 2.4764 (2.5938) grad_norm: 1.0756 (1.5117) time: 2.8703 data: 0.0002 max mem: 28454 +[2024-12-12 06:29:10 root] (utils.py 283): INFO Epoch: [5] [2320/2502] eta: 0:08:42 lr: 0.000017 loss_cls: 2.4420 (2.5932) grad_norm: 1.0988 (1.5106) time: 2.8704 data: 0.0002 max mem: 28454 +[2024-12-12 06:29:38 root] (utils.py 283): INFO Epoch: [5] [2330/2502] eta: 0:08:13 lr: 0.000017 loss_cls: 2.6867 (2.5934) grad_norm: 1.1589 (1.5091) time: 2.8650 data: 0.0002 max mem: 28454 +[2024-12-12 06:30:07 root] (utils.py 283): INFO Epoch: [5] [2340/2502] eta: 0:07:45 lr: 0.000017 loss_cls: 2.7455 (2.5937) grad_norm: 1.1339 (1.5078) time: 2.8654 data: 0.0002 max mem: 28454 +[2024-12-12 06:30:36 root] (utils.py 283): INFO Epoch: [5] [2350/2502] eta: 0:07:16 lr: 0.000017 loss_cls: 2.6655 (2.5935) grad_norm: 1.1192 (1.5063) time: 2.8631 data: 0.0002 max mem: 28454 +[2024-12-12 06:31:04 root] (utils.py 283): INFO Epoch: [5] [2360/2502] eta: 0:06:47 lr: 0.000017 loss_cls: 2.6494 (2.5937) grad_norm: 1.1559 (1.5048) time: 2.8639 data: 0.0002 max mem: 28454 +[2024-12-12 06:31:33 root] (utils.py 283): INFO Epoch: [5] [2370/2502] eta: 0:06:18 lr: 0.000017 loss_cls: 2.6494 (2.5935) grad_norm: 1.1563 (1.5088) time: 2.8644 data: 0.0002 max mem: 28454 +[2024-12-12 06:32:02 root] (utils.py 283): INFO Epoch: [5] [2380/2502] eta: 0:05:50 lr: 0.000017 loss_cls: 2.7004 (2.5932) grad_norm: 1.1563 (1.5119) time: 2.8650 data: 0.0002 max mem: 28454 +[2024-12-12 06:32:30 root] (utils.py 283): INFO Epoch: [5] [2390/2502] eta: 0:05:21 lr: 0.000017 loss_cls: 2.7879 (2.5932) grad_norm: 1.1947 (1.5105) time: 2.8664 data: 0.0002 max mem: 28454 +[2024-12-12 06:32:59 root] (utils.py 283): INFO Epoch: [5] [2400/2502] eta: 0:04:52 lr: 0.000017 loss_cls: 2.7607 (2.5935) grad_norm: 1.2063 (1.5093) time: 2.8663 data: 0.0002 max mem: 28454 +[2024-12-12 06:33:28 root] (utils.py 283): INFO Epoch: [5] [2410/2502] eta: 0:04:24 lr: 0.000017 loss_cls: 2.7329 (2.5940) grad_norm: 1.2062 (1.5081) time: 2.8663 data: 0.0002 max mem: 28454 +[2024-12-12 06:33:56 root] (utils.py 283): INFO Epoch: [5] [2420/2502] eta: 0:03:55 lr: 0.000017 loss_cls: 2.6282 (2.5932) grad_norm: 1.0605 (1.5062) time: 2.8661 data: 0.0002 max mem: 28454 +[2024-12-12 06:34:25 root] (utils.py 283): INFO Epoch: [5] [2430/2502] eta: 0:03:26 lr: 0.000017 loss_cls: 2.6246 (2.5935) grad_norm: 1.0605 (1.5046) time: 2.8672 data: 0.0002 max mem: 28454 +[2024-12-12 06:34:53 root] (utils.py 283): INFO Epoch: [5] [2440/2502] eta: 0:02:57 lr: 0.000017 loss_cls: 2.7144 (2.5935) grad_norm: 1.1613 (1.5033) time: 2.8643 data: 0.0002 max mem: 28454 +[2024-12-12 06:35:22 root] (utils.py 283): INFO Epoch: [5] [2450/2502] eta: 0:02:29 lr: 0.000017 loss_cls: 2.7340 (2.5937) grad_norm: 1.0768 (1.5016) time: 2.8633 data: 0.0002 max mem: 28454 +[2024-12-12 06:35:51 root] (utils.py 283): INFO Epoch: [5] [2460/2502] eta: 0:02:00 lr: 0.000017 loss_cls: 2.6760 (2.5933) grad_norm: 1.1352 (1.5006) time: 2.8659 data: 0.0002 max mem: 28454 +[2024-12-12 06:36:19 root] (utils.py 283): INFO Epoch: [5] [2470/2502] eta: 0:01:31 lr: 0.000017 loss_cls: 2.5479 (2.5933) grad_norm: 1.1352 (1.4993) time: 2.8646 data: 0.0002 max mem: 28454 +[2024-12-12 06:36:48 root] (utils.py 283): INFO Epoch: [5] [2480/2502] eta: 0:01:03 lr: 0.000017 loss_cls: 2.6504 (2.5934) grad_norm: 1.1284 (1.4991) time: 2.8611 data: 0.0002 max mem: 28454 +[2024-12-12 06:37:17 root] (utils.py 283): INFO Epoch: [5] [2490/2502] eta: 0:00:34 lr: 0.000017 loss_cls: 2.7713 (2.5939) grad_norm: 1.1682 (1.4978) time: 2.8817 data: 0.0207 max mem: 28454 +[2024-12-12 06:37:46 root] (utils.py 283): INFO Epoch: [5] [2500/2502] eta: 0:00:05 lr: 0.000017 loss_cls: 2.7849 (2.5941) grad_norm: 1.1684 (1.5231) time: 2.8864 data: 0.0207 max mem: 28454 +[2024-12-12 06:37:49 root] (utils.py 283): INFO Epoch: [5] [2501/2502] eta: 0:00:02 lr: 0.000017 loss_cls: 2.7808 (2.5942) grad_norm: 1.1684 (1.5234) time: 2.8867 data: 0.0207 max mem: 28454 +[2024-12-12 06:37:49 root] (utils.py 297): INFO Epoch: [5] Total time: 1:59:43 (2.8710 s / it) +[2024-12-12 06:37:49 root] (engine.py 179): INFO Averaged stats:lr: 0.000017 loss_cls: 2.7808 (2.5893) grad_norm: 1.1684 (1.5234) +[2024-12-12 06:37:52 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:53 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.4036 (0.4036) acc1: 91.4062 (91.4062) acc3: 98.4375 (98.4375) acc5: 99.2188 (99.2188) time: 0.5425 data: 0.0003 max mem: 28454 +[2024-12-12 06:37:57 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:48 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6486 (0.5954) acc1: 85.1562 (87.2869) acc3: 97.6562 (96.9460) acc5: 98.4375 (97.9403) time: 0.5462 data: 0.0003 max mem: 28454 +[2024-12-12 06:38:03 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:42 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6349 (0.6220) acc1: 87.5000 (87.2396) acc3: 96.8750 (96.3914) acc5: 97.6562 (97.7307) time: 0.5472 data: 0.0004 max mem: 28454 +[2024-12-12 06:38:08 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:37 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6349 (0.6569) acc1: 86.7188 (85.9123) acc3: 95.3125 (96.0685) acc5: 97.6562 (97.7319) time: 0.5479 data: 0.0004 max mem: 28454 +[2024-12-12 06:38:14 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:31 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6762 (0.6635) acc1: 85.9375 (85.6517) acc3: 95.3125 (95.9794) acc5: 97.6562 (97.6753) time: 0.5481 data: 0.0004 max mem: 28454 +[2024-12-12 06:38:19 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:26 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.8000 (0.7419) acc1: 78.1250 (83.8235) acc3: 93.7500 (94.8070) acc5: 95.3125 (96.8444) time: 0.5482 data: 0.0005 max mem: 28454 +[2024-12-12 06:38:25 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:20 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9527 (0.7680) acc1: 78.1250 (83.4273) acc3: 90.6250 (94.2623) acc5: 93.7500 (96.3627) time: 0.5476 data: 0.0004 max mem: 28454 +[2024-12-12 06:38:30 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:15 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9527 (0.7960) acc1: 81.2500 (82.6915) acc3: 92.1875 (93.9371) acc5: 94.5312 (96.1818) time: 0.5475 data: 0.0004 max mem: 28454 +[2024-12-12 06:38:36 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:09 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 1.0070 (0.8201) acc1: 78.1250 (82.1856) acc3: 92.1875 (93.6053) acc5: 94.5312 (95.8816) time: 0.5482 data: 0.0006 max mem: 28454 +[2024-12-12 06:38:41 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:04 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9611 (0.8400) acc1: 78.9062 (81.5677) acc3: 90.6250 (93.3207) acc5: 93.7500 (95.7160) time: 0.5485 data: 0.0006 max mem: 28454 +[2024-12-12 06:38:45 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.8940 (0.8372) acc1: 78.9062 (81.5920) acc3: 91.4062 (93.3600) acc5: 94.5312 (95.7600) time: 0.5392 data: 0.0005 max mem: 28454 +[2024-12-12 06:38:45 root] (utils.py 297): INFO Test: Total time: 0:00:53 (0.5461 s / it) +[2024-12-12 06:38:45 root] (engine.py 264): INFO * Acc@1 81.616 Acc@3 93.376 Acc@5 95.740 loss 0.837 flops 13.207 layer_flops 13.109 +[2024-12-12 06:38:45 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 81.6% +[2024-12-12 06:38:45 root] (main.py 576): INFO Max accuracy: 81.74% +[2024-12-12 06:38:47 root] (utils.py 283): INFO Epoch: [6] [ 0/2502] eta: 1:58:34 lr: 0.000015 loss_cls: 3.2800 (3.2800) grad_norm: 1.3258 (1.3258) time: 2.8435 data: 0.0003 max mem: 28454 +[2024-12-12 06:39:16 root] (utils.py 283): INFO Epoch: [6] [ 10/2502] eta: 1:59:04 lr: 0.000015 loss_cls: 2.7059 (2.5185) grad_norm: 1.5172 (1.5219) time: 2.8668 data: 0.0002 max mem: 28454 +[2024-12-12 06:39:45 root] (utils.py 283): INFO Epoch: [6] [ 20/2502] eta: 1:58:38 lr: 0.000015 loss_cls: 2.7059 (2.6098) grad_norm: 1.5102 (1.5129) time: 2.8691 data: 0.0002 max mem: 28454 +[2024-12-12 06:40:14 root] (utils.py 283): INFO Epoch: [6] [ 30/2502] eta: 1:58:13 lr: 0.000015 loss_cls: 2.8850 (2.6544) grad_norm: 1.4106 (1.5392) time: 2.8712 data: 0.0002 max mem: 28454 +[2024-12-12 06:40:42 root] (utils.py 283): INFO Epoch: [6] [ 40/2502] eta: 1:57:46 lr: 0.000015 loss_cls: 2.7012 (2.6089) grad_norm: 1.3805 (1.5524) time: 2.8723 data: 0.0002 max mem: 28454 +[2024-12-12 06:41:11 root] (utils.py 283): INFO Epoch: [6] [ 50/2502] eta: 1:57:16 lr: 0.000015 loss_cls: 2.5575 (2.5902) grad_norm: 1.1741 (1.4683) time: 2.8695 data: 0.0002 max mem: 28454 +[2024-12-12 06:41:40 root] (utils.py 283): INFO Epoch: [6] [ 60/2502] eta: 1:56:47 lr: 0.000015 loss_cls: 2.5575 (2.5536) grad_norm: 1.1186 (1.4172) time: 2.8683 data: 0.0002 max mem: 28454 +[2024-12-12 06:42:08 root] (utils.py 283): INFO Epoch: [6] [ 70/2502] eta: 1:56:19 lr: 0.000015 loss_cls: 2.6256 (2.5760) grad_norm: 1.1610 (1.3879) time: 2.8708 data: 0.0002 max mem: 28454 +[2024-12-12 06:42:37 root] (utils.py 283): INFO Epoch: [6] [ 80/2502] eta: 1:55:51 lr: 0.000015 loss_cls: 2.6722 (2.5903) grad_norm: 1.2193 (1.8664) time: 2.8729 data: 0.0002 max mem: 28454 +[2024-12-12 06:43:06 root] (utils.py 283): INFO Epoch: [6] [ 90/2502] eta: 1:55:24 lr: 0.000015 loss_cls: 2.7126 (2.5945) grad_norm: 1.4227 (1.8542) time: 2.8735 data: 0.0002 max mem: 28454 +[2024-12-12 06:43:35 root] (utils.py 283): INFO Epoch: [6] [ 100/2502] eta: 1:54:55 lr: 0.000015 loss_cls: 2.7126 (2.5959) grad_norm: 1.4093 (1.8037) time: 2.8724 data: 0.0002 max mem: 28454 +[2024-12-12 06:44:03 root] (utils.py 283): INFO Epoch: [6] [ 110/2502] eta: 1:54:27 lr: 0.000015 loss_cls: 2.7111 (2.5965) grad_norm: 1.2198 (1.7639) time: 2.8728 data: 0.0002 max mem: 28454 +[2024-12-12 06:44:32 root] (utils.py 283): INFO Epoch: [6] [ 120/2502] eta: 1:53:58 lr: 0.000015 loss_cls: 2.5918 (2.5841) grad_norm: 1.1468 (1.7248) time: 2.8729 data: 0.0002 max mem: 28454 +[2024-12-12 06:45:01 root] (utils.py 283): INFO Epoch: [6] [ 130/2502] eta: 1:53:30 lr: 0.000015 loss_cls: 2.5164 (2.5876) grad_norm: 1.1802 (1.7273) time: 2.8712 data: 0.0002 max mem: 28454 +[2024-12-12 06:45:29 root] (utils.py 283): INFO Epoch: [6] [ 140/2502] eta: 1:53:01 lr: 0.000015 loss_cls: 2.7548 (2.6062) grad_norm: 1.2336 (1.6922) time: 2.8712 data: 0.0002 max mem: 28454 +[2024-12-12 06:45:58 root] (utils.py 283): INFO Epoch: [6] [ 150/2502] eta: 1:52:32 lr: 0.000015 loss_cls: 2.6541 (2.5954) grad_norm: 1.1137 (1.6555) time: 2.8713 data: 0.0002 max mem: 28454 +[2024-12-12 06:46:27 root] (utils.py 283): INFO Epoch: [6] [ 160/2502] eta: 1:52:04 lr: 0.000015 loss_cls: 2.6098 (2.5976) grad_norm: 1.1137 (1.6255) time: 2.8720 data: 0.0003 max mem: 28454 +[2024-12-12 06:46:56 root] (utils.py 283): INFO Epoch: [6] [ 170/2502] eta: 1:51:35 lr: 0.000015 loss_cls: 2.7365 (2.6025) grad_norm: 1.1657 (1.5983) time: 2.8720 data: 0.0003 max mem: 28454 +[2024-12-12 06:47:24 root] (utils.py 283): INFO Epoch: [6] [ 180/2502] eta: 1:51:07 lr: 0.000015 loss_cls: 2.7254 (2.5933) grad_norm: 1.1722 (1.5834) time: 2.8715 data: 0.0003 max mem: 28454 +[2024-12-12 06:47:53 root] (utils.py 283): INFO Epoch: [6] [ 190/2502] eta: 1:50:38 lr: 0.000015 loss_cls: 2.4740 (2.5902) grad_norm: 1.2001 (1.5705) time: 2.8730 data: 0.0002 max mem: 28454 +[2024-12-12 06:48:22 root] (utils.py 283): INFO Epoch: [6] [ 200/2502] eta: 1:50:10 lr: 0.000015 loss_cls: 2.7272 (2.5966) grad_norm: 1.1304 (1.5520) time: 2.8733 data: 0.0002 max mem: 28454 +[2024-12-12 06:48:51 root] (utils.py 283): INFO Epoch: [6] [ 210/2502] eta: 1:49:41 lr: 0.000015 loss_cls: 2.7780 (2.5995) grad_norm: 1.1465 (1.5377) time: 2.8723 data: 0.0003 max mem: 28454 +[2024-12-12 06:49:19 root] (utils.py 283): INFO Epoch: [6] [ 220/2502] eta: 1:49:13 lr: 0.000015 loss_cls: 2.7185 (2.5986) grad_norm: 1.2051 (1.5265) time: 2.8735 data: 0.0002 max mem: 28454 +[2024-12-12 06:49:48 root] (utils.py 283): INFO Epoch: [6] [ 230/2502] eta: 1:48:44 lr: 0.000015 loss_cls: 2.6443 (2.5925) grad_norm: 1.1277 (1.5072) time: 2.8729 data: 0.0003 max mem: 28454 +[2024-12-12 06:50:17 root] (utils.py 283): INFO Epoch: [6] [ 240/2502] eta: 1:48:15 lr: 0.000015 loss_cls: 2.6867 (2.5942) grad_norm: 1.1926 (1.5003) time: 2.8721 data: 0.0002 max mem: 28454 +[2024-12-12 06:50:45 root] (utils.py 283): INFO Epoch: [6] [ 250/2502] eta: 1:47:47 lr: 0.000015 loss_cls: 2.6867 (2.5918) grad_norm: 1.1444 (1.4836) time: 2.8734 data: 0.0002 max mem: 28454 +[2024-12-12 06:51:14 root] (utils.py 283): INFO Epoch: [6] [ 260/2502] eta: 1:47:18 lr: 0.000015 loss_cls: 2.5755 (2.5950) grad_norm: 1.1142 (1.4833) time: 2.8741 data: 0.0002 max mem: 28454 +[2024-12-12 06:51:43 root] (utils.py 283): INFO Epoch: [6] [ 270/2502] eta: 1:46:50 lr: 0.000015 loss_cls: 2.7651 (2.6034) grad_norm: 1.1442 (1.4763) time: 2.8741 data: 0.0002 max mem: 28454 +[2024-12-12 06:52:12 root] (utils.py 283): INFO Epoch: [6] [ 280/2502] eta: 1:46:21 lr: 0.000015 loss_cls: 2.7718 (2.6034) grad_norm: 1.1598 (1.4789) time: 2.8734 data: 0.0002 max mem: 28454 +[2024-12-12 06:52:40 root] (utils.py 283): INFO Epoch: [6] [ 290/2502] eta: 1:45:52 lr: 0.000015 loss_cls: 2.6551 (2.6019) grad_norm: 1.1341 (1.4664) time: 2.8733 data: 0.0002 max mem: 28454 +[2024-12-12 06:53:09 root] (utils.py 283): INFO Epoch: [6] [ 300/2502] eta: 1:45:24 lr: 0.000015 loss_cls: 2.6818 (2.6009) grad_norm: 1.1142 (1.4593) time: 2.8763 data: 0.0002 max mem: 28454 +[2024-12-12 06:53:38 root] (utils.py 283): INFO Epoch: [6] [ 310/2502] eta: 1:44:56 lr: 0.000015 loss_cls: 2.7247 (2.5935) grad_norm: 1.1273 (1.4480) time: 2.8796 data: 0.0002 max mem: 28454 +[2024-12-12 06:54:07 root] (utils.py 283): INFO Epoch: [6] [ 320/2502] eta: 1:44:27 lr: 0.000015 loss_cls: 2.5305 (2.5886) grad_norm: 1.1144 (1.4388) time: 2.8752 data: 0.0002 max mem: 28454 +[2024-12-12 06:54:35 root] (utils.py 283): INFO Epoch: [6] [ 330/2502] eta: 1:43:58 lr: 0.000015 loss_cls: 2.5305 (2.5888) grad_norm: 1.1028 (1.4807) time: 2.8700 data: 0.0002 max mem: 28454 +[2024-12-12 06:55:04 root] (utils.py 283): INFO Epoch: [6] [ 340/2502] eta: 1:43:29 lr: 0.000015 loss_cls: 2.5747 (2.5900) grad_norm: 1.1677 (1.4772) time: 2.8705 data: 0.0002 max mem: 28454 +[2024-12-12 06:55:33 root] (utils.py 283): INFO Epoch: [6] [ 350/2502] eta: 1:43:01 lr: 0.000015 loss_cls: 2.6916 (2.5940) grad_norm: 1.1696 (1.4693) time: 2.8743 data: 0.0002 max mem: 28454 +[2024-12-12 06:56:02 root] (utils.py 283): INFO Epoch: [6] [ 360/2502] eta: 1:42:32 lr: 0.000015 loss_cls: 2.6916 (2.5986) grad_norm: 1.1688 (1.4636) time: 2.8748 data: 0.0002 max mem: 28454 +[2024-12-12 06:56:30 root] (utils.py 283): INFO Epoch: [6] [ 370/2502] eta: 1:42:04 lr: 0.000015 loss_cls: 2.7434 (2.6011) grad_norm: 1.2144 (1.4576) time: 2.8733 data: 0.0002 max mem: 28454 +[2024-12-12 06:56:59 root] (utils.py 283): INFO Epoch: [6] [ 380/2502] eta: 1:41:35 lr: 0.000015 loss_cls: 2.5739 (2.5916) grad_norm: 1.1466 (1.4488) time: 2.8732 data: 0.0002 max mem: 28454 +[2024-12-12 06:57:28 root] (utils.py 283): INFO Epoch: [6] [ 390/2502] eta: 1:41:06 lr: 0.000015 loss_cls: 2.3411 (2.5920) grad_norm: 1.1317 (1.4401) time: 2.8707 data: 0.0002 max mem: 28454 +[2024-12-12 06:57:57 root] (utils.py 283): INFO Epoch: [6] [ 400/2502] eta: 1:40:37 lr: 0.000015 loss_cls: 2.6890 (2.5930) grad_norm: 1.1571 (1.4340) time: 2.8723 data: 0.0002 max mem: 28454 +[2024-12-12 06:58:25 root] (utils.py 283): INFO Epoch: [6] [ 410/2502] eta: 1:40:09 lr: 0.000015 loss_cls: 2.6801 (2.5914) grad_norm: 1.1406 (1.4269) time: 2.8737 data: 0.0002 max mem: 28454 +[2024-12-12 06:58:54 root] (utils.py 283): INFO Epoch: [6] [ 420/2502] eta: 1:39:40 lr: 0.000015 loss_cls: 2.5181 (2.5864) grad_norm: 1.0588 (1.4179) time: 2.8714 data: 0.0003 max mem: 28454 +[2024-12-12 06:59:23 root] (utils.py 283): INFO Epoch: [6] [ 430/2502] eta: 1:39:11 lr: 0.000015 loss_cls: 2.6724 (2.5890) grad_norm: 1.0758 (1.4181) time: 2.8698 data: 0.0003 max mem: 28454 +[2024-12-12 06:59:51 root] (utils.py 283): INFO Epoch: [6] [ 440/2502] eta: 1:38:42 lr: 0.000015 loss_cls: 2.7621 (2.5886) grad_norm: 1.0793 (1.4147) time: 2.8709 data: 0.0002 max mem: 28454 +[2024-12-12 07:00:20 root] (utils.py 283): INFO Epoch: [6] [ 450/2502] eta: 1:38:14 lr: 0.000015 loss_cls: 2.7189 (2.5870) grad_norm: 1.0572 (1.4080) time: 2.8716 data: 0.0002 max mem: 28454 +[2024-12-12 07:00:49 root] (utils.py 283): INFO Epoch: [6] [ 460/2502] eta: 1:37:45 lr: 0.000015 loss_cls: 2.4789 (2.5817) grad_norm: 1.0915 (1.4193) time: 2.8687 data: 0.0003 max mem: 28454 +[2024-12-12 07:01:17 root] (utils.py 283): INFO Epoch: [6] [ 470/2502] eta: 1:37:16 lr: 0.000015 loss_cls: 2.5049 (2.5823) grad_norm: 1.1286 (1.4137) time: 2.8678 data: 0.0002 max mem: 28454 +[2024-12-12 07:01:46 root] (utils.py 283): INFO Epoch: [6] [ 480/2502] eta: 1:36:47 lr: 0.000015 loss_cls: 2.7978 (2.5862) grad_norm: 1.1353 (1.4109) time: 2.8715 data: 0.0003 max mem: 28454 +[2024-12-12 07:02:15 root] (utils.py 283): INFO Epoch: [6] [ 490/2502] eta: 1:36:18 lr: 0.000015 loss_cls: 2.7959 (2.5865) grad_norm: 1.1479 (1.4054) time: 2.8709 data: 0.0003 max mem: 28454 +[2024-12-12 07:02:43 root] (utils.py 283): INFO Epoch: [6] [ 500/2502] eta: 1:35:49 lr: 0.000015 loss_cls: 2.7237 (2.5866) grad_norm: 1.0555 (1.3996) time: 2.8635 data: 0.0002 max mem: 28454 +[2024-12-12 07:03:12 root] (utils.py 283): INFO Epoch: [6] [ 510/2502] eta: 1:35:20 lr: 0.000015 loss_cls: 2.7168 (2.5891) grad_norm: 1.0544 (1.3932) time: 2.8596 data: 0.0002 max mem: 28454 +[2024-12-12 07:03:41 root] (utils.py 283): INFO Epoch: [6] [ 520/2502] eta: 1:34:50 lr: 0.000015 loss_cls: 2.5670 (2.5884) grad_norm: 1.0748 (1.4012) time: 2.8583 data: 0.0002 max mem: 28454 +[2024-12-12 07:04:09 root] (utils.py 283): INFO Epoch: [6] [ 530/2502] eta: 1:34:21 lr: 0.000015 loss_cls: 2.7100 (2.5884) grad_norm: 1.1771 (1.4025) time: 2.8561 data: 0.0002 max mem: 28454 +[2024-12-12 07:04:38 root] (utils.py 283): INFO Epoch: [6] [ 540/2502] eta: 1:33:52 lr: 0.000015 loss_cls: 2.7100 (2.5904) grad_norm: 1.1771 (1.3989) time: 2.8591 data: 0.0002 max mem: 28454 +[2024-12-12 07:05:07 root] (utils.py 283): INFO Epoch: [6] [ 550/2502] eta: 1:33:23 lr: 0.000015 loss_cls: 2.7172 (2.5919) grad_norm: 1.1575 (1.3952) time: 2.8666 data: 0.0002 max mem: 28454 +[2024-12-12 07:05:35 root] (utils.py 283): INFO Epoch: [6] [ 560/2502] eta: 1:32:55 lr: 0.000015 loss_cls: 2.7470 (2.5948) grad_norm: 1.1132 (1.3905) time: 2.8709 data: 0.0002 max mem: 28454 +[2024-12-12 07:06:04 root] (utils.py 283): INFO Epoch: [6] [ 570/2502] eta: 1:32:26 lr: 0.000015 loss_cls: 2.7470 (2.5942) grad_norm: 1.1607 (1.3877) time: 2.8730 data: 0.0002 max mem: 28454 +[2024-12-12 07:06:33 root] (utils.py 283): INFO Epoch: [6] [ 580/2502] eta: 1:31:57 lr: 0.000015 loss_cls: 2.5032 (2.5933) grad_norm: 1.2194 (1.3842) time: 2.8716 data: 0.0002 max mem: 28454 +[2024-12-12 07:07:01 root] (utils.py 283): INFO Epoch: [6] [ 590/2502] eta: 1:31:29 lr: 0.000015 loss_cls: 2.6319 (2.5910) grad_norm: 1.1745 (1.3831) time: 2.8696 data: 0.0002 max mem: 28454 +[2024-12-12 07:07:30 root] (utils.py 283): INFO Epoch: [6] [ 600/2502] eta: 1:31:00 lr: 0.000015 loss_cls: 2.7124 (2.5930) grad_norm: 1.2416 (1.3830) time: 2.8710 data: 0.0002 max mem: 28454 +[2024-12-12 07:07:59 root] (utils.py 283): INFO Epoch: [6] [ 610/2502] eta: 1:30:31 lr: 0.000015 loss_cls: 2.7350 (2.5947) grad_norm: 1.2416 (1.3834) time: 2.8692 data: 0.0002 max mem: 28454 +[2024-12-12 07:08:27 root] (utils.py 283): INFO Epoch: [6] [ 620/2502] eta: 1:30:02 lr: 0.000015 loss_cls: 2.6482 (2.5959) grad_norm: 1.1041 (1.3790) time: 2.8671 data: 0.0002 max mem: 28454 +[2024-12-12 07:08:56 root] (utils.py 283): INFO Epoch: [6] [ 630/2502] eta: 1:29:33 lr: 0.000015 loss_cls: 2.6440 (2.5941) grad_norm: 1.0908 (1.3764) time: 2.8672 data: 0.0002 max mem: 28454 +[2024-12-12 07:09:25 root] (utils.py 283): INFO Epoch: [6] [ 640/2502] eta: 1:29:05 lr: 0.000015 loss_cls: 2.6365 (2.5913) grad_norm: 1.0952 (1.3720) time: 2.8685 data: 0.0002 max mem: 28454 +[2024-12-12 07:09:53 root] (utils.py 283): INFO Epoch: [6] [ 650/2502] eta: 1:28:36 lr: 0.000015 loss_cls: 2.6365 (2.5931) grad_norm: 1.1204 (1.3725) time: 2.8681 data: 0.0002 max mem: 28454 +[2024-12-12 07:10:22 root] (utils.py 283): INFO Epoch: [6] [ 660/2502] eta: 1:28:07 lr: 0.000015 loss_cls: 2.7520 (2.5930) grad_norm: 1.1540 (1.3720) time: 2.8674 data: 0.0002 max mem: 28454 +[2024-12-12 07:10:51 root] (utils.py 283): INFO Epoch: [6] [ 670/2502] eta: 1:27:38 lr: 0.000015 loss_cls: 2.6882 (2.5929) grad_norm: 1.1540 (1.3816) time: 2.8682 data: 0.0002 max mem: 28454 +[2024-12-12 07:11:20 root] (utils.py 283): INFO Epoch: [6] [ 680/2502] eta: 1:27:10 lr: 0.000015 loss_cls: 2.5340 (2.5915) grad_norm: 1.1081 (1.3772) time: 2.8686 data: 0.0002 max mem: 28454 +[2024-12-12 07:11:48 root] (utils.py 283): INFO Epoch: [6] [ 690/2502] eta: 1:26:41 lr: 0.000015 loss_cls: 2.5785 (2.5921) grad_norm: 1.1252 (1.3808) time: 2.8689 data: 0.0003 max mem: 28454 +[2024-12-12 07:12:17 root] (utils.py 283): INFO Epoch: [6] [ 700/2502] eta: 1:26:12 lr: 0.000015 loss_cls: 2.6431 (2.5924) grad_norm: 1.1246 (1.3776) time: 2.8667 data: 0.0003 max mem: 28454 +[2024-12-12 07:12:46 root] (utils.py 283): INFO Epoch: [6] [ 710/2502] eta: 1:25:43 lr: 0.000015 loss_cls: 2.6239 (2.5923) grad_norm: 1.0410 (1.3777) time: 2.8657 data: 0.0003 max mem: 28454 +[2024-12-12 07:13:14 root] (utils.py 283): INFO Epoch: [6] [ 720/2502] eta: 1:25:14 lr: 0.000015 loss_cls: 2.6239 (2.5919) grad_norm: 1.1511 (1.3751) time: 2.8664 data: 0.0002 max mem: 28454 +[2024-12-12 07:13:43 root] (utils.py 283): INFO Epoch: [6] [ 730/2502] eta: 1:24:46 lr: 0.000015 loss_cls: 2.6689 (2.5910) grad_norm: 1.2083 (1.3759) time: 2.8672 data: 0.0003 max mem: 28454 +[2024-12-12 07:14:12 root] (utils.py 283): INFO Epoch: [6] [ 740/2502] eta: 1:24:17 lr: 0.000015 loss_cls: 2.7819 (2.5924) grad_norm: 1.2083 (1.3744) time: 2.8684 data: 0.0003 max mem: 28454 +[2024-12-12 07:14:40 root] (utils.py 283): INFO Epoch: [6] [ 750/2502] eta: 1:23:48 lr: 0.000015 loss_cls: 2.7819 (2.5936) grad_norm: 1.8371 (1.4569) time: 2.8693 data: 0.0002 max mem: 28454 +[2024-12-12 07:15:09 root] (utils.py 283): INFO Epoch: [6] [ 760/2502] eta: 1:23:20 lr: 0.000015 loss_cls: 2.7385 (2.5938) grad_norm: 1.8757 (1.4612) time: 2.8717 data: 0.0002 max mem: 28454 +[2024-12-12 07:15:38 root] (utils.py 283): INFO Epoch: [6] [ 770/2502] eta: 1:22:51 lr: 0.000015 loss_cls: 2.6458 (2.5940) grad_norm: 1.5671 (1.4632) time: 2.8696 data: 0.0002 max mem: 28454 +[2024-12-12 07:16:06 root] (utils.py 283): INFO Epoch: [6] [ 780/2502] eta: 1:22:22 lr: 0.000015 loss_cls: 2.6962 (2.5951) grad_norm: 1.4139 (1.4638) time: 2.8646 data: 0.0003 max mem: 28454 +[2024-12-12 07:16:35 root] (utils.py 283): INFO Epoch: [6] [ 790/2502] eta: 1:21:53 lr: 0.000015 loss_cls: 2.8415 (2.5964) grad_norm: 1.3056 (1.4610) time: 2.8658 data: 0.0003 max mem: 28454 +[2024-12-12 07:17:04 root] (utils.py 283): INFO Epoch: [6] [ 800/2502] eta: 1:21:24 lr: 0.000015 loss_cls: 2.6256 (2.5950) grad_norm: 1.1664 (1.4573) time: 2.8670 data: 0.0002 max mem: 28454 +[2024-12-12 07:17:32 root] (utils.py 283): INFO Epoch: [6] [ 810/2502] eta: 1:20:56 lr: 0.000015 loss_cls: 2.5744 (2.5961) grad_norm: 1.1574 (1.4602) time: 2.8666 data: 0.0002 max mem: 28454 +[2024-12-12 07:18:01 root] (utils.py 283): INFO Epoch: [6] [ 820/2502] eta: 1:20:27 lr: 0.000015 loss_cls: 2.8189 (2.5964) grad_norm: 1.1405 (1.4782) time: 2.8700 data: 0.0002 max mem: 28454 +[2024-12-12 07:18:30 root] (utils.py 283): INFO Epoch: [6] [ 830/2502] eta: 1:19:58 lr: 0.000015 loss_cls: 2.7512 (2.5976) grad_norm: 1.1695 (1.4755) time: 2.8708 data: 0.0002 max mem: 28454 +[2024-12-12 07:18:58 root] (utils.py 283): INFO Epoch: [6] [ 840/2502] eta: 1:19:29 lr: 0.000015 loss_cls: 2.7274 (2.5955) grad_norm: 1.2093 (1.4723) time: 2.8680 data: 0.0002 max mem: 28454 +[2024-12-12 07:19:27 root] (utils.py 283): INFO Epoch: [6] [ 850/2502] eta: 1:19:01 lr: 0.000015 loss_cls: 2.7291 (2.5972) grad_norm: 1.2025 (1.4684) time: 2.8677 data: 0.0002 max mem: 28454 +[2024-12-12 07:19:56 root] (utils.py 283): INFO Epoch: [6] [ 860/2502] eta: 1:18:32 lr: 0.000015 loss_cls: 2.7291 (2.5968) grad_norm: 1.1091 (1.4637) time: 2.8705 data: 0.0002 max mem: 28454 +[2024-12-12 07:20:25 root] (utils.py 283): INFO Epoch: [6] [ 870/2502] eta: 1:18:03 lr: 0.000015 loss_cls: 2.8460 (2.6006) grad_norm: 1.1091 (1.4605) time: 2.8737 data: 0.0002 max mem: 28454 +[2024-12-12 07:20:53 root] (utils.py 283): INFO Epoch: [6] [ 880/2502] eta: 1:17:35 lr: 0.000015 loss_cls: 2.8929 (2.6019) grad_norm: 1.1193 (1.4562) time: 2.8723 data: 0.0002 max mem: 28454 +[2024-12-12 07:21:22 root] (utils.py 283): INFO Epoch: [6] [ 890/2502] eta: 1:17:06 lr: 0.000015 loss_cls: 2.6174 (2.5996) grad_norm: 1.1193 (1.4527) time: 2.8701 data: 0.0002 max mem: 28454 +[2024-12-12 07:21:51 root] (utils.py 283): INFO Epoch: [6] [ 900/2502] eta: 1:16:37 lr: 0.000015 loss_cls: 2.4509 (2.5990) grad_norm: 1.1687 (1.4504) time: 2.8701 data: 0.0002 max mem: 28454 +[2024-12-12 07:22:19 root] (utils.py 283): INFO Epoch: [6] [ 910/2502] eta: 1:16:09 lr: 0.000015 loss_cls: 2.6277 (2.5980) grad_norm: 1.1722 (1.4494) time: 2.8698 data: 0.0002 max mem: 28454 +[2024-12-12 07:22:48 root] (utils.py 283): INFO Epoch: [6] [ 920/2502] eta: 1:15:40 lr: 0.000015 loss_cls: 2.2709 (2.5932) grad_norm: 1.1294 (1.4463) time: 2.8713 data: 0.0003 max mem: 28454 +[2024-12-12 07:23:17 root] (utils.py 283): INFO Epoch: [6] [ 930/2502] eta: 1:15:11 lr: 0.000015 loss_cls: 2.4538 (2.5937) grad_norm: 1.0717 (1.4472) time: 2.8709 data: 0.0003 max mem: 28454 +[2024-12-12 07:23:45 root] (utils.py 283): INFO Epoch: [6] [ 940/2502] eta: 1:14:43 lr: 0.000015 loss_cls: 2.5981 (2.5925) grad_norm: 1.0907 (1.4435) time: 2.8682 data: 0.0002 max mem: 28454 +[2024-12-12 07:24:14 root] (utils.py 283): INFO Epoch: [6] [ 950/2502] eta: 1:14:14 lr: 0.000015 loss_cls: 2.5981 (2.5919) grad_norm: 1.0939 (1.4399) time: 2.8680 data: 0.0002 max mem: 28454 +[2024-12-12 07:24:43 root] (utils.py 283): INFO Epoch: [6] [ 960/2502] eta: 1:13:45 lr: 0.000015 loss_cls: 2.6244 (2.5909) grad_norm: 1.1060 (1.4365) time: 2.8702 data: 0.0002 max mem: 28454 +[2024-12-12 07:25:12 root] (utils.py 283): INFO Epoch: [6] [ 970/2502] eta: 1:13:17 lr: 0.000015 loss_cls: 2.5411 (2.5899) grad_norm: 1.0929 (1.4332) time: 2.8726 data: 0.0003 max mem: 28454 +[2024-12-12 07:25:40 root] (utils.py 283): INFO Epoch: [6] [ 980/2502] eta: 1:12:48 lr: 0.000015 loss_cls: 2.4646 (2.5886) grad_norm: 1.0545 (1.4302) time: 2.8752 data: 0.0002 max mem: 28454 +[2024-12-12 07:26:09 root] (utils.py 283): INFO Epoch: [6] [ 990/2502] eta: 1:12:19 lr: 0.000015 loss_cls: 2.6026 (2.5895) grad_norm: 1.1299 (1.4453) time: 2.8744 data: 0.0002 max mem: 28454 +[2024-12-12 07:26:38 root] (utils.py 283): INFO Epoch: [6] [1000/2502] eta: 1:11:51 lr: 0.000015 loss_cls: 2.6419 (2.5885) grad_norm: 1.1757 (1.4443) time: 2.8720 data: 0.0002 max mem: 28454 +[2024-12-12 07:27:07 root] (utils.py 283): INFO Epoch: [6] [1010/2502] eta: 1:11:22 lr: 0.000015 loss_cls: 2.6419 (2.5888) grad_norm: 1.1804 (1.4457) time: 2.8730 data: 0.0002 max mem: 28454 +[2024-12-12 07:27:35 root] (utils.py 283): INFO Epoch: [6] [1020/2502] eta: 1:10:53 lr: 0.000015 loss_cls: 2.5906 (2.5880) grad_norm: 1.2092 (1.4438) time: 2.8732 data: 0.0003 max mem: 28454 +[2024-12-12 07:28:04 root] (utils.py 283): INFO Epoch: [6] [1030/2502] eta: 1:10:25 lr: 0.000015 loss_cls: 2.6599 (2.5881) grad_norm: 1.1525 (1.4843) time: 2.8741 data: 0.0002 max mem: 28454 +[2024-12-12 07:28:33 root] (utils.py 283): INFO Epoch: [6] [1040/2502] eta: 1:09:56 lr: 0.000015 loss_cls: 2.6599 (2.5872) grad_norm: 1.3740 (1.4847) time: 2.8718 data: 0.0002 max mem: 28454 +[2024-12-12 07:29:02 root] (utils.py 283): INFO Epoch: [6] [1050/2502] eta: 1:09:27 lr: 0.000015 loss_cls: 2.5422 (2.5857) grad_norm: 1.2981 (1.4828) time: 2.8723 data: 0.0002 max mem: 28454 +[2024-12-12 07:29:30 root] (utils.py 283): INFO Epoch: [6] [1060/2502] eta: 1:08:59 lr: 0.000015 loss_cls: 2.6544 (2.5852) grad_norm: 1.1400 (1.4796) time: 2.8751 data: 0.0003 max mem: 28454 +[2024-12-12 07:29:59 root] (utils.py 283): INFO Epoch: [6] [1070/2502] eta: 1:08:30 lr: 0.000015 loss_cls: 2.7657 (2.5878) grad_norm: 1.1206 (1.4882) time: 2.8733 data: 0.0003 max mem: 28454 +[2024-12-12 07:30:28 root] (utils.py 283): INFO Epoch: [6] [1080/2502] eta: 1:08:01 lr: 0.000015 loss_cls: 2.7895 (2.5865) grad_norm: 1.1481 (1.4847) time: 2.8718 data: 0.0003 max mem: 28454 +[2024-12-12 07:30:56 root] (utils.py 283): INFO Epoch: [6] [1090/2502] eta: 1:07:33 lr: 0.000015 loss_cls: 2.5704 (2.5849) grad_norm: 1.1287 (1.4816) time: 2.8708 data: 0.0002 max mem: 28454 +[2024-12-12 07:31:25 root] (utils.py 283): INFO Epoch: [6] [1100/2502] eta: 1:07:04 lr: 0.000015 loss_cls: 2.5359 (2.5853) grad_norm: 1.1013 (1.4779) time: 2.8733 data: 0.0003 max mem: 28454 +[2024-12-12 07:31:54 root] (utils.py 283): INFO Epoch: [6] [1110/2502] eta: 1:06:35 lr: 0.000015 loss_cls: 2.5212 (2.5850) grad_norm: 1.1042 (1.4783) time: 2.8759 data: 0.0003 max mem: 28454 +[2024-12-12 07:32:23 root] (utils.py 283): INFO Epoch: [6] [1120/2502] eta: 1:06:07 lr: 0.000015 loss_cls: 2.5497 (2.5853) grad_norm: 1.1607 (1.4766) time: 2.8757 data: 0.0002 max mem: 28454 +[2024-12-12 07:32:51 root] (utils.py 283): INFO Epoch: [6] [1130/2502] eta: 1:05:38 lr: 0.000015 loss_cls: 2.7161 (2.5864) grad_norm: 1.1435 (1.4737) time: 2.8761 data: 0.0002 max mem: 28454 +[2024-12-12 07:33:20 root] (utils.py 283): INFO Epoch: [6] [1140/2502] eta: 1:05:09 lr: 0.000015 loss_cls: 2.7161 (2.5876) grad_norm: 1.0841 (1.4736) time: 2.8783 data: 0.0003 max mem: 28454 +[2024-12-12 07:33:49 root] (utils.py 283): INFO Epoch: [6] [1150/2502] eta: 1:04:41 lr: 0.000015 loss_cls: 2.6433 (2.5877) grad_norm: 1.1771 (1.5118) time: 2.8779 data: 0.0003 max mem: 28454 +[2024-12-12 07:34:18 root] (utils.py 283): INFO Epoch: [6] [1160/2502] eta: 1:04:12 lr: 0.000015 loss_cls: 2.6763 (2.5876) grad_norm: 1.1771 (1.5095) time: 2.8759 data: 0.0003 max mem: 28454 +[2024-12-12 07:34:47 root] (utils.py 283): INFO Epoch: [6] [1170/2502] eta: 1:03:44 lr: 0.000015 loss_cls: 2.5779 (2.5859) grad_norm: 1.1198 (1.5071) time: 2.8784 data: 0.0002 max mem: 28454 +[2024-12-12 07:35:15 root] (utils.py 283): INFO Epoch: [6] [1180/2502] eta: 1:03:15 lr: 0.000015 loss_cls: 2.5179 (2.5863) grad_norm: 1.1186 (1.5139) time: 2.8818 data: 0.0002 max mem: 28454 +[2024-12-12 07:35:44 root] (utils.py 283): INFO Epoch: [6] [1190/2502] eta: 1:02:46 lr: 0.000015 loss_cls: 2.7554 (2.5875) grad_norm: 1.1721 (1.5139) time: 2.8804 data: 0.0002 max mem: 28454 +[2024-12-12 07:36:13 root] (utils.py 283): INFO Epoch: [6] [1200/2502] eta: 1:02:18 lr: 0.000015 loss_cls: 2.8180 (2.5882) grad_norm: 1.3092 (1.5116) time: 2.8786 data: 0.0002 max mem: 28454 +[2024-12-12 07:36:42 root] (utils.py 283): INFO Epoch: [6] [1210/2502] eta: 1:01:49 lr: 0.000015 loss_cls: 2.6289 (2.5871) grad_norm: 1.2704 (1.5103) time: 2.8780 data: 0.0002 max mem: 28454 +[2024-12-12 07:37:11 root] (utils.py 283): INFO Epoch: [6] [1220/2502] eta: 1:01:20 lr: 0.000015 loss_cls: 2.5705 (2.5880) grad_norm: 1.1452 (1.5073) time: 2.8765 data: 0.0002 max mem: 28454 +[2024-12-12 07:37:39 root] (utils.py 283): INFO Epoch: [6] [1230/2502] eta: 1:00:52 lr: 0.000015 loss_cls: 2.5541 (2.5855) grad_norm: 1.0939 (1.5048) time: 2.8741 data: 0.0003 max mem: 28454 +[2024-12-12 07:38:08 root] (utils.py 283): INFO Epoch: [6] [1240/2502] eta: 1:00:23 lr: 0.000015 loss_cls: 2.4166 (2.5855) grad_norm: 1.0813 (1.5035) time: 2.8745 data: 0.0003 max mem: 28454 +[2024-12-12 07:38:37 root] (utils.py 283): INFO Epoch: [6] [1250/2502] eta: 0:59:54 lr: 0.000015 loss_cls: 2.5113 (2.5857) grad_norm: 1.1147 (1.5018) time: 2.8749 data: 0.0002 max mem: 28454 +[2024-12-12 07:39:05 root] (utils.py 283): INFO Epoch: [6] [1260/2502] eta: 0:59:26 lr: 0.000015 loss_cls: 2.5295 (2.5843) grad_norm: 1.1083 (1.4991) time: 2.8749 data: 0.0002 max mem: 28454 +[2024-12-12 07:39:34 root] (utils.py 283): INFO Epoch: [6] [1270/2502] eta: 0:58:57 lr: 0.000015 loss_cls: 2.5854 (2.5841) grad_norm: 1.0821 (1.4959) time: 2.8770 data: 0.0002 max mem: 28454 +[2024-12-12 07:40:03 root] (utils.py 283): INFO Epoch: [6] [1280/2502] eta: 0:58:28 lr: 0.000015 loss_cls: 2.6303 (2.5842) grad_norm: 1.0761 (1.5031) time: 2.8783 data: 0.0002 max mem: 28454 +[2024-12-12 07:40:32 root] (utils.py 283): INFO Epoch: [6] [1290/2502] eta: 0:58:00 lr: 0.000015 loss_cls: 2.5148 (2.5836) grad_norm: 1.0846 (1.5003) time: 2.8788 data: 0.0002 max mem: 28454 +[2024-12-12 07:41:01 root] (utils.py 283): INFO Epoch: [6] [1300/2502] eta: 0:57:31 lr: 0.000015 loss_cls: 2.5734 (2.5842) grad_norm: 1.0835 (1.4972) time: 2.8777 data: 0.0002 max mem: 28454 +[2024-12-12 07:41:29 root] (utils.py 283): INFO Epoch: [6] [1310/2502] eta: 0:57:02 lr: 0.000015 loss_cls: 2.5960 (2.5848) grad_norm: 1.1363 (1.4956) time: 2.8756 data: 0.0002 max mem: 28454 +[2024-12-12 07:41:58 root] (utils.py 283): INFO Epoch: [6] [1320/2502] eta: 0:56:34 lr: 0.000015 loss_cls: 2.7329 (2.5848) grad_norm: 1.1431 (1.4930) time: 2.8803 data: 0.0002 max mem: 28454 +[2024-12-12 07:42:27 root] (utils.py 283): INFO Epoch: [6] [1330/2502] eta: 0:56:05 lr: 0.000015 loss_cls: 2.7265 (2.5840) grad_norm: 1.1013 (1.4900) time: 2.8827 data: 0.0002 max mem: 28454 +[2024-12-12 07:42:56 root] (utils.py 283): INFO Epoch: [6] [1340/2502] eta: 0:55:36 lr: 0.000015 loss_cls: 2.5129 (2.5824) grad_norm: 1.1133 (1.4874) time: 2.8775 data: 0.0002 max mem: 28454 +[2024-12-12 07:43:25 root] (utils.py 283): INFO Epoch: [6] [1350/2502] eta: 0:55:08 lr: 0.000015 loss_cls: 2.5129 (2.5817) grad_norm: 1.1277 (1.4859) time: 2.8774 data: 0.0002 max mem: 28454 +[2024-12-12 07:43:53 root] (utils.py 283): INFO Epoch: [6] [1360/2502] eta: 0:54:39 lr: 0.000015 loss_cls: 2.6104 (2.5805) grad_norm: 1.1418 (1.4840) time: 2.8790 data: 0.0002 max mem: 28454 +[2024-12-12 07:44:22 root] (utils.py 283): INFO Epoch: [6] [1370/2502] eta: 0:54:11 lr: 0.000015 loss_cls: 2.4707 (2.5799) grad_norm: 1.2992 (1.5245) time: 2.8862 data: 0.0002 max mem: 28454 +[2024-12-12 07:44:51 root] (utils.py 283): INFO Epoch: [6] [1380/2502] eta: 0:53:42 lr: 0.000015 loss_cls: 2.6561 (2.5801) grad_norm: 1.3445 (1.5248) time: 2.8862 data: 0.0002 max mem: 28454 +[2024-12-12 07:45:20 root] (utils.py 283): INFO Epoch: [6] [1390/2502] eta: 0:53:13 lr: 0.000015 loss_cls: 2.6561 (2.5802) grad_norm: 1.3445 (1.5231) time: 2.8789 data: 0.0002 max mem: 28454 +[2024-12-12 07:45:49 root] (utils.py 283): INFO Epoch: [6] [1400/2502] eta: 0:52:45 lr: 0.000015 loss_cls: 2.6514 (2.5808) grad_norm: 1.1524 (1.5206) time: 2.8773 data: 0.0002 max mem: 28454 +[2024-12-12 07:46:17 root] (utils.py 283): INFO Epoch: [6] [1410/2502] eta: 0:52:16 lr: 0.000015 loss_cls: 2.5733 (2.5799) grad_norm: 1.1472 (1.5178) time: 2.8782 data: 0.0002 max mem: 28454 +[2024-12-12 07:46:46 root] (utils.py 283): INFO Epoch: [6] [1420/2502] eta: 0:51:47 lr: 0.000015 loss_cls: 2.5545 (2.5798) grad_norm: 1.0408 (1.5147) time: 2.8774 data: 0.0002 max mem: 28454 +[2024-12-12 07:47:15 root] (utils.py 283): INFO Epoch: [6] [1430/2502] eta: 0:51:18 lr: 0.000015 loss_cls: 2.6352 (2.5815) grad_norm: 1.1232 (1.5131) time: 2.8730 data: 0.0002 max mem: 28454 +[2024-12-12 07:47:44 root] (utils.py 283): INFO Epoch: [6] [1440/2502] eta: 0:50:50 lr: 0.000015 loss_cls: 2.6703 (2.5809) grad_norm: 1.1075 (1.5100) time: 2.8698 data: 0.0002 max mem: 28454 +[2024-12-12 07:48:12 root] (utils.py 283): INFO Epoch: [6] [1450/2502] eta: 0:50:21 lr: 0.000015 loss_cls: 2.5563 (2.5794) grad_norm: 1.0810 (1.5073) time: 2.8709 data: 0.0002 max mem: 28454 +[2024-12-12 07:48:41 root] (utils.py 283): INFO Epoch: [6] [1460/2502] eta: 0:49:52 lr: 0.000015 loss_cls: 2.5690 (2.5797) grad_norm: 1.1093 (1.5056) time: 2.8749 data: 0.0002 max mem: 28454 +[2024-12-12 07:49:10 root] (utils.py 283): INFO Epoch: [6] [1470/2502] eta: 0:49:24 lr: 0.000015 loss_cls: 2.5343 (2.5790) grad_norm: 1.0769 (1.5027) time: 2.8737 data: 0.0002 max mem: 28454 +[2024-12-12 07:49:39 root] (utils.py 283): INFO Epoch: [6] [1480/2502] eta: 0:48:55 lr: 0.000015 loss_cls: 2.4621 (2.5790) grad_norm: 1.0853 (1.5007) time: 2.8713 data: 0.0002 max mem: 28454 +[2024-12-12 07:50:07 root] (utils.py 283): INFO Epoch: [6] [1490/2502] eta: 0:48:26 lr: 0.000015 loss_cls: 2.7286 (2.5800) grad_norm: 1.1399 (1.4991) time: 2.8723 data: 0.0002 max mem: 28454 +[2024-12-12 07:50:36 root] (utils.py 283): INFO Epoch: [6] [1500/2502] eta: 0:47:57 lr: 0.000015 loss_cls: 2.7963 (2.5812) grad_norm: 1.1399 (1.4976) time: 2.8733 data: 0.0002 max mem: 28454 +[2024-12-12 07:51:05 root] (utils.py 283): INFO Epoch: [6] [1510/2502] eta: 0:47:29 lr: 0.000015 loss_cls: 2.6678 (2.5806) grad_norm: 1.1496 (1.4954) time: 2.8758 data: 0.0002 max mem: 28454 +[2024-12-12 07:51:34 root] (utils.py 283): INFO Epoch: [6] [1520/2502] eta: 0:47:00 lr: 0.000015 loss_cls: 2.5066 (2.5809) grad_norm: 1.1672 (1.4934) time: 2.8774 data: 0.0002 max mem: 28454 +[2024-12-12 07:52:02 root] (utils.py 283): INFO Epoch: [6] [1530/2502] eta: 0:46:31 lr: 0.000015 loss_cls: 2.5066 (2.5797) grad_norm: 1.0715 (1.4908) time: 2.8764 data: 0.0002 max mem: 28454 +[2024-12-12 07:52:31 root] (utils.py 283): INFO Epoch: [6] [1540/2502] eta: 0:46:03 lr: 0.000015 loss_cls: 2.5216 (2.5787) grad_norm: 1.0501 (1.4884) time: 2.8760 data: 0.0002 max mem: 28454 +[2024-12-12 07:53:00 root] (utils.py 283): INFO Epoch: [6] [1550/2502] eta: 0:45:34 lr: 0.000015 loss_cls: 2.5216 (2.5780) grad_norm: 1.0546 (1.4940) time: 2.8758 data: 0.0002 max mem: 28454 +[2024-12-12 07:53:29 root] (utils.py 283): INFO Epoch: [6] [1560/2502] eta: 0:45:05 lr: 0.000015 loss_cls: 2.6618 (2.5782) grad_norm: 1.1616 (1.4935) time: 2.8784 data: 0.0002 max mem: 28454 +[2024-12-12 07:53:57 root] (utils.py 283): INFO Epoch: [6] [1570/2502] eta: 0:44:37 lr: 0.000015 loss_cls: 2.6331 (2.5782) grad_norm: 1.1359 (1.5182) time: 2.8779 data: 0.0002 max mem: 28454 +[2024-12-12 07:54:26 root] (utils.py 283): INFO Epoch: [6] [1580/2502] eta: 0:44:08 lr: 0.000015 loss_cls: 2.7674 (2.5793) grad_norm: 1.1598 (1.5184) time: 2.8761 data: 0.0002 max mem: 28454 +[2024-12-12 07:54:55 root] (utils.py 283): INFO Epoch: [6] [1590/2502] eta: 0:43:39 lr: 0.000015 loss_cls: 2.6103 (2.5787) grad_norm: 1.2707 (1.5171) time: 2.8764 data: 0.0002 max mem: 28454 +[2024-12-12 07:55:24 root] (utils.py 283): INFO Epoch: [6] [1600/2502] eta: 0:43:10 lr: 0.000015 loss_cls: 2.5549 (2.5788) grad_norm: 1.2038 (1.5152) time: 2.8722 data: 0.0002 max mem: 28454 +[2024-12-12 07:55:52 root] (utils.py 283): INFO Epoch: [6] [1610/2502] eta: 0:42:42 lr: 0.000015 loss_cls: 2.4873 (2.5784) grad_norm: 1.1849 (1.5143) time: 2.8711 data: 0.0002 max mem: 28454 +[2024-12-12 07:56:21 root] (utils.py 283): INFO Epoch: [6] [1620/2502] eta: 0:42:13 lr: 0.000015 loss_cls: 2.6779 (2.5796) grad_norm: 1.1816 (1.5125) time: 2.8743 data: 0.0002 max mem: 28454 +[2024-12-12 07:56:50 root] (utils.py 283): INFO Epoch: [6] [1630/2502] eta: 0:41:44 lr: 0.000015 loss_cls: 2.7510 (2.5795) grad_norm: 1.1816 (1.5107) time: 2.8749 data: 0.0002 max mem: 28454 +[2024-12-12 07:57:19 root] (utils.py 283): INFO Epoch: [6] [1640/2502] eta: 0:41:16 lr: 0.000015 loss_cls: 2.6175 (2.5788) grad_norm: 1.1324 (1.5093) time: 2.8754 data: 0.0002 max mem: 28454 +[2024-12-12 07:57:47 root] (utils.py 283): INFO Epoch: [6] [1650/2502] eta: 0:40:47 lr: 0.000015 loss_cls: 2.7223 (2.5791) grad_norm: 1.1990 (1.5072) time: 2.8748 data: 0.0002 max mem: 28454 +[2024-12-12 07:58:16 root] (utils.py 283): INFO Epoch: [6] [1660/2502] eta: 0:40:18 lr: 0.000015 loss_cls: 2.6602 (2.5788) grad_norm: 1.1619 (1.5050) time: 2.8728 data: 0.0002 max mem: 28454 +[2024-12-12 07:58:45 root] (utils.py 283): INFO Epoch: [6] [1670/2502] eta: 0:39:49 lr: 0.000015 loss_cls: 2.6346 (2.5785) grad_norm: 1.1164 (1.5050) time: 2.8741 data: 0.0002 max mem: 28454 +[2024-12-12 07:59:14 root] (utils.py 283): INFO Epoch: [6] [1680/2502] eta: 0:39:21 lr: 0.000015 loss_cls: 2.7148 (2.5785) grad_norm: 1.1114 (1.5032) time: 2.8769 data: 0.0002 max mem: 28454 +[2024-12-12 07:59:42 root] (utils.py 283): INFO Epoch: [6] [1690/2502] eta: 0:38:52 lr: 0.000015 loss_cls: 2.7459 (2.5797) grad_norm: 1.2455 (1.5041) time: 2.8763 data: 0.0002 max mem: 28454 +[2024-12-12 08:00:11 root] (utils.py 283): INFO Epoch: [6] [1700/2502] eta: 0:38:23 lr: 0.000015 loss_cls: 2.7459 (2.5801) grad_norm: 1.2510 (1.5027) time: 2.8747 data: 0.0002 max mem: 28454 +[2024-12-12 08:00:40 root] (utils.py 283): INFO Epoch: [6] [1710/2502] eta: 0:37:55 lr: 0.000015 loss_cls: 2.5119 (2.5787) grad_norm: 1.1712 (1.5030) time: 2.8742 data: 0.0002 max mem: 28454 +[2024-12-12 08:01:09 root] (utils.py 283): INFO Epoch: [6] [1720/2502] eta: 0:37:26 lr: 0.000015 loss_cls: 2.5023 (2.5779) grad_norm: 1.2091 (1.5012) time: 2.8742 data: 0.0002 max mem: 28454 +[2024-12-12 08:01:37 root] (utils.py 283): INFO Epoch: [6] [1730/2502] eta: 0:36:57 lr: 0.000015 loss_cls: 2.6965 (2.5786) grad_norm: 1.0756 (1.4986) time: 2.8750 data: 0.0002 max mem: 28454 +[2024-12-12 08:02:06 root] (utils.py 283): INFO Epoch: [6] [1740/2502] eta: 0:36:28 lr: 0.000015 loss_cls: 2.8056 (2.5794) grad_norm: 1.0938 (1.5005) time: 2.8756 data: 0.0002 max mem: 28454 +[2024-12-12 08:02:35 root] (utils.py 283): INFO Epoch: [6] [1750/2502] eta: 0:36:00 lr: 0.000015 loss_cls: 2.8105 (2.5803) grad_norm: 1.1815 (1.5007) time: 2.8763 data: 0.0002 max mem: 28454 +[2024-12-12 08:03:04 root] (utils.py 283): INFO Epoch: [6] [1760/2502] eta: 0:35:31 lr: 0.000015 loss_cls: 2.8599 (2.5819) grad_norm: 1.1127 (1.5034) time: 2.8744 data: 0.0002 max mem: 28454 +[2024-12-12 08:03:32 root] (utils.py 283): INFO Epoch: [6] [1770/2502] eta: 0:35:02 lr: 0.000015 loss_cls: 2.7405 (2.5817) grad_norm: 1.1127 (1.5021) time: 2.8735 data: 0.0002 max mem: 28454 +[2024-12-12 08:04:01 root] (utils.py 283): INFO Epoch: [6] [1780/2502] eta: 0:34:34 lr: 0.000015 loss_cls: 2.5515 (2.5803) grad_norm: 1.1582 (1.5001) time: 2.8758 data: 0.0002 max mem: 28454 +[2024-12-12 08:04:30 root] (utils.py 283): INFO Epoch: [6] [1790/2502] eta: 0:34:05 lr: 0.000015 loss_cls: 2.5515 (2.5797) grad_norm: 1.1953 (1.4990) time: 2.8766 data: 0.0002 max mem: 28454 +[2024-12-12 08:04:59 root] (utils.py 283): INFO Epoch: [6] [1800/2502] eta: 0:33:36 lr: 0.000015 loss_cls: 2.6185 (2.5792) grad_norm: 1.1626 (1.4971) time: 2.8743 data: 0.0002 max mem: 28454 +[2024-12-12 08:05:27 root] (utils.py 283): INFO Epoch: [6] [1810/2502] eta: 0:33:07 lr: 0.000015 loss_cls: 2.5876 (2.5790) grad_norm: 1.1122 (1.4972) time: 2.8721 data: 0.0002 max mem: 28454 +[2024-12-12 08:05:56 root] (utils.py 283): INFO Epoch: [6] [1820/2502] eta: 0:32:39 lr: 0.000015 loss_cls: 2.7018 (2.5798) grad_norm: 1.1152 (1.4960) time: 2.8794 data: 0.0002 max mem: 28454 +[2024-12-12 08:06:25 root] (utils.py 283): INFO Epoch: [6] [1830/2502] eta: 0:32:10 lr: 0.000015 loss_cls: 2.7247 (2.5805) grad_norm: 1.2251 (1.4969) time: 2.8726 data: 0.0002 max mem: 28454 +[2024-12-12 08:06:53 root] (utils.py 283): INFO Epoch: [6] [1840/2502] eta: 0:31:41 lr: 0.000015 loss_cls: 2.6680 (2.5810) grad_norm: 1.2308 (1.4955) time: 2.8631 data: 0.0002 max mem: 28454 +[2024-12-12 08:07:22 root] (utils.py 283): INFO Epoch: [6] [1850/2502] eta: 0:31:12 lr: 0.000015 loss_cls: 2.6499 (2.5802) grad_norm: 1.2114 (1.4942) time: 2.8674 data: 0.0002 max mem: 28454 +[2024-12-12 08:07:51 root] (utils.py 283): INFO Epoch: [6] [1860/2502] eta: 0:30:44 lr: 0.000015 loss_cls: 2.6539 (2.5807) grad_norm: 1.1615 (1.4949) time: 2.8679 data: 0.0002 max mem: 28454 +[2024-12-12 08:08:19 root] (utils.py 283): INFO Epoch: [6] [1870/2502] eta: 0:30:15 lr: 0.000015 loss_cls: 2.7032 (2.5811) grad_norm: 1.0924 (1.4930) time: 2.8668 data: 0.0002 max mem: 28454 +[2024-12-12 08:08:48 root] (utils.py 283): INFO Epoch: [6] [1880/2502] eta: 0:29:46 lr: 0.000015 loss_cls: 2.7032 (2.5806) grad_norm: 1.1118 (1.4919) time: 2.8643 data: 0.0002 max mem: 28454 +[2024-12-12 08:09:17 root] (utils.py 283): INFO Epoch: [6] [1890/2502] eta: 0:29:17 lr: 0.000015 loss_cls: 2.5699 (2.5795) grad_norm: 1.1835 (1.4914) time: 2.8649 data: 0.0002 max mem: 28454 +[2024-12-12 08:09:45 root] (utils.py 283): INFO Epoch: [6] [1900/2502] eta: 0:28:49 lr: 0.000015 loss_cls: 2.3168 (2.5785) grad_norm: 1.0908 (1.4894) time: 2.8671 data: 0.0002 max mem: 28454 +[2024-12-12 08:10:14 root] (utils.py 283): INFO Epoch: [6] [1910/2502] eta: 0:28:20 lr: 0.000015 loss_cls: 2.6986 (2.5792) grad_norm: 1.1444 (1.4904) time: 2.8669 data: 0.0002 max mem: 28454 +[2024-12-12 08:10:43 root] (utils.py 283): INFO Epoch: [6] [1920/2502] eta: 0:27:51 lr: 0.000015 loss_cls: 2.6486 (2.5786) grad_norm: 1.1012 (1.4883) time: 2.8675 data: 0.0002 max mem: 28454 +[2024-12-12 08:11:11 root] (utils.py 283): INFO Epoch: [6] [1930/2502] eta: 0:27:23 lr: 0.000015 loss_cls: 2.5839 (2.5780) grad_norm: 1.1012 (1.4867) time: 2.8685 data: 0.0002 max mem: 28454 +[2024-12-12 08:11:40 root] (utils.py 283): INFO Epoch: [6] [1940/2502] eta: 0:26:54 lr: 0.000015 loss_cls: 2.7290 (2.5790) grad_norm: 1.1979 (1.4854) time: 2.8672 data: 0.0002 max mem: 28454 +[2024-12-12 08:12:09 root] (utils.py 283): INFO Epoch: [6] [1950/2502] eta: 0:26:25 lr: 0.000015 loss_cls: 2.7424 (2.5785) grad_norm: 1.2146 (1.4838) time: 2.8675 data: 0.0002 max mem: 28454 +[2024-12-12 08:12:38 root] (utils.py 283): INFO Epoch: [6] [1960/2502] eta: 0:25:56 lr: 0.000015 loss_cls: 2.5511 (2.5779) grad_norm: 1.0507 (1.4820) time: 2.8698 data: 0.0002 max mem: 28454 +[2024-12-12 08:13:06 root] (utils.py 283): INFO Epoch: [6] [1970/2502] eta: 0:25:28 lr: 0.000015 loss_cls: 2.6267 (2.5778) grad_norm: 1.1450 (1.4804) time: 2.8704 data: 0.0002 max mem: 28454 +[2024-12-12 08:13:35 root] (utils.py 283): INFO Epoch: [6] [1980/2502] eta: 0:24:59 lr: 0.000015 loss_cls: 2.6828 (2.5776) grad_norm: 1.1506 (1.4787) time: 2.8673 data: 0.0002 max mem: 28454 +[2024-12-12 08:14:04 root] (utils.py 283): INFO Epoch: [6] [1990/2502] eta: 0:24:30 lr: 0.000015 loss_cls: 2.6483 (2.5783) grad_norm: 1.1504 (1.4777) time: 2.8680 data: 0.0002 max mem: 28454 +[2024-12-12 08:14:32 root] (utils.py 283): INFO Epoch: [6] [2000/2502] eta: 0:24:01 lr: 0.000015 loss_cls: 2.7132 (2.5782) grad_norm: 1.1319 (1.4758) time: 2.8710 data: 0.0002 max mem: 28454 +[2024-12-12 08:15:01 root] (utils.py 283): INFO Epoch: [6] [2010/2502] eta: 0:23:33 lr: 0.000015 loss_cls: 2.4664 (2.5768) grad_norm: 1.0846 (1.4740) time: 2.8744 data: 0.0002 max mem: 28454 +[2024-12-12 08:15:30 root] (utils.py 283): INFO Epoch: [6] [2020/2502] eta: 0:23:04 lr: 0.000015 loss_cls: 2.3246 (2.5762) grad_norm: 1.1129 (1.4725) time: 2.8749 data: 0.0002 max mem: 28454 +[2024-12-12 08:15:59 root] (utils.py 283): INFO Epoch: [6] [2030/2502] eta: 0:22:35 lr: 0.000015 loss_cls: 2.4930 (2.5758) grad_norm: 1.1650 (1.4755) time: 2.8721 data: 0.0002 max mem: 28454 +[2024-12-12 08:16:27 root] (utils.py 283): INFO Epoch: [6] [2040/2502] eta: 0:22:07 lr: 0.000015 loss_cls: 2.5766 (2.5752) grad_norm: 1.2010 (1.4741) time: 2.8706 data: 0.0002 max mem: 28454 +[2024-12-12 08:16:56 root] (utils.py 283): INFO Epoch: [6] [2050/2502] eta: 0:21:38 lr: 0.000015 loss_cls: 2.6349 (2.5751) grad_norm: 1.1790 (1.4737) time: 2.8682 data: 0.0002 max mem: 28454 +[2024-12-12 08:17:25 root] (utils.py 283): INFO Epoch: [6] [2060/2502] eta: 0:21:09 lr: 0.000015 loss_cls: 2.5920 (2.5747) grad_norm: 1.1121 (1.4720) time: 2.8681 data: 0.0002 max mem: 28454 +[2024-12-12 08:17:53 root] (utils.py 283): INFO Epoch: [6] [2070/2502] eta: 0:20:40 lr: 0.000015 loss_cls: 2.5920 (2.5754) grad_norm: 1.1121 (1.4708) time: 2.8688 data: 0.0002 max mem: 28454 +[2024-12-12 08:18:22 root] (utils.py 283): INFO Epoch: [6] [2080/2502] eta: 0:20:12 lr: 0.000015 loss_cls: 2.6525 (2.5752) grad_norm: 1.1775 (1.4693) time: 2.8701 data: 0.0002 max mem: 28454 +[2024-12-12 08:18:51 root] (utils.py 283): INFO Epoch: [6] [2090/2502] eta: 0:19:43 lr: 0.000015 loss_cls: 2.6446 (2.5756) grad_norm: 1.0645 (1.4674) time: 2.8697 data: 0.0002 max mem: 28454 +[2024-12-12 08:19:19 root] (utils.py 283): INFO Epoch: [6] [2100/2502] eta: 0:19:14 lr: 0.000015 loss_cls: 2.6446 (2.5755) grad_norm: 1.0588 (1.4659) time: 2.8694 data: 0.0002 max mem: 28454 +[2024-12-12 08:19:48 root] (utils.py 283): INFO Epoch: [6] [2110/2502] eta: 0:18:45 lr: 0.000015 loss_cls: 2.7329 (2.5764) grad_norm: 1.1248 (1.4650) time: 2.8703 data: 0.0002 max mem: 28454 +[2024-12-12 08:20:17 root] (utils.py 283): INFO Epoch: [6] [2120/2502] eta: 0:18:17 lr: 0.000015 loss_cls: 2.7688 (2.5763) grad_norm: 1.1276 (1.4670) time: 2.8693 data: 0.0002 max mem: 28454 +[2024-12-12 08:20:45 root] (utils.py 283): INFO Epoch: [6] [2130/2502] eta: 0:17:48 lr: 0.000015 loss_cls: 2.7688 (2.5765) grad_norm: 1.1116 (1.4658) time: 2.8703 data: 0.0002 max mem: 28454 +[2024-12-12 08:21:14 root] (utils.py 283): INFO Epoch: [6] [2140/2502] eta: 0:17:19 lr: 0.000015 loss_cls: 2.6695 (2.5766) grad_norm: 1.1472 (1.4650) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-12 08:21:43 root] (utils.py 283): INFO Epoch: [6] [2150/2502] eta: 0:16:51 lr: 0.000015 loss_cls: 2.7840 (2.5771) grad_norm: 1.1087 (1.4638) time: 2.8735 data: 0.0002 max mem: 28454 +[2024-12-12 08:22:12 root] (utils.py 283): INFO Epoch: [6] [2160/2502] eta: 0:16:22 lr: 0.000015 loss_cls: 2.6704 (2.5770) grad_norm: 1.1087 (1.4638) time: 2.8723 data: 0.0002 max mem: 28454 +[2024-12-12 08:22:40 root] (utils.py 283): INFO Epoch: [6] [2170/2502] eta: 0:15:53 lr: 0.000015 loss_cls: 2.4548 (2.5759) grad_norm: 1.1503 (1.4636) time: 2.8697 data: 0.0002 max mem: 28454 +[2024-12-12 08:23:09 root] (utils.py 283): INFO Epoch: [6] [2180/2502] eta: 0:15:24 lr: 0.000015 loss_cls: 2.2850 (2.5749) grad_norm: 1.1250 (1.4621) time: 2.8713 data: 0.0002 max mem: 28454 +[2024-12-12 08:23:38 root] (utils.py 283): INFO Epoch: [6] [2190/2502] eta: 0:14:56 lr: 0.000015 loss_cls: 2.6441 (2.5755) grad_norm: 1.1250 (1.4609) time: 2.8717 data: 0.0002 max mem: 28454 +[2024-12-12 08:24:06 root] (utils.py 283): INFO Epoch: [6] [2200/2502] eta: 0:14:27 lr: 0.000015 loss_cls: 2.7158 (2.5757) grad_norm: 1.0781 (1.4595) time: 2.8703 data: 0.0002 max mem: 28454 +[2024-12-12 08:24:35 root] (utils.py 283): INFO Epoch: [6] [2210/2502] eta: 0:13:58 lr: 0.000015 loss_cls: 2.6093 (2.5751) grad_norm: 1.0781 (1.4585) time: 2.8705 data: 0.0002 max mem: 28454 +[2024-12-12 08:25:04 root] (utils.py 283): INFO Epoch: [6] [2220/2502] eta: 0:13:29 lr: 0.000015 loss_cls: 2.5432 (2.5750) grad_norm: 1.0936 (1.4569) time: 2.8702 data: 0.0002 max mem: 28454 +[2024-12-12 08:25:33 root] (utils.py 283): INFO Epoch: [6] [2230/2502] eta: 0:13:01 lr: 0.000015 loss_cls: 2.6714 (2.5751) grad_norm: 1.0936 (1.4557) time: 2.8709 data: 0.0002 max mem: 28454 +[2024-12-12 08:26:01 root] (utils.py 283): INFO Epoch: [6] [2240/2502] eta: 0:12:32 lr: 0.000015 loss_cls: 2.6191 (2.5751) grad_norm: 1.1109 (1.4541) time: 2.8711 data: 0.0002 max mem: 28454 +[2024-12-12 08:26:30 root] (utils.py 283): INFO Epoch: [6] [2250/2502] eta: 0:12:03 lr: 0.000015 loss_cls: 2.5871 (2.5753) grad_norm: 1.1188 (1.4531) time: 2.8696 data: 0.0002 max mem: 28454 +[2024-12-12 08:26:59 root] (utils.py 283): INFO Epoch: [6] [2260/2502] eta: 0:11:35 lr: 0.000015 loss_cls: 2.5466 (2.5747) grad_norm: 1.0945 (1.4522) time: 2.8660 data: 0.0002 max mem: 28454 +[2024-12-12 08:27:27 root] (utils.py 283): INFO Epoch: [6] [2270/2502] eta: 0:11:06 lr: 0.000015 loss_cls: 2.5466 (2.5746) grad_norm: 1.0764 (1.4560) time: 2.8651 data: 0.0002 max mem: 28454 +[2024-12-12 08:27:56 root] (utils.py 283): INFO Epoch: [6] [2280/2502] eta: 0:10:37 lr: 0.000015 loss_cls: 2.7433 (2.5753) grad_norm: 1.2085 (1.4552) time: 2.8655 data: 0.0002 max mem: 28454 +[2024-12-12 08:28:25 root] (utils.py 283): INFO Epoch: [6] [2290/2502] eta: 0:10:08 lr: 0.000015 loss_cls: 2.6445 (2.5754) grad_norm: 1.2145 (1.4543) time: 2.8649 data: 0.0002 max mem: 28454 +[2024-12-12 08:28:53 root] (utils.py 283): INFO Epoch: [6] [2300/2502] eta: 0:09:40 lr: 0.000015 loss_cls: 2.6200 (2.5758) grad_norm: 1.1285 (1.4531) time: 2.8649 data: 0.0002 max mem: 28454 +[2024-12-12 08:29:22 root] (utils.py 283): INFO Epoch: [6] [2310/2502] eta: 0:09:11 lr: 0.000015 loss_cls: 2.6922 (2.5758) grad_norm: 1.0960 (1.4517) time: 2.8619 data: 0.0002 max mem: 28454 +[2024-12-12 08:29:50 root] (utils.py 283): INFO Epoch: [6] [2320/2502] eta: 0:08:42 lr: 0.000015 loss_cls: 2.7942 (2.5766) grad_norm: 1.1420 (1.4513) time: 2.8615 data: 0.0002 max mem: 28454 +[2024-12-12 08:30:19 root] (utils.py 283): INFO Epoch: [6] [2330/2502] eta: 0:08:13 lr: 0.000015 loss_cls: 2.7126 (2.5758) grad_norm: 1.0672 (1.4499) time: 2.8625 data: 0.0002 max mem: 28454 +[2024-12-12 08:30:48 root] (utils.py 283): INFO Epoch: [6] [2340/2502] eta: 0:07:45 lr: 0.000015 loss_cls: 2.6346 (2.5762) grad_norm: 1.1586 (1.4501) time: 2.8648 data: 0.0002 max mem: 28454 +[2024-12-12 08:31:16 root] (utils.py 283): INFO Epoch: [6] [2350/2502] eta: 0:07:16 lr: 0.000015 loss_cls: 2.6966 (2.5754) grad_norm: 1.1613 (1.4491) time: 2.8671 data: 0.0002 max mem: 28454 +[2024-12-12 08:31:45 root] (utils.py 283): INFO Epoch: [6] [2360/2502] eta: 0:06:47 lr: 0.000015 loss_cls: 2.5112 (2.5751) grad_norm: 1.0854 (1.4480) time: 2.8686 data: 0.0002 max mem: 28454 +[2024-12-12 08:32:14 root] (utils.py 283): INFO Epoch: [6] [2370/2502] eta: 0:06:19 lr: 0.000015 loss_cls: 2.5112 (2.5740) grad_norm: 1.0629 (1.4560) time: 2.8697 data: 0.0002 max mem: 28454 +[2024-12-12 08:32:43 root] (utils.py 283): INFO Epoch: [6] [2380/2502] eta: 0:05:50 lr: 0.000015 loss_cls: 2.5433 (2.5742) grad_norm: 1.2958 (1.4556) time: 2.8687 data: 0.0002 max mem: 28454 +[2024-12-12 08:33:11 root] (utils.py 283): INFO Epoch: [6] [2390/2502] eta: 0:05:21 lr: 0.000015 loss_cls: 2.7343 (2.5746) grad_norm: 1.1822 (1.4544) time: 2.8683 data: 0.0002 max mem: 28454 +[2024-12-12 08:33:40 root] (utils.py 283): INFO Epoch: [6] [2400/2502] eta: 0:04:52 lr: 0.000015 loss_cls: 2.6339 (2.5747) grad_norm: 1.1127 (1.4532) time: 2.8724 data: 0.0002 max mem: 28454 +[2024-12-12 08:34:09 root] (utils.py 283): INFO Epoch: [6] [2410/2502] eta: 0:04:24 lr: 0.000015 loss_cls: 2.7334 (2.5758) grad_norm: 1.1476 (1.4521) time: 2.8711 data: 0.0002 max mem: 28454 +[2024-12-12 08:34:37 root] (utils.py 283): INFO Epoch: [6] [2420/2502] eta: 0:03:55 lr: 0.000015 loss_cls: 2.7619 (2.5758) grad_norm: 1.1476 (1.4507) time: 2.8656 data: 0.0002 max mem: 28454 +[2024-12-12 08:35:06 root] (utils.py 283): INFO Epoch: [6] [2430/2502] eta: 0:03:26 lr: 0.000015 loss_cls: 2.5875 (2.5755) grad_norm: 1.1385 (1.4509) time: 2.8657 data: 0.0002 max mem: 28454 +[2024-12-12 08:35:35 root] (utils.py 283): INFO Epoch: [6] [2440/2502] eta: 0:02:58 lr: 0.000015 loss_cls: 2.5395 (2.5748) grad_norm: 1.1925 (1.4501) time: 2.8648 data: 0.0002 max mem: 28454 +[2024-12-12 08:36:03 root] (utils.py 283): INFO Epoch: [6] [2450/2502] eta: 0:02:29 lr: 0.000015 loss_cls: 2.5328 (2.5741) grad_norm: 1.1755 (1.4489) time: 2.8660 data: 0.0002 max mem: 28454 +[2024-12-12 08:36:32 root] (utils.py 283): INFO Epoch: [6] [2460/2502] eta: 0:02:00 lr: 0.000015 loss_cls: 2.6840 (2.5743) grad_norm: 1.1395 (1.4476) time: 2.8644 data: 0.0003 max mem: 28454 +[2024-12-12 08:37:01 root] (utils.py 283): INFO Epoch: [6] [2470/2502] eta: 0:01:31 lr: 0.000015 loss_cls: 2.7026 (2.5742) grad_norm: 1.0403 (1.4480) time: 2.8646 data: 0.0003 max mem: 28454 +[2024-12-12 08:37:29 root] (utils.py 283): INFO Epoch: [6] [2480/2502] eta: 0:01:03 lr: 0.000015 loss_cls: 2.7496 (2.5744) grad_norm: 1.0947 (1.8949) time: 2.8664 data: 0.0003 max mem: 28454 +[2024-12-12 08:37:58 root] (utils.py 283): INFO Epoch: [6] [2490/2502] eta: 0:00:34 lr: 0.000015 loss_cls: 2.6892 (2.5737) grad_norm: 1.6088 (1.8959) time: 2.8839 data: 0.0197 max mem: 28454 +[2024-12-12 08:38:27 root] (utils.py 283): INFO Epoch: [6] [2500/2502] eta: 0:00:05 lr: 0.000015 loss_cls: 2.5474 (2.5745) grad_norm: 1.7520 (1.8967) time: 2.8864 data: 0.0198 max mem: 28454 +[2024-12-12 08:38:30 root] (utils.py 283): INFO Epoch: [6] [2501/2502] eta: 0:00:02 lr: 0.000015 loss_cls: 2.6369 (2.5746) grad_norm: 1.6564 (1.8965) time: 2.8864 data: 0.0197 max mem: 28454 +[2024-12-12 08:38:30 root] (utils.py 297): INFO Epoch: [6] Total time: 1:59:45 (2.8718 s / it) +[2024-12-12 08:38:30 root] (engine.py 179): INFO Averaged stats:lr: 0.000015 loss_cls: 2.6369 (2.5849) grad_norm: 1.6564 (1.8965) +[2024-12-12 08:38:33 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:52 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.4137 (0.4137) acc1: 89.8438 (89.8438) acc3: 98.4375 (98.4375) acc5: 98.4375 (98.4375) time: 0.5380 data: 0.0005 max mem: 28454 +[2024-12-12 08:38:38 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:47 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6373 (0.6059) acc1: 87.5000 (87.3580) acc3: 96.8750 (96.7330) acc5: 98.4375 (97.7983) time: 0.5449 data: 0.0005 max mem: 28454 +[2024-12-12 08:38:44 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:42 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6373 (0.6334) acc1: 86.7188 (86.6443) acc3: 96.0938 (96.2426) acc5: 98.4375 (97.5446) time: 0.5459 data: 0.0005 max mem: 28454 +[2024-12-12 08:38:49 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:37 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6376 (0.6685) acc1: 85.9375 (85.4587) acc3: 95.3125 (95.9425) acc5: 97.6562 (97.5554) time: 0.5467 data: 0.0005 max mem: 28454 +[2024-12-12 08:38:55 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:31 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6837 (0.6695) acc1: 84.3750 (85.4421) acc3: 96.0938 (95.9794) acc5: 97.6562 (97.5610) time: 0.5473 data: 0.0005 max mem: 28454 +[2024-12-12 08:39:00 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:26 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.8668 (0.7480) acc1: 78.1250 (83.5784) acc3: 93.7500 (94.9449) acc5: 95.3125 (96.7065) time: 0.5474 data: 0.0004 max mem: 28454 +[2024-12-12 08:39:06 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:20 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9600 (0.7750) acc1: 79.6875 (83.2992) acc3: 90.6250 (94.3776) acc5: 93.7500 (96.2602) time: 0.5472 data: 0.0004 max mem: 28454 +[2024-12-12 08:39:11 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:15 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9501 (0.8029) acc1: 81.2500 (82.5264) acc3: 92.1875 (94.1241) acc5: 94.5312 (96.0938) time: 0.5472 data: 0.0004 max mem: 28454 +[2024-12-12 08:39:17 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:09 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9765 (0.8291) acc1: 78.9062 (82.0988) acc3: 92.1875 (93.6921) acc5: 94.5312 (95.7562) time: 0.5472 data: 0.0007 max mem: 28454 +[2024-12-12 08:39:22 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:04 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9765 (0.8462) acc1: 77.3438 (81.4990) acc3: 89.8438 (93.3036) acc5: 94.5312 (95.6216) time: 0.5473 data: 0.0006 max mem: 28454 +[2024-12-12 08:39:26 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.8891 (0.8430) acc1: 78.1250 (81.5520) acc3: 92.1875 (93.4000) acc5: 95.3125 (95.7040) time: 0.5386 data: 0.0006 max mem: 28454 +[2024-12-12 08:39:26 root] (utils.py 297): INFO Test: Total time: 0:00:53 (0.5453 s / it) +[2024-12-12 08:39:26 root] (engine.py 264): INFO * Acc@1 81.544 Acc@3 93.308 Acc@5 95.662 loss 0.844 flops 13.207 layer_flops 13.109 +[2024-12-12 08:39:26 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 81.5% +[2024-12-12 08:39:26 root] (main.py 576): INFO Max accuracy: 81.74% +[2024-12-12 08:39:28 root] (utils.py 283): INFO Epoch: [7] [ 0/2502] eta: 1:58:44 lr: 0.000013 loss_cls: 3.0467 (3.0467) grad_norm: 1.6367 (1.6367) time: 2.8477 data: 0.0003 max mem: 28454 +[2024-12-12 08:39:57 root] (utils.py 283): INFO Epoch: [7] [ 10/2502] eta: 1:58:49 lr: 0.000013 loss_cls: 2.9733 (2.8576) grad_norm: 1.6367 (2.0120) time: 2.8610 data: 0.0002 max mem: 28454 +[2024-12-12 08:40:26 root] (utils.py 283): INFO Epoch: [7] [ 20/2502] eta: 1:58:29 lr: 0.000013 loss_cls: 2.8703 (2.7219) grad_norm: 1.5100 (1.7106) time: 2.8652 data: 0.0002 max mem: 28454 +[2024-12-12 08:40:54 root] (utils.py 283): INFO Epoch: [7] [ 30/2502] eta: 1:58:00 lr: 0.000013 loss_cls: 2.8302 (2.7201) grad_norm: 1.3584 (1.6319) time: 2.8664 data: 0.0002 max mem: 28454 +[2024-12-12 08:41:23 root] (utils.py 283): INFO Epoch: [7] [ 40/2502] eta: 1:57:36 lr: 0.000013 loss_cls: 2.6382 (2.6595) grad_norm: 1.2965 (1.5445) time: 2.8679 data: 0.0002 max mem: 28454 +[2024-12-12 08:41:52 root] (utils.py 283): INFO Epoch: [7] [ 50/2502] eta: 1:57:05 lr: 0.000013 loss_cls: 2.6123 (2.6440) grad_norm: 1.2483 (1.4946) time: 2.8664 data: 0.0003 max mem: 28454 +[2024-12-12 08:42:20 root] (utils.py 283): INFO Epoch: [7] [ 60/2502] eta: 1:56:36 lr: 0.000013 loss_cls: 2.8285 (2.6647) grad_norm: 1.2858 (1.4815) time: 2.8632 data: 0.0003 max mem: 28454 +[2024-12-12 08:42:49 root] (utils.py 283): INFO Epoch: [7] [ 70/2502] eta: 1:56:08 lr: 0.000013 loss_cls: 2.7902 (2.6524) grad_norm: 1.3691 (1.5144) time: 2.8658 data: 0.0003 max mem: 28454 +[2024-12-12 08:43:18 root] (utils.py 283): INFO Epoch: [7] [ 80/2502] eta: 1:55:40 lr: 0.000013 loss_cls: 2.7118 (2.6562) grad_norm: 1.2418 (1.4864) time: 2.8665 data: 0.0003 max mem: 28454 +[2024-12-12 08:43:46 root] (utils.py 283): INFO Epoch: [7] [ 90/2502] eta: 1:55:12 lr: 0.000013 loss_cls: 2.7567 (2.6583) grad_norm: 1.2087 (1.4612) time: 2.8670 data: 0.0002 max mem: 28454 +[2024-12-12 08:44:15 root] (utils.py 283): INFO Epoch: [7] [ 100/2502] eta: 1:54:42 lr: 0.000013 loss_cls: 2.6658 (2.6571) grad_norm: 1.2323 (1.4370) time: 2.8647 data: 0.0002 max mem: 28454 +[2024-12-12 08:44:44 root] (utils.py 283): INFO Epoch: [7] [ 110/2502] eta: 1:54:14 lr: 0.000013 loss_cls: 2.7347 (2.6576) grad_norm: 1.1681 (1.4240) time: 2.8640 data: 0.0002 max mem: 28454 +[2024-12-12 08:45:12 root] (utils.py 283): INFO Epoch: [7] [ 120/2502] eta: 1:53:44 lr: 0.000013 loss_cls: 2.6342 (2.6348) grad_norm: 1.1047 (1.4080) time: 2.8646 data: 0.0002 max mem: 28454 +[2024-12-12 08:45:41 root] (utils.py 283): INFO Epoch: [7] [ 130/2502] eta: 1:53:16 lr: 0.000013 loss_cls: 2.6342 (2.6381) grad_norm: 1.0782 (1.3996) time: 2.8646 data: 0.0003 max mem: 28454 +[2024-12-12 08:46:10 root] (utils.py 283): INFO Epoch: [7] [ 140/2502] eta: 1:52:47 lr: 0.000013 loss_cls: 2.7589 (2.6407) grad_norm: 1.2421 (1.4113) time: 2.8655 data: 0.0003 max mem: 28454 +[2024-12-12 08:46:38 root] (utils.py 283): INFO Epoch: [7] [ 150/2502] eta: 1:52:19 lr: 0.000013 loss_cls: 2.6685 (2.6338) grad_norm: 1.1489 (1.4316) time: 2.8662 data: 0.0002 max mem: 28454 +[2024-12-12 08:47:07 root] (utils.py 283): INFO Epoch: [7] [ 160/2502] eta: 1:51:51 lr: 0.000013 loss_cls: 2.6205 (2.6385) grad_norm: 1.1489 (1.4472) time: 2.8686 data: 0.0002 max mem: 28454 +[2024-12-12 08:47:36 root] (utils.py 283): INFO Epoch: [7] [ 170/2502] eta: 1:51:23 lr: 0.000013 loss_cls: 2.6205 (2.6214) grad_norm: 1.3584 (2.7427) time: 2.8686 data: 0.0003 max mem: 28454 +[2024-12-12 08:48:04 root] (utils.py 283): INFO Epoch: [7] [ 180/2502] eta: 1:50:54 lr: 0.000013 loss_cls: 2.0046 (2.5997) grad_norm: 1.4329 (2.6757) time: 2.8682 data: 0.0003 max mem: 28454 +[2024-12-12 08:48:33 root] (utils.py 283): INFO Epoch: [7] [ 190/2502] eta: 1:50:26 lr: 0.000013 loss_cls: 2.6815 (2.6130) grad_norm: 1.4566 (2.6132) time: 2.8690 data: 0.0003 max mem: 28454 +[2024-12-12 08:49:02 root] (utils.py 283): INFO Epoch: [7] [ 200/2502] eta: 1:49:57 lr: 0.000013 loss_cls: 2.8312 (2.6125) grad_norm: 1.4296 (2.5594) time: 2.8671 data: 0.0003 max mem: 28454 +[2024-12-12 08:49:30 root] (utils.py 283): INFO Epoch: [7] [ 210/2502] eta: 1:49:29 lr: 0.000013 loss_cls: 2.6007 (2.6096) grad_norm: 1.3856 (2.5038) time: 2.8678 data: 0.0002 max mem: 28454 +[2024-12-12 08:49:59 root] (utils.py 283): INFO Epoch: [7] [ 220/2502] eta: 1:49:00 lr: 0.000013 loss_cls: 2.5614 (2.6030) grad_norm: 1.2938 (2.4518) time: 2.8669 data: 0.0003 max mem: 28454 +[2024-12-12 08:50:28 root] (utils.py 283): INFO Epoch: [7] [ 230/2502] eta: 1:48:32 lr: 0.000013 loss_cls: 2.6506 (2.6086) grad_norm: 1.2787 (2.4072) time: 2.8660 data: 0.0003 max mem: 28454 +[2024-12-12 08:50:56 root] (utils.py 283): INFO Epoch: [7] [ 240/2502] eta: 1:48:03 lr: 0.000013 loss_cls: 2.7961 (2.6173) grad_norm: 1.3476 (2.3778) time: 2.8685 data: 0.0003 max mem: 28454 +[2024-12-12 08:51:25 root] (utils.py 283): INFO Epoch: [7] [ 250/2502] eta: 1:47:34 lr: 0.000013 loss_cls: 2.7961 (2.6174) grad_norm: 1.3083 (2.3336) time: 2.8673 data: 0.0003 max mem: 28454 +[2024-12-12 08:51:54 root] (utils.py 283): INFO Epoch: [7] [ 260/2502] eta: 1:47:06 lr: 0.000013 loss_cls: 2.7247 (2.6128) grad_norm: 1.2163 (2.2910) time: 2.8688 data: 0.0003 max mem: 28454 +[2024-12-12 08:52:23 root] (utils.py 283): INFO Epoch: [7] [ 270/2502] eta: 1:46:38 lr: 0.000013 loss_cls: 2.5517 (2.6075) grad_norm: 1.1998 (2.2652) time: 2.8707 data: 0.0003 max mem: 28454 +[2024-12-12 08:52:51 root] (utils.py 283): INFO Epoch: [7] [ 280/2502] eta: 1:46:09 lr: 0.000013 loss_cls: 2.7415 (2.6089) grad_norm: 1.1742 (2.2270) time: 2.8661 data: 0.0002 max mem: 28454 +[2024-12-12 08:53:20 root] (utils.py 283): INFO Epoch: [7] [ 290/2502] eta: 1:45:40 lr: 0.000013 loss_cls: 2.7455 (2.6105) grad_norm: 1.1879 (2.1936) time: 2.8642 data: 0.0002 max mem: 28454 +[2024-12-12 08:53:48 root] (utils.py 283): INFO Epoch: [7] [ 300/2502] eta: 1:45:11 lr: 0.000013 loss_cls: 2.8019 (2.6100) grad_norm: 1.2395 (2.3480) time: 2.8663 data: 0.0002 max mem: 28454 +[2024-12-12 08:54:17 root] (utils.py 283): INFO Epoch: [7] [ 310/2502] eta: 1:44:43 lr: 0.000013 loss_cls: 2.6862 (2.6032) grad_norm: 1.1824 (2.3097) time: 2.8681 data: 0.0002 max mem: 28454 +[2024-12-12 08:54:46 root] (utils.py 283): INFO Epoch: [7] [ 320/2502] eta: 1:44:15 lr: 0.000013 loss_cls: 2.4146 (2.6014) grad_norm: 1.1616 (2.2790) time: 2.8692 data: 0.0002 max mem: 28454 +[2024-12-12 08:55:15 root] (utils.py 283): INFO Epoch: [7] [ 330/2502] eta: 1:43:46 lr: 0.000013 loss_cls: 2.6907 (2.6012) grad_norm: 1.1616 (2.2498) time: 2.8676 data: 0.0002 max mem: 28454 +[2024-12-12 08:55:43 root] (utils.py 283): INFO Epoch: [7] [ 340/2502] eta: 1:43:17 lr: 0.000013 loss_cls: 2.6267 (2.5993) grad_norm: 1.1257 (2.2179) time: 2.8688 data: 0.0002 max mem: 28454 +[2024-12-12 08:56:12 root] (utils.py 283): INFO Epoch: [7] [ 350/2502] eta: 1:42:49 lr: 0.000013 loss_cls: 2.6313 (2.5991) grad_norm: 1.1249 (2.1914) time: 2.8709 data: 0.0002 max mem: 28454 +[2024-12-12 08:56:41 root] (utils.py 283): INFO Epoch: [7] [ 360/2502] eta: 1:42:20 lr: 0.000013 loss_cls: 2.8123 (2.6032) grad_norm: 1.2048 (2.1802) time: 2.8695 data: 0.0003 max mem: 28454 +[2024-12-12 08:57:09 root] (utils.py 283): INFO Epoch: [7] [ 370/2502] eta: 1:41:52 lr: 0.000013 loss_cls: 2.7630 (2.5971) grad_norm: 1.2103 (2.1517) time: 2.8692 data: 0.0002 max mem: 28454 +[2024-12-12 08:57:38 root] (utils.py 283): INFO Epoch: [7] [ 380/2502] eta: 1:41:23 lr: 0.000013 loss_cls: 2.4786 (2.5991) grad_norm: 1.0989 (2.1271) time: 2.8703 data: 0.0002 max mem: 28454 +[2024-12-12 08:58:07 root] (utils.py 283): INFO Epoch: [7] [ 390/2502] eta: 1:40:55 lr: 0.000013 loss_cls: 2.8274 (2.6011) grad_norm: 1.1052 (2.1032) time: 2.8720 data: 0.0002 max mem: 28454 +[2024-12-12 08:58:35 root] (utils.py 283): INFO Epoch: [7] [ 400/2502] eta: 1:40:26 lr: 0.000013 loss_cls: 2.7873 (2.6058) grad_norm: 1.1052 (2.0784) time: 2.8701 data: 0.0003 max mem: 28454 +[2024-12-12 08:59:04 root] (utils.py 283): INFO Epoch: [7] [ 410/2502] eta: 1:39:58 lr: 0.000013 loss_cls: 2.8099 (2.6066) grad_norm: 1.1169 (2.0585) time: 2.8714 data: 0.0002 max mem: 28454 +[2024-12-12 08:59:33 root] (utils.py 283): INFO Epoch: [7] [ 420/2502] eta: 1:39:30 lr: 0.000013 loss_cls: 2.4864 (2.6025) grad_norm: 1.1503 (2.0457) time: 2.8712 data: 0.0002 max mem: 28454 +[2024-12-12 09:00:02 root] (utils.py 283): INFO Epoch: [7] [ 430/2502] eta: 1:39:01 lr: 0.000013 loss_cls: 2.5116 (2.6039) grad_norm: 1.1066 (2.0271) time: 2.8649 data: 0.0002 max mem: 28454 +[2024-12-12 09:00:30 root] (utils.py 283): INFO Epoch: [7] [ 440/2502] eta: 1:38:32 lr: 0.000013 loss_cls: 2.5434 (2.5991) grad_norm: 1.1219 (2.0143) time: 2.8669 data: 0.0003 max mem: 28454 +[2024-12-12 09:00:59 root] (utils.py 283): INFO Epoch: [7] [ 450/2502] eta: 1:38:04 lr: 0.000013 loss_cls: 2.4533 (2.5974) grad_norm: 1.1219 (1.9952) time: 2.8698 data: 0.0003 max mem: 28454 +[2024-12-12 09:01:28 root] (utils.py 283): INFO Epoch: [7] [ 460/2502] eta: 1:37:35 lr: 0.000013 loss_cls: 2.7094 (2.5994) grad_norm: 1.1340 (1.9772) time: 2.8704 data: 0.0002 max mem: 28454 +[2024-12-12 09:01:56 root] (utils.py 283): INFO Epoch: [7] [ 470/2502] eta: 1:37:06 lr: 0.000013 loss_cls: 2.6964 (2.5958) grad_norm: 1.1340 (1.9623) time: 2.8701 data: 0.0002 max mem: 28454 +[2024-12-12 09:02:25 root] (utils.py 283): INFO Epoch: [7] [ 480/2502] eta: 1:36:38 lr: 0.000013 loss_cls: 2.5056 (2.5928) grad_norm: 1.1103 (1.9443) time: 2.8712 data: 0.0002 max mem: 28454 +[2024-12-12 09:02:54 root] (utils.py 283): INFO Epoch: [7] [ 490/2502] eta: 1:36:10 lr: 0.000013 loss_cls: 2.7391 (2.5939) grad_norm: 1.1328 (1.9289) time: 2.8734 data: 0.0002 max mem: 28454 +[2024-12-12 09:03:23 root] (utils.py 283): INFO Epoch: [7] [ 500/2502] eta: 1:35:41 lr: 0.000013 loss_cls: 2.7410 (2.5935) grad_norm: 1.1274 (1.9229) time: 2.8727 data: 0.0002 max mem: 28454 +[2024-12-12 09:03:51 root] (utils.py 283): INFO Epoch: [7] [ 510/2502] eta: 1:35:13 lr: 0.000013 loss_cls: 2.6831 (2.5956) grad_norm: 1.0820 (1.9177) time: 2.8722 data: 0.0002 max mem: 28454 +[2024-12-12 09:04:20 root] (utils.py 283): INFO Epoch: [7] [ 520/2502] eta: 1:34:44 lr: 0.000013 loss_cls: 2.6613 (2.5950) grad_norm: 1.0863 (1.9027) time: 2.8734 data: 0.0002 max mem: 28454 +[2024-12-12 09:04:49 root] (utils.py 283): INFO Epoch: [7] [ 530/2502] eta: 1:34:16 lr: 0.000013 loss_cls: 2.6613 (2.5952) grad_norm: 1.1950 (1.8902) time: 2.8730 data: 0.0002 max mem: 28454 +[2024-12-12 09:05:17 root] (utils.py 283): INFO Epoch: [7] [ 540/2502] eta: 1:33:47 lr: 0.000013 loss_cls: 2.7484 (2.5917) grad_norm: 1.1241 (1.8760) time: 2.8689 data: 0.0002 max mem: 28454 +[2024-12-12 09:05:46 root] (utils.py 283): INFO Epoch: [7] [ 550/2502] eta: 1:33:18 lr: 0.000013 loss_cls: 2.5608 (2.5909) grad_norm: 1.1204 (1.8681) time: 2.8681 data: 0.0002 max mem: 28454 +[2024-12-12 09:06:15 root] (utils.py 283): INFO Epoch: [7] [ 560/2502] eta: 1:32:50 lr: 0.000013 loss_cls: 2.7488 (2.5933) grad_norm: 1.1228 (1.9173) time: 2.8707 data: 0.0002 max mem: 28454 +[2024-12-12 09:06:43 root] (utils.py 283): INFO Epoch: [7] [ 570/2502] eta: 1:32:21 lr: 0.000013 loss_cls: 2.8383 (2.5936) grad_norm: 1.0930 (1.9084) time: 2.8693 data: 0.0002 max mem: 28454 +[2024-12-12 09:07:12 root] (utils.py 283): INFO Epoch: [7] [ 580/2502] eta: 1:31:52 lr: 0.000013 loss_cls: 2.7841 (2.5964) grad_norm: 1.0561 (1.9067) time: 2.8676 data: 0.0002 max mem: 28454 +[2024-12-12 09:07:41 root] (utils.py 283): INFO Epoch: [7] [ 590/2502] eta: 1:31:24 lr: 0.000013 loss_cls: 2.8402 (2.5996) grad_norm: 1.1446 (1.8945) time: 2.8683 data: 0.0002 max mem: 28454 +[2024-12-12 09:08:10 root] (utils.py 283): INFO Epoch: [7] [ 600/2502] eta: 1:30:55 lr: 0.000013 loss_cls: 2.8136 (2.5998) grad_norm: 1.1446 (1.8895) time: 2.8683 data: 0.0002 max mem: 28454 +[2024-12-12 09:08:38 root] (utils.py 283): INFO Epoch: [7] [ 610/2502] eta: 1:30:26 lr: 0.000013 loss_cls: 2.6105 (2.5994) grad_norm: 1.1684 (1.8779) time: 2.8688 data: 0.0002 max mem: 28454 +[2024-12-12 09:09:07 root] (utils.py 283): INFO Epoch: [7] [ 620/2502] eta: 1:29:57 lr: 0.000013 loss_cls: 2.5724 (2.5983) grad_norm: 1.1589 (1.8655) time: 2.8686 data: 0.0002 max mem: 28454 +[2024-12-12 09:09:36 root] (utils.py 283): INFO Epoch: [7] [ 630/2502] eta: 1:29:29 lr: 0.000013 loss_cls: 2.5819 (2.5989) grad_norm: 1.1256 (1.8543) time: 2.8689 data: 0.0002 max mem: 28454 +[2024-12-12 09:10:04 root] (utils.py 283): INFO Epoch: [7] [ 640/2502] eta: 1:29:00 lr: 0.000013 loss_cls: 2.7220 (2.5982) grad_norm: 1.0887 (1.8430) time: 2.8684 data: 0.0002 max mem: 28454 +[2024-12-12 09:10:33 root] (utils.py 283): INFO Epoch: [7] [ 650/2502] eta: 1:28:31 lr: 0.000013 loss_cls: 2.5598 (2.5962) grad_norm: 1.0887 (1.8314) time: 2.8666 data: 0.0002 max mem: 28454 +[2024-12-12 09:11:02 root] (utils.py 283): INFO Epoch: [7] [ 660/2502] eta: 1:28:03 lr: 0.000013 loss_cls: 2.7902 (2.6001) grad_norm: 1.1153 (1.8213) time: 2.8653 data: 0.0003 max mem: 28454 +[2024-12-12 09:11:30 root] (utils.py 283): INFO Epoch: [7] [ 670/2502] eta: 1:27:34 lr: 0.000013 loss_cls: 2.8287 (2.6002) grad_norm: 1.1306 (1.8119) time: 2.8656 data: 0.0003 max mem: 28454 +[2024-12-12 09:11:59 root] (utils.py 283): INFO Epoch: [7] [ 680/2502] eta: 1:27:05 lr: 0.000013 loss_cls: 2.6827 (2.6012) grad_norm: 1.1499 (1.8017) time: 2.8679 data: 0.0002 max mem: 28454 +[2024-12-12 09:12:28 root] (utils.py 283): INFO Epoch: [7] [ 690/2502] eta: 1:26:37 lr: 0.000013 loss_cls: 2.6827 (2.6012) grad_norm: 1.0898 (1.8045) time: 2.8707 data: 0.0002 max mem: 28454 +[2024-12-12 09:12:56 root] (utils.py 283): INFO Epoch: [7] [ 700/2502] eta: 1:26:08 lr: 0.000013 loss_cls: 2.6309 (2.6023) grad_norm: 1.0729 (1.7948) time: 2.8715 data: 0.0003 max mem: 28454 +[2024-12-12 09:13:25 root] (utils.py 283): INFO Epoch: [7] [ 710/2502] eta: 1:25:39 lr: 0.000013 loss_cls: 2.6309 (2.6022) grad_norm: 1.0901 (1.7905) time: 2.8673 data: 0.0003 max mem: 28454 +[2024-12-12 09:13:54 root] (utils.py 283): INFO Epoch: [7] [ 720/2502] eta: 1:25:11 lr: 0.000013 loss_cls: 2.7393 (2.6028) grad_norm: 1.1434 (1.7845) time: 2.8653 data: 0.0002 max mem: 28454 +[2024-12-12 09:14:22 root] (utils.py 283): INFO Epoch: [7] [ 730/2502] eta: 1:24:42 lr: 0.000013 loss_cls: 2.7603 (2.6030) grad_norm: 1.1637 (1.7771) time: 2.8695 data: 0.0002 max mem: 28454 +[2024-12-12 09:14:51 root] (utils.py 283): INFO Epoch: [7] [ 740/2502] eta: 1:24:13 lr: 0.000013 loss_cls: 2.7260 (2.6041) grad_norm: 1.1129 (1.8163) time: 2.8727 data: 0.0002 max mem: 28454 +[2024-12-12 09:15:20 root] (utils.py 283): INFO Epoch: [7] [ 750/2502] eta: 1:23:45 lr: 0.000013 loss_cls: 2.7123 (2.6063) grad_norm: 1.1009 (1.8080) time: 2.8733 data: 0.0002 max mem: 28454 +[2024-12-12 09:15:49 root] (utils.py 283): INFO Epoch: [7] [ 760/2502] eta: 1:23:16 lr: 0.000013 loss_cls: 2.7895 (2.6083) grad_norm: 1.1505 (1.8008) time: 2.8714 data: 0.0002 max mem: 28454 +[2024-12-12 09:16:17 root] (utils.py 283): INFO Epoch: [7] [ 770/2502] eta: 1:22:47 lr: 0.000013 loss_cls: 2.8010 (2.6101) grad_norm: 1.2177 (1.7936) time: 2.8688 data: 0.0002 max mem: 28454 +[2024-12-12 09:16:46 root] (utils.py 283): INFO Epoch: [7] [ 780/2502] eta: 1:22:19 lr: 0.000013 loss_cls: 2.6844 (2.6085) grad_norm: 1.1304 (1.7854) time: 2.8673 data: 0.0002 max mem: 28454 +[2024-12-12 09:17:15 root] (utils.py 283): INFO Epoch: [7] [ 790/2502] eta: 1:21:50 lr: 0.000013 loss_cls: 2.6636 (2.6106) grad_norm: 1.1304 (1.7784) time: 2.8673 data: 0.0002 max mem: 28454 +[2024-12-12 09:17:43 root] (utils.py 283): INFO Epoch: [7] [ 800/2502] eta: 1:21:21 lr: 0.000013 loss_cls: 2.7255 (2.6096) grad_norm: 1.1177 (1.7698) time: 2.8678 data: 0.0002 max mem: 28454 +[2024-12-12 09:18:12 root] (utils.py 283): INFO Epoch: [7] [ 810/2502] eta: 1:20:53 lr: 0.000013 loss_cls: 2.7226 (2.6115) grad_norm: 1.0978 (1.7631) time: 2.8676 data: 0.0002 max mem: 28454 +[2024-12-12 09:18:41 root] (utils.py 283): INFO Epoch: [7] [ 820/2502] eta: 1:20:24 lr: 0.000013 loss_cls: 2.7118 (2.6104) grad_norm: 1.1600 (1.7566) time: 2.8679 data: 0.0002 max mem: 28454 +[2024-12-12 09:19:09 root] (utils.py 283): INFO Epoch: [7] [ 830/2502] eta: 1:19:55 lr: 0.000013 loss_cls: 2.5205 (2.6088) grad_norm: 1.0646 (1.7494) time: 2.8698 data: 0.0002 max mem: 28454 +[2024-12-12 09:19:38 root] (utils.py 283): INFO Epoch: [7] [ 840/2502] eta: 1:19:27 lr: 0.000013 loss_cls: 2.4314 (2.6065) grad_norm: 1.0646 (1.7438) time: 2.8743 data: 0.0002 max mem: 28454 +[2024-12-12 09:20:07 root] (utils.py 283): INFO Epoch: [7] [ 850/2502] eta: 1:18:58 lr: 0.000013 loss_cls: 2.5046 (2.6063) grad_norm: 1.0210 (1.7360) time: 2.8763 data: 0.0002 max mem: 28454 +[2024-12-12 09:20:36 root] (utils.py 283): INFO Epoch: [7] [ 860/2502] eta: 1:18:30 lr: 0.000013 loss_cls: 2.5702 (2.6062) grad_norm: 1.0981 (1.7293) time: 2.8747 data: 0.0002 max mem: 28454 +[2024-12-12 09:21:04 root] (utils.py 283): INFO Epoch: [7] [ 870/2502] eta: 1:18:01 lr: 0.000013 loss_cls: 2.7678 (2.6081) grad_norm: 1.1952 (1.7426) time: 2.8758 data: 0.0002 max mem: 28454 +[2024-12-12 09:21:33 root] (utils.py 283): INFO Epoch: [7] [ 880/2502] eta: 1:17:33 lr: 0.000013 loss_cls: 2.8190 (2.6073) grad_norm: 1.1491 (1.7354) time: 2.8740 data: 0.0002 max mem: 28454 +[2024-12-12 09:22:02 root] (utils.py 283): INFO Epoch: [7] [ 890/2502] eta: 1:17:04 lr: 0.000013 loss_cls: 2.7424 (2.6081) grad_norm: 1.0597 (1.7286) time: 2.8703 data: 0.0002 max mem: 28454 +[2024-12-12 09:22:30 root] (utils.py 283): INFO Epoch: [7] [ 900/2502] eta: 1:16:35 lr: 0.000013 loss_cls: 2.6884 (2.6092) grad_norm: 1.0702 (1.7223) time: 2.8698 data: 0.0002 max mem: 28454 +[2024-12-12 09:22:59 root] (utils.py 283): INFO Epoch: [7] [ 910/2502] eta: 1:16:07 lr: 0.000013 loss_cls: 2.6599 (2.6090) grad_norm: 1.0702 (1.7189) time: 2.8714 data: 0.0002 max mem: 28454 +[2024-12-12 09:23:28 root] (utils.py 283): INFO Epoch: [7] [ 920/2502] eta: 1:15:38 lr: 0.000013 loss_cls: 2.5512 (2.6080) grad_norm: 1.1397 (1.7134) time: 2.8726 data: 0.0002 max mem: 28454 +[2024-12-12 09:23:57 root] (utils.py 283): INFO Epoch: [7] [ 930/2502] eta: 1:15:09 lr: 0.000013 loss_cls: 2.3160 (2.6055) grad_norm: 1.1255 (1.7072) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-12 09:24:25 root] (utils.py 283): INFO Epoch: [7] [ 940/2502] eta: 1:14:41 lr: 0.000013 loss_cls: 2.3160 (2.6032) grad_norm: 1.1569 (1.7022) time: 2.8717 data: 0.0002 max mem: 28454 +[2024-12-12 09:24:54 root] (utils.py 283): INFO Epoch: [7] [ 950/2502] eta: 1:14:12 lr: 0.000013 loss_cls: 2.4683 (2.6017) grad_norm: 1.1569 (1.6959) time: 2.8711 data: 0.0002 max mem: 28454 +[2024-12-12 09:25:23 root] (utils.py 283): INFO Epoch: [7] [ 960/2502] eta: 1:13:43 lr: 0.000013 loss_cls: 2.5218 (2.6026) grad_norm: 1.1296 (1.6910) time: 2.8732 data: 0.0002 max mem: 28454 +[2024-12-12 09:25:51 root] (utils.py 283): INFO Epoch: [7] [ 970/2502] eta: 1:13:15 lr: 0.000013 loss_cls: 2.6280 (2.6026) grad_norm: 1.1520 (1.6883) time: 2.8706 data: 0.0002 max mem: 28454 +[2024-12-12 09:26:20 root] (utils.py 283): INFO Epoch: [7] [ 980/2502] eta: 1:12:46 lr: 0.000013 loss_cls: 2.6577 (2.6028) grad_norm: 1.1121 (1.6847) time: 2.8676 data: 0.0002 max mem: 28454 +[2024-12-12 09:26:49 root] (utils.py 283): INFO Epoch: [7] [ 990/2502] eta: 1:12:17 lr: 0.000013 loss_cls: 2.6577 (2.6041) grad_norm: 1.1182 (1.6809) time: 2.8692 data: 0.0002 max mem: 28454 +[2024-12-12 09:27:18 root] (utils.py 283): INFO Epoch: [7] [1000/2502] eta: 1:11:49 lr: 0.000013 loss_cls: 2.6367 (2.6039) grad_norm: 1.1088 (1.6754) time: 2.8695 data: 0.0002 max mem: 28454 +[2024-12-12 09:27:46 root] (utils.py 283): INFO Epoch: [7] [1010/2502] eta: 1:11:20 lr: 0.000013 loss_cls: 2.6079 (2.6043) grad_norm: 1.1312 (1.6879) time: 2.8709 data: 0.0002 max mem: 28454 +[2024-12-12 09:28:15 root] (utils.py 283): INFO Epoch: [7] [1020/2502] eta: 1:10:51 lr: 0.000013 loss_cls: 2.6823 (2.6034) grad_norm: 1.1848 (1.6848) time: 2.8730 data: 0.0002 max mem: 28454 +[2024-12-12 09:28:44 root] (utils.py 283): INFO Epoch: [7] [1030/2502] eta: 1:10:23 lr: 0.000013 loss_cls: 2.3198 (2.5995) grad_norm: 1.1598 (1.6793) time: 2.8726 data: 0.0003 max mem: 28454 +[2024-12-12 09:29:12 root] (utils.py 283): INFO Epoch: [7] [1040/2502] eta: 1:09:54 lr: 0.000013 loss_cls: 2.4955 (2.6007) grad_norm: 1.1211 (1.6744) time: 2.8717 data: 0.0003 max mem: 28454 +[2024-12-12 09:29:41 root] (utils.py 283): INFO Epoch: [7] [1050/2502] eta: 1:09:25 lr: 0.000013 loss_cls: 2.5374 (2.5998) grad_norm: 1.1316 (1.6803) time: 2.8721 data: 0.0003 max mem: 28454 +[2024-12-12 09:30:10 root] (utils.py 283): INFO Epoch: [7] [1060/2502] eta: 1:08:57 lr: 0.000013 loss_cls: 2.4471 (2.5994) grad_norm: 1.1684 (1.6765) time: 2.8722 data: 0.0003 max mem: 28454 +[2024-12-12 09:30:39 root] (utils.py 283): INFO Epoch: [7] [1070/2502] eta: 1:08:28 lr: 0.000013 loss_cls: 2.6127 (2.5998) grad_norm: 1.0789 (1.6707) time: 2.8695 data: 0.0003 max mem: 28454 +[2024-12-12 09:31:07 root] (utils.py 283): INFO Epoch: [7] [1080/2502] eta: 1:07:59 lr: 0.000013 loss_cls: 2.5948 (2.5990) grad_norm: 1.0702 (1.6663) time: 2.8675 data: 0.0003 max mem: 28454 +[2024-12-12 09:31:36 root] (utils.py 283): INFO Epoch: [7] [1090/2502] eta: 1:07:31 lr: 0.000013 loss_cls: 2.6894 (2.5998) grad_norm: 1.0702 (1.6773) time: 2.8685 data: 0.0003 max mem: 28454 +[2024-12-12 09:32:05 root] (utils.py 283): INFO Epoch: [7] [1100/2502] eta: 1:07:02 lr: 0.000013 loss_cls: 2.6894 (2.5988) grad_norm: 1.0695 (1.6724) time: 2.8697 data: 0.0003 max mem: 28454 +[2024-12-12 09:32:33 root] (utils.py 283): INFO Epoch: [7] [1110/2502] eta: 1:06:33 lr: 0.000013 loss_cls: 2.5921 (2.5971) grad_norm: 1.0781 (1.6673) time: 2.8690 data: 0.0003 max mem: 28454 +[2024-12-12 09:33:02 root] (utils.py 283): INFO Epoch: [7] [1120/2502] eta: 1:06:05 lr: 0.000013 loss_cls: 2.4114 (2.5955) grad_norm: 1.1070 (1.6729) time: 2.8651 data: 0.0003 max mem: 28454 +[2024-12-12 09:33:31 root] (utils.py 283): INFO Epoch: [7] [1130/2502] eta: 1:05:36 lr: 0.000013 loss_cls: 2.5193 (2.5955) grad_norm: 1.1017 (1.6676) time: 2.8624 data: 0.0002 max mem: 28454 +[2024-12-12 09:33:59 root] (utils.py 283): INFO Epoch: [7] [1140/2502] eta: 1:05:07 lr: 0.000013 loss_cls: 2.6207 (2.5949) grad_norm: 1.0139 (1.6623) time: 2.8640 data: 0.0002 max mem: 28454 +[2024-12-12 09:34:28 root] (utils.py 283): INFO Epoch: [7] [1150/2502] eta: 1:04:38 lr: 0.000013 loss_cls: 2.5393 (2.5949) grad_norm: 1.0887 (1.6573) time: 2.8649 data: 0.0002 max mem: 28454 +[2024-12-12 09:34:57 root] (utils.py 283): INFO Epoch: [7] [1160/2502] eta: 1:04:10 lr: 0.000013 loss_cls: 2.5393 (2.5946) grad_norm: 1.1029 (1.6627) time: 2.8637 data: 0.0002 max mem: 28454 +[2024-12-12 09:35:25 root] (utils.py 283): INFO Epoch: [7] [1170/2502] eta: 1:03:41 lr: 0.000013 loss_cls: 2.7012 (2.5940) grad_norm: 1.2149 (1.6627) time: 2.8631 data: 0.0002 max mem: 28454 +[2024-12-12 09:35:54 root] (utils.py 283): INFO Epoch: [7] [1180/2502] eta: 1:03:12 lr: 0.000013 loss_cls: 2.7012 (2.5954) grad_norm: 1.1765 (1.6589) time: 2.8637 data: 0.0003 max mem: 28454 +[2024-12-12 09:36:22 root] (utils.py 283): INFO Epoch: [7] [1190/2502] eta: 1:02:43 lr: 0.000013 loss_cls: 2.6899 (2.5941) grad_norm: 1.1127 (1.6549) time: 2.8649 data: 0.0003 max mem: 28454 +[2024-12-12 09:36:51 root] (utils.py 283): INFO Epoch: [7] [1200/2502] eta: 1:02:15 lr: 0.000013 loss_cls: 2.2990 (2.5921) grad_norm: 1.0916 (1.6499) time: 2.8668 data: 0.0003 max mem: 28454 +[2024-12-12 09:37:20 root] (utils.py 283): INFO Epoch: [7] [1210/2502] eta: 1:01:46 lr: 0.000013 loss_cls: 2.4379 (2.5922) grad_norm: 1.0916 (1.6565) time: 2.8703 data: 0.0003 max mem: 28454 +[2024-12-12 09:37:49 root] (utils.py 283): INFO Epoch: [7] [1220/2502] eta: 1:01:17 lr: 0.000013 loss_cls: 2.5745 (2.5902) grad_norm: 1.1572 (1.6522) time: 2.8716 data: 0.0002 max mem: 28454 +[2024-12-12 09:38:17 root] (utils.py 283): INFO Epoch: [7] [1230/2502] eta: 1:00:49 lr: 0.000013 loss_cls: 2.5265 (2.5901) grad_norm: 1.1081 (1.6516) time: 2.8734 data: 0.0002 max mem: 28454 +[2024-12-12 09:38:46 root] (utils.py 283): INFO Epoch: [7] [1240/2502] eta: 1:00:20 lr: 0.000013 loss_cls: 2.6350 (2.5894) grad_norm: 1.0683 (1.6527) time: 2.8727 data: 0.0003 max mem: 28454 +[2024-12-12 09:39:15 root] (utils.py 283): INFO Epoch: [7] [1250/2502] eta: 0:59:51 lr: 0.000013 loss_cls: 2.5645 (2.5887) grad_norm: 1.0448 (1.6491) time: 2.8709 data: 0.0003 max mem: 28454 +[2024-12-12 09:39:43 root] (utils.py 283): INFO Epoch: [7] [1260/2502] eta: 0:59:23 lr: 0.000013 loss_cls: 2.6479 (2.5903) grad_norm: 1.1591 (1.6466) time: 2.8714 data: 0.0003 max mem: 28454 +[2024-12-12 09:40:12 root] (utils.py 283): INFO Epoch: [7] [1270/2502] eta: 0:58:54 lr: 0.000013 loss_cls: 2.7813 (2.5915) grad_norm: 1.1473 (1.6443) time: 2.8697 data: 0.0002 max mem: 28454 +[2024-12-12 09:40:41 root] (utils.py 283): INFO Epoch: [7] [1280/2502] eta: 0:58:25 lr: 0.000013 loss_cls: 2.6400 (2.5913) grad_norm: 1.0761 (1.6400) time: 2.8675 data: 0.0002 max mem: 28454 +[2024-12-12 09:41:10 root] (utils.py 283): INFO Epoch: [7] [1290/2502] eta: 0:57:57 lr: 0.000013 loss_cls: 2.6240 (2.5917) grad_norm: 1.1022 (1.6364) time: 2.8685 data: 0.0002 max mem: 28454 +[2024-12-12 09:41:38 root] (utils.py 283): INFO Epoch: [7] [1300/2502] eta: 0:57:28 lr: 0.000013 loss_cls: 2.6743 (2.5925) grad_norm: 1.1194 (1.6323) time: 2.8704 data: 0.0002 max mem: 28454 +[2024-12-12 09:42:07 root] (utils.py 283): INFO Epoch: [7] [1310/2502] eta: 0:56:59 lr: 0.000013 loss_cls: 2.5933 (2.5920) grad_norm: 1.0832 (1.6283) time: 2.8710 data: 0.0002 max mem: 28454 +[2024-12-12 09:42:36 root] (utils.py 283): INFO Epoch: [7] [1320/2502] eta: 0:56:31 lr: 0.000013 loss_cls: 2.5933 (2.5916) grad_norm: 1.0933 (1.6246) time: 2.8697 data: 0.0002 max mem: 28454 +[2024-12-12 09:43:04 root] (utils.py 283): INFO Epoch: [7] [1330/2502] eta: 0:56:02 lr: 0.000013 loss_cls: 2.5578 (2.5899) grad_norm: 1.0873 (1.6205) time: 2.8748 data: 0.0002 max mem: 28454 +[2024-12-12 09:43:33 root] (utils.py 283): INFO Epoch: [7] [1340/2502] eta: 0:55:33 lr: 0.000013 loss_cls: 2.4533 (2.5889) grad_norm: 1.0832 (1.6172) time: 2.8740 data: 0.0002 max mem: 28454 +[2024-12-12 09:44:02 root] (utils.py 283): INFO Epoch: [7] [1350/2502] eta: 0:55:05 lr: 0.000013 loss_cls: 2.7314 (2.5904) grad_norm: 1.1449 (1.6140) time: 2.8682 data: 0.0003 max mem: 28454 +[2024-12-12 09:44:31 root] (utils.py 283): INFO Epoch: [7] [1360/2502] eta: 0:54:36 lr: 0.000013 loss_cls: 2.7345 (2.5901) grad_norm: 1.1454 (1.6112) time: 2.8716 data: 0.0002 max mem: 28454 +[2024-12-12 09:44:59 root] (utils.py 283): INFO Epoch: [7] [1370/2502] eta: 0:54:07 lr: 0.000013 loss_cls: 2.7076 (2.5906) grad_norm: 1.1312 (1.6077) time: 2.8703 data: 0.0002 max mem: 28454 +[2024-12-12 09:45:28 root] (utils.py 283): INFO Epoch: [7] [1380/2502] eta: 0:53:39 lr: 0.000013 loss_cls: 2.7842 (2.5920) grad_norm: 1.1412 (1.6045) time: 2.8678 data: 0.0002 max mem: 28454 +[2024-12-12 09:45:57 root] (utils.py 283): INFO Epoch: [7] [1390/2502] eta: 0:53:10 lr: 0.000013 loss_cls: 2.7453 (2.5922) grad_norm: 1.1106 (1.6018) time: 2.8678 data: 0.0002 max mem: 28454 +[2024-12-12 09:46:25 root] (utils.py 283): INFO Epoch: [7] [1400/2502] eta: 0:52:41 lr: 0.000013 loss_cls: 2.6238 (2.5914) grad_norm: 1.1106 (1.5985) time: 2.8700 data: 0.0002 max mem: 28454 +[2024-12-12 09:46:54 root] (utils.py 283): INFO Epoch: [7] [1410/2502] eta: 0:52:12 lr: 0.000013 loss_cls: 2.5748 (2.5913) grad_norm: 1.0854 (1.5949) time: 2.8717 data: 0.0002 max mem: 28454 +[2024-12-12 09:47:23 root] (utils.py 283): INFO Epoch: [7] [1420/2502] eta: 0:51:44 lr: 0.000013 loss_cls: 2.6327 (2.5907) grad_norm: 1.0854 (1.5986) time: 2.8708 data: 0.0002 max mem: 28454 +[2024-12-12 09:47:51 root] (utils.py 283): INFO Epoch: [7] [1430/2502] eta: 0:51:15 lr: 0.000013 loss_cls: 2.6327 (2.5899) grad_norm: 1.1520 (1.6002) time: 2.8690 data: 0.0002 max mem: 28454 +[2024-12-12 09:48:20 root] (utils.py 283): INFO Epoch: [7] [1440/2502] eta: 0:50:46 lr: 0.000013 loss_cls: 2.5885 (2.5888) grad_norm: 1.1224 (1.6045) time: 2.8679 data: 0.0002 max mem: 28454 +[2024-12-12 09:48:49 root] (utils.py 283): INFO Epoch: [7] [1450/2502] eta: 0:50:18 lr: 0.000013 loss_cls: 2.5702 (2.5882) grad_norm: 1.1916 (1.6024) time: 2.8700 data: 0.0002 max mem: 28454 +[2024-12-12 09:49:17 root] (utils.py 283): INFO Epoch: [7] [1460/2502] eta: 0:49:49 lr: 0.000013 loss_cls: 2.6684 (2.5897) grad_norm: 1.1858 (1.5998) time: 2.8700 data: 0.0002 max mem: 28454 +[2024-12-12 09:49:46 root] (utils.py 283): INFO Epoch: [7] [1470/2502] eta: 0:49:20 lr: 0.000013 loss_cls: 2.8574 (2.5894) grad_norm: 1.1403 (1.5968) time: 2.8653 data: 0.0002 max mem: 28454 +[2024-12-12 09:50:15 root] (utils.py 283): INFO Epoch: [7] [1480/2502] eta: 0:48:52 lr: 0.000013 loss_cls: 2.6302 (2.5883) grad_norm: 1.1403 (1.5936) time: 2.8637 data: 0.0002 max mem: 28454 +[2024-12-12 09:50:43 root] (utils.py 283): INFO Epoch: [7] [1490/2502] eta: 0:48:23 lr: 0.000013 loss_cls: 2.6049 (2.5881) grad_norm: 1.1476 (1.5913) time: 2.8654 data: 0.0002 max mem: 28454 +[2024-12-12 09:51:12 root] (utils.py 283): INFO Epoch: [7] [1500/2502] eta: 0:47:54 lr: 0.000013 loss_cls: 2.7003 (2.5878) grad_norm: 1.1352 (1.5889) time: 2.8651 data: 0.0002 max mem: 28454 +[2024-12-12 09:51:41 root] (utils.py 283): INFO Epoch: [7] [1510/2502] eta: 0:47:25 lr: 0.000013 loss_cls: 2.6797 (2.5879) grad_norm: 1.1388 (1.5871) time: 2.8662 data: 0.0003 max mem: 28454 +[2024-12-12 09:52:09 root] (utils.py 283): INFO Epoch: [7] [1520/2502] eta: 0:46:57 lr: 0.000013 loss_cls: 2.5414 (2.5865) grad_norm: 1.1479 (1.5846) time: 2.8690 data: 0.0003 max mem: 28454 +[2024-12-12 09:52:38 root] (utils.py 283): INFO Epoch: [7] [1530/2502] eta: 0:46:28 lr: 0.000013 loss_cls: 2.3036 (2.5850) grad_norm: 1.0620 (1.5810) time: 2.8679 data: 0.0002 max mem: 28454 +[2024-12-12 09:53:07 root] (utils.py 283): INFO Epoch: [7] [1540/2502] eta: 0:45:59 lr: 0.000013 loss_cls: 2.7154 (2.5861) grad_norm: 1.0620 (1.5781) time: 2.8686 data: 0.0002 max mem: 28454 +[2024-12-12 09:53:36 root] (utils.py 283): INFO Epoch: [7] [1550/2502] eta: 0:45:31 lr: 0.000013 loss_cls: 2.7925 (2.5866) grad_norm: 1.0907 (1.5760) time: 2.8723 data: 0.0002 max mem: 28454 +[2024-12-12 09:54:04 root] (utils.py 283): INFO Epoch: [7] [1560/2502] eta: 0:45:02 lr: 0.000013 loss_cls: 2.5659 (2.5866) grad_norm: 1.0907 (1.5742) time: 2.8757 data: 0.0002 max mem: 28454 +[2024-12-12 09:54:33 root] (utils.py 283): INFO Epoch: [7] [1570/2502] eta: 0:44:33 lr: 0.000013 loss_cls: 2.6962 (2.5876) grad_norm: 1.0578 (1.5713) time: 2.8766 data: 0.0002 max mem: 28454 +[2024-12-12 09:55:02 root] (utils.py 283): INFO Epoch: [7] [1580/2502] eta: 0:44:05 lr: 0.000013 loss_cls: 2.6566 (2.5865) grad_norm: 1.0787 (1.5694) time: 2.8737 data: 0.0002 max mem: 28454 +[2024-12-12 09:55:31 root] (utils.py 283): INFO Epoch: [7] [1590/2502] eta: 0:43:36 lr: 0.000013 loss_cls: 2.6529 (2.5878) grad_norm: 1.1565 (1.5689) time: 2.8720 data: 0.0002 max mem: 28454 +[2024-12-12 09:55:59 root] (utils.py 283): INFO Epoch: [7] [1600/2502] eta: 0:43:07 lr: 0.000013 loss_cls: 2.7887 (2.5887) grad_norm: 1.1987 (1.5689) time: 2.8729 data: 0.0003 max mem: 28454 +[2024-12-12 09:56:28 root] (utils.py 283): INFO Epoch: [7] [1610/2502] eta: 0:42:39 lr: 0.000013 loss_cls: 2.6803 (2.5882) grad_norm: 1.1828 (1.5662) time: 2.8754 data: 0.0002 max mem: 28454 +[2024-12-12 09:56:57 root] (utils.py 283): INFO Epoch: [7] [1620/2502] eta: 0:42:10 lr: 0.000013 loss_cls: 2.4908 (2.5863) grad_norm: 1.0949 (1.5631) time: 2.8767 data: 0.0002 max mem: 28454 +[2024-12-12 09:57:26 root] (utils.py 283): INFO Epoch: [7] [1630/2502] eta: 0:41:41 lr: 0.000013 loss_cls: 2.5378 (2.5860) grad_norm: 1.0629 (1.5601) time: 2.8770 data: 0.0002 max mem: 28454 +[2024-12-12 09:57:54 root] (utils.py 283): INFO Epoch: [7] [1640/2502] eta: 0:41:13 lr: 0.000013 loss_cls: 2.7719 (2.5861) grad_norm: 1.0982 (1.5591) time: 2.8772 data: 0.0002 max mem: 28454 +[2024-12-12 09:58:23 root] (utils.py 283): INFO Epoch: [7] [1650/2502] eta: 0:40:44 lr: 0.000013 loss_cls: 2.7213 (2.5856) grad_norm: 1.2212 (1.5577) time: 2.8770 data: 0.0002 max mem: 28454 +[2024-12-12 09:58:52 root] (utils.py 283): INFO Epoch: [7] [1660/2502] eta: 0:40:16 lr: 0.000013 loss_cls: 2.8317 (2.5867) grad_norm: 1.1717 (1.5553) time: 2.8778 data: 0.0002 max mem: 28454 +[2024-12-12 09:59:21 root] (utils.py 283): INFO Epoch: [7] [1670/2502] eta: 0:39:47 lr: 0.000013 loss_cls: 2.8943 (2.5875) grad_norm: 1.1005 (1.5539) time: 2.8761 data: 0.0002 max mem: 28454 +[2024-12-12 09:59:49 root] (utils.py 283): INFO Epoch: [7] [1680/2502] eta: 0:39:18 lr: 0.000013 loss_cls: 2.8324 (2.5874) grad_norm: 1.1134 (1.5536) time: 2.8745 data: 0.0002 max mem: 28454 +[2024-12-12 10:00:18 root] (utils.py 283): INFO Epoch: [7] [1690/2502] eta: 0:38:50 lr: 0.000013 loss_cls: 2.7032 (2.5874) grad_norm: 1.1234 (1.5534) time: 2.8742 data: 0.0002 max mem: 28454 +[2024-12-12 10:00:47 root] (utils.py 283): INFO Epoch: [7] [1700/2502] eta: 0:38:21 lr: 0.000013 loss_cls: 2.6984 (2.5881) grad_norm: 1.0787 (1.5506) time: 2.8733 data: 0.0002 max mem: 28454 +[2024-12-12 10:01:16 root] (utils.py 283): INFO Epoch: [7] [1710/2502] eta: 0:37:52 lr: 0.000013 loss_cls: 2.7096 (2.5890) grad_norm: 1.1312 (1.5490) time: 2.8744 data: 0.0003 max mem: 28454 +[2024-12-12 10:01:44 root] (utils.py 283): INFO Epoch: [7] [1720/2502] eta: 0:37:24 lr: 0.000013 loss_cls: 2.7109 (2.5898) grad_norm: 1.0906 (1.5465) time: 2.8761 data: 0.0002 max mem: 28454 +[2024-12-12 10:02:13 root] (utils.py 283): INFO Epoch: [7] [1730/2502] eta: 0:36:55 lr: 0.000013 loss_cls: 2.7521 (2.5896) grad_norm: 1.0938 (1.5439) time: 2.8760 data: 0.0002 max mem: 28454 +[2024-12-12 10:02:42 root] (utils.py 283): INFO Epoch: [7] [1740/2502] eta: 0:36:26 lr: 0.000013 loss_cls: 2.7232 (2.5887) grad_norm: 1.0938 (1.5497) time: 2.8745 data: 0.0002 max mem: 28454 +[2024-12-12 10:03:11 root] (utils.py 283): INFO Epoch: [7] [1750/2502] eta: 0:35:58 lr: 0.000013 loss_cls: 2.6053 (2.5882) grad_norm: 1.1389 (1.5482) time: 2.8757 data: 0.0002 max mem: 28454 +[2024-12-12 10:03:39 root] (utils.py 283): INFO Epoch: [7] [1760/2502] eta: 0:35:29 lr: 0.000013 loss_cls: 2.5700 (2.5873) grad_norm: 1.1566 (1.5475) time: 2.8739 data: 0.0002 max mem: 28454 +[2024-12-12 10:04:08 root] (utils.py 283): INFO Epoch: [7] [1770/2502] eta: 0:35:00 lr: 0.000013 loss_cls: 2.3568 (2.5861) grad_norm: 1.1175 (1.5500) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-12 10:04:37 root] (utils.py 283): INFO Epoch: [7] [1780/2502] eta: 0:34:31 lr: 0.000013 loss_cls: 2.7065 (2.5866) grad_norm: 1.1175 (1.5480) time: 2.8745 data: 0.0002 max mem: 28454 +[2024-12-12 10:05:06 root] (utils.py 283): INFO Epoch: [7] [1790/2502] eta: 0:34:03 lr: 0.000013 loss_cls: 2.6706 (2.5866) grad_norm: 1.1858 (1.5458) time: 2.8749 data: 0.0002 max mem: 28454 +[2024-12-12 10:05:34 root] (utils.py 283): INFO Epoch: [7] [1800/2502] eta: 0:33:34 lr: 0.000013 loss_cls: 2.5735 (2.5867) grad_norm: 1.0892 (1.5439) time: 2.8735 data: 0.0002 max mem: 28454 +[2024-12-12 10:06:03 root] (utils.py 283): INFO Epoch: [7] [1810/2502] eta: 0:33:05 lr: 0.000013 loss_cls: 2.5648 (2.5862) grad_norm: 1.0781 (1.5415) time: 2.8727 data: 0.0002 max mem: 28454 +[2024-12-12 10:06:32 root] (utils.py 283): INFO Epoch: [7] [1820/2502] eta: 0:32:37 lr: 0.000013 loss_cls: 2.5501 (2.5854) grad_norm: 1.0959 (1.5394) time: 2.8747 data: 0.0002 max mem: 28454 +[2024-12-12 10:07:01 root] (utils.py 283): INFO Epoch: [7] [1830/2502] eta: 0:32:08 lr: 0.000013 loss_cls: 2.6433 (2.5864) grad_norm: 1.1131 (1.5429) time: 2.8724 data: 0.0002 max mem: 28454 +[2024-12-12 10:07:29 root] (utils.py 283): INFO Epoch: [7] [1840/2502] eta: 0:31:39 lr: 0.000013 loss_cls: 2.6852 (2.5863) grad_norm: 1.1327 (1.5413) time: 2.8717 data: 0.0002 max mem: 28454 +[2024-12-12 10:07:58 root] (utils.py 283): INFO Epoch: [7] [1850/2502] eta: 0:31:11 lr: 0.000013 loss_cls: 2.6336 (2.5866) grad_norm: 1.1511 (1.5393) time: 2.8751 data: 0.0002 max mem: 28454 +[2024-12-12 10:08:27 root] (utils.py 283): INFO Epoch: [7] [1860/2502] eta: 0:30:42 lr: 0.000013 loss_cls: 2.5703 (2.5855) grad_norm: 1.1259 (1.5371) time: 2.8754 data: 0.0002 max mem: 28454 +[2024-12-12 10:08:56 root] (utils.py 283): INFO Epoch: [7] [1870/2502] eta: 0:30:13 lr: 0.000013 loss_cls: 2.5357 (2.5851) grad_norm: 1.1309 (1.5351) time: 2.8765 data: 0.0002 max mem: 28454 +[2024-12-12 10:09:24 root] (utils.py 283): INFO Epoch: [7] [1880/2502] eta: 0:29:45 lr: 0.000013 loss_cls: 2.4953 (2.5835) grad_norm: 1.0371 (1.5324) time: 2.8757 data: 0.0002 max mem: 28454 +[2024-12-12 10:09:53 root] (utils.py 283): INFO Epoch: [7] [1890/2502] eta: 0:29:16 lr: 0.000013 loss_cls: 2.5167 (2.5836) grad_norm: 1.0371 (1.5340) time: 2.8753 data: 0.0002 max mem: 28454 +[2024-12-12 10:10:22 root] (utils.py 283): INFO Epoch: [7] [1900/2502] eta: 0:28:47 lr: 0.000013 loss_cls: 2.6450 (2.5842) grad_norm: 1.0899 (1.5318) time: 2.8763 data: 0.0002 max mem: 28454 +[2024-12-12 10:10:51 root] (utils.py 283): INFO Epoch: [7] [1910/2502] eta: 0:28:19 lr: 0.000013 loss_cls: 2.8589 (2.5857) grad_norm: 1.0671 (1.5298) time: 2.8757 data: 0.0002 max mem: 28454 +[2024-12-12 10:11:19 root] (utils.py 283): INFO Epoch: [7] [1920/2502] eta: 0:27:50 lr: 0.000013 loss_cls: 2.7480 (2.5855) grad_norm: 1.0807 (1.5277) time: 2.8756 data: 0.0002 max mem: 28454 +[2024-12-12 10:11:48 root] (utils.py 283): INFO Epoch: [7] [1930/2502] eta: 0:27:21 lr: 0.000013 loss_cls: 2.7480 (2.5862) grad_norm: 1.1765 (1.5262) time: 2.8817 data: 0.0002 max mem: 28454 +[2024-12-12 10:12:17 root] (utils.py 283): INFO Epoch: [7] [1940/2502] eta: 0:26:53 lr: 0.000013 loss_cls: 2.7942 (2.5868) grad_norm: 1.1936 (1.5248) time: 2.8822 data: 0.0002 max mem: 28454 +[2024-12-12 10:12:46 root] (utils.py 283): INFO Epoch: [7] [1950/2502] eta: 0:26:24 lr: 0.000013 loss_cls: 2.6561 (2.5869) grad_norm: 1.1759 (1.5229) time: 2.8761 data: 0.0002 max mem: 28454 +[2024-12-12 10:13:14 root] (utils.py 283): INFO Epoch: [7] [1960/2502] eta: 0:25:55 lr: 0.000013 loss_cls: 2.6414 (2.5870) grad_norm: 1.1084 (1.5209) time: 2.8748 data: 0.0002 max mem: 28454 +[2024-12-12 10:13:43 root] (utils.py 283): INFO Epoch: [7] [1970/2502] eta: 0:25:26 lr: 0.000013 loss_cls: 2.7084 (2.5882) grad_norm: 1.0257 (1.5188) time: 2.8735 data: 0.0002 max mem: 28454 +[2024-12-12 10:14:12 root] (utils.py 283): INFO Epoch: [7] [1980/2502] eta: 0:24:58 lr: 0.000013 loss_cls: 2.7769 (2.5884) grad_norm: 1.0642 (1.5166) time: 2.8724 data: 0.0002 max mem: 28454 +[2024-12-12 10:14:41 root] (utils.py 283): INFO Epoch: [7] [1990/2502] eta: 0:24:29 lr: 0.000013 loss_cls: 2.5500 (2.5881) grad_norm: 1.0750 (1.5150) time: 2.8744 data: 0.0002 max mem: 28454 +[2024-12-12 10:15:09 root] (utils.py 283): INFO Epoch: [7] [2000/2502] eta: 0:24:00 lr: 0.000013 loss_cls: 2.5075 (2.5879) grad_norm: 1.1003 (1.5131) time: 2.8774 data: 0.0002 max mem: 28454 +[2024-12-12 10:15:38 root] (utils.py 283): INFO Epoch: [7] [2010/2502] eta: 0:23:32 lr: 0.000013 loss_cls: 2.5905 (2.5879) grad_norm: 1.1205 (1.5129) time: 2.8763 data: 0.0002 max mem: 28454 +[2024-12-12 10:16:07 root] (utils.py 283): INFO Epoch: [7] [2020/2502] eta: 0:23:03 lr: 0.000013 loss_cls: 2.6699 (2.5878) grad_norm: 1.1133 (1.5110) time: 2.8725 data: 0.0003 max mem: 28454 +[2024-12-12 10:16:36 root] (utils.py 283): INFO Epoch: [7] [2030/2502] eta: 0:22:34 lr: 0.000013 loss_cls: 2.7566 (2.5882) grad_norm: 1.1133 (1.5107) time: 2.8711 data: 0.0002 max mem: 28454 +[2024-12-12 10:17:04 root] (utils.py 283): INFO Epoch: [7] [2040/2502] eta: 0:22:06 lr: 0.000013 loss_cls: 2.7200 (2.5880) grad_norm: 1.1233 (1.5085) time: 2.8731 data: 0.0002 max mem: 28454 +[2024-12-12 10:17:33 root] (utils.py 283): INFO Epoch: [7] [2050/2502] eta: 0:21:37 lr: 0.000013 loss_cls: 2.7105 (2.5884) grad_norm: 1.1659 (1.5279) time: 2.8726 data: 0.0002 max mem: 28454 +[2024-12-12 10:18:02 root] (utils.py 283): INFO Epoch: [7] [2060/2502] eta: 0:21:08 lr: 0.000013 loss_cls: 2.7171 (2.5896) grad_norm: 1.2841 (1.5275) time: 2.8709 data: 0.0002 max mem: 28454 +[2024-12-12 10:18:31 root] (utils.py 283): INFO Epoch: [7] [2070/2502] eta: 0:20:40 lr: 0.000013 loss_cls: 2.7328 (2.5892) grad_norm: 1.2239 (1.5282) time: 2.8742 data: 0.0002 max mem: 28454 +[2024-12-12 10:18:59 root] (utils.py 283): INFO Epoch: [7] [2080/2502] eta: 0:20:11 lr: 0.000013 loss_cls: 2.6981 (2.5893) grad_norm: 1.1463 (1.5291) time: 2.8750 data: 0.0002 max mem: 28454 +[2024-12-12 10:19:28 root] (utils.py 283): INFO Epoch: [7] [2090/2502] eta: 0:19:42 lr: 0.000013 loss_cls: 2.6487 (2.5891) grad_norm: 1.1398 (1.5278) time: 2.8740 data: 0.0002 max mem: 28454 +[2024-12-12 10:19:57 root] (utils.py 283): INFO Epoch: [7] [2100/2502] eta: 0:19:13 lr: 0.000013 loss_cls: 2.6413 (2.5893) grad_norm: 1.1398 (1.5316) time: 2.8750 data: 0.0002 max mem: 28454 +[2024-12-12 10:20:26 root] (utils.py 283): INFO Epoch: [7] [2110/2502] eta: 0:18:45 lr: 0.000013 loss_cls: 2.5973 (2.5893) grad_norm: 1.1572 (1.5324) time: 2.8745 data: 0.0002 max mem: 28454 +[2024-12-12 10:20:54 root] (utils.py 283): INFO Epoch: [7] [2120/2502] eta: 0:18:16 lr: 0.000013 loss_cls: 2.7140 (2.5895) grad_norm: 1.2344 (1.5317) time: 2.8733 data: 0.0002 max mem: 28454 +[2024-12-12 10:21:23 root] (utils.py 283): INFO Epoch: [7] [2130/2502] eta: 0:17:47 lr: 0.000013 loss_cls: 2.7140 (2.5891) grad_norm: 1.1422 (1.5303) time: 2.8740 data: 0.0002 max mem: 28454 +[2024-12-12 10:21:52 root] (utils.py 283): INFO Epoch: [7] [2140/2502] eta: 0:17:19 lr: 0.000013 loss_cls: 2.4801 (2.5881) grad_norm: 1.0683 (1.5283) time: 2.8745 data: 0.0002 max mem: 28454 +[2024-12-12 10:22:21 root] (utils.py 283): INFO Epoch: [7] [2150/2502] eta: 0:16:50 lr: 0.000013 loss_cls: 2.5016 (2.5874) grad_norm: 1.0758 (1.5271) time: 2.8733 data: 0.0002 max mem: 28454 +[2024-12-12 10:22:49 root] (utils.py 283): INFO Epoch: [7] [2160/2502] eta: 0:16:21 lr: 0.000013 loss_cls: 2.5037 (2.5872) grad_norm: 1.0939 (1.5269) time: 2.8731 data: 0.0002 max mem: 28454 +[2024-12-12 10:23:18 root] (utils.py 283): INFO Epoch: [7] [2170/2502] eta: 0:15:53 lr: 0.000013 loss_cls: 2.5037 (2.5869) grad_norm: 1.1831 (1.5255) time: 2.8723 data: 0.0002 max mem: 28454 +[2024-12-12 10:23:47 root] (utils.py 283): INFO Epoch: [7] [2180/2502] eta: 0:15:24 lr: 0.000013 loss_cls: 2.6373 (2.5866) grad_norm: 1.1789 (1.5238) time: 2.8725 data: 0.0002 max mem: 28454 +[2024-12-12 10:24:15 root] (utils.py 283): INFO Epoch: [7] [2190/2502] eta: 0:14:55 lr: 0.000013 loss_cls: 2.7381 (2.5874) grad_norm: 1.1560 (1.5225) time: 2.8734 data: 0.0002 max mem: 28454 +[2024-12-12 10:24:44 root] (utils.py 283): INFO Epoch: [7] [2200/2502] eta: 0:14:26 lr: 0.000013 loss_cls: 2.6926 (2.5864) grad_norm: 1.0989 (1.5208) time: 2.8718 data: 0.0003 max mem: 28454 +[2024-12-12 10:25:13 root] (utils.py 283): INFO Epoch: [7] [2210/2502] eta: 0:13:58 lr: 0.000013 loss_cls: 2.4317 (2.5868) grad_norm: 1.1226 (1.5204) time: 2.8721 data: 0.0003 max mem: 28454 +[2024-12-12 10:25:42 root] (utils.py 283): INFO Epoch: [7] [2220/2502] eta: 0:13:29 lr: 0.000013 loss_cls: 2.7084 (2.5871) grad_norm: 1.1547 (1.5189) time: 2.8739 data: 0.0002 max mem: 28454 +[2024-12-12 10:26:10 root] (utils.py 283): INFO Epoch: [7] [2230/2502] eta: 0:13:00 lr: 0.000013 loss_cls: 2.7084 (2.5872) grad_norm: 1.1547 (1.5176) time: 2.8725 data: 0.0002 max mem: 28454 +[2024-12-12 10:26:39 root] (utils.py 283): INFO Epoch: [7] [2240/2502] eta: 0:12:32 lr: 0.000013 loss_cls: 2.6840 (2.5867) grad_norm: 1.1371 (1.5162) time: 2.8712 data: 0.0002 max mem: 28454 +[2024-12-12 10:27:08 root] (utils.py 283): INFO Epoch: [7] [2250/2502] eta: 0:12:03 lr: 0.000013 loss_cls: 2.7031 (2.5873) grad_norm: 1.0928 (1.5141) time: 2.8711 data: 0.0002 max mem: 28454 +[2024-12-12 10:27:36 root] (utils.py 283): INFO Epoch: [7] [2260/2502] eta: 0:11:34 lr: 0.000013 loss_cls: 2.7376 (2.5867) grad_norm: 1.0336 (1.5123) time: 2.8701 data: 0.0003 max mem: 28454 +[2024-12-12 10:28:05 root] (utils.py 283): INFO Epoch: [7] [2270/2502] eta: 0:11:05 lr: 0.000013 loss_cls: 2.5394 (2.5864) grad_norm: 1.0891 (1.5433) time: 2.8682 data: 0.0003 max mem: 28454 +[2024-12-12 10:28:34 root] (utils.py 283): INFO Epoch: [7] [2280/2502] eta: 0:10:37 lr: 0.000013 loss_cls: 2.4510 (2.5864) grad_norm: 1.3496 (1.5442) time: 2.8694 data: 0.0002 max mem: 28454 +[2024-12-12 10:29:03 root] (utils.py 283): INFO Epoch: [7] [2290/2502] eta: 0:10:08 lr: 0.000013 loss_cls: 2.8231 (2.5872) grad_norm: 1.6733 (1.5449) time: 2.8712 data: 0.0002 max mem: 28454 +[2024-12-12 10:29:31 root] (utils.py 283): INFO Epoch: [7] [2300/2502] eta: 0:09:39 lr: 0.000013 loss_cls: 2.7535 (2.5864) grad_norm: 1.5927 (1.5442) time: 2.8699 data: 0.0002 max mem: 28454 +[2024-12-12 10:30:00 root] (utils.py 283): INFO Epoch: [7] [2310/2502] eta: 0:09:11 lr: 0.000013 loss_cls: 2.5715 (2.5862) grad_norm: 1.3238 (1.5438) time: 2.8696 data: 0.0002 max mem: 28454 +[2024-12-12 10:30:29 root] (utils.py 283): INFO Epoch: [7] [2320/2502] eta: 0:08:42 lr: 0.000013 loss_cls: 2.7409 (2.5861) grad_norm: 1.3238 (1.5428) time: 2.8707 data: 0.0002 max mem: 28454 +[2024-12-12 10:30:57 root] (utils.py 283): INFO Epoch: [7] [2330/2502] eta: 0:08:13 lr: 0.000013 loss_cls: 2.7409 (2.5866) grad_norm: 1.3150 (1.5421) time: 2.8699 data: 0.0002 max mem: 28454 +[2024-12-12 10:31:26 root] (utils.py 283): INFO Epoch: [7] [2340/2502] eta: 0:07:45 lr: 0.000013 loss_cls: 2.5434 (2.5857) grad_norm: 1.3162 (1.5407) time: 2.8692 data: 0.0002 max mem: 28454 +[2024-12-12 10:31:55 root] (utils.py 283): INFO Epoch: [7] [2350/2502] eta: 0:07:16 lr: 0.000013 loss_cls: 2.5601 (2.5859) grad_norm: 1.0757 (1.5388) time: 2.8728 data: 0.0003 max mem: 28454 +[2024-12-12 10:32:24 root] (utils.py 283): INFO Epoch: [7] [2360/2502] eta: 0:06:47 lr: 0.000013 loss_cls: 2.7536 (2.5864) grad_norm: 1.1183 (1.5373) time: 2.8761 data: 0.0002 max mem: 28454 +[2024-12-12 10:32:52 root] (utils.py 283): INFO Epoch: [7] [2370/2502] eta: 0:06:18 lr: 0.000013 loss_cls: 2.7538 (2.5865) grad_norm: 1.1922 (1.5384) time: 2.8770 data: 0.0002 max mem: 28454 +[2024-12-12 10:33:21 root] (utils.py 283): INFO Epoch: [7] [2380/2502] eta: 0:05:50 lr: 0.000013 loss_cls: 2.7344 (2.5869) grad_norm: 1.1946 (1.5376) time: 2.8743 data: 0.0002 max mem: 28454 +[2024-12-12 10:33:50 root] (utils.py 283): INFO Epoch: [7] [2390/2502] eta: 0:05:21 lr: 0.000013 loss_cls: 2.7127 (2.5872) grad_norm: 1.1507 (1.5373) time: 2.8748 data: 0.0002 max mem: 28454 +[2024-12-12 10:34:19 root] (utils.py 283): INFO Epoch: [7] [2400/2502] eta: 0:04:52 lr: 0.000013 loss_cls: 2.6405 (2.5864) grad_norm: 1.0832 (1.5354) time: 2.8739 data: 0.0002 max mem: 28454 +[2024-12-12 10:34:47 root] (utils.py 283): INFO Epoch: [7] [2410/2502] eta: 0:04:24 lr: 0.000013 loss_cls: 2.6228 (2.5866) grad_norm: 1.0832 (1.5338) time: 2.8668 data: 0.0002 max mem: 28454 +[2024-12-12 10:35:16 root] (utils.py 283): INFO Epoch: [7] [2420/2502] eta: 0:03:55 lr: 0.000013 loss_cls: 2.5961 (2.5868) grad_norm: 1.1295 (1.5325) time: 2.8675 data: 0.0002 max mem: 28454 +[2024-12-12 10:35:45 root] (utils.py 283): INFO Epoch: [7] [2430/2502] eta: 0:03:26 lr: 0.000013 loss_cls: 2.5899 (2.5866) grad_norm: 1.1295 (1.5318) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-12 10:36:13 root] (utils.py 283): INFO Epoch: [7] [2440/2502] eta: 0:02:57 lr: 0.000013 loss_cls: 2.6558 (2.5870) grad_norm: 1.0751 (1.5300) time: 2.8726 data: 0.0002 max mem: 28454 +[2024-12-12 10:36:42 root] (utils.py 283): INFO Epoch: [7] [2450/2502] eta: 0:02:29 lr: 0.000013 loss_cls: 2.7474 (2.5868) grad_norm: 1.0822 (1.5299) time: 2.8742 data: 0.0002 max mem: 28454 +[2024-12-12 10:37:11 root] (utils.py 283): INFO Epoch: [7] [2460/2502] eta: 0:02:00 lr: 0.000013 loss_cls: 2.6028 (2.5863) grad_norm: 1.0871 (1.5281) time: 2.8745 data: 0.0002 max mem: 28454 +[2024-12-12 10:37:40 root] (utils.py 283): INFO Epoch: [7] [2470/2502] eta: 0:01:31 lr: 0.000013 loss_cls: 2.5501 (2.5859) grad_norm: 1.1175 (1.5267) time: 2.8731 data: 0.0002 max mem: 28454 +[2024-12-12 10:38:08 root] (utils.py 283): INFO Epoch: [7] [2480/2502] eta: 0:01:03 lr: 0.000013 loss_cls: 2.4476 (2.5857) grad_norm: 1.1212 (1.5249) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-12 10:38:37 root] (utils.py 283): INFO Epoch: [7] [2490/2502] eta: 0:00:34 lr: 0.000013 loss_cls: 2.5740 (2.5859) grad_norm: 1.0449 (1.5329) time: 2.8895 data: 0.0202 max mem: 28454 +[2024-12-12 10:39:06 root] (utils.py 283): INFO Epoch: [7] [2500/2502] eta: 0:00:05 lr: 0.000013 loss_cls: 2.5740 (2.5854) grad_norm: 1.1492 (1.5320) time: 2.8927 data: 0.0202 max mem: 28454 +[2024-12-12 10:39:09 root] (utils.py 283): INFO Epoch: [7] [2501/2502] eta: 0:00:02 lr: 0.000013 loss_cls: 2.5879 (2.5854) grad_norm: 1.1492 (1.5318) time: 2.8935 data: 0.0202 max mem: 28454 +[2024-12-12 10:39:09 root] (utils.py 297): INFO Epoch: [7] Total time: 1:59:43 (2.8710 s / it) +[2024-12-12 10:39:09 root] (engine.py 179): INFO Averaged stats:lr: 0.000013 loss_cls: 2.5879 (2.5823) grad_norm: 1.1492 (1.5318) +[2024-12-12 10:39:12 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:52 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.3950 (0.3950) acc1: 92.9688 (92.9688) acc3: 98.4375 (98.4375) acc5: 98.4375 (98.4375) time: 0.5395 data: 0.0005 max mem: 28454 +[2024-12-12 10:39:17 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:48 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6084 (0.5884) acc1: 86.7188 (87.0739) acc3: 97.6562 (97.0170) acc5: 98.4375 (97.9403) time: 0.5456 data: 0.0004 max mem: 28454 +[2024-12-12 10:39:23 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:42 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6084 (0.6151) acc1: 86.7188 (86.7560) acc3: 96.0938 (96.5402) acc5: 98.4375 (97.6562) time: 0.5466 data: 0.0004 max mem: 28454 +[2024-12-12 10:39:28 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:37 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6168 (0.6494) acc1: 86.7188 (85.5847) acc3: 96.0938 (96.1946) acc5: 97.6562 (97.4546) time: 0.5473 data: 0.0004 max mem: 28454 +[2024-12-12 10:39:34 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:31 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6822 (0.6567) acc1: 85.1562 (85.3849) acc3: 96.0938 (96.1319) acc5: 97.6562 (97.4466) time: 0.5478 data: 0.0004 max mem: 28454 +[2024-12-12 10:39:39 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:26 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.8371 (0.7357) acc1: 79.6875 (83.7010) acc3: 94.5312 (95.0827) acc5: 95.3125 (96.7525) time: 0.5481 data: 0.0004 max mem: 28454 +[2024-12-12 10:39:45 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:20 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9908 (0.7641) acc1: 79.6875 (83.3504) acc3: 90.6250 (94.4800) acc5: 93.7500 (96.2731) time: 0.5484 data: 0.0004 max mem: 28454 +[2024-12-12 10:39:50 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:15 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9907 (0.7897) acc1: 80.4688 (82.6364) acc3: 92.1875 (94.1901) acc5: 94.5312 (96.1378) time: 0.5483 data: 0.0004 max mem: 28454 +[2024-12-12 10:39:56 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:09 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9657 (0.8136) acc1: 77.3438 (82.1856) acc3: 92.1875 (93.8947) acc5: 94.5312 (95.8333) time: 0.5487 data: 0.0006 max mem: 28454 +[2024-12-12 10:40:01 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:04 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9657 (0.8333) acc1: 76.5625 (81.4646) acc3: 91.4062 (93.5783) acc5: 94.5312 (95.6817) time: 0.5488 data: 0.0006 max mem: 28454 +[2024-12-12 10:40:05 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9012 (0.8312) acc1: 76.5625 (81.5040) acc3: 92.1875 (93.5760) acc5: 95.3125 (95.7360) time: 0.5395 data: 0.0006 max mem: 28454 +[2024-12-12 10:40:05 root] (utils.py 297): INFO Test: Total time: 0:00:53 (0.5461 s / it) +[2024-12-12 10:40:05 root] (engine.py 264): INFO * Acc@1 81.766 Acc@3 93.548 Acc@5 95.742 loss 0.833 flops 13.207 layer_flops 13.109 +[2024-12-12 10:40:05 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 81.8% +[2024-12-12 10:40:07 root] (main.py 576): INFO Max accuracy: 81.77% +[2024-12-12 10:40:10 root] (utils.py 283): INFO Epoch: [8] [ 0/2502] eta: 1:58:00 lr: 0.000011 loss_cls: 2.6725 (2.6725) grad_norm: 1.3320 (1.3320) time: 2.8299 data: 0.0003 max mem: 28454 +[2024-12-12 10:40:38 root] (utils.py 283): INFO Epoch: [8] [ 10/2502] eta: 1:59:01 lr: 0.000011 loss_cls: 2.7639 (2.5918) grad_norm: 1.2621 (1.2367) time: 2.8658 data: 0.0002 max mem: 28454 +[2024-12-12 10:41:07 root] (utils.py 283): INFO Epoch: [8] [ 20/2502] eta: 1:58:41 lr: 0.000011 loss_cls: 2.8023 (2.6811) grad_norm: 1.1620 (1.1970) time: 2.8714 data: 0.0002 max mem: 28454 +[2024-12-12 10:41:36 root] (utils.py 283): INFO Epoch: [8] [ 30/2502] eta: 1:58:16 lr: 0.000011 loss_cls: 2.8075 (2.6930) grad_norm: 1.1934 (2.1979) time: 2.8732 data: 0.0003 max mem: 28454 +[2024-12-12 10:42:04 root] (utils.py 283): INFO Epoch: [8] [ 40/2502] eta: 1:57:47 lr: 0.000011 loss_cls: 2.7922 (2.6774) grad_norm: 1.2155 (2.0067) time: 2.8723 data: 0.0002 max mem: 28454 +[2024-12-12 10:42:33 root] (utils.py 283): INFO Epoch: [8] [ 50/2502] eta: 1:57:21 lr: 0.000011 loss_cls: 2.7922 (2.6770) grad_norm: 1.2118 (1.9050) time: 2.8738 data: 0.0002 max mem: 28454 +[2024-12-12 10:43:02 root] (utils.py 283): INFO Epoch: [8] [ 60/2502] eta: 1:56:54 lr: 0.000011 loss_cls: 2.7126 (2.6841) grad_norm: 1.1859 (1.7882) time: 2.8761 data: 0.0002 max mem: 28454 +[2024-12-12 10:43:31 root] (utils.py 283): INFO Epoch: [8] [ 70/2502] eta: 1:56:26 lr: 0.000011 loss_cls: 2.6733 (2.6736) grad_norm: 1.1210 (1.6906) time: 2.8748 data: 0.0002 max mem: 28454 +[2024-12-12 10:43:59 root] (utils.py 283): INFO Epoch: [8] [ 80/2502] eta: 1:55:59 lr: 0.000011 loss_cls: 2.6862 (2.6879) grad_norm: 1.0910 (1.6275) time: 2.8755 data: 0.0002 max mem: 28454 +[2024-12-12 10:44:28 root] (utils.py 283): INFO Epoch: [8] [ 90/2502] eta: 1:55:30 lr: 0.000011 loss_cls: 2.6862 (2.6708) grad_norm: 1.1644 (1.7939) time: 2.8761 data: 0.0002 max mem: 28454 +[2024-12-12 10:44:57 root] (utils.py 283): INFO Epoch: [8] [ 100/2502] eta: 1:55:01 lr: 0.000011 loss_cls: 2.4689 (2.6441) grad_norm: 1.2150 (1.7380) time: 2.8735 data: 0.0002 max mem: 28454 +[2024-12-12 10:45:26 root] (utils.py 283): INFO Epoch: [8] [ 110/2502] eta: 1:54:32 lr: 0.000011 loss_cls: 2.5205 (2.6301) grad_norm: 1.1610 (1.7105) time: 2.8724 data: 0.0002 max mem: 28454 +[2024-12-12 10:45:54 root] (utils.py 283): INFO Epoch: [8] [ 120/2502] eta: 1:54:04 lr: 0.000011 loss_cls: 2.6494 (2.6205) grad_norm: 1.1565 (1.6688) time: 2.8728 data: 0.0002 max mem: 28454 +[2024-12-12 10:46:23 root] (utils.py 283): INFO Epoch: [8] [ 130/2502] eta: 1:53:35 lr: 0.000011 loss_cls: 2.6084 (2.6168) grad_norm: 1.1325 (1.6493) time: 2.8732 data: 0.0002 max mem: 28454 +[2024-12-12 10:46:52 root] (utils.py 283): INFO Epoch: [8] [ 140/2502] eta: 1:53:06 lr: 0.000011 loss_cls: 2.6508 (2.6211) grad_norm: 1.0347 (1.6075) time: 2.8717 data: 0.0002 max mem: 28454 +[2024-12-12 10:47:21 root] (utils.py 283): INFO Epoch: [8] [ 150/2502] eta: 1:52:37 lr: 0.000011 loss_cls: 2.6187 (2.6130) grad_norm: 1.0593 (1.5801) time: 2.8730 data: 0.0002 max mem: 28454 +[2024-12-12 10:47:49 root] (utils.py 283): INFO Epoch: [8] [ 160/2502] eta: 1:52:09 lr: 0.000011 loss_cls: 2.5411 (2.6055) grad_norm: 1.1672 (1.5661) time: 2.8744 data: 0.0002 max mem: 28454 +[2024-12-12 10:48:18 root] (utils.py 283): INFO Epoch: [8] [ 170/2502] eta: 1:51:40 lr: 0.000011 loss_cls: 2.3008 (2.5883) grad_norm: 1.1830 (1.5461) time: 2.8744 data: 0.0002 max mem: 28454 +[2024-12-12 10:48:47 root] (utils.py 283): INFO Epoch: [8] [ 180/2502] eta: 1:51:11 lr: 0.000011 loss_cls: 2.4303 (2.5850) grad_norm: 1.1341 (1.5224) time: 2.8746 data: 0.0002 max mem: 28454 +[2024-12-12 10:49:16 root] (utils.py 283): INFO Epoch: [8] [ 190/2502] eta: 1:50:43 lr: 0.000011 loss_cls: 2.5260 (2.5809) grad_norm: 1.1276 (1.5032) time: 2.8739 data: 0.0002 max mem: 28454 +[2024-12-12 10:49:44 root] (utils.py 283): INFO Epoch: [8] [ 200/2502] eta: 1:50:14 lr: 0.000011 loss_cls: 2.6007 (2.5767) grad_norm: 1.0888 (1.4880) time: 2.8734 data: 0.0002 max mem: 28454 +[2024-12-12 10:50:13 root] (utils.py 283): INFO Epoch: [8] [ 210/2502] eta: 1:49:45 lr: 0.000011 loss_cls: 2.6919 (2.5797) grad_norm: 1.0912 (1.4727) time: 2.8737 data: 0.0002 max mem: 28454 +[2024-12-12 10:50:42 root] (utils.py 283): INFO Epoch: [8] [ 220/2502] eta: 1:49:16 lr: 0.000011 loss_cls: 2.6894 (2.5775) grad_norm: 1.1884 (1.4624) time: 2.8727 data: 0.0002 max mem: 28454 +[2024-12-12 10:51:11 root] (utils.py 283): INFO Epoch: [8] [ 230/2502] eta: 1:48:48 lr: 0.000011 loss_cls: 2.7311 (2.5816) grad_norm: 1.1884 (1.4656) time: 2.8727 data: 0.0002 max mem: 28454 +[2024-12-12 10:51:39 root] (utils.py 283): INFO Epoch: [8] [ 240/2502] eta: 1:48:19 lr: 0.000011 loss_cls: 2.5367 (2.5684) grad_norm: 1.0907 (1.4530) time: 2.8735 data: 0.0003 max mem: 28454 +[2024-12-12 10:52:08 root] (utils.py 283): INFO Epoch: [8] [ 250/2502] eta: 1:47:50 lr: 0.000011 loss_cls: 2.4021 (2.5674) grad_norm: 1.1526 (1.4641) time: 2.8722 data: 0.0003 max mem: 28454 +[2024-12-12 10:52:37 root] (utils.py 283): INFO Epoch: [8] [ 260/2502] eta: 1:47:22 lr: 0.000011 loss_cls: 2.7033 (2.5663) grad_norm: 1.1582 (1.4653) time: 2.8760 data: 0.0002 max mem: 28454 +[2024-12-12 10:53:06 root] (utils.py 283): INFO Epoch: [8] [ 270/2502] eta: 1:46:53 lr: 0.000011 loss_cls: 2.7050 (2.5732) grad_norm: 1.1314 (1.4612) time: 2.8777 data: 0.0002 max mem: 28454 +[2024-12-12 10:53:34 root] (utils.py 283): INFO Epoch: [8] [ 280/2502] eta: 1:46:25 lr: 0.000011 loss_cls: 2.7788 (2.5783) grad_norm: 1.1013 (1.4611) time: 2.8765 data: 0.0002 max mem: 28454 +[2024-12-12 10:54:03 root] (utils.py 283): INFO Epoch: [8] [ 290/2502] eta: 1:45:56 lr: 0.000011 loss_cls: 2.5880 (2.5675) grad_norm: 1.0370 (1.4471) time: 2.8756 data: 0.0002 max mem: 28454 +[2024-12-12 10:54:32 root] (utils.py 283): INFO Epoch: [8] [ 300/2502] eta: 1:45:27 lr: 0.000011 loss_cls: 2.5319 (2.5713) grad_norm: 1.0932 (1.4384) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-12 10:55:00 root] (utils.py 283): INFO Epoch: [8] [ 310/2502] eta: 1:44:58 lr: 0.000011 loss_cls: 2.6443 (2.5710) grad_norm: 1.0649 (1.4265) time: 2.8693 data: 0.0002 max mem: 28454 +[2024-12-12 10:55:29 root] (utils.py 283): INFO Epoch: [8] [ 320/2502] eta: 1:44:29 lr: 0.000011 loss_cls: 2.6443 (2.5780) grad_norm: 1.0641 (1.4189) time: 2.8698 data: 0.0002 max mem: 28454 +[2024-12-12 10:55:58 root] (utils.py 283): INFO Epoch: [8] [ 330/2502] eta: 1:44:00 lr: 0.000011 loss_cls: 2.5681 (2.5704) grad_norm: 1.0841 (1.4087) time: 2.8697 data: 0.0002 max mem: 28454 +[2024-12-12 10:56:27 root] (utils.py 283): INFO Epoch: [8] [ 340/2502] eta: 1:43:31 lr: 0.000011 loss_cls: 2.3810 (2.5679) grad_norm: 1.0540 (1.4422) time: 2.8702 data: 0.0002 max mem: 28454 +[2024-12-12 10:56:55 root] (utils.py 283): INFO Epoch: [8] [ 350/2502] eta: 1:43:03 lr: 0.000011 loss_cls: 2.3937 (2.5676) grad_norm: 1.1020 (1.4412) time: 2.8738 data: 0.0002 max mem: 28454 +[2024-12-12 10:57:24 root] (utils.py 283): INFO Epoch: [8] [ 360/2502] eta: 1:42:34 lr: 0.000011 loss_cls: 2.7083 (2.5706) grad_norm: 1.1851 (1.4353) time: 2.8775 data: 0.0002 max mem: 28454 +[2024-12-12 10:57:53 root] (utils.py 283): INFO Epoch: [8] [ 370/2502] eta: 1:42:06 lr: 0.000011 loss_cls: 2.6733 (2.5671) grad_norm: 1.0619 (1.4250) time: 2.8783 data: 0.0002 max mem: 28454 +[2024-12-12 10:58:22 root] (utils.py 283): INFO Epoch: [8] [ 380/2502] eta: 1:41:37 lr: 0.000011 loss_cls: 2.7334 (2.5702) grad_norm: 1.0926 (1.4208) time: 2.8751 data: 0.0002 max mem: 28454 +[2024-12-12 10:58:50 root] (utils.py 283): INFO Epoch: [8] [ 390/2502] eta: 1:41:08 lr: 0.000011 loss_cls: 2.7334 (2.5675) grad_norm: 1.1144 (1.4121) time: 2.8727 data: 0.0003 max mem: 28454 +[2024-12-12 10:59:19 root] (utils.py 283): INFO Epoch: [8] [ 400/2502] eta: 1:40:40 lr: 0.000011 loss_cls: 2.4806 (2.5653) grad_norm: 1.0459 (1.4049) time: 2.8738 data: 0.0002 max mem: 28454 +[2024-12-12 10:59:48 root] (utils.py 283): INFO Epoch: [8] [ 410/2502] eta: 1:40:11 lr: 0.000011 loss_cls: 2.7324 (2.5706) grad_norm: 1.0807 (1.3987) time: 2.8748 data: 0.0002 max mem: 28454 +[2024-12-12 11:00:17 root] (utils.py 283): INFO Epoch: [8] [ 420/2502] eta: 1:39:42 lr: 0.000011 loss_cls: 2.7326 (2.5696) grad_norm: 1.1133 (1.3949) time: 2.8753 data: 0.0002 max mem: 28454 +[2024-12-12 11:00:45 root] (utils.py 283): INFO Epoch: [8] [ 430/2502] eta: 1:39:13 lr: 0.000011 loss_cls: 2.5497 (2.5670) grad_norm: 1.1896 (1.3987) time: 2.8725 data: 0.0002 max mem: 28454 +[2024-12-12 11:01:14 root] (utils.py 283): INFO Epoch: [8] [ 440/2502] eta: 1:38:45 lr: 0.000011 loss_cls: 2.6756 (2.5691) grad_norm: 1.2553 (1.4048) time: 2.8712 data: 0.0002 max mem: 28454 +[2024-12-12 11:01:43 root] (utils.py 283): INFO Epoch: [8] [ 450/2502] eta: 1:38:16 lr: 0.000011 loss_cls: 2.7010 (2.5700) grad_norm: 1.1213 (1.4035) time: 2.8719 data: 0.0003 max mem: 28454 +[2024-12-12 11:02:11 root] (utils.py 283): INFO Epoch: [8] [ 460/2502] eta: 1:37:47 lr: 0.000011 loss_cls: 2.7010 (2.5722) grad_norm: 1.0882 (1.3987) time: 2.8729 data: 0.0003 max mem: 28454 +[2024-12-12 11:02:40 root] (utils.py 283): INFO Epoch: [8] [ 470/2502] eta: 1:37:18 lr: 0.000011 loss_cls: 2.7308 (2.5741) grad_norm: 1.1096 (1.3932) time: 2.8753 data: 0.0003 max mem: 28454 +[2024-12-12 11:03:09 root] (utils.py 283): INFO Epoch: [8] [ 480/2502] eta: 1:36:50 lr: 0.000011 loss_cls: 2.5545 (2.5721) grad_norm: 1.0891 (1.4033) time: 2.8745 data: 0.0003 max mem: 28454 +[2024-12-12 11:03:38 root] (utils.py 283): INFO Epoch: [8] [ 490/2502] eta: 1:36:21 lr: 0.000011 loss_cls: 2.5068 (2.5730) grad_norm: 1.0862 (1.4050) time: 2.8722 data: 0.0003 max mem: 28454 +[2024-12-12 11:04:06 root] (utils.py 283): INFO Epoch: [8] [ 500/2502] eta: 1:35:52 lr: 0.000011 loss_cls: 2.6760 (2.5728) grad_norm: 1.1013 (1.4224) time: 2.8727 data: 0.0002 max mem: 28454 +[2024-12-12 11:04:35 root] (utils.py 283): INFO Epoch: [8] [ 510/2502] eta: 1:35:23 lr: 0.000011 loss_cls: 2.7232 (2.5724) grad_norm: 1.0724 (1.4157) time: 2.8730 data: 0.0003 max mem: 28454 +[2024-12-12 11:05:04 root] (utils.py 283): INFO Epoch: [8] [ 520/2502] eta: 1:34:54 lr: 0.000011 loss_cls: 2.7232 (2.5733) grad_norm: 1.0405 (1.4125) time: 2.8702 data: 0.0003 max mem: 28454 +[2024-12-12 11:05:33 root] (utils.py 283): INFO Epoch: [8] [ 530/2502] eta: 1:34:26 lr: 0.000011 loss_cls: 2.6784 (2.5752) grad_norm: 1.1084 (1.4071) time: 2.8717 data: 0.0002 max mem: 28454 +[2024-12-12 11:06:01 root] (utils.py 283): INFO Epoch: [8] [ 540/2502] eta: 1:33:57 lr: 0.000011 loss_cls: 2.6119 (2.5718) grad_norm: 1.1084 (1.4033) time: 2.8766 data: 0.0002 max mem: 28454 +[2024-12-12 11:06:30 root] (utils.py 283): INFO Epoch: [8] [ 550/2502] eta: 1:33:28 lr: 0.000011 loss_cls: 2.6216 (2.5748) grad_norm: 1.0836 (1.3977) time: 2.8755 data: 0.0002 max mem: 28454 +[2024-12-12 11:06:59 root] (utils.py 283): INFO Epoch: [8] [ 560/2502] eta: 1:33:00 lr: 0.000011 loss_cls: 2.7836 (2.5775) grad_norm: 1.1122 (1.3941) time: 2.8733 data: 0.0003 max mem: 28454 +[2024-12-12 11:07:28 root] (utils.py 283): INFO Epoch: [8] [ 570/2502] eta: 1:32:31 lr: 0.000011 loss_cls: 2.6211 (2.5790) grad_norm: 1.1939 (1.3928) time: 2.8727 data: 0.0002 max mem: 28454 +[2024-12-12 11:07:56 root] (utils.py 283): INFO Epoch: [8] [ 580/2502] eta: 1:32:02 lr: 0.000011 loss_cls: 2.6211 (2.5776) grad_norm: 1.1522 (1.3892) time: 2.8728 data: 0.0003 max mem: 28454 +[2024-12-12 11:08:25 root] (utils.py 283): INFO Epoch: [8] [ 590/2502] eta: 1:31:33 lr: 0.000011 loss_cls: 2.5293 (2.5777) grad_norm: 1.0762 (1.4261) time: 2.8733 data: 0.0003 max mem: 28454 +[2024-12-12 11:08:54 root] (utils.py 283): INFO Epoch: [8] [ 600/2502] eta: 1:31:05 lr: 0.000011 loss_cls: 2.7781 (2.5823) grad_norm: 1.1542 (1.4216) time: 2.8734 data: 0.0002 max mem: 28454 +[2024-12-12 11:09:22 root] (utils.py 283): INFO Epoch: [8] [ 610/2502] eta: 1:30:36 lr: 0.000011 loss_cls: 2.7781 (2.5820) grad_norm: 1.1493 (1.4185) time: 2.8732 data: 0.0002 max mem: 28454 +[2024-12-12 11:09:51 root] (utils.py 283): INFO Epoch: [8] [ 620/2502] eta: 1:30:07 lr: 0.000011 loss_cls: 2.6699 (2.5814) grad_norm: 1.1358 (1.4163) time: 2.8730 data: 0.0002 max mem: 28454 +[2024-12-12 11:10:20 root] (utils.py 283): INFO Epoch: [8] [ 630/2502] eta: 1:29:39 lr: 0.000011 loss_cls: 2.6518 (2.5813) grad_norm: 1.1421 (1.4157) time: 2.8735 data: 0.0002 max mem: 28454 +[2024-12-12 11:10:49 root] (utils.py 283): INFO Epoch: [8] [ 640/2502] eta: 1:29:10 lr: 0.000011 loss_cls: 2.8110 (2.5845) grad_norm: 1.1488 (1.4116) time: 2.8731 data: 0.0003 max mem: 28454 +[2024-12-12 11:11:17 root] (utils.py 283): INFO Epoch: [8] [ 650/2502] eta: 1:28:41 lr: 0.000011 loss_cls: 2.7305 (2.5827) grad_norm: 1.1488 (1.4085) time: 2.8731 data: 0.0002 max mem: 28454 +[2024-12-12 11:11:46 root] (utils.py 283): INFO Epoch: [8] [ 660/2502] eta: 1:28:12 lr: 0.000011 loss_cls: 2.5178 (2.5830) grad_norm: 1.1642 (1.4050) time: 2.8729 data: 0.0003 max mem: 28454 +[2024-12-12 11:12:15 root] (utils.py 283): INFO Epoch: [8] [ 670/2502] eta: 1:27:43 lr: 0.000011 loss_cls: 2.6018 (2.5830) grad_norm: 1.1404 (1.4122) time: 2.8707 data: 0.0002 max mem: 28454 +[2024-12-12 11:12:44 root] (utils.py 283): INFO Epoch: [8] [ 680/2502] eta: 1:27:15 lr: 0.000011 loss_cls: 2.5944 (2.5829) grad_norm: 1.1006 (1.4074) time: 2.8703 data: 0.0002 max mem: 28454 +[2024-12-12 11:13:12 root] (utils.py 283): INFO Epoch: [8] [ 690/2502] eta: 1:26:46 lr: 0.000011 loss_cls: 2.6029 (2.5830) grad_norm: 1.0940 (1.4037) time: 2.8705 data: 0.0002 max mem: 28454 +[2024-12-12 11:13:41 root] (utils.py 283): INFO Epoch: [8] [ 700/2502] eta: 1:26:17 lr: 0.000011 loss_cls: 2.6663 (2.5834) grad_norm: 1.0434 (1.3995) time: 2.8686 data: 0.0002 max mem: 28454 +[2024-12-12 11:14:10 root] (utils.py 283): INFO Epoch: [8] [ 710/2502] eta: 1:25:48 lr: 0.000011 loss_cls: 2.6663 (2.5849) grad_norm: 1.0883 (1.3971) time: 2.8694 data: 0.0002 max mem: 28454 +[2024-12-12 11:14:38 root] (utils.py 283): INFO Epoch: [8] [ 720/2502] eta: 1:25:19 lr: 0.000011 loss_cls: 2.5127 (2.5825) grad_norm: 1.0883 (1.3933) time: 2.8696 data: 0.0002 max mem: 28454 +[2024-12-12 11:15:07 root] (utils.py 283): INFO Epoch: [8] [ 730/2502] eta: 1:24:50 lr: 0.000011 loss_cls: 2.4458 (2.5828) grad_norm: 1.0772 (1.3927) time: 2.8665 data: 0.0002 max mem: 28454 +[2024-12-12 11:15:36 root] (utils.py 283): INFO Epoch: [8] [ 740/2502] eta: 1:24:22 lr: 0.000011 loss_cls: 2.6647 (2.5825) grad_norm: 1.1059 (1.3987) time: 2.8668 data: 0.0002 max mem: 28454 +[2024-12-12 11:16:04 root] (utils.py 283): INFO Epoch: [8] [ 750/2502] eta: 1:23:53 lr: 0.000011 loss_cls: 2.6647 (2.5825) grad_norm: 1.0880 (1.3940) time: 2.8675 data: 0.0002 max mem: 28454 +[2024-12-12 11:16:33 root] (utils.py 283): INFO Epoch: [8] [ 760/2502] eta: 1:23:24 lr: 0.000011 loss_cls: 2.6513 (2.5826) grad_norm: 1.0880 (1.3902) time: 2.8684 data: 0.0002 max mem: 28454 +[2024-12-12 11:17:02 root] (utils.py 283): INFO Epoch: [8] [ 770/2502] eta: 1:22:55 lr: 0.000011 loss_cls: 2.7609 (2.5847) grad_norm: 1.1248 (1.3902) time: 2.8684 data: 0.0002 max mem: 28454 +[2024-12-12 11:17:30 root] (utils.py 283): INFO Epoch: [8] [ 780/2502] eta: 1:22:26 lr: 0.000011 loss_cls: 2.7535 (2.5823) grad_norm: 1.2031 (1.3893) time: 2.8650 data: 0.0002 max mem: 28454 +[2024-12-12 11:17:59 root] (utils.py 283): INFO Epoch: [8] [ 790/2502] eta: 1:21:57 lr: 0.000011 loss_cls: 2.3482 (2.5798) grad_norm: 1.1702 (1.3864) time: 2.8650 data: 0.0002 max mem: 28454 +[2024-12-12 11:18:28 root] (utils.py 283): INFO Epoch: [8] [ 800/2502] eta: 1:21:28 lr: 0.000011 loss_cls: 2.7451 (2.5815) grad_norm: 1.0985 (1.3829) time: 2.8650 data: 0.0002 max mem: 28454 +[2024-12-12 11:18:56 root] (utils.py 283): INFO Epoch: [8] [ 810/2502] eta: 1:20:59 lr: 0.000011 loss_cls: 2.6915 (2.5804) grad_norm: 1.0950 (1.3845) time: 2.8651 data: 0.0002 max mem: 28454 +[2024-12-12 11:19:25 root] (utils.py 283): INFO Epoch: [8] [ 820/2502] eta: 1:20:31 lr: 0.000011 loss_cls: 2.7185 (2.5829) grad_norm: 1.1155 (1.3820) time: 2.8685 data: 0.0002 max mem: 28454 +[2024-12-12 11:19:54 root] (utils.py 283): INFO Epoch: [8] [ 830/2502] eta: 1:20:02 lr: 0.000011 loss_cls: 2.7676 (2.5842) grad_norm: 1.0878 (1.3790) time: 2.8681 data: 0.0002 max mem: 28454 +[2024-12-12 11:20:22 root] (utils.py 283): INFO Epoch: [8] [ 840/2502] eta: 1:19:33 lr: 0.000011 loss_cls: 2.6997 (2.5837) grad_norm: 1.0873 (1.3772) time: 2.8654 data: 0.0002 max mem: 28454 +[2024-12-12 11:20:51 root] (utils.py 283): INFO Epoch: [8] [ 850/2502] eta: 1:19:04 lr: 0.000011 loss_cls: 2.5977 (2.5805) grad_norm: 1.1312 (1.3751) time: 2.8650 data: 0.0002 max mem: 28454 +[2024-12-12 11:21:20 root] (utils.py 283): INFO Epoch: [8] [ 860/2502] eta: 1:18:35 lr: 0.000011 loss_cls: 2.6276 (2.5806) grad_norm: 1.0899 (1.3723) time: 2.8660 data: 0.0002 max mem: 28454 +[2024-12-12 11:21:48 root] (utils.py 283): INFO Epoch: [8] [ 870/2502] eta: 1:18:06 lr: 0.000011 loss_cls: 2.6479 (2.5778) grad_norm: 1.1074 (1.3725) time: 2.8656 data: 0.0002 max mem: 28454 +[2024-12-12 11:22:17 root] (utils.py 283): INFO Epoch: [8] [ 880/2502] eta: 1:17:38 lr: 0.000011 loss_cls: 2.5735 (2.5787) grad_norm: 1.1074 (1.3694) time: 2.8659 data: 0.0002 max mem: 28454 +[2024-12-12 11:22:46 root] (utils.py 283): INFO Epoch: [8] [ 890/2502] eta: 1:17:09 lr: 0.000011 loss_cls: 2.6429 (2.5794) grad_norm: 1.1321 (1.3705) time: 2.8713 data: 0.0003 max mem: 28454 +[2024-12-12 11:23:14 root] (utils.py 283): INFO Epoch: [8] [ 900/2502] eta: 1:16:40 lr: 0.000011 loss_cls: 2.6429 (2.5795) grad_norm: 1.1808 (1.3682) time: 2.8728 data: 0.0003 max mem: 28454 +[2024-12-12 11:23:43 root] (utils.py 283): INFO Epoch: [8] [ 910/2502] eta: 1:16:11 lr: 0.000011 loss_cls: 2.7120 (2.5792) grad_norm: 1.1159 (1.3682) time: 2.8669 data: 0.0003 max mem: 28454 +[2024-12-12 11:24:12 root] (utils.py 283): INFO Epoch: [8] [ 920/2502] eta: 1:15:43 lr: 0.000011 loss_cls: 2.6985 (2.5781) grad_norm: 1.1076 (1.3653) time: 2.8627 data: 0.0003 max mem: 28454 +[2024-12-12 11:24:40 root] (utils.py 283): INFO Epoch: [8] [ 930/2502] eta: 1:15:14 lr: 0.000011 loss_cls: 2.4579 (2.5742) grad_norm: 1.0232 (1.3621) time: 2.8655 data: 0.0002 max mem: 28454 +[2024-12-12 11:25:09 root] (utils.py 283): INFO Epoch: [8] [ 940/2502] eta: 1:14:45 lr: 0.000011 loss_cls: 2.4642 (2.5764) grad_norm: 1.1185 (1.3632) time: 2.8676 data: 0.0002 max mem: 28454 +[2024-12-12 11:25:38 root] (utils.py 283): INFO Epoch: [8] [ 950/2502] eta: 1:14:16 lr: 0.000011 loss_cls: 2.7577 (2.5757) grad_norm: 1.1434 (1.3617) time: 2.8659 data: 0.0002 max mem: 28454 +[2024-12-12 11:26:06 root] (utils.py 283): INFO Epoch: [8] [ 960/2502] eta: 1:13:47 lr: 0.000011 loss_cls: 2.6381 (2.5737) grad_norm: 1.0924 (1.3587) time: 2.8659 data: 0.0003 max mem: 28454 +[2024-12-12 11:26:35 root] (utils.py 283): INFO Epoch: [8] [ 970/2502] eta: 1:13:18 lr: 0.000011 loss_cls: 2.4225 (2.5725) grad_norm: 1.1019 (1.3577) time: 2.8656 data: 0.0003 max mem: 28454 +[2024-12-12 11:27:04 root] (utils.py 283): INFO Epoch: [8] [ 980/2502] eta: 1:12:50 lr: 0.000011 loss_cls: 2.6472 (2.5742) grad_norm: 1.1606 (1.3555) time: 2.8659 data: 0.0003 max mem: 28454 +[2024-12-12 11:27:32 root] (utils.py 283): INFO Epoch: [8] [ 990/2502] eta: 1:12:21 lr: 0.000011 loss_cls: 2.7342 (2.5761) grad_norm: 1.1825 (1.3555) time: 2.8658 data: 0.0002 max mem: 28454 +[2024-12-12 11:28:01 root] (utils.py 283): INFO Epoch: [8] [1000/2502] eta: 1:11:52 lr: 0.000011 loss_cls: 2.7268 (2.5745) grad_norm: 1.1965 (1.3555) time: 2.8645 data: 0.0002 max mem: 28454 +[2024-12-12 11:28:30 root] (utils.py 283): INFO Epoch: [8] [1010/2502] eta: 1:11:23 lr: 0.000011 loss_cls: 2.4123 (2.5739) grad_norm: 1.2041 (1.3539) time: 2.8652 data: 0.0002 max mem: 28454 +[2024-12-12 11:28:58 root] (utils.py 283): INFO Epoch: [8] [1020/2502] eta: 1:10:55 lr: 0.000011 loss_cls: 2.7047 (2.5742) grad_norm: 1.0836 (1.3510) time: 2.8668 data: 0.0002 max mem: 28454 +[2024-12-12 11:29:27 root] (utils.py 283): INFO Epoch: [8] [1030/2502] eta: 1:10:26 lr: 0.000011 loss_cls: 2.6885 (2.5743) grad_norm: 1.0836 (1.3491) time: 2.8649 data: 0.0002 max mem: 28454 +[2024-12-12 11:29:56 root] (utils.py 283): INFO Epoch: [8] [1040/2502] eta: 1:09:57 lr: 0.000011 loss_cls: 2.5675 (2.5732) grad_norm: 1.0941 (1.3473) time: 2.8641 data: 0.0002 max mem: 28454 +[2024-12-12 11:30:24 root] (utils.py 283): INFO Epoch: [8] [1050/2502] eta: 1:09:28 lr: 0.000011 loss_cls: 2.5575 (2.5739) grad_norm: 1.0726 (1.3453) time: 2.8658 data: 0.0002 max mem: 28454 +[2024-12-12 11:30:53 root] (utils.py 283): INFO Epoch: [8] [1060/2502] eta: 1:08:59 lr: 0.000011 loss_cls: 2.5575 (2.5720) grad_norm: 1.0742 (1.3433) time: 2.8665 data: 0.0002 max mem: 28454 +[2024-12-12 11:31:22 root] (utils.py 283): INFO Epoch: [8] [1070/2502] eta: 1:08:31 lr: 0.000011 loss_cls: 2.6097 (2.5717) grad_norm: 1.1381 (1.3415) time: 2.8653 data: 0.0003 max mem: 28454 +[2024-12-12 11:31:50 root] (utils.py 283): INFO Epoch: [8] [1080/2502] eta: 1:08:02 lr: 0.000011 loss_cls: 2.6796 (2.5704) grad_norm: 1.1299 (1.3399) time: 2.8656 data: 0.0002 max mem: 28454 +[2024-12-12 11:32:19 root] (utils.py 283): INFO Epoch: [8] [1090/2502] eta: 1:07:33 lr: 0.000011 loss_cls: 2.6772 (2.5693) grad_norm: 1.0634 (1.3395) time: 2.8672 data: 0.0002 max mem: 28454 +[2024-12-12 11:32:48 root] (utils.py 283): INFO Epoch: [8] [1100/2502] eta: 1:07:04 lr: 0.000011 loss_cls: 2.4800 (2.5691) grad_norm: 1.0634 (1.3413) time: 2.8663 data: 0.0002 max mem: 28454 +[2024-12-12 11:33:16 root] (utils.py 283): INFO Epoch: [8] [1110/2502] eta: 1:06:36 lr: 0.000011 loss_cls: 2.4800 (2.5677) grad_norm: 1.2013 (1.3409) time: 2.8668 data: 0.0003 max mem: 28454 +[2024-12-12 11:33:45 root] (utils.py 283): INFO Epoch: [8] [1120/2502] eta: 1:06:07 lr: 0.000011 loss_cls: 2.6046 (2.5663) grad_norm: 1.1600 (1.3392) time: 2.8650 data: 0.0003 max mem: 28454 +[2024-12-12 11:34:14 root] (utils.py 283): INFO Epoch: [8] [1130/2502] eta: 1:05:38 lr: 0.000011 loss_cls: 2.6406 (2.5673) grad_norm: 1.1043 (1.3367) time: 2.8644 data: 0.0003 max mem: 28454 +[2024-12-12 11:34:42 root] (utils.py 283): INFO Epoch: [8] [1140/2502] eta: 1:05:09 lr: 0.000011 loss_cls: 2.7247 (2.5684) grad_norm: 1.1043 (1.3356) time: 2.8613 data: 0.0003 max mem: 28454 +[2024-12-12 11:35:11 root] (utils.py 283): INFO Epoch: [8] [1150/2502] eta: 1:04:40 lr: 0.000011 loss_cls: 2.6511 (2.5681) grad_norm: 1.1267 (1.3347) time: 2.8599 data: 0.0002 max mem: 28454 +[2024-12-12 11:35:39 root] (utils.py 283): INFO Epoch: [8] [1160/2502] eta: 1:04:12 lr: 0.000011 loss_cls: 2.6378 (2.5673) grad_norm: 1.0830 (1.3338) time: 2.8641 data: 0.0002 max mem: 28454 +[2024-12-12 11:36:08 root] (utils.py 283): INFO Epoch: [8] [1170/2502] eta: 1:03:43 lr: 0.000011 loss_cls: 2.6621 (2.5684) grad_norm: 1.0687 (1.3364) time: 2.8659 data: 0.0003 max mem: 28454 +[2024-12-12 11:36:37 root] (utils.py 283): INFO Epoch: [8] [1180/2502] eta: 1:03:14 lr: 0.000011 loss_cls: 2.7274 (2.5699) grad_norm: 1.1547 (1.3364) time: 2.8671 data: 0.0003 max mem: 28454 +[2024-12-12 11:37:05 root] (utils.py 283): INFO Epoch: [8] [1190/2502] eta: 1:02:45 lr: 0.000011 loss_cls: 2.6631 (2.5704) grad_norm: 1.1428 (1.3360) time: 2.8679 data: 0.0002 max mem: 28454 +[2024-12-12 11:37:34 root] (utils.py 283): INFO Epoch: [8] [1200/2502] eta: 1:02:17 lr: 0.000011 loss_cls: 2.7447 (2.5715) grad_norm: 1.1428 (1.3408) time: 2.8688 data: 0.0003 max mem: 28454 +[2024-12-12 11:38:03 root] (utils.py 283): INFO Epoch: [8] [1210/2502] eta: 1:01:48 lr: 0.000011 loss_cls: 2.7461 (2.5708) grad_norm: 1.1771 (1.3831) time: 2.8693 data: 0.0003 max mem: 28454 +[2024-12-12 11:38:31 root] (utils.py 283): INFO Epoch: [8] [1220/2502] eta: 1:01:19 lr: 0.000011 loss_cls: 2.6598 (2.5712) grad_norm: 1.3232 (1.3865) time: 2.8703 data: 0.0002 max mem: 28454 +[2024-12-12 11:39:00 root] (utils.py 283): INFO Epoch: [8] [1230/2502] eta: 1:00:50 lr: 0.000011 loss_cls: 2.6598 (2.5722) grad_norm: 1.3075 (1.3858) time: 2.8706 data: 0.0002 max mem: 28454 +[2024-12-12 11:39:29 root] (utils.py 283): INFO Epoch: [8] [1240/2502] eta: 1:00:22 lr: 0.000011 loss_cls: 2.7105 (2.5723) grad_norm: 1.2062 (1.3861) time: 2.8706 data: 0.0002 max mem: 28454 +[2024-12-12 11:39:58 root] (utils.py 283): INFO Epoch: [8] [1250/2502] eta: 0:59:53 lr: 0.000011 loss_cls: 2.7727 (2.5725) grad_norm: 1.1720 (1.3850) time: 2.8741 data: 0.0002 max mem: 28454 +[2024-12-12 11:40:26 root] (utils.py 283): INFO Epoch: [8] [1260/2502] eta: 0:59:24 lr: 0.000011 loss_cls: 2.7425 (2.5729) grad_norm: 1.1624 (1.3832) time: 2.8728 data: 0.0002 max mem: 28454 +[2024-12-12 11:40:55 root] (utils.py 283): INFO Epoch: [8] [1270/2502] eta: 0:58:56 lr: 0.000011 loss_cls: 2.6998 (2.5736) grad_norm: 1.1624 (1.3852) time: 2.8718 data: 0.0002 max mem: 28454 +[2024-12-12 11:41:24 root] (utils.py 283): INFO Epoch: [8] [1280/2502] eta: 0:58:27 lr: 0.000011 loss_cls: 2.6125 (2.5723) grad_norm: 1.1113 (1.3838) time: 2.8734 data: 0.0002 max mem: 28454 +[2024-12-12 11:41:53 root] (utils.py 283): INFO Epoch: [8] [1290/2502] eta: 0:57:59 lr: 0.000011 loss_cls: 2.4674 (2.5706) grad_norm: 1.0919 (1.3823) time: 2.8797 data: 0.0002 max mem: 28454 +[2024-12-12 11:42:21 root] (utils.py 283): INFO Epoch: [8] [1300/2502] eta: 0:57:30 lr: 0.000011 loss_cls: 2.2194 (2.5669) grad_norm: 1.0919 (1.3800) time: 2.8808 data: 0.0002 max mem: 28454 +[2024-12-12 11:42:50 root] (utils.py 283): INFO Epoch: [8] [1310/2502] eta: 0:57:01 lr: 0.000011 loss_cls: 2.3325 (2.5681) grad_norm: 1.0709 (1.3800) time: 2.8732 data: 0.0002 max mem: 28454 +[2024-12-12 11:43:19 root] (utils.py 283): INFO Epoch: [8] [1320/2502] eta: 0:56:33 lr: 0.000011 loss_cls: 2.7507 (2.5685) grad_norm: 1.0943 (1.3785) time: 2.8723 data: 0.0002 max mem: 28454 +[2024-12-12 11:43:48 root] (utils.py 283): INFO Epoch: [8] [1330/2502] eta: 0:56:04 lr: 0.000011 loss_cls: 2.7237 (2.5691) grad_norm: 1.0438 (1.3766) time: 2.8727 data: 0.0002 max mem: 28454 +[2024-12-12 11:44:16 root] (utils.py 283): INFO Epoch: [8] [1340/2502] eta: 0:55:35 lr: 0.000011 loss_cls: 2.6890 (2.5707) grad_norm: 1.0438 (1.3751) time: 2.8718 data: 0.0002 max mem: 28454 +[2024-12-12 11:44:45 root] (utils.py 283): INFO Epoch: [8] [1350/2502] eta: 0:55:06 lr: 0.000011 loss_cls: 2.8473 (2.5722) grad_norm: 1.1256 (1.3735) time: 2.8705 data: 0.0002 max mem: 28454 +[2024-12-12 11:45:14 root] (utils.py 283): INFO Epoch: [8] [1360/2502] eta: 0:54:38 lr: 0.000011 loss_cls: 2.7806 (2.5725) grad_norm: 1.1427 (1.3729) time: 2.8685 data: 0.0002 max mem: 28454 +[2024-12-12 11:45:42 root] (utils.py 283): INFO Epoch: [8] [1370/2502] eta: 0:54:09 lr: 0.000011 loss_cls: 2.6375 (2.5715) grad_norm: 1.1364 (1.3722) time: 2.8660 data: 0.0002 max mem: 28454 +[2024-12-12 11:46:11 root] (utils.py 283): INFO Epoch: [8] [1380/2502] eta: 0:53:40 lr: 0.000011 loss_cls: 2.7016 (2.5723) grad_norm: 1.1413 (1.3718) time: 2.8648 data: 0.0002 max mem: 28454 +[2024-12-12 11:46:40 root] (utils.py 283): INFO Epoch: [8] [1390/2502] eta: 0:53:11 lr: 0.000011 loss_cls: 2.5652 (2.5714) grad_norm: 1.1917 (1.3745) time: 2.8662 data: 0.0002 max mem: 28454 +[2024-12-12 11:47:08 root] (utils.py 283): INFO Epoch: [8] [1400/2502] eta: 0:52:43 lr: 0.000011 loss_cls: 2.4030 (2.5703) grad_norm: 1.1391 (1.3725) time: 2.8636 data: 0.0002 max mem: 28454 +[2024-12-12 11:47:37 root] (utils.py 283): INFO Epoch: [8] [1410/2502] eta: 0:52:14 lr: 0.000011 loss_cls: 2.7099 (2.5700) grad_norm: 1.0959 (1.3704) time: 2.8630 data: 0.0002 max mem: 28454 +[2024-12-12 11:48:06 root] (utils.py 283): INFO Epoch: [8] [1420/2502] eta: 0:51:45 lr: 0.000011 loss_cls: 2.7183 (2.5691) grad_norm: 1.0546 (1.3683) time: 2.8672 data: 0.0002 max mem: 28454 +[2024-12-12 11:48:34 root] (utils.py 283): INFO Epoch: [8] [1430/2502] eta: 0:51:17 lr: 0.000011 loss_cls: 2.4348 (2.5685) grad_norm: 1.1064 (1.3667) time: 2.8707 data: 0.0002 max mem: 28454 +[2024-12-12 11:49:03 root] (utils.py 283): INFO Epoch: [8] [1440/2502] eta: 0:50:48 lr: 0.000011 loss_cls: 2.4348 (2.5682) grad_norm: 1.1064 (1.3652) time: 2.8729 data: 0.0002 max mem: 28454 +[2024-12-12 11:49:32 root] (utils.py 283): INFO Epoch: [8] [1450/2502] eta: 0:50:19 lr: 0.000011 loss_cls: 2.6455 (2.5668) grad_norm: 1.1004 (1.3637) time: 2.8742 data: 0.0002 max mem: 28454 +[2024-12-12 11:50:01 root] (utils.py 283): INFO Epoch: [8] [1460/2502] eta: 0:49:50 lr: 0.000011 loss_cls: 2.6669 (2.5678) grad_norm: 1.1444 (1.3654) time: 2.8740 data: 0.0002 max mem: 28454 +[2024-12-12 11:50:29 root] (utils.py 283): INFO Epoch: [8] [1470/2502] eta: 0:49:22 lr: 0.000011 loss_cls: 2.6745 (2.5674) grad_norm: 1.1598 (1.3675) time: 2.8707 data: 0.0002 max mem: 28454 +[2024-12-12 11:50:58 root] (utils.py 283): INFO Epoch: [8] [1480/2502] eta: 0:48:53 lr: 0.000011 loss_cls: 2.5878 (2.5663) grad_norm: 1.1239 (1.3658) time: 2.8709 data: 0.0002 max mem: 28454 +[2024-12-12 11:51:27 root] (utils.py 283): INFO Epoch: [8] [1490/2502] eta: 0:48:24 lr: 0.000011 loss_cls: 2.3403 (2.5654) grad_norm: 1.0814 (1.3653) time: 2.8679 data: 0.0002 max mem: 28454 +[2024-12-12 11:51:55 root] (utils.py 283): INFO Epoch: [8] [1500/2502] eta: 0:47:56 lr: 0.000011 loss_cls: 2.5627 (2.5647) grad_norm: 1.0814 (1.3640) time: 2.8670 data: 0.0002 max mem: 28454 +[2024-12-12 11:52:24 root] (utils.py 283): INFO Epoch: [8] [1510/2502] eta: 0:47:27 lr: 0.000011 loss_cls: 2.6411 (2.5652) grad_norm: 1.0773 (1.3627) time: 2.8702 data: 0.0002 max mem: 28454 +[2024-12-12 11:52:53 root] (utils.py 283): INFO Epoch: [8] [1520/2502] eta: 0:46:58 lr: 0.000011 loss_cls: 2.5118 (2.5633) grad_norm: 1.1349 (1.3642) time: 2.8714 data: 0.0002 max mem: 28454 +[2024-12-12 11:53:21 root] (utils.py 283): INFO Epoch: [8] [1530/2502] eta: 0:46:30 lr: 0.000011 loss_cls: 2.1420 (2.5624) grad_norm: 1.1322 (1.3638) time: 2.8704 data: 0.0002 max mem: 28454 +[2024-12-12 11:53:50 root] (utils.py 283): INFO Epoch: [8] [1540/2502] eta: 0:46:01 lr: 0.000011 loss_cls: 2.3252 (2.5611) grad_norm: 1.1068 (1.3626) time: 2.8686 data: 0.0002 max mem: 28454 +[2024-12-12 11:54:19 root] (utils.py 283): INFO Epoch: [8] [1550/2502] eta: 0:45:32 lr: 0.000011 loss_cls: 2.5827 (2.5615) grad_norm: 1.1136 (1.3608) time: 2.8682 data: 0.0002 max mem: 28454 +[2024-12-12 11:54:48 root] (utils.py 283): INFO Epoch: [8] [1560/2502] eta: 0:45:03 lr: 0.000011 loss_cls: 2.7362 (2.5619) grad_norm: 1.1208 (1.3594) time: 2.8681 data: 0.0002 max mem: 28454 +[2024-12-12 11:55:16 root] (utils.py 283): INFO Epoch: [8] [1570/2502] eta: 0:44:35 lr: 0.000011 loss_cls: 2.7389 (2.5621) grad_norm: 1.1498 (1.3580) time: 2.8698 data: 0.0002 max mem: 28454 +[2024-12-12 11:55:45 root] (utils.py 283): INFO Epoch: [8] [1580/2502] eta: 0:44:06 lr: 0.000011 loss_cls: 2.5225 (2.5603) grad_norm: 1.1096 (1.3566) time: 2.8724 data: 0.0002 max mem: 28454 +[2024-12-12 11:56:14 root] (utils.py 283): INFO Epoch: [8] [1590/2502] eta: 0:43:37 lr: 0.000011 loss_cls: 2.3712 (2.5590) grad_norm: 1.1020 (1.4509) time: 2.8735 data: 0.0002 max mem: 28454 +[2024-12-12 11:56:42 root] (utils.py 283): INFO Epoch: [8] [1600/2502] eta: 0:43:09 lr: 0.000011 loss_cls: 2.2017 (2.5571) grad_norm: 1.2493 (1.4505) time: 2.8713 data: 0.0002 max mem: 28454 +[2024-12-12 11:57:11 root] (utils.py 283): INFO Epoch: [8] [1610/2502] eta: 0:42:40 lr: 0.000011 loss_cls: 2.7818 (2.5585) grad_norm: 1.4270 (1.4532) time: 2.8726 data: 0.0002 max mem: 28454 +[2024-12-12 11:57:40 root] (utils.py 283): INFO Epoch: [8] [1620/2502] eta: 0:42:11 lr: 0.000011 loss_cls: 2.8213 (2.5584) grad_norm: 1.4718 (1.4529) time: 2.8709 data: 0.0002 max mem: 28454 +[2024-12-12 11:58:08 root] (utils.py 283): INFO Epoch: [8] [1630/2502] eta: 0:41:42 lr: 0.000011 loss_cls: 2.7858 (2.5589) grad_norm: 1.2938 (1.4525) time: 2.8658 data: 0.0002 max mem: 28454 +[2024-12-12 11:58:37 root] (utils.py 283): INFO Epoch: [8] [1640/2502] eta: 0:41:14 lr: 0.000011 loss_cls: 2.6410 (2.5591) grad_norm: 1.2653 (1.4795) time: 2.8656 data: 0.0003 max mem: 28454 +[2024-12-12 11:59:06 root] (utils.py 283): INFO Epoch: [8] [1650/2502] eta: 0:40:45 lr: 0.000011 loss_cls: 2.5641 (2.5588) grad_norm: 1.2682 (1.4781) time: 2.8646 data: 0.0002 max mem: 28454 +[2024-12-12 11:59:34 root] (utils.py 283): INFO Epoch: [8] [1660/2502] eta: 0:40:16 lr: 0.000011 loss_cls: 2.5658 (2.5593) grad_norm: 1.2645 (1.4775) time: 2.8663 data: 0.0002 max mem: 28454 +[2024-12-12 12:00:03 root] (utils.py 283): INFO Epoch: [8] [1670/2502] eta: 0:39:48 lr: 0.000011 loss_cls: 2.7204 (2.5599) grad_norm: 1.2344 (1.4759) time: 2.8713 data: 0.0002 max mem: 28454 +[2024-12-12 12:00:32 root] (utils.py 283): INFO Epoch: [8] [1680/2502] eta: 0:39:19 lr: 0.000011 loss_cls: 2.7204 (2.5607) grad_norm: 1.2250 (1.4756) time: 2.8724 data: 0.0002 max mem: 28454 +[2024-12-12 12:01:01 root] (utils.py 283): INFO Epoch: [8] [1690/2502] eta: 0:38:50 lr: 0.000011 loss_cls: 2.6437 (2.5606) grad_norm: 1.1972 (1.4741) time: 2.8702 data: 0.0002 max mem: 28454 +[2024-12-12 12:01:29 root] (utils.py 283): INFO Epoch: [8] [1700/2502] eta: 0:38:21 lr: 0.000011 loss_cls: 2.6437 (2.5613) grad_norm: 1.1670 (1.4723) time: 2.8705 data: 0.0002 max mem: 28454 +[2024-12-12 12:01:58 root] (utils.py 283): INFO Epoch: [8] [1710/2502] eta: 0:37:53 lr: 0.000011 loss_cls: 2.7608 (2.5622) grad_norm: 1.1505 (1.4708) time: 2.8736 data: 0.0002 max mem: 28454 +[2024-12-12 12:02:27 root] (utils.py 283): INFO Epoch: [8] [1720/2502] eta: 0:37:24 lr: 0.000011 loss_cls: 2.7693 (2.5627) grad_norm: 1.1983 (1.4700) time: 2.8737 data: 0.0002 max mem: 28454 +[2024-12-12 12:02:55 root] (utils.py 283): INFO Epoch: [8] [1730/2502] eta: 0:36:55 lr: 0.000011 loss_cls: 2.7238 (2.5620) grad_norm: 1.1201 (1.4679) time: 2.8712 data: 0.0002 max mem: 28454 +[2024-12-12 12:03:24 root] (utils.py 283): INFO Epoch: [8] [1740/2502] eta: 0:36:27 lr: 0.000011 loss_cls: 2.7078 (2.5623) grad_norm: 1.0849 (1.4661) time: 2.8714 data: 0.0002 max mem: 28454 +[2024-12-12 12:03:53 root] (utils.py 283): INFO Epoch: [8] [1750/2502] eta: 0:35:58 lr: 0.000011 loss_cls: 2.7838 (2.5640) grad_norm: 1.0995 (1.4641) time: 2.8700 data: 0.0002 max mem: 28454 +[2024-12-12 12:04:22 root] (utils.py 283): INFO Epoch: [8] [1760/2502] eta: 0:35:29 lr: 0.000011 loss_cls: 2.8561 (2.5636) grad_norm: 1.1399 (1.4630) time: 2.8690 data: 0.0002 max mem: 28454 +[2024-12-12 12:04:50 root] (utils.py 283): INFO Epoch: [8] [1770/2502] eta: 0:35:01 lr: 0.000011 loss_cls: 2.4568 (2.5640) grad_norm: 1.1531 (1.4631) time: 2.8725 data: 0.0002 max mem: 28454 +[2024-12-12 12:05:19 root] (utils.py 283): INFO Epoch: [8] [1780/2502] eta: 0:34:32 lr: 0.000011 loss_cls: 2.4568 (2.5627) grad_norm: 1.1711 (1.4682) time: 2.8737 data: 0.0002 max mem: 28454 +[2024-12-12 12:05:48 root] (utils.py 283): INFO Epoch: [8] [1790/2502] eta: 0:34:03 lr: 0.000011 loss_cls: 2.5900 (2.5632) grad_norm: 1.1805 (1.4667) time: 2.8743 data: 0.0002 max mem: 28454 +[2024-12-12 12:06:17 root] (utils.py 283): INFO Epoch: [8] [1800/2502] eta: 0:33:35 lr: 0.000011 loss_cls: 2.8624 (2.5647) grad_norm: 1.0858 (1.4645) time: 2.8752 data: 0.0002 max mem: 28454 +[2024-12-12 12:06:45 root] (utils.py 283): INFO Epoch: [8] [1810/2502] eta: 0:33:06 lr: 0.000011 loss_cls: 2.8287 (2.5651) grad_norm: 1.0858 (1.4645) time: 2.8732 data: 0.0002 max mem: 28454 +[2024-12-12 12:07:14 root] (utils.py 283): INFO Epoch: [8] [1820/2502] eta: 0:32:37 lr: 0.000011 loss_cls: 2.7066 (2.5649) grad_norm: 1.1300 (1.4650) time: 2.8726 data: 0.0002 max mem: 28454 +[2024-12-12 12:07:43 root] (utils.py 283): INFO Epoch: [8] [1830/2502] eta: 0:32:08 lr: 0.000011 loss_cls: 2.5071 (2.5650) grad_norm: 1.1193 (1.4648) time: 2.8717 data: 0.0002 max mem: 28454 +[2024-12-12 12:08:11 root] (utils.py 283): INFO Epoch: [8] [1840/2502] eta: 0:31:40 lr: 0.000011 loss_cls: 2.4296 (2.5633) grad_norm: 1.1458 (1.4631) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-12 12:08:40 root] (utils.py 283): INFO Epoch: [8] [1850/2502] eta: 0:31:11 lr: 0.000011 loss_cls: 2.6048 (2.5636) grad_norm: 1.1596 (1.4620) time: 2.8739 data: 0.0002 max mem: 28454 +[2024-12-12 12:09:09 root] (utils.py 283): INFO Epoch: [8] [1860/2502] eta: 0:30:42 lr: 0.000011 loss_cls: 2.7647 (2.5647) grad_norm: 1.1596 (1.4658) time: 2.8749 data: 0.0002 max mem: 28454 +[2024-12-12 12:09:38 root] (utils.py 283): INFO Epoch: [8] [1870/2502] eta: 0:30:14 lr: 0.000011 loss_cls: 2.6014 (2.5629) grad_norm: 1.1091 (1.4641) time: 2.8764 data: 0.0002 max mem: 28454 +[2024-12-12 12:10:06 root] (utils.py 283): INFO Epoch: [8] [1880/2502] eta: 0:29:45 lr: 0.000011 loss_cls: 2.6035 (2.5646) grad_norm: 1.2121 (1.4635) time: 2.8763 data: 0.0002 max mem: 28454 +[2024-12-12 12:10:35 root] (utils.py 283): INFO Epoch: [8] [1890/2502] eta: 0:29:16 lr: 0.000011 loss_cls: 2.7949 (2.5651) grad_norm: 1.1984 (1.4636) time: 2.8740 data: 0.0003 max mem: 28454 +[2024-12-12 12:11:04 root] (utils.py 283): INFO Epoch: [8] [1900/2502] eta: 0:28:48 lr: 0.000011 loss_cls: 2.7867 (2.5649) grad_norm: 1.1258 (1.4621) time: 2.8718 data: 0.0003 max mem: 28454 +[2024-12-12 12:11:33 root] (utils.py 283): INFO Epoch: [8] [1910/2502] eta: 0:28:19 lr: 0.000011 loss_cls: 2.6976 (2.5655) grad_norm: 1.2145 (1.4639) time: 2.8722 data: 0.0003 max mem: 28454 +[2024-12-12 12:12:01 root] (utils.py 283): INFO Epoch: [8] [1920/2502] eta: 0:27:50 lr: 0.000011 loss_cls: 2.6991 (2.5656) grad_norm: 1.2387 (1.4624) time: 2.8747 data: 0.0003 max mem: 28454 +[2024-12-12 12:12:30 root] (utils.py 283): INFO Epoch: [8] [1930/2502] eta: 0:27:22 lr: 0.000011 loss_cls: 2.6759 (2.5654) grad_norm: 1.0456 (1.4605) time: 2.8744 data: 0.0002 max mem: 28454 +[2024-12-12 12:12:59 root] (utils.py 283): INFO Epoch: [8] [1940/2502] eta: 0:26:53 lr: 0.000011 loss_cls: 2.6692 (2.5650) grad_norm: 1.0440 (1.4593) time: 2.8741 data: 0.0002 max mem: 28454 +[2024-12-12 12:13:28 root] (utils.py 283): INFO Epoch: [8] [1950/2502] eta: 0:26:24 lr: 0.000011 loss_cls: 2.4905 (2.5647) grad_norm: 1.1407 (1.4588) time: 2.8743 data: 0.0002 max mem: 28454 +[2024-12-12 12:13:56 root] (utils.py 283): INFO Epoch: [8] [1960/2502] eta: 0:25:55 lr: 0.000011 loss_cls: 2.5239 (2.5658) grad_norm: 1.0976 (1.4570) time: 2.8725 data: 0.0002 max mem: 28454 +[2024-12-12 12:14:25 root] (utils.py 283): INFO Epoch: [8] [1970/2502] eta: 0:25:27 lr: 0.000011 loss_cls: 2.7737 (2.5661) grad_norm: 1.1582 (1.4555) time: 2.8735 data: 0.0002 max mem: 28454 +[2024-12-12 12:14:54 root] (utils.py 283): INFO Epoch: [8] [1980/2502] eta: 0:24:58 lr: 0.000011 loss_cls: 2.6525 (2.5660) grad_norm: 1.1509 (1.4542) time: 2.8742 data: 0.0002 max mem: 28454 +[2024-12-12 12:15:23 root] (utils.py 283): INFO Epoch: [8] [1990/2502] eta: 0:24:29 lr: 0.000011 loss_cls: 2.6091 (2.5661) grad_norm: 1.1314 (1.4524) time: 2.8742 data: 0.0002 max mem: 28454 +[2024-12-12 12:15:51 root] (utils.py 283): INFO Epoch: [8] [2000/2502] eta: 0:24:01 lr: 0.000011 loss_cls: 2.6977 (2.5662) grad_norm: 1.1428 (1.4509) time: 2.8725 data: 0.0002 max mem: 28454 +[2024-12-12 12:16:20 root] (utils.py 283): INFO Epoch: [8] [2010/2502] eta: 0:23:32 lr: 0.000011 loss_cls: 2.5192 (2.5652) grad_norm: 1.1428 (1.4498) time: 2.8721 data: 0.0002 max mem: 28454 +[2024-12-12 12:16:49 root] (utils.py 283): INFO Epoch: [8] [2020/2502] eta: 0:23:03 lr: 0.000011 loss_cls: 2.4144 (2.5640) grad_norm: 1.1215 (1.4490) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-12 12:17:17 root] (utils.py 283): INFO Epoch: [8] [2030/2502] eta: 0:22:35 lr: 0.000011 loss_cls: 2.5891 (2.5644) grad_norm: 1.2325 (1.4478) time: 2.8721 data: 0.0002 max mem: 28454 +[2024-12-12 12:17:46 root] (utils.py 283): INFO Epoch: [8] [2040/2502] eta: 0:22:06 lr: 0.000011 loss_cls: 2.5740 (2.5641) grad_norm: 1.1865 (1.4474) time: 2.8741 data: 0.0002 max mem: 28454 +[2024-12-12 12:18:15 root] (utils.py 283): INFO Epoch: [8] [2050/2502] eta: 0:21:37 lr: 0.000011 loss_cls: 2.5598 (2.5648) grad_norm: 1.1654 (1.4461) time: 2.8734 data: 0.0002 max mem: 28454 +[2024-12-12 12:18:44 root] (utils.py 283): INFO Epoch: [8] [2060/2502] eta: 0:21:08 lr: 0.000011 loss_cls: 2.5806 (2.5644) grad_norm: 1.1526 (1.4452) time: 2.8726 data: 0.0003 max mem: 28454 +[2024-12-12 12:19:12 root] (utils.py 283): INFO Epoch: [8] [2070/2502] eta: 0:20:40 lr: 0.000011 loss_cls: 2.6346 (2.5643) grad_norm: 1.1526 (1.4439) time: 2.8721 data: 0.0002 max mem: 28454 +[2024-12-12 12:19:41 root] (utils.py 283): INFO Epoch: [8] [2080/2502] eta: 0:20:11 lr: 0.000011 loss_cls: 2.5585 (2.5641) grad_norm: 1.1557 (1.4427) time: 2.8734 data: 0.0002 max mem: 28454 +[2024-12-12 12:20:10 root] (utils.py 283): INFO Epoch: [8] [2090/2502] eta: 0:19:42 lr: 0.000011 loss_cls: 2.6821 (2.5650) grad_norm: 1.1252 (1.4413) time: 2.8733 data: 0.0002 max mem: 28454 +[2024-12-12 12:20:39 root] (utils.py 283): INFO Epoch: [8] [2100/2502] eta: 0:19:14 lr: 0.000011 loss_cls: 2.5419 (2.5634) grad_norm: 1.0735 (1.4397) time: 2.8713 data: 0.0002 max mem: 28454 +[2024-12-12 12:21:07 root] (utils.py 283): INFO Epoch: [8] [2110/2502] eta: 0:18:45 lr: 0.000011 loss_cls: 2.4020 (2.5632) grad_norm: 1.0793 (1.4386) time: 2.8715 data: 0.0002 max mem: 28454 +[2024-12-12 12:21:36 root] (utils.py 283): INFO Epoch: [8] [2120/2502] eta: 0:18:16 lr: 0.000011 loss_cls: 2.4964 (2.5635) grad_norm: 1.1748 (1.4384) time: 2.8705 data: 0.0002 max mem: 28454 +[2024-12-12 12:22:05 root] (utils.py 283): INFO Epoch: [8] [2130/2502] eta: 0:17:47 lr: 0.000011 loss_cls: 2.6303 (2.5632) grad_norm: 1.1878 (1.4379) time: 2.8706 data: 0.0002 max mem: 28454 +[2024-12-12 12:22:33 root] (utils.py 283): INFO Epoch: [8] [2140/2502] eta: 0:17:19 lr: 0.000011 loss_cls: 2.6492 (2.5631) grad_norm: 1.2506 (1.4374) time: 2.8727 data: 0.0002 max mem: 28454 +[2024-12-12 12:23:02 root] (utils.py 283): INFO Epoch: [8] [2150/2502] eta: 0:16:50 lr: 0.000011 loss_cls: 2.6612 (2.5625) grad_norm: 1.1799 (1.4366) time: 2.8726 data: 0.0002 max mem: 28454 +[2024-12-12 12:23:31 root] (utils.py 283): INFO Epoch: [8] [2160/2502] eta: 0:16:21 lr: 0.000011 loss_cls: 2.7372 (2.5630) grad_norm: 1.2131 (1.4365) time: 2.8690 data: 0.0002 max mem: 28454 +[2024-12-12 12:24:00 root] (utils.py 283): INFO Epoch: [8] [2170/2502] eta: 0:15:53 lr: 0.000011 loss_cls: 2.8400 (2.5643) grad_norm: 1.1996 (1.4356) time: 2.8678 data: 0.0002 max mem: 28454 +[2024-12-12 12:24:28 root] (utils.py 283): INFO Epoch: [8] [2180/2502] eta: 0:15:24 lr: 0.000011 loss_cls: 2.7719 (2.5634) grad_norm: 1.1765 (1.4346) time: 2.8696 data: 0.0002 max mem: 28454 +[2024-12-12 12:24:57 root] (utils.py 283): INFO Epoch: [8] [2190/2502] eta: 0:14:55 lr: 0.000011 loss_cls: 2.7719 (2.5638) grad_norm: 1.1765 (1.4338) time: 2.8664 data: 0.0002 max mem: 28454 +[2024-12-12 12:25:26 root] (utils.py 283): INFO Epoch: [8] [2200/2502] eta: 0:14:26 lr: 0.000011 loss_cls: 2.8277 (2.5632) grad_norm: 1.1984 (1.4327) time: 2.8657 data: 0.0002 max mem: 28454 +[2024-12-12 12:25:54 root] (utils.py 283): INFO Epoch: [8] [2210/2502] eta: 0:13:58 lr: 0.000011 loss_cls: 2.5613 (2.5638) grad_norm: 1.1859 (1.4318) time: 2.8687 data: 0.0002 max mem: 28454 +[2024-12-12 12:26:23 root] (utils.py 283): INFO Epoch: [8] [2220/2502] eta: 0:13:29 lr: 0.000011 loss_cls: 2.6571 (2.5634) grad_norm: 1.0366 (1.4300) time: 2.8710 data: 0.0002 max mem: 28454 +[2024-12-12 12:26:52 root] (utils.py 283): INFO Epoch: [8] [2230/2502] eta: 0:13:00 lr: 0.000011 loss_cls: 2.6471 (2.5634) grad_norm: 1.0366 (1.4305) time: 2.8711 data: 0.0002 max mem: 28454 +[2024-12-12 12:27:20 root] (utils.py 283): INFO Epoch: [8] [2240/2502] eta: 0:12:32 lr: 0.000011 loss_cls: 2.7848 (2.5642) grad_norm: 1.1531 (1.4311) time: 2.8695 data: 0.0002 max mem: 28454 +[2024-12-12 12:27:49 root] (utils.py 283): INFO Epoch: [8] [2250/2502] eta: 0:12:03 lr: 0.000011 loss_cls: 2.8215 (2.5653) grad_norm: 1.1510 (1.4300) time: 2.8681 data: 0.0002 max mem: 28454 +[2024-12-12 12:28:18 root] (utils.py 283): INFO Epoch: [8] [2260/2502] eta: 0:11:34 lr: 0.000011 loss_cls: 2.8510 (2.5666) grad_norm: 1.2176 (1.4402) time: 2.8672 data: 0.0002 max mem: 28454 +[2024-12-12 12:28:46 root] (utils.py 283): INFO Epoch: [8] [2270/2502] eta: 0:11:06 lr: 0.000011 loss_cls: 2.8199 (2.5667) grad_norm: 1.2187 (1.4393) time: 2.8695 data: 0.0002 max mem: 28454 +[2024-12-12 12:29:15 root] (utils.py 283): INFO Epoch: [8] [2280/2502] eta: 0:10:37 lr: 0.000011 loss_cls: 2.5295 (2.5664) grad_norm: 1.1931 (1.4383) time: 2.8691 data: 0.0002 max mem: 28454 +[2024-12-12 12:29:44 root] (utils.py 283): INFO Epoch: [8] [2290/2502] eta: 0:10:08 lr: 0.000011 loss_cls: 2.4301 (2.5659) grad_norm: 1.1538 (1.4369) time: 2.8675 data: 0.0002 max mem: 28454 +[2024-12-12 12:30:12 root] (utils.py 283): INFO Epoch: [8] [2300/2502] eta: 0:09:39 lr: 0.000011 loss_cls: 2.6130 (2.5662) grad_norm: 1.1471 (1.4357) time: 2.8687 data: 0.0002 max mem: 28454 +[2024-12-12 12:30:41 root] (utils.py 283): INFO Epoch: [8] [2310/2502] eta: 0:09:11 lr: 0.000011 loss_cls: 2.6673 (2.5664) grad_norm: 1.1355 (1.4348) time: 2.8690 data: 0.0002 max mem: 28454 +[2024-12-12 12:31:10 root] (utils.py 283): INFO Epoch: [8] [2320/2502] eta: 0:08:42 lr: 0.000011 loss_cls: 2.7669 (2.5675) grad_norm: 1.0870 (1.4346) time: 2.8669 data: 0.0002 max mem: 28454 +[2024-12-12 12:31:38 root] (utils.py 283): INFO Epoch: [8] [2330/2502] eta: 0:08:13 lr: 0.000011 loss_cls: 2.7669 (2.5683) grad_norm: 1.1436 (1.4345) time: 2.8656 data: 0.0003 max mem: 28454 +[2024-12-12 12:32:07 root] (utils.py 283): INFO Epoch: [8] [2340/2502] eta: 0:07:45 lr: 0.000011 loss_cls: 2.6845 (2.5679) grad_norm: 1.1436 (1.4333) time: 2.8684 data: 0.0002 max mem: 28454 +[2024-12-12 12:32:36 root] (utils.py 283): INFO Epoch: [8] [2350/2502] eta: 0:07:16 lr: 0.000011 loss_cls: 2.5574 (2.5683) grad_norm: 1.1121 (1.4321) time: 2.8690 data: 0.0002 max mem: 28454 +[2024-12-12 12:33:05 root] (utils.py 283): INFO Epoch: [8] [2360/2502] eta: 0:06:47 lr: 0.000011 loss_cls: 2.6795 (2.5680) grad_norm: 1.1121 (1.4309) time: 2.8690 data: 0.0002 max mem: 28454 +[2024-12-12 12:33:33 root] (utils.py 283): INFO Epoch: [8] [2370/2502] eta: 0:06:18 lr: 0.000011 loss_cls: 2.6795 (2.5684) grad_norm: 1.0799 (1.4295) time: 2.8675 data: 0.0002 max mem: 28454 +[2024-12-12 12:34:02 root] (utils.py 283): INFO Epoch: [8] [2380/2502] eta: 0:05:50 lr: 0.000011 loss_cls: 2.7887 (2.5694) grad_norm: 1.1319 (1.4284) time: 2.8661 data: 0.0003 max mem: 28454 +[2024-12-12 12:34:31 root] (utils.py 283): INFO Epoch: [8] [2390/2502] eta: 0:05:21 lr: 0.000011 loss_cls: 2.7887 (2.5692) grad_norm: 1.1477 (1.4272) time: 2.8661 data: 0.0003 max mem: 28454 +[2024-12-12 12:34:59 root] (utils.py 283): INFO Epoch: [8] [2400/2502] eta: 0:04:52 lr: 0.000011 loss_cls: 2.6963 (2.5692) grad_norm: 1.0817 (1.4263) time: 2.8670 data: 0.0002 max mem: 28454 +[2024-12-12 12:35:28 root] (utils.py 283): INFO Epoch: [8] [2410/2502] eta: 0:04:24 lr: 0.000011 loss_cls: 2.5717 (2.5689) grad_norm: 1.1187 (1.4253) time: 2.8703 data: 0.0003 max mem: 28454 +[2024-12-12 12:35:57 root] (utils.py 283): INFO Epoch: [8] [2420/2502] eta: 0:03:55 lr: 0.000011 loss_cls: 2.7198 (2.5697) grad_norm: 1.1195 (1.4244) time: 2.8744 data: 0.0002 max mem: 28454 +[2024-12-12 12:36:25 root] (utils.py 283): INFO Epoch: [8] [2430/2502] eta: 0:03:26 lr: 0.000011 loss_cls: 2.7760 (2.5704) grad_norm: 1.1518 (1.4234) time: 2.8736 data: 0.0002 max mem: 28454 +[2024-12-12 12:36:54 root] (utils.py 283): INFO Epoch: [8] [2440/2502] eta: 0:02:57 lr: 0.000011 loss_cls: 2.7342 (2.5702) grad_norm: 1.1496 (1.4222) time: 2.8717 data: 0.0002 max mem: 28454 +[2024-12-12 12:37:23 root] (utils.py 283): INFO Epoch: [8] [2450/2502] eta: 0:02:29 lr: 0.000011 loss_cls: 2.7342 (2.5711) grad_norm: 1.1640 (1.4216) time: 2.8744 data: 0.0002 max mem: 28454 +[2024-12-12 12:37:52 root] (utils.py 283): INFO Epoch: [8] [2460/2502] eta: 0:02:00 lr: 0.000011 loss_cls: 2.7781 (2.5707) grad_norm: 1.1617 (1.4207) time: 2.8727 data: 0.0002 max mem: 28454 +[2024-12-12 12:38:20 root] (utils.py 283): INFO Epoch: [8] [2470/2502] eta: 0:01:31 lr: 0.000011 loss_cls: 2.7630 (2.5707) grad_norm: 1.1394 (1.4208) time: 2.8711 data: 0.0002 max mem: 28454 +[2024-12-12 12:38:49 root] (utils.py 283): INFO Epoch: [8] [2480/2502] eta: 0:01:03 lr: 0.000011 loss_cls: 2.6116 (2.5701) grad_norm: 1.1657 (1.4204) time: 2.8738 data: 0.0002 max mem: 28454 +[2024-12-12 12:39:18 root] (utils.py 283): INFO Epoch: [8] [2490/2502] eta: 0:00:34 lr: 0.000011 loss_cls: 2.6637 (2.5707) grad_norm: 1.1205 (1.4195) time: 2.8937 data: 0.0212 max mem: 28454 +[2024-12-12 12:39:47 root] (utils.py 283): INFO Epoch: [8] [2500/2502] eta: 0:00:05 lr: 0.000011 loss_cls: 2.7973 (2.5721) grad_norm: 1.1883 (1.4194) time: 2.8893 data: 0.0212 max mem: 28454 +[2024-12-12 12:39:50 root] (utils.py 283): INFO Epoch: [8] [2501/2502] eta: 0:00:02 lr: 0.000011 loss_cls: 2.8103 (2.5722) grad_norm: 1.1748 (1.4192) time: 2.8885 data: 0.0212 max mem: 28454 +[2024-12-12 12:39:50 root] (utils.py 297): INFO Epoch: [8] Total time: 1:59:42 (2.8709 s / it) +[2024-12-12 12:39:50 root] (engine.py 179): INFO Averaged stats:lr: 0.000011 loss_cls: 2.8103 (2.5727) grad_norm: 1.1748 (1.4192) +[2024-12-12 12:39:53 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:52 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.4083 (0.4083) acc1: 92.9688 (92.9688) acc3: 98.4375 (98.4375) acc5: 98.4375 (98.4375) time: 0.5390 data: 0.0003 max mem: 28454 +[2024-12-12 12:39:58 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:48 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6196 (0.5981) acc1: 85.9375 (86.7188) acc3: 97.6562 (96.8040) acc5: 98.4375 (97.8693) time: 0.5463 data: 0.0005 max mem: 28454 +[2024-12-12 12:40:04 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:42 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6141 (0.6205) acc1: 86.7188 (86.4955) acc3: 96.8750 (96.5030) acc5: 98.4375 (97.5074) time: 0.5473 data: 0.0005 max mem: 28454 +[2024-12-12 12:40:09 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:37 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6141 (0.6514) acc1: 86.7188 (85.6351) acc3: 95.3125 (96.2702) acc5: 96.8750 (97.4294) time: 0.5479 data: 0.0005 max mem: 28454 +[2024-12-12 12:40:15 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:31 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6988 (0.6592) acc1: 85.1562 (85.4992) acc3: 96.8750 (96.2271) acc5: 97.6562 (97.4657) time: 0.5481 data: 0.0005 max mem: 28454 +[2024-12-12 12:40:20 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:26 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.8312 (0.7417) acc1: 79.6875 (83.7163) acc3: 93.7500 (95.0674) acc5: 95.3125 (96.6146) time: 0.5475 data: 0.0004 max mem: 28454 +[2024-12-12 12:40:26 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:20 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9831 (0.7708) acc1: 78.9062 (83.2351) acc3: 90.6250 (94.4800) acc5: 92.9688 (96.1450) time: 0.5474 data: 0.0005 max mem: 28454 +[2024-12-12 12:40:31 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:15 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9583 (0.7955) acc1: 78.9062 (82.5814) acc3: 92.1875 (94.2232) acc5: 94.5312 (96.0497) time: 0.5475 data: 0.0004 max mem: 28454 +[2024-12-12 12:40:37 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:09 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9583 (0.8180) acc1: 78.9062 (82.2434) acc3: 92.1875 (93.8657) acc5: 94.5312 (95.7755) time: 0.5478 data: 0.0006 max mem: 28454 +[2024-12-12 12:40:42 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:04 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9874 (0.8368) acc1: 78.9062 (81.6020) acc3: 90.6250 (93.5783) acc5: 94.5312 (95.6559) time: 0.5484 data: 0.0006 max mem: 28454 +[2024-12-12 12:40:46 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.8650 (0.8346) acc1: 78.9062 (81.5920) acc3: 92.1875 (93.6320) acc5: 94.5312 (95.7200) time: 0.5393 data: 0.0006 max mem: 28454 +[2024-12-12 12:40:46 root] (utils.py 297): INFO Test: Total time: 0:00:53 (0.5460 s / it) +[2024-12-12 12:40:46 root] (engine.py 264): INFO * Acc@1 81.754 Acc@3 93.434 Acc@5 95.756 loss 0.834 flops 13.207 layer_flops 13.109 +[2024-12-12 12:40:46 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 81.8% +[2024-12-12 12:40:46 root] (main.py 576): INFO Max accuracy: 81.77% +[2024-12-12 12:40:49 root] (utils.py 283): INFO Epoch: [9] [ 0/2502] eta: 1:59:06 lr: 0.000010 loss_cls: 2.5311 (2.5311) grad_norm: 1.1904 (1.1904) time: 2.8563 data: 0.0004 max mem: 28454 +[2024-12-12 12:41:17 root] (utils.py 283): INFO Epoch: [9] [ 10/2502] eta: 1:59:15 lr: 0.000010 loss_cls: 2.7086 (2.6365) grad_norm: 1.1904 (1.1645) time: 2.8713 data: 0.0002 max mem: 28454 +[2024-12-12 12:41:46 root] (utils.py 283): INFO Epoch: [9] [ 20/2502] eta: 1:58:48 lr: 0.000010 loss_cls: 2.6661 (2.5684) grad_norm: 1.1037 (1.1083) time: 2.8729 data: 0.0002 max mem: 28454 +[2024-12-12 12:42:15 root] (utils.py 283): INFO Epoch: [9] [ 30/2502] eta: 1:58:20 lr: 0.000010 loss_cls: 2.5166 (2.5364) grad_norm: 1.1037 (1.1341) time: 2.8727 data: 0.0002 max mem: 28454 +[2024-12-12 12:42:43 root] (utils.py 283): INFO Epoch: [9] [ 40/2502] eta: 1:57:47 lr: 0.000010 loss_cls: 2.5166 (2.5363) grad_norm: 1.1493 (1.2115) time: 2.8688 data: 0.0002 max mem: 28454 +[2024-12-12 12:43:12 root] (utils.py 283): INFO Epoch: [9] [ 50/2502] eta: 1:57:14 lr: 0.000010 loss_cls: 2.3637 (2.4853) grad_norm: 1.1451 (1.2448) time: 2.8633 data: 0.0002 max mem: 28454 +[2024-12-12 12:43:41 root] (utils.py 283): INFO Epoch: [9] [ 60/2502] eta: 1:56:43 lr: 0.000010 loss_cls: 2.7223 (2.5234) grad_norm: 1.1904 (1.6816) time: 2.8631 data: 0.0002 max mem: 28454 +[2024-12-12 12:44:09 root] (utils.py 283): INFO Epoch: [9] [ 70/2502] eta: 1:56:14 lr: 0.000010 loss_cls: 2.7223 (2.5050) grad_norm: 1.2550 (1.6141) time: 2.8651 data: 0.0002 max mem: 28454 +[2024-12-12 12:44:38 root] (utils.py 283): INFO Epoch: [9] [ 80/2502] eta: 1:55:44 lr: 0.000010 loss_cls: 2.4752 (2.4929) grad_norm: 1.1573 (1.5905) time: 2.8651 data: 0.0003 max mem: 28454 +[2024-12-12 12:45:07 root] (utils.py 283): INFO Epoch: [9] [ 90/2502] eta: 1:55:14 lr: 0.000010 loss_cls: 2.5384 (2.5061) grad_norm: 1.1314 (1.5512) time: 2.8626 data: 0.0003 max mem: 28454 +[2024-12-12 12:45:35 root] (utils.py 283): INFO Epoch: [9] [ 100/2502] eta: 1:54:45 lr: 0.000010 loss_cls: 2.5384 (2.5026) grad_norm: 1.0872 (1.5072) time: 2.8639 data: 0.0002 max mem: 28454 +[2024-12-12 12:46:04 root] (utils.py 283): INFO Epoch: [9] [ 110/2502] eta: 1:54:17 lr: 0.000010 loss_cls: 2.7902 (2.5288) grad_norm: 1.0785 (1.5157) time: 2.8671 data: 0.0002 max mem: 28454 +[2024-12-12 12:46:33 root] (utils.py 283): INFO Epoch: [9] [ 120/2502] eta: 1:53:48 lr: 0.000010 loss_cls: 2.7464 (2.5418) grad_norm: 1.0758 (1.4873) time: 2.8666 data: 0.0002 max mem: 28454 +[2024-12-12 12:47:01 root] (utils.py 283): INFO Epoch: [9] [ 130/2502] eta: 1:53:19 lr: 0.000010 loss_cls: 2.6740 (2.5370) grad_norm: 1.0758 (1.4609) time: 2.8672 data: 0.0003 max mem: 28454 +[2024-12-12 12:47:30 root] (utils.py 283): INFO Epoch: [9] [ 140/2502] eta: 1:52:50 lr: 0.000010 loss_cls: 2.7326 (2.5528) grad_norm: 1.1081 (1.4396) time: 2.8661 data: 0.0003 max mem: 28454 +[2024-12-12 12:47:59 root] (utils.py 283): INFO Epoch: [9] [ 150/2502] eta: 1:52:21 lr: 0.000010 loss_cls: 2.7481 (2.5569) grad_norm: 1.0831 (1.4172) time: 2.8646 data: 0.0002 max mem: 28454 +[2024-12-12 12:48:27 root] (utils.py 283): INFO Epoch: [9] [ 160/2502] eta: 1:51:53 lr: 0.000010 loss_cls: 2.5306 (2.5445) grad_norm: 1.1034 (1.4102) time: 2.8654 data: 0.0002 max mem: 28454 +[2024-12-12 12:48:56 root] (utils.py 283): INFO Epoch: [9] [ 170/2502] eta: 1:51:24 lr: 0.000010 loss_cls: 2.3086 (2.5284) grad_norm: 1.1534 (1.4298) time: 2.8652 data: 0.0002 max mem: 28454 +[2024-12-12 12:49:25 root] (utils.py 283): INFO Epoch: [9] [ 180/2502] eta: 1:50:55 lr: 0.000010 loss_cls: 2.2338 (2.5166) grad_norm: 1.0378 (1.4104) time: 2.8662 data: 0.0003 max mem: 28454 +[2024-12-12 12:49:53 root] (utils.py 283): INFO Epoch: [9] [ 190/2502] eta: 1:50:26 lr: 0.000010 loss_cls: 2.3553 (2.5189) grad_norm: 1.1006 (1.3960) time: 2.8660 data: 0.0003 max mem: 28454 +[2024-12-12 12:50:22 root] (utils.py 283): INFO Epoch: [9] [ 200/2502] eta: 1:49:58 lr: 0.000010 loss_cls: 2.6567 (2.5256) grad_norm: 1.1638 (1.3930) time: 2.8683 data: 0.0003 max mem: 28454 +[2024-12-12 12:50:51 root] (utils.py 283): INFO Epoch: [9] [ 210/2502] eta: 1:49:30 lr: 0.000010 loss_cls: 2.6608 (2.5246) grad_norm: 1.1920 (1.3839) time: 2.8730 data: 0.0003 max mem: 28454 +[2024-12-12 12:51:19 root] (utils.py 283): INFO Epoch: [9] [ 220/2502] eta: 1:49:02 lr: 0.000010 loss_cls: 2.6758 (2.5311) grad_norm: 1.1786 (1.3871) time: 2.8734 data: 0.0002 max mem: 28454 +[2024-12-12 12:51:48 root] (utils.py 283): INFO Epoch: [9] [ 230/2502] eta: 1:48:35 lr: 0.000010 loss_cls: 2.6600 (2.5265) grad_norm: 1.1492 (1.3771) time: 2.8741 data: 0.0002 max mem: 28454 +[2024-12-12 12:52:17 root] (utils.py 283): INFO Epoch: [9] [ 240/2502] eta: 1:48:07 lr: 0.000010 loss_cls: 2.6290 (2.5308) grad_norm: 1.1547 (1.3826) time: 2.8759 data: 0.0002 max mem: 28454 +[2024-12-12 12:52:46 root] (utils.py 283): INFO Epoch: [9] [ 250/2502] eta: 1:47:39 lr: 0.000010 loss_cls: 2.7633 (2.5382) grad_norm: 1.1803 (1.4101) time: 2.8749 data: 0.0002 max mem: 28454 +[2024-12-12 12:53:14 root] (utils.py 283): INFO Epoch: [9] [ 260/2502] eta: 1:47:10 lr: 0.000010 loss_cls: 2.7657 (2.5350) grad_norm: 1.1643 (1.4003) time: 2.8720 data: 0.0003 max mem: 28454 +[2024-12-12 12:53:43 root] (utils.py 283): INFO Epoch: [9] [ 270/2502] eta: 1:46:41 lr: 0.000010 loss_cls: 2.6203 (2.5380) grad_norm: 1.1402 (1.3969) time: 2.8703 data: 0.0002 max mem: 28454 +[2024-12-12 12:54:12 root] (utils.py 283): INFO Epoch: [9] [ 280/2502] eta: 1:46:13 lr: 0.000010 loss_cls: 2.7091 (2.5382) grad_norm: 1.1323 (1.3866) time: 2.8698 data: 0.0002 max mem: 28454 +[2024-12-12 12:54:40 root] (utils.py 283): INFO Epoch: [9] [ 290/2502] eta: 1:45:45 lr: 0.000010 loss_cls: 2.7336 (2.5463) grad_norm: 1.1323 (1.3790) time: 2.8719 data: 0.0003 max mem: 28454 +[2024-12-12 12:55:09 root] (utils.py 283): INFO Epoch: [9] [ 300/2502] eta: 1:45:16 lr: 0.000010 loss_cls: 2.7100 (2.5419) grad_norm: 1.1374 (1.3704) time: 2.8735 data: 0.0003 max mem: 28454 +[2024-12-12 12:55:38 root] (utils.py 283): INFO Epoch: [9] [ 310/2502] eta: 1:44:48 lr: 0.000010 loss_cls: 2.4896 (2.5345) grad_norm: 1.1392 (1.3640) time: 2.8755 data: 0.0002 max mem: 28454 +[2024-12-12 12:56:07 root] (utils.py 283): INFO Epoch: [9] [ 320/2502] eta: 1:44:20 lr: 0.000010 loss_cls: 2.4896 (2.5342) grad_norm: 1.1258 (1.3632) time: 2.8755 data: 0.0003 max mem: 28454 +[2024-12-12 12:56:35 root] (utils.py 283): INFO Epoch: [9] [ 330/2502] eta: 1:43:52 lr: 0.000010 loss_cls: 2.6625 (2.5387) grad_norm: 1.0793 (1.3571) time: 2.8741 data: 0.0003 max mem: 28454 +[2024-12-12 12:57:04 root] (utils.py 283): INFO Epoch: [9] [ 340/2502] eta: 1:43:23 lr: 0.000010 loss_cls: 2.6711 (2.5393) grad_norm: 1.1806 (1.3594) time: 2.8762 data: 0.0002 max mem: 28454 +[2024-12-12 12:57:33 root] (utils.py 283): INFO Epoch: [9] [ 350/2502] eta: 1:42:55 lr: 0.000010 loss_cls: 2.6668 (2.5402) grad_norm: 1.1929 (1.3550) time: 2.8750 data: 0.0003 max mem: 28454 +[2024-12-12 12:58:02 root] (utils.py 283): INFO Epoch: [9] [ 360/2502] eta: 1:42:26 lr: 0.000010 loss_cls: 2.3796 (2.5322) grad_norm: 1.2236 (1.3533) time: 2.8715 data: 0.0002 max mem: 28454 +[2024-12-12 12:58:30 root] (utils.py 283): INFO Epoch: [9] [ 370/2502] eta: 1:41:58 lr: 0.000010 loss_cls: 2.5555 (2.5329) grad_norm: 1.1454 (1.3478) time: 2.8698 data: 0.0002 max mem: 28454 +[2024-12-12 12:58:59 root] (utils.py 283): INFO Epoch: [9] [ 380/2502] eta: 1:41:29 lr: 0.000010 loss_cls: 2.5774 (2.5339) grad_norm: 1.1377 (1.3417) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-12 12:59:28 root] (utils.py 283): INFO Epoch: [9] [ 390/2502] eta: 1:41:00 lr: 0.000010 loss_cls: 2.7589 (2.5363) grad_norm: 1.1183 (1.3370) time: 2.8713 data: 0.0002 max mem: 28454 +[2024-12-12 12:59:57 root] (utils.py 283): INFO Epoch: [9] [ 400/2502] eta: 1:40:32 lr: 0.000010 loss_cls: 2.6843 (2.5389) grad_norm: 1.1183 (1.3328) time: 2.8706 data: 0.0003 max mem: 28454 +[2024-12-12 13:00:25 root] (utils.py 283): INFO Epoch: [9] [ 410/2502] eta: 1:40:03 lr: 0.000010 loss_cls: 2.6001 (2.5387) grad_norm: 1.1674 (1.3283) time: 2.8723 data: 0.0002 max mem: 28454 +[2024-12-12 13:00:54 root] (utils.py 283): INFO Epoch: [9] [ 420/2502] eta: 1:39:35 lr: 0.000010 loss_cls: 2.6726 (2.5414) grad_norm: 1.0692 (1.3244) time: 2.8737 data: 0.0002 max mem: 28454 +[2024-12-12 13:01:23 root] (utils.py 283): INFO Epoch: [9] [ 430/2502] eta: 1:39:06 lr: 0.000010 loss_cls: 2.6346 (2.5392) grad_norm: 1.0516 (1.3237) time: 2.8739 data: 0.0002 max mem: 28454 +[2024-12-12 13:01:51 root] (utils.py 283): INFO Epoch: [9] [ 440/2502] eta: 1:38:38 lr: 0.000010 loss_cls: 2.6346 (2.5404) grad_norm: 1.0522 (1.3202) time: 2.8713 data: 0.0002 max mem: 28454 +[2024-12-12 13:02:20 root] (utils.py 283): INFO Epoch: [9] [ 450/2502] eta: 1:38:09 lr: 0.000010 loss_cls: 2.7296 (2.5405) grad_norm: 1.2050 (1.3246) time: 2.8702 data: 0.0003 max mem: 28454 +[2024-12-12 13:02:49 root] (utils.py 283): INFO Epoch: [9] [ 460/2502] eta: 1:37:40 lr: 0.000010 loss_cls: 2.5071 (2.5398) grad_norm: 1.1021 (1.3189) time: 2.8703 data: 0.0002 max mem: 28454 +[2024-12-12 13:03:17 root] (utils.py 283): INFO Epoch: [9] [ 470/2502] eta: 1:37:11 lr: 0.000010 loss_cls: 2.5021 (2.5411) grad_norm: 1.0702 (1.3172) time: 2.8692 data: 0.0002 max mem: 28454 +[2024-12-12 13:03:46 root] (utils.py 283): INFO Epoch: [9] [ 480/2502] eta: 1:36:43 lr: 0.000010 loss_cls: 2.5762 (2.5389) grad_norm: 1.1314 (1.3267) time: 2.8698 data: 0.0002 max mem: 28454 +[2024-12-12 13:04:15 root] (utils.py 283): INFO Epoch: [9] [ 490/2502] eta: 1:36:14 lr: 0.000010 loss_cls: 2.7011 (2.5420) grad_norm: 1.1435 (1.3228) time: 2.8726 data: 0.0002 max mem: 28454 +[2024-12-12 13:04:44 root] (utils.py 283): INFO Epoch: [9] [ 500/2502] eta: 1:35:46 lr: 0.000010 loss_cls: 2.7431 (2.5452) grad_norm: 1.1403 (1.3217) time: 2.8738 data: 0.0002 max mem: 28454 +[2024-12-12 13:05:12 root] (utils.py 283): INFO Epoch: [9] [ 510/2502] eta: 1:35:17 lr: 0.000010 loss_cls: 2.7431 (2.5435) grad_norm: 1.1403 (1.3183) time: 2.8733 data: 0.0002 max mem: 28454 +[2024-12-12 13:05:41 root] (utils.py 283): INFO Epoch: [9] [ 520/2502] eta: 1:34:48 lr: 0.000010 loss_cls: 2.4740 (2.5412) grad_norm: 1.1529 (1.3174) time: 2.8718 data: 0.0002 max mem: 28454 +[2024-12-12 13:06:10 root] (utils.py 283): INFO Epoch: [9] [ 530/2502] eta: 1:34:20 lr: 0.000010 loss_cls: 2.4803 (2.5371) grad_norm: 1.1353 (1.3148) time: 2.8735 data: 0.0002 max mem: 28454 +[2024-12-12 13:06:39 root] (utils.py 283): INFO Epoch: [9] [ 540/2502] eta: 1:33:51 lr: 0.000010 loss_cls: 2.4839 (2.5347) grad_norm: 1.0783 (1.3107) time: 2.8736 data: 0.0003 max mem: 28454 +[2024-12-12 13:07:07 root] (utils.py 283): INFO Epoch: [9] [ 550/2502] eta: 1:33:23 lr: 0.000010 loss_cls: 2.5417 (2.5353) grad_norm: 1.0634 (1.3077) time: 2.8728 data: 0.0003 max mem: 28454 +[2024-12-12 13:07:36 root] (utils.py 283): INFO Epoch: [9] [ 560/2502] eta: 1:32:54 lr: 0.000010 loss_cls: 2.7357 (2.5340) grad_norm: 1.1237 (1.3073) time: 2.8738 data: 0.0003 max mem: 28454 +[2024-12-12 13:08:05 root] (utils.py 283): INFO Epoch: [9] [ 570/2502] eta: 1:32:25 lr: 0.000010 loss_cls: 2.7647 (2.5354) grad_norm: 1.1527 (1.3045) time: 2.8720 data: 0.0002 max mem: 28454 +[2024-12-12 13:08:33 root] (utils.py 283): INFO Epoch: [9] [ 580/2502] eta: 1:31:57 lr: 0.000010 loss_cls: 2.7687 (2.5362) grad_norm: 1.1709 (1.3024) time: 2.8695 data: 0.0002 max mem: 28454 +[2024-12-12 13:09:02 root] (utils.py 283): INFO Epoch: [9] [ 590/2502] eta: 1:31:28 lr: 0.000010 loss_cls: 2.7245 (2.5372) grad_norm: 1.1597 (1.3025) time: 2.8702 data: 0.0002 max mem: 28454 +[2024-12-12 13:09:31 root] (utils.py 283): INFO Epoch: [9] [ 600/2502] eta: 1:30:59 lr: 0.000010 loss_cls: 2.6555 (2.5337) grad_norm: 1.1406 (1.6157) time: 2.8711 data: 0.0002 max mem: 28454 +[2024-12-12 13:10:00 root] (utils.py 283): INFO Epoch: [9] [ 610/2502] eta: 1:30:31 lr: 0.000010 loss_cls: 2.5645 (2.5360) grad_norm: 1.1804 (1.6111) time: 2.8714 data: 0.0002 max mem: 28454 +[2024-12-12 13:10:28 root] (utils.py 283): INFO Epoch: [9] [ 620/2502] eta: 1:30:02 lr: 0.000010 loss_cls: 2.5645 (2.5347) grad_norm: 1.2408 (1.6055) time: 2.8715 data: 0.0003 max mem: 28454 +[2024-12-12 13:10:57 root] (utils.py 283): INFO Epoch: [9] [ 630/2502] eta: 1:29:33 lr: 0.000010 loss_cls: 2.6284 (2.5346) grad_norm: 1.3058 (1.6155) time: 2.8705 data: 0.0003 max mem: 28454 +[2024-12-12 13:11:26 root] (utils.py 283): INFO Epoch: [9] [ 640/2502] eta: 1:29:04 lr: 0.000010 loss_cls: 2.5701 (2.5345) grad_norm: 1.2499 (1.6089) time: 2.8710 data: 0.0002 max mem: 28454 +[2024-12-12 13:11:54 root] (utils.py 283): INFO Epoch: [9] [ 650/2502] eta: 1:28:36 lr: 0.000010 loss_cls: 2.5232 (2.5329) grad_norm: 1.1583 (1.6035) time: 2.8706 data: 0.0002 max mem: 28454 +[2024-12-12 13:12:23 root] (utils.py 283): INFO Epoch: [9] [ 660/2502] eta: 1:28:07 lr: 0.000010 loss_cls: 2.4544 (2.5329) grad_norm: 1.1496 (1.5963) time: 2.8720 data: 0.0002 max mem: 28454 +[2024-12-12 13:12:52 root] (utils.py 283): INFO Epoch: [9] [ 670/2502] eta: 1:27:38 lr: 0.000010 loss_cls: 2.4791 (2.5310) grad_norm: 1.1610 (1.6041) time: 2.8744 data: 0.0002 max mem: 28454 +[2024-12-12 13:13:21 root] (utils.py 283): INFO Epoch: [9] [ 680/2502] eta: 1:27:10 lr: 0.000010 loss_cls: 2.6879 (2.5332) grad_norm: 1.1324 (1.6030) time: 2.8706 data: 0.0002 max mem: 28454 +[2024-12-12 13:13:49 root] (utils.py 283): INFO Epoch: [9] [ 690/2502] eta: 1:26:41 lr: 0.000010 loss_cls: 2.8192 (2.5325) grad_norm: 1.0896 (1.5959) time: 2.8691 data: 0.0002 max mem: 28454 +[2024-12-12 13:14:18 root] (utils.py 283): INFO Epoch: [9] [ 700/2502] eta: 1:26:12 lr: 0.000010 loss_cls: 2.6190 (2.5332) grad_norm: 1.0896 (1.5927) time: 2.8704 data: 0.0002 max mem: 28454 +[2024-12-12 13:14:47 root] (utils.py 283): INFO Epoch: [9] [ 710/2502] eta: 1:25:44 lr: 0.000010 loss_cls: 2.6190 (2.5352) grad_norm: 1.1048 (1.5876) time: 2.8708 data: 0.0002 max mem: 28454 +[2024-12-12 13:15:15 root] (utils.py 283): INFO Epoch: [9] [ 720/2502] eta: 1:25:15 lr: 0.000010 loss_cls: 2.6492 (2.5367) grad_norm: 1.1219 (1.5826) time: 2.8680 data: 0.0003 max mem: 28454 +[2024-12-12 13:15:44 root] (utils.py 283): INFO Epoch: [9] [ 730/2502] eta: 1:24:46 lr: 0.000010 loss_cls: 2.5224 (2.5359) grad_norm: 1.1767 (1.5765) time: 2.8692 data: 0.0003 max mem: 28454 +[2024-12-12 13:16:13 root] (utils.py 283): INFO Epoch: [9] [ 740/2502] eta: 1:24:17 lr: 0.000010 loss_cls: 2.5936 (2.5374) grad_norm: 1.1156 (1.5705) time: 2.8706 data: 0.0002 max mem: 28454 +[2024-12-12 13:16:41 root] (utils.py 283): INFO Epoch: [9] [ 750/2502] eta: 1:23:49 lr: 0.000010 loss_cls: 2.6787 (2.5396) grad_norm: 1.0914 (1.5647) time: 2.8680 data: 0.0003 max mem: 28454 +[2024-12-12 13:17:10 root] (utils.py 283): INFO Epoch: [9] [ 760/2502] eta: 1:23:20 lr: 0.000010 loss_cls: 2.6741 (2.5408) grad_norm: 1.0976 (1.5765) time: 2.8672 data: 0.0003 max mem: 28454 +[2024-12-12 13:17:39 root] (utils.py 283): INFO Epoch: [9] [ 770/2502] eta: 1:22:51 lr: 0.000010 loss_cls: 2.4492 (2.5375) grad_norm: 1.1971 (1.5738) time: 2.8671 data: 0.0003 max mem: 28454 +[2024-12-12 13:18:08 root] (utils.py 283): INFO Epoch: [9] [ 780/2502] eta: 1:22:22 lr: 0.000010 loss_cls: 2.6231 (2.5373) grad_norm: 1.1970 (1.5736) time: 2.8695 data: 0.0002 max mem: 28454 +[2024-12-12 13:18:36 root] (utils.py 283): INFO Epoch: [9] [ 790/2502] eta: 1:21:54 lr: 0.000010 loss_cls: 2.7148 (2.5395) grad_norm: 1.1609 (1.5706) time: 2.8732 data: 0.0002 max mem: 28454 +[2024-12-12 13:19:05 root] (utils.py 283): INFO Epoch: [9] [ 800/2502] eta: 1:21:25 lr: 0.000010 loss_cls: 2.7882 (2.5419) grad_norm: 1.1470 (1.5660) time: 2.8729 data: 0.0002 max mem: 28454 +[2024-12-12 13:19:34 root] (utils.py 283): INFO Epoch: [9] [ 810/2502] eta: 1:20:56 lr: 0.000010 loss_cls: 2.8140 (2.5433) grad_norm: 1.0942 (1.5622) time: 2.8728 data: 0.0002 max mem: 28454 +[2024-12-12 13:20:03 root] (utils.py 283): INFO Epoch: [9] [ 820/2502] eta: 1:20:28 lr: 0.000010 loss_cls: 2.6294 (2.5428) grad_norm: 1.1253 (1.5585) time: 2.8764 data: 0.0002 max mem: 28454 +[2024-12-12 13:20:31 root] (utils.py 283): INFO Epoch: [9] [ 830/2502] eta: 1:19:59 lr: 0.000010 loss_cls: 2.8268 (2.5452) grad_norm: 1.2185 (1.5557) time: 2.8782 data: 0.0002 max mem: 28454 +[2024-12-12 13:21:00 root] (utils.py 283): INFO Epoch: [9] [ 840/2502] eta: 1:19:31 lr: 0.000010 loss_cls: 2.8268 (2.5441) grad_norm: 1.1420 (1.5521) time: 2.8786 data: 0.0002 max mem: 28454 +[2024-12-12 13:21:29 root] (utils.py 283): INFO Epoch: [9] [ 850/2502] eta: 1:19:02 lr: 0.000010 loss_cls: 2.5415 (2.5453) grad_norm: 1.1008 (1.5470) time: 2.8756 data: 0.0002 max mem: 28454 +[2024-12-12 13:21:58 root] (utils.py 283): INFO Epoch: [9] [ 860/2502] eta: 1:18:33 lr: 0.000010 loss_cls: 2.7231 (2.5446) grad_norm: 1.0574 (1.5480) time: 2.8723 data: 0.0002 max mem: 28454 +[2024-12-12 13:22:26 root] (utils.py 283): INFO Epoch: [9] [ 870/2502] eta: 1:18:05 lr: 0.000010 loss_cls: 2.5365 (2.5435) grad_norm: 1.2272 (1.5499) time: 2.8730 data: 0.0002 max mem: 28454 +[2024-12-12 13:22:55 root] (utils.py 283): INFO Epoch: [9] [ 880/2502] eta: 1:17:36 lr: 0.000010 loss_cls: 2.5365 (2.5433) grad_norm: 1.2227 (1.5684) time: 2.8745 data: 0.0002 max mem: 28454 +[2024-12-12 13:23:24 root] (utils.py 283): INFO Epoch: [9] [ 890/2502] eta: 1:17:08 lr: 0.000010 loss_cls: 2.4141 (2.5409) grad_norm: 1.1448 (1.5640) time: 2.8754 data: 0.0003 max mem: 28454 +[2024-12-12 13:23:53 root] (utils.py 283): INFO Epoch: [9] [ 900/2502] eta: 1:16:39 lr: 0.000010 loss_cls: 2.4141 (2.5395) grad_norm: 1.1448 (1.5639) time: 2.8747 data: 0.0002 max mem: 28454 +[2024-12-12 13:24:21 root] (utils.py 283): INFO Epoch: [9] [ 910/2502] eta: 1:16:10 lr: 0.000010 loss_cls: 2.6084 (2.5379) grad_norm: 1.1231 (1.5588) time: 2.8749 data: 0.0002 max mem: 28454 +[2024-12-12 13:24:50 root] (utils.py 283): INFO Epoch: [9] [ 920/2502] eta: 1:15:42 lr: 0.000010 loss_cls: 2.6524 (2.5382) grad_norm: 1.1298 (1.5693) time: 2.8759 data: 0.0002 max mem: 28454 +[2024-12-12 13:25:19 root] (utils.py 283): INFO Epoch: [9] [ 930/2502] eta: 1:15:13 lr: 0.000010 loss_cls: 2.6088 (2.5389) grad_norm: 1.1923 (1.5649) time: 2.8745 data: 0.0002 max mem: 28454 +[2024-12-12 13:25:48 root] (utils.py 283): INFO Epoch: [9] [ 940/2502] eta: 1:14:44 lr: 0.000010 loss_cls: 2.6361 (2.5399) grad_norm: 1.0588 (1.5599) time: 2.8728 data: 0.0003 max mem: 28454 +[2024-12-12 13:26:16 root] (utils.py 283): INFO Epoch: [9] [ 950/2502] eta: 1:14:16 lr: 0.000010 loss_cls: 2.8223 (2.5407) grad_norm: 1.0804 (1.5601) time: 2.8737 data: 0.0003 max mem: 28454 +[2024-12-12 13:26:45 root] (utils.py 283): INFO Epoch: [9] [ 960/2502] eta: 1:13:47 lr: 0.000010 loss_cls: 2.5945 (2.5411) grad_norm: 1.1038 (1.5563) time: 2.8747 data: 0.0002 max mem: 28454 +[2024-12-12 13:27:14 root] (utils.py 283): INFO Epoch: [9] [ 970/2502] eta: 1:13:18 lr: 0.000010 loss_cls: 2.5232 (2.5404) grad_norm: 1.0755 (1.5516) time: 2.8763 data: 0.0002 max mem: 28454 +[2024-12-12 13:27:43 root] (utils.py 283): INFO Epoch: [9] [ 980/2502] eta: 1:12:50 lr: 0.000010 loss_cls: 2.4316 (2.5383) grad_norm: 1.1086 (1.5481) time: 2.8766 data: 0.0002 max mem: 28454 +[2024-12-12 13:28:11 root] (utils.py 283): INFO Epoch: [9] [ 990/2502] eta: 1:12:21 lr: 0.000010 loss_cls: 2.4121 (2.5378) grad_norm: 1.1376 (1.5443) time: 2.8761 data: 0.0002 max mem: 28454 +[2024-12-12 13:28:40 root] (utils.py 283): INFO Epoch: [9] [1000/2502] eta: 1:11:52 lr: 0.000010 loss_cls: 2.7057 (2.5394) grad_norm: 1.1491 (1.5407) time: 2.8784 data: 0.0002 max mem: 28454 +[2024-12-12 13:29:09 root] (utils.py 283): INFO Epoch: [9] [1010/2502] eta: 1:11:24 lr: 0.000010 loss_cls: 2.6064 (2.5382) grad_norm: 1.1197 (1.5372) time: 2.8787 data: 0.0002 max mem: 28454 +[2024-12-12 13:29:38 root] (utils.py 283): INFO Epoch: [9] [1020/2502] eta: 1:10:55 lr: 0.000010 loss_cls: 2.6651 (2.5390) grad_norm: 1.0871 (1.5330) time: 2.8757 data: 0.0002 max mem: 28454 +[2024-12-12 13:30:06 root] (utils.py 283): INFO Epoch: [9] [1030/2502] eta: 1:10:26 lr: 0.000010 loss_cls: 2.7122 (2.5394) grad_norm: 1.0871 (1.5301) time: 2.8747 data: 0.0002 max mem: 28454 +[2024-12-12 13:30:35 root] (utils.py 283): INFO Epoch: [9] [1040/2502] eta: 1:09:58 lr: 0.000010 loss_cls: 2.5944 (2.5402) grad_norm: 1.0666 (1.5254) time: 2.8753 data: 0.0002 max mem: 28454 +[2024-12-12 13:31:04 root] (utils.py 283): INFO Epoch: [9] [1050/2502] eta: 1:09:29 lr: 0.000010 loss_cls: 2.6789 (2.5411) grad_norm: 1.1188 (1.5237) time: 2.8737 data: 0.0002 max mem: 28454 +[2024-12-12 13:31:33 root] (utils.py 283): INFO Epoch: [9] [1060/2502] eta: 1:09:00 lr: 0.000010 loss_cls: 2.6102 (2.5422) grad_norm: 1.1919 (1.5241) time: 2.8745 data: 0.0002 max mem: 28454 +[2024-12-12 13:32:01 root] (utils.py 283): INFO Epoch: [9] [1070/2502] eta: 1:08:32 lr: 0.000010 loss_cls: 2.6662 (2.5424) grad_norm: 1.1071 (1.5207) time: 2.8763 data: 0.0003 max mem: 28454 +[2024-12-12 13:32:30 root] (utils.py 283): INFO Epoch: [9] [1080/2502] eta: 1:08:03 lr: 0.000010 loss_cls: 2.6662 (2.5435) grad_norm: 1.1101 (1.5186) time: 2.8751 data: 0.0002 max mem: 28454 +[2024-12-12 13:32:59 root] (utils.py 283): INFO Epoch: [9] [1090/2502] eta: 1:07:34 lr: 0.000010 loss_cls: 2.8354 (2.5455) grad_norm: 1.1379 (1.5184) time: 2.8740 data: 0.0002 max mem: 28454 +[2024-12-12 13:33:28 root] (utils.py 283): INFO Epoch: [9] [1100/2502] eta: 1:07:06 lr: 0.000010 loss_cls: 2.8552 (2.5467) grad_norm: 1.1640 (1.5157) time: 2.8748 data: 0.0002 max mem: 28454 +[2024-12-12 13:33:56 root] (utils.py 283): INFO Epoch: [9] [1110/2502] eta: 1:06:37 lr: 0.000010 loss_cls: 2.6458 (2.5471) grad_norm: 1.1708 (1.5128) time: 2.8744 data: 0.0002 max mem: 28454 +[2024-12-12 13:34:25 root] (utils.py 283): INFO Epoch: [9] [1120/2502] eta: 1:06:08 lr: 0.000010 loss_cls: 2.5776 (2.5470) grad_norm: 1.1355 (1.5091) time: 2.8721 data: 0.0002 max mem: 28454 +[2024-12-12 13:34:54 root] (utils.py 283): INFO Epoch: [9] [1130/2502] eta: 1:05:40 lr: 0.000010 loss_cls: 2.5776 (2.5462) grad_norm: 1.1015 (1.5055) time: 2.8712 data: 0.0003 max mem: 28454 +[2024-12-12 13:35:23 root] (utils.py 283): INFO Epoch: [9] [1140/2502] eta: 1:05:11 lr: 0.000010 loss_cls: 2.6181 (2.5461) grad_norm: 1.1015 (1.5024) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-12 13:35:51 root] (utils.py 283): INFO Epoch: [9] [1150/2502] eta: 1:04:42 lr: 0.000010 loss_cls: 2.6469 (2.5461) grad_norm: 1.0952 (1.5029) time: 2.8735 data: 0.0002 max mem: 28454 +[2024-12-12 13:36:20 root] (utils.py 283): INFO Epoch: [9] [1160/2502] eta: 1:04:13 lr: 0.000010 loss_cls: 2.7661 (2.5481) grad_norm: 1.0616 (1.4995) time: 2.8710 data: 0.0002 max mem: 28454 +[2024-12-12 13:36:49 root] (utils.py 283): INFO Epoch: [9] [1170/2502] eta: 1:03:45 lr: 0.000010 loss_cls: 2.7800 (2.5473) grad_norm: 1.0772 (1.4963) time: 2.8680 data: 0.0002 max mem: 28454 +[2024-12-12 13:37:17 root] (utils.py 283): INFO Epoch: [9] [1180/2502] eta: 1:03:16 lr: 0.000010 loss_cls: 2.6725 (2.5481) grad_norm: 1.1054 (1.4948) time: 2.8676 data: 0.0003 max mem: 28454 +[2024-12-12 13:37:46 root] (utils.py 283): INFO Epoch: [9] [1190/2502] eta: 1:02:47 lr: 0.000010 loss_cls: 2.6725 (2.5477) grad_norm: 1.1054 (1.4932) time: 2.8660 data: 0.0002 max mem: 28454 +[2024-12-12 13:38:15 root] (utils.py 283): INFO Epoch: [9] [1200/2502] eta: 1:02:18 lr: 0.000010 loss_cls: 2.6996 (2.5482) grad_norm: 1.0705 (1.4897) time: 2.8673 data: 0.0002 max mem: 28454 +[2024-12-12 13:38:43 root] (utils.py 283): INFO Epoch: [9] [1210/2502] eta: 1:01:50 lr: 0.000010 loss_cls: 2.6633 (2.5477) grad_norm: 1.0933 (1.4882) time: 2.8676 data: 0.0002 max mem: 28454 +[2024-12-12 13:39:12 root] (utils.py 283): INFO Epoch: [9] [1220/2502] eta: 1:01:21 lr: 0.000010 loss_cls: 2.5792 (2.5481) grad_norm: 1.1051 (1.4854) time: 2.8692 data: 0.0002 max mem: 28454 +[2024-12-12 13:39:41 root] (utils.py 283): INFO Epoch: [9] [1230/2502] eta: 1:00:52 lr: 0.000010 loss_cls: 2.5961 (2.5476) grad_norm: 1.0660 (1.4830) time: 2.8701 data: 0.0002 max mem: 28454 +[2024-12-12 13:40:09 root] (utils.py 283): INFO Epoch: [9] [1240/2502] eta: 1:00:23 lr: 0.000010 loss_cls: 2.5660 (2.5469) grad_norm: 1.1912 (1.4806) time: 2.8692 data: 0.0002 max mem: 28454 +[2024-12-12 13:40:38 root] (utils.py 283): INFO Epoch: [9] [1250/2502] eta: 0:59:55 lr: 0.000010 loss_cls: 2.5924 (2.5478) grad_norm: 1.1912 (1.4783) time: 2.8734 data: 0.0002 max mem: 28454 +[2024-12-12 13:41:07 root] (utils.py 283): INFO Epoch: [9] [1260/2502] eta: 0:59:26 lr: 0.000010 loss_cls: 2.6293 (2.5473) grad_norm: 1.1902 (1.4765) time: 2.8785 data: 0.0003 max mem: 28454 +[2024-12-12 13:41:36 root] (utils.py 283): INFO Epoch: [9] [1270/2502] eta: 0:58:57 lr: 0.000010 loss_cls: 2.3243 (2.5453) grad_norm: 1.1691 (1.4765) time: 2.8750 data: 0.0002 max mem: 28454 +[2024-12-12 13:42:04 root] (utils.py 283): INFO Epoch: [9] [1280/2502] eta: 0:58:29 lr: 0.000010 loss_cls: 2.3367 (2.5447) grad_norm: 1.1415 (1.4768) time: 2.8704 data: 0.0002 max mem: 28454 +[2024-12-12 13:42:33 root] (utils.py 283): INFO Epoch: [9] [1290/2502] eta: 0:58:00 lr: 0.000010 loss_cls: 2.4856 (2.5445) grad_norm: 1.1615 (1.4744) time: 2.8725 data: 0.0002 max mem: 28454 +[2024-12-12 13:43:02 root] (utils.py 283): INFO Epoch: [9] [1300/2502] eta: 0:57:31 lr: 0.000010 loss_cls: 2.4856 (2.5442) grad_norm: 1.1913 (1.4739) time: 2.8751 data: 0.0002 max mem: 28454 +[2024-12-12 13:43:31 root] (utils.py 283): INFO Epoch: [9] [1310/2502] eta: 0:57:03 lr: 0.000010 loss_cls: 2.7349 (2.5451) grad_norm: 1.2055 (1.4718) time: 2.8750 data: 0.0002 max mem: 28454 +[2024-12-12 13:43:59 root] (utils.py 283): INFO Epoch: [9] [1320/2502] eta: 0:56:34 lr: 0.000010 loss_cls: 2.7383 (2.5462) grad_norm: 1.1703 (1.4746) time: 2.8734 data: 0.0002 max mem: 28454 +[2024-12-12 13:44:28 root] (utils.py 283): INFO Epoch: [9] [1330/2502] eta: 0:56:05 lr: 0.000010 loss_cls: 2.6297 (2.5444) grad_norm: 1.1598 (1.4717) time: 2.8726 data: 0.0002 max mem: 28454 +[2024-12-12 13:44:57 root] (utils.py 283): INFO Epoch: [9] [1340/2502] eta: 0:55:36 lr: 0.000010 loss_cls: 2.5048 (2.5447) grad_norm: 1.1268 (1.4704) time: 2.8726 data: 0.0002 max mem: 28454 +[2024-12-12 13:45:26 root] (utils.py 283): INFO Epoch: [9] [1350/2502] eta: 0:55:08 lr: 0.000010 loss_cls: 2.8219 (2.5470) grad_norm: 1.1636 (1.4692) time: 2.8732 data: 0.0003 max mem: 28454 +[2024-12-12 13:45:54 root] (utils.py 283): INFO Epoch: [9] [1360/2502] eta: 0:54:39 lr: 0.000010 loss_cls: 2.8597 (2.5463) grad_norm: 1.1387 (1.4669) time: 2.8737 data: 0.0002 max mem: 28454 +[2024-12-12 13:46:23 root] (utils.py 283): INFO Epoch: [9] [1370/2502] eta: 0:54:10 lr: 0.000010 loss_cls: 2.7047 (2.5467) grad_norm: 1.1387 (1.4649) time: 2.8738 data: 0.0002 max mem: 28454 +[2024-12-12 13:46:52 root] (utils.py 283): INFO Epoch: [9] [1380/2502] eta: 0:53:42 lr: 0.000010 loss_cls: 2.7061 (2.5468) grad_norm: 1.1400 (1.4627) time: 2.8732 data: 0.0002 max mem: 28454 +[2024-12-12 13:47:20 root] (utils.py 283): INFO Epoch: [9] [1390/2502] eta: 0:53:13 lr: 0.000010 loss_cls: 2.7061 (2.5472) grad_norm: 1.1498 (1.4602) time: 2.8723 data: 0.0003 max mem: 28454 +[2024-12-12 13:47:49 root] (utils.py 283): INFO Epoch: [9] [1400/2502] eta: 0:52:44 lr: 0.000010 loss_cls: 2.7163 (2.5473) grad_norm: 1.1363 (1.4683) time: 2.8731 data: 0.0003 max mem: 28454 +[2024-12-12 13:48:18 root] (utils.py 283): INFO Epoch: [9] [1410/2502] eta: 0:52:16 lr: 0.000010 loss_cls: 2.6615 (2.5470) grad_norm: 1.1914 (1.4672) time: 2.8761 data: 0.0002 max mem: 28454 +[2024-12-12 13:48:47 root] (utils.py 283): INFO Epoch: [9] [1420/2502] eta: 0:51:47 lr: 0.000010 loss_cls: 2.5985 (2.5480) grad_norm: 1.2009 (1.4659) time: 2.8713 data: 0.0002 max mem: 28454 +[2024-12-12 13:49:15 root] (utils.py 283): INFO Epoch: [9] [1430/2502] eta: 0:51:18 lr: 0.000010 loss_cls: 2.5746 (2.5476) grad_norm: 1.1979 (1.4652) time: 2.8674 data: 0.0002 max mem: 28454 +[2024-12-12 13:49:44 root] (utils.py 283): INFO Epoch: [9] [1440/2502] eta: 0:50:49 lr: 0.000010 loss_cls: 2.5742 (2.5481) grad_norm: 1.1831 (1.4756) time: 2.8700 data: 0.0003 max mem: 28454 +[2024-12-12 13:50:13 root] (utils.py 283): INFO Epoch: [9] [1450/2502] eta: 0:50:21 lr: 0.000010 loss_cls: 2.6783 (2.5488) grad_norm: 1.1131 (1.4736) time: 2.8717 data: 0.0003 max mem: 28454 +[2024-12-12 13:50:42 root] (utils.py 283): INFO Epoch: [9] [1460/2502] eta: 0:49:52 lr: 0.000010 loss_cls: 2.7159 (2.5478) grad_norm: 1.1575 (1.4732) time: 2.8728 data: 0.0003 max mem: 28454 +[2024-12-12 13:51:10 root] (utils.py 283): INFO Epoch: [9] [1470/2502] eta: 0:49:23 lr: 0.000010 loss_cls: 2.5340 (2.5484) grad_norm: 1.1792 (1.4727) time: 2.8698 data: 0.0003 max mem: 28454 +[2024-12-12 13:51:39 root] (utils.py 283): INFO Epoch: [9] [1480/2502] eta: 0:48:54 lr: 0.000010 loss_cls: 2.5340 (2.5477) grad_norm: 1.0972 (1.4700) time: 2.8668 data: 0.0003 max mem: 28454 +[2024-12-12 13:52:08 root] (utils.py 283): INFO Epoch: [9] [1490/2502] eta: 0:48:26 lr: 0.000010 loss_cls: 2.7192 (2.5493) grad_norm: 1.0972 (1.4783) time: 2.8681 data: 0.0003 max mem: 28454 +[2024-12-12 13:52:36 root] (utils.py 283): INFO Epoch: [9] [1500/2502] eta: 0:47:57 lr: 0.000010 loss_cls: 2.7517 (2.5501) grad_norm: 1.1800 (1.4767) time: 2.8702 data: 0.0003 max mem: 28454 +[2024-12-12 13:53:05 root] (utils.py 283): INFO Epoch: [9] [1510/2502] eta: 0:47:28 lr: 0.000010 loss_cls: 2.6799 (2.5491) grad_norm: 1.2068 (1.4752) time: 2.8734 data: 0.0003 max mem: 28454 +[2024-12-12 13:53:34 root] (utils.py 283): INFO Epoch: [9] [1520/2502] eta: 0:47:00 lr: 0.000010 loss_cls: 2.3454 (2.5481) grad_norm: 1.1900 (1.4729) time: 2.8733 data: 0.0003 max mem: 28454 +[2024-12-12 13:54:02 root] (utils.py 283): INFO Epoch: [9] [1530/2502] eta: 0:46:31 lr: 0.000010 loss_cls: 2.5351 (2.5487) grad_norm: 1.1067 (1.4713) time: 2.8686 data: 0.0003 max mem: 28454 +[2024-12-12 13:54:31 root] (utils.py 283): INFO Epoch: [9] [1540/2502] eta: 0:46:02 lr: 0.000010 loss_cls: 2.6460 (2.5488) grad_norm: 1.1006 (1.4691) time: 2.8678 data: 0.0003 max mem: 28454 +[2024-12-12 13:55:00 root] (utils.py 283): INFO Epoch: [9] [1550/2502] eta: 0:45:33 lr: 0.000010 loss_cls: 2.6259 (2.5487) grad_norm: 1.1006 (1.4759) time: 2.8688 data: 0.0003 max mem: 28454 +[2024-12-12 13:55:29 root] (utils.py 283): INFO Epoch: [9] [1560/2502] eta: 0:45:05 lr: 0.000010 loss_cls: 2.5212 (2.5487) grad_norm: 1.1030 (1.4737) time: 2.8710 data: 0.0002 max mem: 28454 +[2024-12-12 13:55:57 root] (utils.py 283): INFO Epoch: [9] [1570/2502] eta: 0:44:36 lr: 0.000010 loss_cls: 2.6171 (2.5483) grad_norm: 1.1083 (1.4718) time: 2.8723 data: 0.0002 max mem: 28454 +[2024-12-12 13:56:26 root] (utils.py 283): INFO Epoch: [9] [1580/2502] eta: 0:44:07 lr: 0.000010 loss_cls: 2.6171 (2.5495) grad_norm: 1.1516 (1.4703) time: 2.8687 data: 0.0002 max mem: 28454 +[2024-12-12 13:56:55 root] (utils.py 283): INFO Epoch: [9] [1590/2502] eta: 0:43:38 lr: 0.000010 loss_cls: 2.6750 (2.5492) grad_norm: 1.1542 (1.4695) time: 2.8681 data: 0.0003 max mem: 28454 +[2024-12-12 13:57:23 root] (utils.py 283): INFO Epoch: [9] [1600/2502] eta: 0:43:10 lr: 0.000010 loss_cls: 2.6771 (2.5484) grad_norm: 1.1094 (1.4672) time: 2.8705 data: 0.0003 max mem: 28454 +[2024-12-12 13:57:52 root] (utils.py 283): INFO Epoch: [9] [1610/2502] eta: 0:42:41 lr: 0.000010 loss_cls: 2.5918 (2.5479) grad_norm: 1.1078 (1.5291) time: 2.8670 data: 0.0002 max mem: 28454 +[2024-12-12 13:58:21 root] (utils.py 283): INFO Epoch: [9] [1620/2502] eta: 0:42:12 lr: 0.000010 loss_cls: 2.6213 (2.5492) grad_norm: 1.3834 (1.5294) time: 2.8674 data: 0.0002 max mem: 28454 +[2024-12-12 13:58:49 root] (utils.py 283): INFO Epoch: [9] [1630/2502] eta: 0:41:43 lr: 0.000010 loss_cls: 2.7979 (2.5500) grad_norm: 1.4510 (1.5289) time: 2.8691 data: 0.0003 max mem: 28454 +[2024-12-12 13:59:18 root] (utils.py 283): INFO Epoch: [9] [1640/2502] eta: 0:41:15 lr: 0.000010 loss_cls: 2.7979 (2.5507) grad_norm: 1.3331 (1.5433) time: 2.8669 data: 0.0003 max mem: 28454 +[2024-12-12 13:59:47 root] (utils.py 283): INFO Epoch: [9] [1650/2502] eta: 0:40:46 lr: 0.000010 loss_cls: 2.7108 (2.5511) grad_norm: 1.3055 (1.5478) time: 2.8708 data: 0.0003 max mem: 28454 +[2024-12-12 14:00:15 root] (utils.py 283): INFO Epoch: [9] [1660/2502] eta: 0:40:17 lr: 0.000010 loss_cls: 2.5427 (2.5505) grad_norm: 1.1851 (1.5454) time: 2.8731 data: 0.0002 max mem: 28454 +[2024-12-12 14:00:44 root] (utils.py 283): INFO Epoch: [9] [1670/2502] eta: 0:39:49 lr: 0.000010 loss_cls: 2.6222 (2.5510) grad_norm: 1.1335 (1.5431) time: 2.8736 data: 0.0002 max mem: 28454 +[2024-12-12 14:01:13 root] (utils.py 283): INFO Epoch: [9] [1680/2502] eta: 0:39:20 lr: 0.000010 loss_cls: 2.7039 (2.5516) grad_norm: 1.1623 (1.5411) time: 2.8715 data: 0.0002 max mem: 28454 +[2024-12-12 14:01:42 root] (utils.py 283): INFO Epoch: [9] [1690/2502] eta: 0:38:51 lr: 0.000010 loss_cls: 2.6479 (2.5517) grad_norm: 1.1437 (1.5385) time: 2.8687 data: 0.0002 max mem: 28454 +[2024-12-12 14:02:10 root] (utils.py 283): INFO Epoch: [9] [1700/2502] eta: 0:38:22 lr: 0.000010 loss_cls: 2.6457 (2.5515) grad_norm: 1.1230 (1.5394) time: 2.8709 data: 0.0002 max mem: 28454 +[2024-12-12 14:02:39 root] (utils.py 283): INFO Epoch: [9] [1710/2502] eta: 0:37:54 lr: 0.000010 loss_cls: 2.6527 (2.5521) grad_norm: 1.1555 (1.5638) time: 2.8749 data: 0.0002 max mem: 28454 +[2024-12-12 14:03:08 root] (utils.py 283): INFO Epoch: [9] [1720/2502] eta: 0:37:25 lr: 0.000010 loss_cls: 2.6527 (2.5510) grad_norm: 1.2333 (1.5624) time: 2.8766 data: 0.0002 max mem: 28454 +[2024-12-12 14:03:37 root] (utils.py 283): INFO Epoch: [9] [1730/2502] eta: 0:36:56 lr: 0.000010 loss_cls: 2.7010 (2.5516) grad_norm: 1.3660 (1.5622) time: 2.8744 data: 0.0003 max mem: 28454 +[2024-12-12 14:04:05 root] (utils.py 283): INFO Epoch: [9] [1740/2502] eta: 0:36:28 lr: 0.000010 loss_cls: 2.5588 (2.5509) grad_norm: 1.3396 (1.5614) time: 2.8726 data: 0.0002 max mem: 28454 +[2024-12-12 14:04:34 root] (utils.py 283): INFO Epoch: [9] [1750/2502] eta: 0:35:59 lr: 0.000010 loss_cls: 2.6606 (2.5516) grad_norm: 1.2573 (1.5601) time: 2.8743 data: 0.0002 max mem: 28454 +[2024-12-12 14:05:03 root] (utils.py 283): INFO Epoch: [9] [1760/2502] eta: 0:35:30 lr: 0.000010 loss_cls: 2.7363 (2.5525) grad_norm: 1.2516 (1.5581) time: 2.8768 data: 0.0002 max mem: 28454 +[2024-12-12 14:05:32 root] (utils.py 283): INFO Epoch: [9] [1770/2502] eta: 0:35:02 lr: 0.000010 loss_cls: 2.6037 (2.5515) grad_norm: 1.1224 (1.5562) time: 2.8759 data: 0.0002 max mem: 28454 +[2024-12-12 14:06:00 root] (utils.py 283): INFO Epoch: [9] [1780/2502] eta: 0:34:33 lr: 0.000010 loss_cls: 2.5468 (2.5516) grad_norm: 1.1521 (1.5543) time: 2.8769 data: 0.0003 max mem: 28454 +[2024-12-12 14:06:29 root] (utils.py 283): INFO Epoch: [9] [1790/2502] eta: 0:34:04 lr: 0.000010 loss_cls: 2.7250 (2.5536) grad_norm: 1.1677 (1.5521) time: 2.8815 data: 0.0003 max mem: 28454 +[2024-12-12 14:06:58 root] (utils.py 283): INFO Epoch: [9] [1800/2502] eta: 0:33:36 lr: 0.000010 loss_cls: 2.8219 (2.5543) grad_norm: 1.1409 (1.5499) time: 2.8818 data: 0.0002 max mem: 28454 +[2024-12-12 14:07:27 root] (utils.py 283): INFO Epoch: [9] [1810/2502] eta: 0:33:07 lr: 0.000010 loss_cls: 2.6246 (2.5540) grad_norm: 1.1293 (1.5480) time: 2.8799 data: 0.0003 max mem: 28454 +[2024-12-12 14:07:56 root] (utils.py 283): INFO Epoch: [9] [1820/2502] eta: 0:32:38 lr: 0.000010 loss_cls: 2.4979 (2.5537) grad_norm: 1.1134 (1.5468) time: 2.8790 data: 0.0003 max mem: 28454 +[2024-12-12 14:08:24 root] (utils.py 283): INFO Epoch: [9] [1830/2502] eta: 0:32:09 lr: 0.000010 loss_cls: 2.5886 (2.5537) grad_norm: 1.1134 (1.5454) time: 2.8774 data: 0.0003 max mem: 28454 +[2024-12-12 14:08:53 root] (utils.py 283): INFO Epoch: [9] [1840/2502] eta: 0:31:41 lr: 0.000010 loss_cls: 2.6600 (2.5533) grad_norm: 1.1150 (1.5431) time: 2.8789 data: 0.0003 max mem: 28454 +[2024-12-12 14:09:22 root] (utils.py 283): INFO Epoch: [9] [1850/2502] eta: 0:31:12 lr: 0.000010 loss_cls: 2.5615 (2.5524) grad_norm: 1.1204 (1.5409) time: 2.8796 data: 0.0003 max mem: 28454 +[2024-12-12 14:09:51 root] (utils.py 283): INFO Epoch: [9] [1860/2502] eta: 0:30:43 lr: 0.000010 loss_cls: 2.3874 (2.5521) grad_norm: 1.1611 (1.5389) time: 2.8796 data: 0.0003 max mem: 28454 +[2024-12-12 14:10:20 root] (utils.py 283): INFO Epoch: [9] [1870/2502] eta: 0:30:15 lr: 0.000010 loss_cls: 2.3874 (2.5509) grad_norm: 1.1295 (1.5371) time: 2.8787 data: 0.0003 max mem: 28454 +[2024-12-12 14:10:48 root] (utils.py 283): INFO Epoch: [9] [1880/2502] eta: 0:29:46 lr: 0.000010 loss_cls: 2.4254 (2.5516) grad_norm: 1.1639 (1.5352) time: 2.8779 data: 0.0003 max mem: 28454 +[2024-12-12 14:11:17 root] (utils.py 283): INFO Epoch: [9] [1890/2502] eta: 0:29:17 lr: 0.000010 loss_cls: 2.7626 (2.5522) grad_norm: 1.1542 (1.5333) time: 2.8791 data: 0.0003 max mem: 28454 +[2024-12-12 14:11:46 root] (utils.py 283): INFO Epoch: [9] [1900/2502] eta: 0:28:49 lr: 0.000010 loss_cls: 2.7189 (2.5521) grad_norm: 1.1398 (1.5315) time: 2.8795 data: 0.0002 max mem: 28454 +[2024-12-12 14:12:15 root] (utils.py 283): INFO Epoch: [9] [1910/2502] eta: 0:28:20 lr: 0.000010 loss_cls: 2.5282 (2.5509) grad_norm: 1.1414 (1.5293) time: 2.8793 data: 0.0003 max mem: 28454 +[2024-12-12 14:12:43 root] (utils.py 283): INFO Epoch: [9] [1920/2502] eta: 0:27:51 lr: 0.000010 loss_cls: 2.6510 (2.5513) grad_norm: 1.0932 (1.5283) time: 2.8772 data: 0.0002 max mem: 28454 +[2024-12-12 14:13:12 root] (utils.py 283): INFO Epoch: [9] [1930/2502] eta: 0:27:22 lr: 0.000010 loss_cls: 2.5215 (2.5492) grad_norm: 1.0991 (1.5265) time: 2.8740 data: 0.0003 max mem: 28454 +[2024-12-12 14:13:41 root] (utils.py 283): INFO Epoch: [9] [1940/2502] eta: 0:26:54 lr: 0.000010 loss_cls: 2.2773 (2.5488) grad_norm: 1.1052 (1.5243) time: 2.8743 data: 0.0003 max mem: 28454 +[2024-12-12 14:14:10 root] (utils.py 283): INFO Epoch: [9] [1950/2502] eta: 0:26:25 lr: 0.000010 loss_cls: 2.6174 (2.5491) grad_norm: 1.1328 (1.5232) time: 2.8717 data: 0.0002 max mem: 28454 +[2024-12-12 14:14:38 root] (utils.py 283): INFO Epoch: [9] [1960/2502] eta: 0:25:56 lr: 0.000010 loss_cls: 2.7786 (2.5500) grad_norm: 1.2200 (1.5376) time: 2.8677 data: 0.0003 max mem: 28454 +[2024-12-12 14:15:07 root] (utils.py 283): INFO Epoch: [9] [1970/2502] eta: 0:25:28 lr: 0.000010 loss_cls: 2.7819 (2.5506) grad_norm: 1.1191 (1.5358) time: 2.8698 data: 0.0003 max mem: 28454 +[2024-12-12 14:15:36 root] (utils.py 283): INFO Epoch: [9] [1980/2502] eta: 0:24:59 lr: 0.000010 loss_cls: 2.7383 (2.5504) grad_norm: 1.1072 (1.5373) time: 2.8686 data: 0.0003 max mem: 28454 +[2024-12-12 14:16:04 root] (utils.py 283): INFO Epoch: [9] [1990/2502] eta: 0:24:30 lr: 0.000010 loss_cls: 2.6585 (2.5509) grad_norm: 1.1072 (1.5362) time: 2.8681 data: 0.0003 max mem: 28454 +[2024-12-12 14:16:33 root] (utils.py 283): INFO Epoch: [9] [2000/2502] eta: 0:24:01 lr: 0.000010 loss_cls: 2.6364 (2.5511) grad_norm: 1.0534 (1.5382) time: 2.8704 data: 0.0003 max mem: 28454 +[2024-12-12 14:17:02 root] (utils.py 283): INFO Epoch: [9] [2010/2502] eta: 0:23:33 lr: 0.000010 loss_cls: 2.6364 (2.5510) grad_norm: 1.0984 (1.5361) time: 2.8711 data: 0.0002 max mem: 28454 +[2024-12-12 14:17:31 root] (utils.py 283): INFO Epoch: [9] [2020/2502] eta: 0:23:04 lr: 0.000010 loss_cls: 2.7044 (2.5520) grad_norm: 1.1229 (1.5346) time: 2.8737 data: 0.0002 max mem: 28454 +[2024-12-12 14:17:59 root] (utils.py 283): INFO Epoch: [9] [2030/2502] eta: 0:22:35 lr: 0.000010 loss_cls: 2.7866 (2.5530) grad_norm: 1.1130 (1.5620) time: 2.8732 data: 0.0003 max mem: 28454 +[2024-12-12 14:18:28 root] (utils.py 283): INFO Epoch: [9] [2040/2502] eta: 0:22:06 lr: 0.000010 loss_cls: 2.6688 (2.5526) grad_norm: 1.1256 (1.5614) time: 2.8703 data: 0.0003 max mem: 28454 +[2024-12-12 14:18:57 root] (utils.py 283): INFO Epoch: [9] [2050/2502] eta: 0:21:38 lr: 0.000010 loss_cls: 2.4936 (2.5525) grad_norm: 1.1953 (1.5599) time: 2.8704 data: 0.0003 max mem: 28454 +[2024-12-12 14:19:25 root] (utils.py 283): INFO Epoch: [9] [2060/2502] eta: 0:21:09 lr: 0.000010 loss_cls: 2.7121 (2.5533) grad_norm: 1.1807 (1.5601) time: 2.8703 data: 0.0003 max mem: 28454 +[2024-12-12 14:19:54 root] (utils.py 283): INFO Epoch: [9] [2070/2502] eta: 0:20:40 lr: 0.000010 loss_cls: 2.7080 (2.5534) grad_norm: 1.1905 (1.5611) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-12 14:20:23 root] (utils.py 283): INFO Epoch: [9] [2080/2502] eta: 0:20:12 lr: 0.000010 loss_cls: 2.6029 (2.5535) grad_norm: 1.1840 (1.5616) time: 2.8748 data: 0.0002 max mem: 28454 +[2024-12-12 14:20:52 root] (utils.py 283): INFO Epoch: [9] [2090/2502] eta: 0:19:43 lr: 0.000010 loss_cls: 2.5061 (2.5531) grad_norm: 1.1213 (1.5597) time: 2.8751 data: 0.0002 max mem: 28454 +[2024-12-12 14:21:20 root] (utils.py 283): INFO Epoch: [9] [2100/2502] eta: 0:19:14 lr: 0.000010 loss_cls: 2.4699 (2.5526) grad_norm: 1.1207 (1.5577) time: 2.8737 data: 0.0002 max mem: 28454 +[2024-12-12 14:21:49 root] (utils.py 283): INFO Epoch: [9] [2110/2502] eta: 0:18:45 lr: 0.000010 loss_cls: 2.5829 (2.5538) grad_norm: 1.1362 (1.5576) time: 2.8733 data: 0.0002 max mem: 28454 +[2024-12-12 14:22:18 root] (utils.py 283): INFO Epoch: [9] [2120/2502] eta: 0:18:17 lr: 0.000010 loss_cls: 2.6982 (2.5535) grad_norm: 1.1362 (1.5555) time: 2.8733 data: 0.0002 max mem: 28454 +[2024-12-12 14:22:47 root] (utils.py 283): INFO Epoch: [9] [2130/2502] eta: 0:17:48 lr: 0.000010 loss_cls: 2.6342 (2.5538) grad_norm: 1.0798 (1.5535) time: 2.8723 data: 0.0003 max mem: 28454 +[2024-12-12 14:23:15 root] (utils.py 283): INFO Epoch: [9] [2140/2502] eta: 0:17:19 lr: 0.000010 loss_cls: 2.7187 (2.5541) grad_norm: 1.1148 (1.5522) time: 2.8723 data: 0.0003 max mem: 28454 +[2024-12-12 14:23:44 root] (utils.py 283): INFO Epoch: [9] [2150/2502] eta: 0:16:51 lr: 0.000010 loss_cls: 2.7628 (2.5541) grad_norm: 1.1473 (1.5509) time: 2.8734 data: 0.0002 max mem: 28454 +[2024-12-12 14:24:13 root] (utils.py 283): INFO Epoch: [9] [2160/2502] eta: 0:16:22 lr: 0.000010 loss_cls: 2.6038 (2.5535) grad_norm: 1.1414 (1.5494) time: 2.8712 data: 0.0002 max mem: 28454 +[2024-12-12 14:24:41 root] (utils.py 283): INFO Epoch: [9] [2170/2502] eta: 0:15:53 lr: 0.000010 loss_cls: 2.5633 (2.5537) grad_norm: 1.1605 (1.5486) time: 2.8693 data: 0.0002 max mem: 28454 +[2024-12-12 14:25:10 root] (utils.py 283): INFO Epoch: [9] [2180/2502] eta: 0:15:24 lr: 0.000010 loss_cls: 2.5633 (2.5539) grad_norm: 1.1988 (1.5491) time: 2.8720 data: 0.0002 max mem: 28454 +[2024-12-12 14:25:39 root] (utils.py 283): INFO Epoch: [9] [2190/2502] eta: 0:14:56 lr: 0.000010 loss_cls: 2.5370 (2.5542) grad_norm: 1.1373 (1.5472) time: 2.8749 data: 0.0002 max mem: 28454 +[2024-12-12 14:26:08 root] (utils.py 283): INFO Epoch: [9] [2200/2502] eta: 0:14:27 lr: 0.000010 loss_cls: 2.7323 (2.5545) grad_norm: 1.0372 (1.5454) time: 2.8759 data: 0.0002 max mem: 28454 +[2024-12-12 14:26:36 root] (utils.py 283): INFO Epoch: [9] [2210/2502] eta: 0:13:58 lr: 0.000010 loss_cls: 2.7317 (2.5546) grad_norm: 1.0905 (1.5441) time: 2.8760 data: 0.0002 max mem: 28454 +[2024-12-12 14:27:05 root] (utils.py 283): INFO Epoch: [9] [2220/2502] eta: 0:13:29 lr: 0.000010 loss_cls: 2.8287 (2.5562) grad_norm: 1.2540 (1.5443) time: 2.8769 data: 0.0002 max mem: 28454 +[2024-12-12 14:27:34 root] (utils.py 283): INFO Epoch: [9] [2230/2502] eta: 0:13:01 lr: 0.000010 loss_cls: 2.8374 (2.5573) grad_norm: 1.2370 (1.5432) time: 2.8791 data: 0.0003 max mem: 28454 +[2024-12-12 14:28:03 root] (utils.py 283): INFO Epoch: [9] [2240/2502] eta: 0:12:32 lr: 0.000010 loss_cls: 2.7386 (2.5575) grad_norm: 1.0959 (1.5411) time: 2.8790 data: 0.0002 max mem: 28454 +[2024-12-12 14:28:32 root] (utils.py 283): INFO Epoch: [9] [2250/2502] eta: 0:12:03 lr: 0.000010 loss_cls: 2.5846 (2.5577) grad_norm: 1.0664 (1.5393) time: 2.8782 data: 0.0002 max mem: 28454 +[2024-12-12 14:29:00 root] (utils.py 283): INFO Epoch: [9] [2260/2502] eta: 0:11:35 lr: 0.000010 loss_cls: 2.7786 (2.5586) grad_norm: 1.1826 (1.5397) time: 2.8797 data: 0.0002 max mem: 28454 +[2024-12-12 14:29:29 root] (utils.py 283): INFO Epoch: [9] [2270/2502] eta: 0:11:06 lr: 0.000010 loss_cls: 2.3528 (2.5562) grad_norm: 1.1321 (1.5377) time: 2.8776 data: 0.0002 max mem: 28454 +[2024-12-12 14:29:58 root] (utils.py 283): INFO Epoch: [9] [2280/2502] eta: 0:10:37 lr: 0.000010 loss_cls: 2.3218 (2.5563) grad_norm: 1.1125 (1.5365) time: 2.8738 data: 0.0003 max mem: 28454 +[2024-12-12 14:30:27 root] (utils.py 283): INFO Epoch: [9] [2290/2502] eta: 0:10:08 lr: 0.000010 loss_cls: 2.6336 (2.5557) grad_norm: 1.1436 (1.5346) time: 2.8727 data: 0.0002 max mem: 28454 +[2024-12-12 14:30:55 root] (utils.py 283): INFO Epoch: [9] [2300/2502] eta: 0:09:40 lr: 0.000010 loss_cls: 2.6336 (2.5562) grad_norm: 1.0616 (1.5328) time: 2.8740 data: 0.0002 max mem: 28454 +[2024-12-12 14:31:24 root] (utils.py 283): INFO Epoch: [9] [2310/2502] eta: 0:09:11 lr: 0.000010 loss_cls: 2.6601 (2.5558) grad_norm: 1.0660 (1.5315) time: 2.8738 data: 0.0002 max mem: 28454 +[2024-12-12 14:31:53 root] (utils.py 283): INFO Epoch: [9] [2320/2502] eta: 0:08:42 lr: 0.000010 loss_cls: 2.5210 (2.5557) grad_norm: 1.1515 (1.5297) time: 2.8729 data: 0.0002 max mem: 28454 +[2024-12-12 14:32:22 root] (utils.py 283): INFO Epoch: [9] [2330/2502] eta: 0:08:14 lr: 0.000010 loss_cls: 2.5560 (2.5556) grad_norm: 1.0594 (1.5286) time: 2.8715 data: 0.0002 max mem: 28454 +[2024-12-12 14:32:50 root] (utils.py 283): INFO Epoch: [9] [2340/2502] eta: 0:07:45 lr: 0.000010 loss_cls: 2.6424 (2.5562) grad_norm: 1.0742 (1.5268) time: 2.8699 data: 0.0002 max mem: 28454 +[2024-12-12 14:33:19 root] (utils.py 283): INFO Epoch: [9] [2350/2502] eta: 0:07:16 lr: 0.000010 loss_cls: 2.7517 (2.5566) grad_norm: 1.1599 (1.5274) time: 2.8715 data: 0.0002 max mem: 28454 +[2024-12-12 14:33:48 root] (utils.py 283): INFO Epoch: [9] [2360/2502] eta: 0:06:47 lr: 0.000010 loss_cls: 2.6162 (2.5566) grad_norm: 1.2256 (1.5258) time: 2.8739 data: 0.0002 max mem: 28454 +[2024-12-12 14:34:16 root] (utils.py 283): INFO Epoch: [9] [2370/2502] eta: 0:06:19 lr: 0.000010 loss_cls: 2.7017 (2.5573) grad_norm: 1.1689 (1.5364) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-12 14:34:45 root] (utils.py 283): INFO Epoch: [9] [2380/2502] eta: 0:05:50 lr: 0.000010 loss_cls: 2.7608 (2.5567) grad_norm: 1.1689 (1.5348) time: 2.8700 data: 0.0002 max mem: 28454 +[2024-12-12 14:35:14 root] (utils.py 283): INFO Epoch: [9] [2390/2502] eta: 0:05:21 lr: 0.000010 loss_cls: 2.5978 (2.5562) grad_norm: 1.1378 (1.5337) time: 2.8721 data: 0.0002 max mem: 28454 +[2024-12-12 14:35:43 root] (utils.py 283): INFO Epoch: [9] [2400/2502] eta: 0:04:52 lr: 0.000010 loss_cls: 2.5107 (2.5557) grad_norm: 1.0977 (1.5322) time: 2.8750 data: 0.0002 max mem: 28454 +[2024-12-12 14:36:11 root] (utils.py 283): INFO Epoch: [9] [2410/2502] eta: 0:04:24 lr: 0.000010 loss_cls: 2.6497 (2.5560) grad_norm: 1.1573 (1.5366) time: 2.8749 data: 0.0002 max mem: 28454 +[2024-12-12 14:36:40 root] (utils.py 283): INFO Epoch: [9] [2420/2502] eta: 0:03:55 lr: 0.000010 loss_cls: 2.7337 (2.5555) grad_norm: 1.2067 (1.5782) time: 2.8705 data: 0.0002 max mem: 28454 +[2024-12-12 14:37:09 root] (utils.py 283): INFO Epoch: [9] [2430/2502] eta: 0:03:26 lr: 0.000010 loss_cls: 2.7412 (2.5560) grad_norm: 1.2464 (1.5779) time: 2.8658 data: 0.0002 max mem: 28454 +[2024-12-12 14:37:37 root] (utils.py 283): INFO Epoch: [9] [2440/2502] eta: 0:02:58 lr: 0.000010 loss_cls: 2.8568 (2.5573) grad_norm: 1.2304 (1.5799) time: 2.8652 data: 0.0002 max mem: 28454 +[2024-12-12 14:38:06 root] (utils.py 283): INFO Epoch: [9] [2450/2502] eta: 0:02:29 lr: 0.000010 loss_cls: 2.7520 (2.5573) grad_norm: 1.1522 (1.5781) time: 2.8673 data: 0.0002 max mem: 28454 +[2024-12-12 14:38:35 root] (utils.py 283): INFO Epoch: [9] [2460/2502] eta: 0:02:00 lr: 0.000010 loss_cls: 2.7520 (2.5585) grad_norm: 1.1721 (1.5770) time: 2.8689 data: 0.0002 max mem: 28454 +[2024-12-12 14:39:03 root] (utils.py 283): INFO Epoch: [9] [2470/2502] eta: 0:01:31 lr: 0.000010 loss_cls: 2.7794 (2.5580) grad_norm: 1.2645 (1.5752) time: 2.8687 data: 0.0002 max mem: 28454 +[2024-12-12 14:39:32 root] (utils.py 283): INFO Epoch: [9] [2480/2502] eta: 0:01:03 lr: 0.000010 loss_cls: 2.5793 (2.5580) grad_norm: 1.1018 (1.5738) time: 2.8678 data: 0.0002 max mem: 28454 +[2024-12-12 14:40:01 root] (utils.py 283): INFO Epoch: [9] [2490/2502] eta: 0:00:34 lr: 0.000010 loss_cls: 2.5965 (2.5584) grad_norm: 1.1138 (1.5732) time: 2.8907 data: 0.0212 max mem: 28454 +[2024-12-12 14:40:30 root] (utils.py 283): INFO Epoch: [9] [2500/2502] eta: 0:00:05 lr: 0.000010 loss_cls: 2.6952 (2.5582) grad_norm: 1.2385 (1.5867) time: 2.8878 data: 0.0212 max mem: 28454 +[2024-12-12 14:40:33 root] (utils.py 283): INFO Epoch: [9] [2501/2502] eta: 0:00:02 lr: 0.000010 loss_cls: 2.6952 (2.5583) grad_norm: 1.2385 (1.5866) time: 2.8873 data: 0.0212 max mem: 28454 +[2024-12-12 14:40:33 root] (utils.py 297): INFO Epoch: [9] Total time: 1:59:47 (2.8725 s / it) +[2024-12-12 14:40:33 root] (engine.py 179): INFO Averaged stats:lr: 0.000010 loss_cls: 2.6952 (2.5632) grad_norm: 1.2385 (1.5866) +[2024-12-12 14:40:36 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:53 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.4141 (0.4141) acc1: 91.4062 (91.4062) acc3: 98.4375 (98.4375) acc5: 98.4375 (98.4375) time: 0.5420 data: 0.0003 max mem: 28454 +[2024-12-12 14:40:41 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:48 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6162 (0.5970) acc1: 86.7188 (86.9318) acc3: 97.6562 (96.9460) acc5: 98.4375 (98.0114) time: 0.5455 data: 0.0005 max mem: 28454 +[2024-12-12 14:40:47 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:42 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6140 (0.6195) acc1: 87.5000 (86.7932) acc3: 96.8750 (96.6890) acc5: 98.4375 (97.7679) time: 0.5461 data: 0.0005 max mem: 28454 +[2024-12-12 14:40:52 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:37 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6140 (0.6526) acc1: 86.7188 (85.6603) acc3: 96.0938 (96.3710) acc5: 97.6562 (97.6562) time: 0.5470 data: 0.0004 max mem: 28454 +[2024-12-12 14:40:58 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:31 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6802 (0.6595) acc1: 86.7188 (85.5945) acc3: 96.0938 (96.3224) acc5: 97.6562 (97.5800) time: 0.5478 data: 0.0004 max mem: 28454 +[2024-12-12 14:41:03 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:26 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.8336 (0.7427) acc1: 79.6875 (83.6857) acc3: 92.9688 (95.1746) acc5: 95.3125 (96.7218) time: 0.5478 data: 0.0004 max mem: 28454 +[2024-12-12 14:41:09 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:20 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 1.0044 (0.7739) acc1: 79.6875 (83.3120) acc3: 90.6250 (94.5056) acc5: 92.9688 (96.2090) time: 0.5478 data: 0.0004 max mem: 28454 +[2024-12-12 14:41:14 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:15 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9720 (0.7988) acc1: 80.4688 (82.5814) acc3: 92.1875 (94.2121) acc5: 94.5312 (96.0607) time: 0.5476 data: 0.0004 max mem: 28454 +[2024-12-12 14:41:19 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:09 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9269 (0.8220) acc1: 78.1250 (82.1566) acc3: 92.1875 (93.8754) acc5: 94.5312 (95.7755) time: 0.5476 data: 0.0006 max mem: 28454 +[2024-12-12 14:41:25 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:04 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9321 (0.8418) acc1: 77.3438 (81.4990) acc3: 92.1875 (93.5440) acc5: 94.5312 (95.6216) time: 0.5484 data: 0.0006 max mem: 28454 +[2024-12-12 14:41:29 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9211 (0.8375) acc1: 77.3438 (81.5040) acc3: 92.1875 (93.5920) acc5: 94.5312 (95.6720) time: 0.5387 data: 0.0006 max mem: 28454 +[2024-12-12 14:41:29 root] (utils.py 297): INFO Test: Total time: 0:00:53 (0.5457 s / it) +[2024-12-12 14:41:29 root] (engine.py 264): INFO * Acc@1 81.710 Acc@3 93.420 Acc@5 95.712 loss 0.836 flops 13.207 layer_flops 13.109 +[2024-12-12 14:41:29 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 81.7% +[2024-12-12 14:41:29 root] (main.py 576): INFO Max accuracy: 81.77% +[2024-12-12 14:41:31 root] (utils.py 283): INFO Epoch: [10] [ 0/2502] eta: 1:58:29 lr: 0.000008 loss_cls: 2.3675 (2.3675) grad_norm: 1.1526 (1.1526) time: 2.8416 data: 0.0003 max mem: 28454 +[2024-12-12 14:42:00 root] (utils.py 283): INFO Epoch: [10] [ 10/2502] eta: 1:59:06 lr: 0.000008 loss_cls: 2.6989 (2.5843) grad_norm: 1.2590 (1.3590) time: 2.8679 data: 0.0002 max mem: 28454 +[2024-12-12 14:42:29 root] (utils.py 283): INFO Epoch: [10] [ 20/2502] eta: 1:58:41 lr: 0.000008 loss_cls: 2.7809 (2.6075) grad_norm: 1.1526 (1.2505) time: 2.8708 data: 0.0003 max mem: 28454 +[2024-12-12 14:42:58 root] (utils.py 283): INFO Epoch: [10] [ 30/2502] eta: 1:58:17 lr: 0.000008 loss_cls: 2.7241 (2.5737) grad_norm: 1.1389 (1.2170) time: 2.8729 data: 0.0003 max mem: 28454 +[2024-12-12 14:43:26 root] (utils.py 283): INFO Epoch: [10] [ 40/2502] eta: 1:57:52 lr: 0.000008 loss_cls: 2.5353 (2.5163) grad_norm: 1.1628 (1.2565) time: 2.8760 data: 0.0002 max mem: 28454 +[2024-12-12 14:43:55 root] (utils.py 283): INFO Epoch: [10] [ 50/2502] eta: 1:57:23 lr: 0.000008 loss_cls: 2.3576 (2.5064) grad_norm: 1.1590 (1.2250) time: 2.8749 data: 0.0003 max mem: 28454 +[2024-12-12 14:44:24 root] (utils.py 283): INFO Epoch: [10] [ 60/2502] eta: 1:56:52 lr: 0.000008 loss_cls: 2.5618 (2.5194) grad_norm: 1.1258 (1.2186) time: 2.8693 data: 0.0003 max mem: 28454 +[2024-12-12 14:44:52 root] (utils.py 283): INFO Epoch: [10] [ 70/2502] eta: 1:56:23 lr: 0.000008 loss_cls: 2.7101 (2.5299) grad_norm: 1.1439 (1.2042) time: 2.8682 data: 0.0003 max mem: 28454 +[2024-12-12 14:45:21 root] (utils.py 283): INFO Epoch: [10] [ 80/2502] eta: 1:55:55 lr: 0.000008 loss_cls: 2.6814 (2.5390) grad_norm: 1.1356 (1.2024) time: 2.8732 data: 0.0003 max mem: 28454 +[2024-12-12 14:45:50 root] (utils.py 283): INFO Epoch: [10] [ 90/2502] eta: 1:55:27 lr: 0.000008 loss_cls: 2.6581 (2.5278) grad_norm: 1.0899 (1.1915) time: 2.8749 data: 0.0003 max mem: 28454 +[2024-12-12 14:46:19 root] (utils.py 283): INFO Epoch: [10] [ 100/2502] eta: 1:54:58 lr: 0.000008 loss_cls: 2.1778 (2.5037) grad_norm: 1.0738 (1.1822) time: 2.8722 data: 0.0003 max mem: 28454 +[2024-12-12 14:46:47 root] (utils.py 283): INFO Epoch: [10] [ 110/2502] eta: 1:54:30 lr: 0.000008 loss_cls: 2.4173 (2.5040) grad_norm: 1.1065 (1.1836) time: 2.8725 data: 0.0003 max mem: 28454 +[2024-12-12 14:47:16 root] (utils.py 283): INFO Epoch: [10] [ 120/2502] eta: 1:54:01 lr: 0.000008 loss_cls: 2.5974 (2.5079) grad_norm: 1.1236 (1.2094) time: 2.8736 data: 0.0002 max mem: 28454 +[2024-12-12 14:47:45 root] (utils.py 283): INFO Epoch: [10] [ 130/2502] eta: 1:53:33 lr: 0.000008 loss_cls: 2.5128 (2.5074) grad_norm: 1.0752 (1.2020) time: 2.8745 data: 0.0002 max mem: 28454 +[2024-12-12 14:48:14 root] (utils.py 283): INFO Epoch: [10] [ 140/2502] eta: 1:53:05 lr: 0.000008 loss_cls: 2.5128 (2.5097) grad_norm: 1.1375 (1.2122) time: 2.8756 data: 0.0003 max mem: 28454 +[2024-12-12 14:48:42 root] (utils.py 283): INFO Epoch: [10] [ 150/2502] eta: 1:52:37 lr: 0.000008 loss_cls: 2.7581 (2.5194) grad_norm: 1.1986 (1.2049) time: 2.8763 data: 0.0003 max mem: 28454 +[2024-12-12 14:49:11 root] (utils.py 283): INFO Epoch: [10] [ 160/2502] eta: 1:52:09 lr: 0.000008 loss_cls: 2.8083 (2.5254) grad_norm: 1.1054 (1.1965) time: 2.8777 data: 0.0002 max mem: 28454 +[2024-12-12 14:49:40 root] (utils.py 283): INFO Epoch: [10] [ 170/2502] eta: 1:51:40 lr: 0.000008 loss_cls: 2.6555 (2.5144) grad_norm: 1.1376 (1.2241) time: 2.8760 data: 0.0003 max mem: 28454 +[2024-12-12 14:50:09 root] (utils.py 283): INFO Epoch: [10] [ 180/2502] eta: 1:51:12 lr: 0.000008 loss_cls: 2.6287 (2.5243) grad_norm: 1.1982 (1.2199) time: 2.8761 data: 0.0003 max mem: 28454 +[2024-12-12 14:50:38 root] (utils.py 283): INFO Epoch: [10] [ 190/2502] eta: 1:50:44 lr: 0.000008 loss_cls: 2.7147 (2.5401) grad_norm: 1.1346 (1.2217) time: 2.8763 data: 0.0002 max mem: 28454 +[2024-12-12 14:51:06 root] (utils.py 283): INFO Epoch: [10] [ 200/2502] eta: 1:50:15 lr: 0.000008 loss_cls: 2.8003 (2.5535) grad_norm: 1.1346 (1.2193) time: 2.8751 data: 0.0002 max mem: 28454 +[2024-12-12 14:51:35 root] (utils.py 283): INFO Epoch: [10] [ 210/2502] eta: 1:49:47 lr: 0.000008 loss_cls: 2.8796 (2.5631) grad_norm: 1.1838 (1.2300) time: 2.8763 data: 0.0002 max mem: 28454 +[2024-12-12 14:52:04 root] (utils.py 283): INFO Epoch: [10] [ 220/2502] eta: 1:49:18 lr: 0.000008 loss_cls: 2.6995 (2.5646) grad_norm: 1.1578 (1.2255) time: 2.8756 data: 0.0002 max mem: 28454 +[2024-12-12 14:52:33 root] (utils.py 283): INFO Epoch: [10] [ 230/2502] eta: 1:48:49 lr: 0.000008 loss_cls: 2.6251 (2.5647) grad_norm: 1.0636 (1.2283) time: 2.8731 data: 0.0002 max mem: 28454 +[2024-12-12 14:53:01 root] (utils.py 283): INFO Epoch: [10] [ 240/2502] eta: 1:48:20 lr: 0.000008 loss_cls: 2.5732 (2.5598) grad_norm: 1.2159 (1.7004) time: 2.8727 data: 0.0002 max mem: 28454 +[2024-12-12 14:53:30 root] (utils.py 283): INFO Epoch: [10] [ 250/2502] eta: 1:47:51 lr: 0.000008 loss_cls: 2.5319 (2.5574) grad_norm: 1.2281 (1.6897) time: 2.8739 data: 0.0002 max mem: 28454 +[2024-12-12 14:53:59 root] (utils.py 283): INFO Epoch: [10] [ 260/2502] eta: 1:47:23 lr: 0.000008 loss_cls: 2.6714 (2.5584) grad_norm: 1.3814 (1.6813) time: 2.8739 data: 0.0002 max mem: 28454 +[2024-12-12 14:54:27 root] (utils.py 283): INFO Epoch: [10] [ 270/2502] eta: 1:46:54 lr: 0.000008 loss_cls: 2.6657 (2.5594) grad_norm: 1.3246 (1.7822) time: 2.8729 data: 0.0002 max mem: 28454 +[2024-12-12 14:54:56 root] (utils.py 283): INFO Epoch: [10] [ 280/2502] eta: 1:46:25 lr: 0.000008 loss_cls: 2.7558 (2.5633) grad_norm: 1.1960 (1.7631) time: 2.8713 data: 0.0002 max mem: 28454 +[2024-12-12 14:55:25 root] (utils.py 283): INFO Epoch: [10] [ 290/2502] eta: 1:45:56 lr: 0.000008 loss_cls: 2.6822 (2.5613) grad_norm: 1.1711 (1.7434) time: 2.8720 data: 0.0002 max mem: 28454 +[2024-12-12 14:55:54 root] (utils.py 283): INFO Epoch: [10] [ 300/2502] eta: 1:45:27 lr: 0.000008 loss_cls: 2.5798 (2.5607) grad_norm: 1.2156 (1.7314) time: 2.8747 data: 0.0003 max mem: 28454 +[2024-12-12 14:56:22 root] (utils.py 283): INFO Epoch: [10] [ 310/2502] eta: 1:44:59 lr: 0.000008 loss_cls: 2.4801 (2.5590) grad_norm: 1.2227 (1.7585) time: 2.8757 data: 0.0003 max mem: 28454 +[2024-12-12 14:56:51 root] (utils.py 283): INFO Epoch: [10] [ 320/2502] eta: 1:44:31 lr: 0.000008 loss_cls: 2.6574 (2.5642) grad_norm: 1.1455 (1.7660) time: 2.8789 data: 0.0002 max mem: 28454 +[2024-12-12 14:57:20 root] (utils.py 283): INFO Epoch: [10] [ 330/2502] eta: 1:44:03 lr: 0.000008 loss_cls: 2.7237 (2.5639) grad_norm: 1.1157 (1.7494) time: 2.8841 data: 0.0002 max mem: 28454 +[2024-12-12 14:57:49 root] (utils.py 283): INFO Epoch: [10] [ 340/2502] eta: 1:43:34 lr: 0.000008 loss_cls: 2.7124 (2.5689) grad_norm: 1.0926 (1.7300) time: 2.8826 data: 0.0003 max mem: 28454 +[2024-12-12 14:58:18 root] (utils.py 283): INFO Epoch: [10] [ 350/2502] eta: 1:43:06 lr: 0.000008 loss_cls: 2.7124 (2.5680) grad_norm: 1.0837 (1.7167) time: 2.8777 data: 0.0002 max mem: 28454 +[2024-12-12 14:58:46 root] (utils.py 283): INFO Epoch: [10] [ 360/2502] eta: 1:42:37 lr: 0.000008 loss_cls: 2.5909 (2.5623) grad_norm: 1.1157 (1.7147) time: 2.8739 data: 0.0002 max mem: 28454 +[2024-12-12 14:59:15 root] (utils.py 283): INFO Epoch: [10] [ 370/2502] eta: 1:42:08 lr: 0.000008 loss_cls: 2.5923 (2.5636) grad_norm: 1.1474 (1.7165) time: 2.8735 data: 0.0002 max mem: 28454 +[2024-12-12 14:59:44 root] (utils.py 283): INFO Epoch: [10] [ 380/2502] eta: 1:41:39 lr: 0.000008 loss_cls: 2.6356 (2.5624) grad_norm: 1.2331 (1.7027) time: 2.8752 data: 0.0002 max mem: 28454 +[2024-12-12 15:00:13 root] (utils.py 283): INFO Epoch: [10] [ 390/2502] eta: 1:41:11 lr: 0.000008 loss_cls: 2.6305 (2.5616) grad_norm: 1.1401 (1.6948) time: 2.8747 data: 0.0002 max mem: 28454 +[2024-12-12 15:00:41 root] (utils.py 283): INFO Epoch: [10] [ 400/2502] eta: 1:40:42 lr: 0.000008 loss_cls: 2.6305 (2.5616) grad_norm: 1.1354 (1.6826) time: 2.8758 data: 0.0002 max mem: 28454 +[2024-12-12 15:01:10 root] (utils.py 283): INFO Epoch: [10] [ 410/2502] eta: 1:40:13 lr: 0.000008 loss_cls: 2.5551 (2.5592) grad_norm: 1.0923 (1.6700) time: 2.8751 data: 0.0003 max mem: 28454 +[2024-12-12 15:01:39 root] (utils.py 283): INFO Epoch: [10] [ 420/2502] eta: 1:39:44 lr: 0.000008 loss_cls: 2.7760 (2.5660) grad_norm: 1.1404 (1.6631) time: 2.8727 data: 0.0002 max mem: 28454 +[2024-12-12 15:02:08 root] (utils.py 283): INFO Epoch: [10] [ 430/2502] eta: 1:39:15 lr: 0.000008 loss_cls: 2.8482 (2.5712) grad_norm: 1.1733 (1.6928) time: 2.8707 data: 0.0002 max mem: 28454 +[2024-12-12 15:02:36 root] (utils.py 283): INFO Epoch: [10] [ 440/2502] eta: 1:38:46 lr: 0.000008 loss_cls: 2.6785 (2.5640) grad_norm: 1.2004 (1.6830) time: 2.8710 data: 0.0002 max mem: 28454 +[2024-12-12 15:03:05 root] (utils.py 283): INFO Epoch: [10] [ 450/2502] eta: 1:38:18 lr: 0.000008 loss_cls: 2.2292 (2.5596) grad_norm: 1.3601 (1.6795) time: 2.8744 data: 0.0002 max mem: 28454 +[2024-12-12 15:03:34 root] (utils.py 283): INFO Epoch: [10] [ 460/2502] eta: 1:37:49 lr: 0.000008 loss_cls: 2.6173 (2.5619) grad_norm: 1.3615 (1.6878) time: 2.8753 data: 0.0002 max mem: 28454 +[2024-12-12 15:04:02 root] (utils.py 283): INFO Epoch: [10] [ 470/2502] eta: 1:37:20 lr: 0.000008 loss_cls: 2.8058 (2.5635) grad_norm: 1.1323 (1.6757) time: 2.8726 data: 0.0002 max mem: 28454 +[2024-12-12 15:04:31 root] (utils.py 283): INFO Epoch: [10] [ 480/2502] eta: 1:36:51 lr: 0.000008 loss_cls: 2.7066 (2.5598) grad_norm: 1.1031 (1.6653) time: 2.8729 data: 0.0002 max mem: 28454 +[2024-12-12 15:05:00 root] (utils.py 283): INFO Epoch: [10] [ 490/2502] eta: 1:36:23 lr: 0.000008 loss_cls: 2.2649 (2.5568) grad_norm: 1.1524 (1.6712) time: 2.8760 data: 0.0002 max mem: 28454 +[2024-12-12 15:05:29 root] (utils.py 283): INFO Epoch: [10] [ 500/2502] eta: 1:35:54 lr: 0.000008 loss_cls: 2.4224 (2.5533) grad_norm: 1.1387 (1.6610) time: 2.8758 data: 0.0002 max mem: 28454 +[2024-12-12 15:05:57 root] (utils.py 283): INFO Epoch: [10] [ 510/2502] eta: 1:35:25 lr: 0.000008 loss_cls: 2.3833 (2.5475) grad_norm: 1.0815 (1.6533) time: 2.8746 data: 0.0002 max mem: 28454 +[2024-12-12 15:06:26 root] (utils.py 283): INFO Epoch: [10] [ 520/2502] eta: 1:34:57 lr: 0.000008 loss_cls: 2.2109 (2.5454) grad_norm: 1.0606 (1.6432) time: 2.8747 data: 0.0002 max mem: 28454 +[2024-12-12 15:06:55 root] (utils.py 283): INFO Epoch: [10] [ 530/2502] eta: 1:34:28 lr: 0.000008 loss_cls: 2.6379 (2.5471) grad_norm: 1.0343 (1.6766) time: 2.8746 data: 0.0002 max mem: 28454 +[2024-12-12 15:07:24 root] (utils.py 283): INFO Epoch: [10] [ 540/2502] eta: 1:33:59 lr: 0.000008 loss_cls: 2.6379 (2.5433) grad_norm: 1.0689 (1.6673) time: 2.8743 data: 0.0002 max mem: 28454 +[2024-12-12 15:07:53 root] (utils.py 283): INFO Epoch: [10] [ 550/2502] eta: 1:33:30 lr: 0.000008 loss_cls: 2.4888 (2.5454) grad_norm: 1.1522 (1.6581) time: 2.8761 data: 0.0003 max mem: 28454 +[2024-12-12 15:08:21 root] (utils.py 283): INFO Epoch: [10] [ 560/2502] eta: 1:33:02 lr: 0.000008 loss_cls: 2.4888 (2.5445) grad_norm: 1.1522 (1.6517) time: 2.8759 data: 0.0002 max mem: 28454 +[2024-12-12 15:08:50 root] (utils.py 283): INFO Epoch: [10] [ 570/2502] eta: 1:32:33 lr: 0.000008 loss_cls: 2.5670 (2.5476) grad_norm: 1.1616 (1.6426) time: 2.8744 data: 0.0002 max mem: 28454 +[2024-12-12 15:09:19 root] (utils.py 283): INFO Epoch: [10] [ 580/2502] eta: 1:32:04 lr: 0.000008 loss_cls: 2.5670 (2.5469) grad_norm: 1.1237 (1.6343) time: 2.8747 data: 0.0003 max mem: 28454 +[2024-12-12 15:09:47 root] (utils.py 283): INFO Epoch: [10] [ 590/2502] eta: 1:31:35 lr: 0.000008 loss_cls: 2.6777 (2.5475) grad_norm: 1.1255 (1.6261) time: 2.8729 data: 0.0002 max mem: 28454 +[2024-12-12 15:10:16 root] (utils.py 283): INFO Epoch: [10] [ 600/2502] eta: 1:31:06 lr: 0.000008 loss_cls: 2.7224 (2.5495) grad_norm: 1.0834 (1.6195) time: 2.8703 data: 0.0002 max mem: 28454 +[2024-12-12 15:10:45 root] (utils.py 283): INFO Epoch: [10] [ 610/2502] eta: 1:30:38 lr: 0.000008 loss_cls: 2.6396 (2.5490) grad_norm: 1.0146 (1.6099) time: 2.8704 data: 0.0002 max mem: 28454 +[2024-12-12 15:11:14 root] (utils.py 283): INFO Epoch: [10] [ 620/2502] eta: 1:30:09 lr: 0.000008 loss_cls: 2.5684 (2.5476) grad_norm: 1.0964 (1.6107) time: 2.8710 data: 0.0003 max mem: 28454 +[2024-12-12 15:11:42 root] (utils.py 283): INFO Epoch: [10] [ 630/2502] eta: 1:29:40 lr: 0.000008 loss_cls: 2.5845 (2.5457) grad_norm: 1.1064 (1.6046) time: 2.8706 data: 0.0002 max mem: 28454 +[2024-12-12 15:12:11 root] (utils.py 283): INFO Epoch: [10] [ 640/2502] eta: 1:29:11 lr: 0.000008 loss_cls: 2.5845 (2.5458) grad_norm: 1.1332 (1.5983) time: 2.8716 data: 0.0002 max mem: 28454 +[2024-12-12 15:12:40 root] (utils.py 283): INFO Epoch: [10] [ 650/2502] eta: 1:28:42 lr: 0.000008 loss_cls: 2.5881 (2.5453) grad_norm: 1.2157 (1.5934) time: 2.8718 data: 0.0002 max mem: 28454 +[2024-12-12 15:13:08 root] (utils.py 283): INFO Epoch: [10] [ 660/2502] eta: 1:28:14 lr: 0.000008 loss_cls: 2.6117 (2.5431) grad_norm: 1.1510 (1.5859) time: 2.8715 data: 0.0002 max mem: 28454 +[2024-12-12 15:13:37 root] (utils.py 283): INFO Epoch: [10] [ 670/2502] eta: 1:27:44 lr: 0.000008 loss_cls: 2.3424 (2.5408) grad_norm: 1.1498 (1.5817) time: 2.8676 data: 0.0002 max mem: 28454 +[2024-12-12 15:14:06 root] (utils.py 283): INFO Epoch: [10] [ 680/2502] eta: 1:27:16 lr: 0.000008 loss_cls: 2.3253 (2.5388) grad_norm: 1.1498 (1.5766) time: 2.8661 data: 0.0002 max mem: 28454 +[2024-12-12 15:14:34 root] (utils.py 283): INFO Epoch: [10] [ 690/2502] eta: 1:26:47 lr: 0.000008 loss_cls: 2.6369 (2.5418) grad_norm: 1.1422 (1.5716) time: 2.8691 data: 0.0002 max mem: 28454 +[2024-12-12 15:15:03 root] (utils.py 283): INFO Epoch: [10] [ 700/2502] eta: 1:26:18 lr: 0.000008 loss_cls: 2.8012 (2.5451) grad_norm: 1.1593 (1.5666) time: 2.8701 data: 0.0002 max mem: 28454 +[2024-12-12 15:15:32 root] (utils.py 283): INFO Epoch: [10] [ 710/2502] eta: 1:25:49 lr: 0.000008 loss_cls: 2.8015 (2.5447) grad_norm: 1.1593 (1.5663) time: 2.8720 data: 0.0002 max mem: 28454 +[2024-12-12 15:16:01 root] (utils.py 283): INFO Epoch: [10] [ 720/2502] eta: 1:25:20 lr: 0.000008 loss_cls: 2.6713 (2.5464) grad_norm: 1.0843 (1.5594) time: 2.8736 data: 0.0002 max mem: 28454 +[2024-12-12 15:16:29 root] (utils.py 283): INFO Epoch: [10] [ 730/2502] eta: 1:24:52 lr: 0.000008 loss_cls: 2.6585 (2.5448) grad_norm: 1.0778 (1.5532) time: 2.8731 data: 0.0002 max mem: 28454 +[2024-12-12 15:16:58 root] (utils.py 283): INFO Epoch: [10] [ 740/2502] eta: 1:24:23 lr: 0.000008 loss_cls: 2.6102 (2.5463) grad_norm: 1.1383 (1.5477) time: 2.8717 data: 0.0002 max mem: 28454 +[2024-12-12 15:17:27 root] (utils.py 283): INFO Epoch: [10] [ 750/2502] eta: 1:23:54 lr: 0.000008 loss_cls: 2.4741 (2.5422) grad_norm: 1.1383 (1.5431) time: 2.8681 data: 0.0002 max mem: 28454 +[2024-12-12 15:17:55 root] (utils.py 283): INFO Epoch: [10] [ 760/2502] eta: 1:23:25 lr: 0.000008 loss_cls: 2.3762 (2.5403) grad_norm: 1.0764 (1.5376) time: 2.8660 data: 0.0002 max mem: 28454 +[2024-12-12 15:18:24 root] (utils.py 283): INFO Epoch: [10] [ 770/2502] eta: 1:22:56 lr: 0.000008 loss_cls: 2.5471 (2.5406) grad_norm: 1.0540 (1.5323) time: 2.8659 data: 0.0002 max mem: 28454 +[2024-12-12 15:18:53 root] (utils.py 283): INFO Epoch: [10] [ 780/2502] eta: 1:22:27 lr: 0.000008 loss_cls: 2.5824 (2.5410) grad_norm: 1.1452 (1.5307) time: 2.8668 data: 0.0002 max mem: 28454 +[2024-12-12 15:19:21 root] (utils.py 283): INFO Epoch: [10] [ 790/2502] eta: 1:21:58 lr: 0.000008 loss_cls: 2.5498 (2.5421) grad_norm: 1.1683 (1.5310) time: 2.8692 data: 0.0002 max mem: 28454 +[2024-12-12 15:19:50 root] (utils.py 283): INFO Epoch: [10] [ 800/2502] eta: 1:21:30 lr: 0.000008 loss_cls: 2.5733 (2.5425) grad_norm: 1.2079 (1.5306) time: 2.8697 data: 0.0002 max mem: 28454 +[2024-12-12 15:20:19 root] (utils.py 283): INFO Epoch: [10] [ 810/2502] eta: 1:21:01 lr: 0.000008 loss_cls: 2.5371 (2.5424) grad_norm: 1.1931 (1.5315) time: 2.8723 data: 0.0002 max mem: 28454 +[2024-12-12 15:20:48 root] (utils.py 283): INFO Epoch: [10] [ 820/2502] eta: 1:20:32 lr: 0.000008 loss_cls: 2.4904 (2.5427) grad_norm: 1.1821 (1.5278) time: 2.8729 data: 0.0002 max mem: 28454 +[2024-12-12 15:21:16 root] (utils.py 283): INFO Epoch: [10] [ 830/2502] eta: 1:20:04 lr: 0.000008 loss_cls: 2.7211 (2.5455) grad_norm: 1.1744 (1.5241) time: 2.8727 data: 0.0002 max mem: 28454 +[2024-12-12 15:21:45 root] (utils.py 283): INFO Epoch: [10] [ 840/2502] eta: 1:19:35 lr: 0.000008 loss_cls: 2.8414 (2.5466) grad_norm: 1.1218 (1.5187) time: 2.8725 data: 0.0002 max mem: 28454 +[2024-12-12 15:22:14 root] (utils.py 283): INFO Epoch: [10] [ 850/2502] eta: 1:19:06 lr: 0.000008 loss_cls: 2.8179 (2.5481) grad_norm: 1.1043 (1.5285) time: 2.8720 data: 0.0002 max mem: 28454 +[2024-12-12 15:22:42 root] (utils.py 283): INFO Epoch: [10] [ 860/2502] eta: 1:18:37 lr: 0.000008 loss_cls: 2.6236 (2.5455) grad_norm: 1.1349 (1.5246) time: 2.8705 data: 0.0002 max mem: 28454 +[2024-12-12 15:23:11 root] (utils.py 283): INFO Epoch: [10] [ 870/2502] eta: 1:18:08 lr: 0.000008 loss_cls: 2.2517 (2.5422) grad_norm: 1.0844 (1.5195) time: 2.8701 data: 0.0002 max mem: 28454 +[2024-12-12 15:23:40 root] (utils.py 283): INFO Epoch: [10] [ 880/2502] eta: 1:17:40 lr: 0.000008 loss_cls: 2.3825 (2.5422) grad_norm: 1.0844 (1.5166) time: 2.8717 data: 0.0002 max mem: 28454 +[2024-12-12 15:24:09 root] (utils.py 283): INFO Epoch: [10] [ 890/2502] eta: 1:17:11 lr: 0.000008 loss_cls: 2.5595 (2.5400) grad_norm: 1.0874 (1.5121) time: 2.8721 data: 0.0002 max mem: 28454 +[2024-12-12 15:24:37 root] (utils.py 283): INFO Epoch: [10] [ 900/2502] eta: 1:16:42 lr: 0.000008 loss_cls: 2.6100 (2.5406) grad_norm: 1.0401 (1.5075) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-12 15:25:06 root] (utils.py 283): INFO Epoch: [10] [ 910/2502] eta: 1:16:13 lr: 0.000008 loss_cls: 2.7257 (2.5418) grad_norm: 1.0297 (1.5038) time: 2.8733 data: 0.0002 max mem: 28454 +[2024-12-12 15:25:35 root] (utils.py 283): INFO Epoch: [10] [ 920/2502] eta: 1:15:45 lr: 0.000008 loss_cls: 2.6580 (2.5411) grad_norm: 1.1146 (1.4999) time: 2.8721 data: 0.0002 max mem: 28454 +[2024-12-12 15:26:04 root] (utils.py 283): INFO Epoch: [10] [ 930/2502] eta: 1:15:16 lr: 0.000008 loss_cls: 2.3693 (2.5390) grad_norm: 1.1104 (1.4959) time: 2.8709 data: 0.0002 max mem: 28454 +[2024-12-12 15:26:32 root] (utils.py 283): INFO Epoch: [10] [ 940/2502] eta: 1:14:47 lr: 0.000008 loss_cls: 2.3752 (2.5395) grad_norm: 1.0831 (1.4938) time: 2.8721 data: 0.0002 max mem: 28454 +[2024-12-12 15:27:01 root] (utils.py 283): INFO Epoch: [10] [ 950/2502] eta: 1:14:18 lr: 0.000008 loss_cls: 2.6602 (2.5394) grad_norm: 1.0247 (1.4901) time: 2.8706 data: 0.0002 max mem: 28454 +[2024-12-12 15:27:30 root] (utils.py 283): INFO Epoch: [10] [ 960/2502] eta: 1:13:50 lr: 0.000008 loss_cls: 2.5342 (2.5383) grad_norm: 1.1518 (1.4908) time: 2.8700 data: 0.0002 max mem: 28454 +[2024-12-12 15:27:58 root] (utils.py 283): INFO Epoch: [10] [ 970/2502] eta: 1:13:21 lr: 0.000008 loss_cls: 2.4657 (2.5369) grad_norm: 1.1328 (1.4908) time: 2.8689 data: 0.0002 max mem: 28454 +[2024-12-12 15:28:27 root] (utils.py 283): INFO Epoch: [10] [ 980/2502] eta: 1:12:52 lr: 0.000008 loss_cls: 2.4561 (2.5360) grad_norm: 1.0977 (1.4870) time: 2.8677 data: 0.0002 max mem: 28454 +[2024-12-12 15:28:56 root] (utils.py 283): INFO Epoch: [10] [ 990/2502] eta: 1:12:23 lr: 0.000008 loss_cls: 2.5592 (2.5365) grad_norm: 1.1230 (1.4838) time: 2.8661 data: 0.0002 max mem: 28454 +[2024-12-12 15:29:24 root] (utils.py 283): INFO Epoch: [10] [1000/2502] eta: 1:11:54 lr: 0.000008 loss_cls: 2.5709 (2.5364) grad_norm: 1.1408 (1.4921) time: 2.8661 data: 0.0002 max mem: 28454 +[2024-12-12 15:29:53 root] (utils.py 283): INFO Epoch: [10] [1010/2502] eta: 1:11:26 lr: 0.000008 loss_cls: 2.6769 (2.5368) grad_norm: 1.1538 (1.4917) time: 2.8695 data: 0.0002 max mem: 28454 +[2024-12-12 15:30:22 root] (utils.py 283): INFO Epoch: [10] [1020/2502] eta: 1:10:57 lr: 0.000008 loss_cls: 2.6769 (2.5368) grad_norm: 1.2228 (1.4895) time: 2.8721 data: 0.0002 max mem: 28454 +[2024-12-12 15:30:50 root] (utils.py 283): INFO Epoch: [10] [1030/2502] eta: 1:10:28 lr: 0.000008 loss_cls: 2.4586 (2.5361) grad_norm: 1.1697 (1.4965) time: 2.8709 data: 0.0002 max mem: 28454 +[2024-12-12 15:31:19 root] (utils.py 283): INFO Epoch: [10] [1040/2502] eta: 1:09:59 lr: 0.000008 loss_cls: 2.4586 (2.5358) grad_norm: 1.0604 (1.4929) time: 2.8713 data: 0.0002 max mem: 28454 +[2024-12-12 15:31:48 root] (utils.py 283): INFO Epoch: [10] [1050/2502] eta: 1:09:31 lr: 0.000008 loss_cls: 2.5238 (2.5349) grad_norm: 1.0643 (1.4898) time: 2.8734 data: 0.0002 max mem: 28454 +[2024-12-12 15:32:17 root] (utils.py 283): INFO Epoch: [10] [1060/2502] eta: 1:09:02 lr: 0.000008 loss_cls: 2.4714 (2.5339) grad_norm: 1.1110 (1.4865) time: 2.8726 data: 0.0002 max mem: 28454 +[2024-12-12 15:32:45 root] (utils.py 283): INFO Epoch: [10] [1070/2502] eta: 1:08:33 lr: 0.000008 loss_cls: 2.5052 (2.5356) grad_norm: 1.0793 (1.4835) time: 2.8726 data: 0.0002 max mem: 28454 +[2024-12-12 15:33:14 root] (utils.py 283): INFO Epoch: [10] [1080/2502] eta: 1:08:04 lr: 0.000008 loss_cls: 2.6200 (2.5354) grad_norm: 1.0764 (1.5048) time: 2.8740 data: 0.0002 max mem: 28454 +[2024-12-12 15:33:43 root] (utils.py 283): INFO Epoch: [10] [1090/2502] eta: 1:07:36 lr: 0.000008 loss_cls: 2.6027 (2.5362) grad_norm: 1.1106 (1.5097) time: 2.8746 data: 0.0002 max mem: 28454 +[2024-12-12 15:34:12 root] (utils.py 283): INFO Epoch: [10] [1100/2502] eta: 1:07:07 lr: 0.000008 loss_cls: 2.5759 (2.5342) grad_norm: 1.1469 (1.5067) time: 2.8737 data: 0.0002 max mem: 28454 +[2024-12-12 15:34:40 root] (utils.py 283): INFO Epoch: [10] [1110/2502] eta: 1:06:38 lr: 0.000008 loss_cls: 2.3404 (2.5342) grad_norm: 1.1440 (1.5067) time: 2.8741 data: 0.0002 max mem: 28454 +[2024-12-12 15:35:09 root] (utils.py 283): INFO Epoch: [10] [1120/2502] eta: 1:06:10 lr: 0.000008 loss_cls: 2.6153 (2.5351) grad_norm: 1.1555 (1.5045) time: 2.8727 data: 0.0002 max mem: 28454 +[2024-12-12 15:35:38 root] (utils.py 283): INFO Epoch: [10] [1130/2502] eta: 1:05:41 lr: 0.000008 loss_cls: 2.4970 (2.5344) grad_norm: 1.1294 (1.5309) time: 2.8714 data: 0.0002 max mem: 28454 +[2024-12-12 15:36:06 root] (utils.py 283): INFO Epoch: [10] [1140/2502] eta: 1:05:12 lr: 0.000008 loss_cls: 2.4720 (2.5333) grad_norm: 1.1240 (1.5302) time: 2.8703 data: 0.0002 max mem: 28454 +[2024-12-12 15:36:35 root] (utils.py 283): INFO Epoch: [10] [1150/2502] eta: 1:04:43 lr: 0.000008 loss_cls: 2.6304 (2.5337) grad_norm: 1.2999 (1.5292) time: 2.8681 data: 0.0002 max mem: 28454 +[2024-12-12 15:37:04 root] (utils.py 283): INFO Epoch: [10] [1160/2502] eta: 1:04:15 lr: 0.000008 loss_cls: 2.7256 (2.5352) grad_norm: 1.1839 (1.5260) time: 2.8705 data: 0.0002 max mem: 28454 +[2024-12-12 15:37:33 root] (utils.py 283): INFO Epoch: [10] [1170/2502] eta: 1:03:46 lr: 0.000008 loss_cls: 2.6458 (2.5346) grad_norm: 1.1441 (1.5233) time: 2.8719 data: 0.0003 max mem: 28454 +[2024-12-12 15:38:01 root] (utils.py 283): INFO Epoch: [10] [1180/2502] eta: 1:03:17 lr: 0.000008 loss_cls: 2.6764 (2.5352) grad_norm: 1.0730 (1.5199) time: 2.8712 data: 0.0002 max mem: 28454 +[2024-12-12 15:38:30 root] (utils.py 283): INFO Epoch: [10] [1190/2502] eta: 1:02:48 lr: 0.000008 loss_cls: 2.5882 (2.5342) grad_norm: 1.0817 (1.5430) time: 2.8712 data: 0.0002 max mem: 28454 +[2024-12-12 15:38:59 root] (utils.py 283): INFO Epoch: [10] [1200/2502] eta: 1:02:20 lr: 0.000008 loss_cls: 2.6187 (2.5355) grad_norm: 1.1833 (1.5419) time: 2.8711 data: 0.0002 max mem: 28454 +[2024-12-12 15:39:27 root] (utils.py 283): INFO Epoch: [10] [1210/2502] eta: 1:01:51 lr: 0.000008 loss_cls: 2.7530 (2.5346) grad_norm: 1.2646 (1.5413) time: 2.8694 data: 0.0002 max mem: 28454 +[2024-12-12 15:39:56 root] (utils.py 283): INFO Epoch: [10] [1220/2502] eta: 1:01:22 lr: 0.000008 loss_cls: 2.6727 (2.5345) grad_norm: 1.1950 (1.5380) time: 2.8744 data: 0.0002 max mem: 28454 +[2024-12-12 15:40:25 root] (utils.py 283): INFO Epoch: [10] [1230/2502] eta: 1:00:53 lr: 0.000008 loss_cls: 2.7406 (2.5358) grad_norm: 1.0686 (1.5371) time: 2.8750 data: 0.0002 max mem: 28454 +[2024-12-12 15:40:54 root] (utils.py 283): INFO Epoch: [10] [1240/2502] eta: 1:00:25 lr: 0.000008 loss_cls: 2.8143 (2.5372) grad_norm: 1.1245 (1.5342) time: 2.8701 data: 0.0002 max mem: 28454 +[2024-12-12 15:41:22 root] (utils.py 283): INFO Epoch: [10] [1250/2502] eta: 0:59:56 lr: 0.000008 loss_cls: 2.7566 (2.5380) grad_norm: 1.1749 (1.5316) time: 2.8708 data: 0.0002 max mem: 28454 +[2024-12-12 15:41:51 root] (utils.py 283): INFO Epoch: [10] [1260/2502] eta: 0:59:27 lr: 0.000008 loss_cls: 2.7053 (2.5392) grad_norm: 1.1833 (1.5291) time: 2.8690 data: 0.0002 max mem: 28454 +[2024-12-12 15:42:20 root] (utils.py 283): INFO Epoch: [10] [1270/2502] eta: 0:58:58 lr: 0.000008 loss_cls: 2.6856 (2.5400) grad_norm: 1.0879 (1.5256) time: 2.8671 data: 0.0002 max mem: 28454 +[2024-12-12 15:42:48 root] (utils.py 283): INFO Epoch: [10] [1280/2502] eta: 0:58:30 lr: 0.000008 loss_cls: 2.6856 (2.5400) grad_norm: 1.0335 (1.5387) time: 2.8667 data: 0.0003 max mem: 28454 +[2024-12-12 15:43:17 root] (utils.py 283): INFO Epoch: [10] [1290/2502] eta: 0:58:01 lr: 0.000008 loss_cls: 2.5488 (2.5396) grad_norm: 1.1204 (1.5367) time: 2.8673 data: 0.0002 max mem: 28454 +[2024-12-12 15:43:46 root] (utils.py 283): INFO Epoch: [10] [1300/2502] eta: 0:57:32 lr: 0.000008 loss_cls: 2.5305 (2.5401) grad_norm: 1.1716 (1.5350) time: 2.8680 data: 0.0002 max mem: 28454 +[2024-12-12 15:44:14 root] (utils.py 283): INFO Epoch: [10] [1310/2502] eta: 0:57:03 lr: 0.000008 loss_cls: 2.6377 (2.5400) grad_norm: 1.1716 (1.5346) time: 2.8696 data: 0.0002 max mem: 28454 +[2024-12-12 15:44:43 root] (utils.py 283): INFO Epoch: [10] [1320/2502] eta: 0:56:35 lr: 0.000008 loss_cls: 2.6377 (2.5407) grad_norm: 1.1640 (1.5476) time: 2.8693 data: 0.0002 max mem: 28454 +[2024-12-12 15:45:12 root] (utils.py 283): INFO Epoch: [10] [1330/2502] eta: 0:56:06 lr: 0.000008 loss_cls: 2.5314 (2.5400) grad_norm: 1.2286 (1.5470) time: 2.8662 data: 0.0002 max mem: 28454 +[2024-12-12 15:45:40 root] (utils.py 283): INFO Epoch: [10] [1340/2502] eta: 0:55:37 lr: 0.000008 loss_cls: 2.6146 (2.5411) grad_norm: 1.1046 (1.5445) time: 2.8678 data: 0.0002 max mem: 28454 +[2024-12-12 15:46:09 root] (utils.py 283): INFO Epoch: [10] [1350/2502] eta: 0:55:08 lr: 0.000008 loss_cls: 2.7118 (2.5411) grad_norm: 1.1046 (1.5418) time: 2.8720 data: 0.0002 max mem: 28454 +[2024-12-12 15:46:38 root] (utils.py 283): INFO Epoch: [10] [1360/2502] eta: 0:54:40 lr: 0.000008 loss_cls: 2.7791 (2.5423) grad_norm: 1.1434 (1.5392) time: 2.8714 data: 0.0002 max mem: 28454 +[2024-12-12 15:47:07 root] (utils.py 283): INFO Epoch: [10] [1370/2502] eta: 0:54:11 lr: 0.000008 loss_cls: 2.6941 (2.5433) grad_norm: 1.1161 (1.5363) time: 2.8718 data: 0.0002 max mem: 28454 +[2024-12-12 15:47:35 root] (utils.py 283): INFO Epoch: [10] [1380/2502] eta: 0:53:42 lr: 0.000008 loss_cls: 2.6479 (2.5439) grad_norm: 1.1161 (1.5339) time: 2.8723 data: 0.0002 max mem: 28454 +[2024-12-12 15:48:04 root] (utils.py 283): INFO Epoch: [10] [1390/2502] eta: 0:53:13 lr: 0.000008 loss_cls: 2.4691 (2.5431) grad_norm: 1.1303 (1.5315) time: 2.8682 data: 0.0002 max mem: 28454 +[2024-12-12 15:48:33 root] (utils.py 283): INFO Epoch: [10] [1400/2502] eta: 0:52:45 lr: 0.000008 loss_cls: 2.6829 (2.5441) grad_norm: 1.1631 (1.5289) time: 2.8676 data: 0.0002 max mem: 28454 +[2024-12-12 15:49:01 root] (utils.py 283): INFO Epoch: [10] [1410/2502] eta: 0:52:16 lr: 0.000008 loss_cls: 2.7384 (2.5444) grad_norm: 1.1631 (1.5261) time: 2.8712 data: 0.0002 max mem: 28454 +[2024-12-12 15:49:30 root] (utils.py 283): INFO Epoch: [10] [1420/2502] eta: 0:51:47 lr: 0.000008 loss_cls: 2.6401 (2.5446) grad_norm: 1.1699 (1.5248) time: 2.8700 data: 0.0002 max mem: 28454 +[2024-12-12 15:49:59 root] (utils.py 283): INFO Epoch: [10] [1430/2502] eta: 0:51:19 lr: 0.000008 loss_cls: 2.6289 (2.5448) grad_norm: 1.2093 (1.5435) time: 2.8749 data: 0.0002 max mem: 28454 +[2024-12-12 15:50:28 root] (utils.py 283): INFO Epoch: [10] [1440/2502] eta: 0:50:50 lr: 0.000008 loss_cls: 2.6283 (2.5452) grad_norm: 1.1277 (1.5564) time: 2.8768 data: 0.0002 max mem: 28454 +[2024-12-12 15:50:56 root] (utils.py 283): INFO Epoch: [10] [1450/2502] eta: 0:50:21 lr: 0.000008 loss_cls: 2.5959 (2.5451) grad_norm: 1.1704 (1.5544) time: 2.8701 data: 0.0002 max mem: 28454 +[2024-12-12 15:51:25 root] (utils.py 283): INFO Epoch: [10] [1460/2502] eta: 0:49:52 lr: 0.000008 loss_cls: 2.3436 (2.5432) grad_norm: 1.1708 (1.5522) time: 2.8717 data: 0.0003 max mem: 28454 +[2024-12-12 15:51:54 root] (utils.py 283): INFO Epoch: [10] [1470/2502] eta: 0:49:24 lr: 0.000008 loss_cls: 2.3563 (2.5432) grad_norm: 1.1208 (1.5496) time: 2.8735 data: 0.0003 max mem: 28454 +[2024-12-12 15:52:22 root] (utils.py 283): INFO Epoch: [10] [1480/2502] eta: 0:48:55 lr: 0.000008 loss_cls: 2.5404 (2.5442) grad_norm: 1.1208 (1.5472) time: 2.8715 data: 0.0003 max mem: 28454 +[2024-12-12 15:52:51 root] (utils.py 283): INFO Epoch: [10] [1490/2502] eta: 0:48:26 lr: 0.000008 loss_cls: 2.7707 (2.5443) grad_norm: 1.2100 (1.5446) time: 2.8711 data: 0.0003 max mem: 28454 +[2024-12-12 15:53:20 root] (utils.py 283): INFO Epoch: [10] [1500/2502] eta: 0:47:57 lr: 0.000008 loss_cls: 2.7533 (2.5444) grad_norm: 1.1317 (1.5420) time: 2.8741 data: 0.0003 max mem: 28454 +[2024-12-12 15:53:49 root] (utils.py 283): INFO Epoch: [10] [1510/2502] eta: 0:47:29 lr: 0.000008 loss_cls: 2.5888 (2.5449) grad_norm: 1.1515 (1.5404) time: 2.8744 data: 0.0003 max mem: 28454 +[2024-12-12 15:54:17 root] (utils.py 283): INFO Epoch: [10] [1520/2502] eta: 0:47:00 lr: 0.000008 loss_cls: 2.7570 (2.5470) grad_norm: 1.1971 (1.5379) time: 2.8722 data: 0.0002 max mem: 28454 +[2024-12-12 15:54:46 root] (utils.py 283): INFO Epoch: [10] [1530/2502] eta: 0:46:31 lr: 0.000008 loss_cls: 2.8380 (2.5475) grad_norm: 1.1078 (1.5352) time: 2.8718 data: 0.0002 max mem: 28454 +[2024-12-12 15:55:15 root] (utils.py 283): INFO Epoch: [10] [1540/2502] eta: 0:46:03 lr: 0.000008 loss_cls: 2.6645 (2.5478) grad_norm: 1.1078 (1.5344) time: 2.8734 data: 0.0002 max mem: 28454 +[2024-12-12 15:55:44 root] (utils.py 283): INFO Epoch: [10] [1550/2502] eta: 0:45:34 lr: 0.000008 loss_cls: 2.7234 (2.5484) grad_norm: 1.0788 (1.5313) time: 2.8723 data: 0.0003 max mem: 28454 +[2024-12-12 15:56:12 root] (utils.py 283): INFO Epoch: [10] [1560/2502] eta: 0:45:05 lr: 0.000008 loss_cls: 2.7234 (2.5487) grad_norm: 1.0885 (1.5290) time: 2.8723 data: 0.0003 max mem: 28454 +[2024-12-12 15:56:41 root] (utils.py 283): INFO Epoch: [10] [1570/2502] eta: 0:44:36 lr: 0.000008 loss_cls: 2.8392 (2.5493) grad_norm: 1.1209 (1.5337) time: 2.8732 data: 0.0003 max mem: 28454 +[2024-12-12 15:57:10 root] (utils.py 283): INFO Epoch: [10] [1580/2502] eta: 0:44:08 lr: 0.000008 loss_cls: 2.7033 (2.5486) grad_norm: 1.1355 (1.5317) time: 2.8727 data: 0.0003 max mem: 28454 +[2024-12-12 15:57:39 root] (utils.py 283): INFO Epoch: [10] [1590/2502] eta: 0:43:39 lr: 0.000008 loss_cls: 2.6697 (2.5486) grad_norm: 1.1344 (1.5291) time: 2.8742 data: 0.0003 max mem: 28454 +[2024-12-12 15:58:07 root] (utils.py 283): INFO Epoch: [10] [1600/2502] eta: 0:43:10 lr: 0.000008 loss_cls: 2.5773 (2.5481) grad_norm: 1.1074 (1.5286) time: 2.8747 data: 0.0003 max mem: 28454 +[2024-12-12 15:58:36 root] (utils.py 283): INFO Epoch: [10] [1610/2502] eta: 0:42:42 lr: 0.000008 loss_cls: 2.4881 (2.5476) grad_norm: 1.1074 (1.5264) time: 2.8744 data: 0.0003 max mem: 28454 +[2024-12-12 15:59:05 root] (utils.py 283): INFO Epoch: [10] [1620/2502] eta: 0:42:13 lr: 0.000008 loss_cls: 2.4666 (2.5460) grad_norm: 1.1725 (1.5243) time: 2.8743 data: 0.0003 max mem: 28454 +[2024-12-12 15:59:33 root] (utils.py 283): INFO Epoch: [10] [1630/2502] eta: 0:41:44 lr: 0.000008 loss_cls: 2.6187 (2.5467) grad_norm: 1.1350 (1.5870) time: 2.8723 data: 0.0002 max mem: 28454 +[2024-12-12 16:00:02 root] (utils.py 283): INFO Epoch: [10] [1640/2502] eta: 0:41:15 lr: 0.000008 loss_cls: 2.7575 (2.5483) grad_norm: 1.1488 (1.5899) time: 2.8727 data: 0.0003 max mem: 28454 +[2024-12-12 16:00:31 root] (utils.py 283): INFO Epoch: [10] [1650/2502] eta: 0:40:47 lr: 0.000008 loss_cls: 2.8365 (2.5493) grad_norm: 1.3700 (1.5898) time: 2.8726 data: 0.0002 max mem: 28454 +[2024-12-12 16:01:00 root] (utils.py 283): INFO Epoch: [10] [1660/2502] eta: 0:40:18 lr: 0.000008 loss_cls: 2.7728 (2.5500) grad_norm: 1.2776 (1.5946) time: 2.8705 data: 0.0002 max mem: 28454 +[2024-12-12 16:01:28 root] (utils.py 283): INFO Epoch: [10] [1670/2502] eta: 0:39:49 lr: 0.000008 loss_cls: 2.6435 (2.5501) grad_norm: 1.2776 (1.5939) time: 2.8708 data: 0.0002 max mem: 28454 +[2024-12-12 16:01:57 root] (utils.py 283): INFO Epoch: [10] [1680/2502] eta: 0:39:20 lr: 0.000008 loss_cls: 2.6541 (2.5501) grad_norm: 1.2673 (1.5920) time: 2.8704 data: 0.0003 max mem: 28454 +[2024-12-12 16:02:26 root] (utils.py 283): INFO Epoch: [10] [1690/2502] eta: 0:38:52 lr: 0.000008 loss_cls: 2.6603 (2.5509) grad_norm: 1.1858 (1.6011) time: 2.8700 data: 0.0003 max mem: 28454 +[2024-12-12 16:02:54 root] (utils.py 283): INFO Epoch: [10] [1700/2502] eta: 0:38:23 lr: 0.000008 loss_cls: 2.7630 (2.5514) grad_norm: 1.0764 (1.6008) time: 2.8677 data: 0.0002 max mem: 28454 +[2024-12-12 16:03:23 root] (utils.py 283): INFO Epoch: [10] [1710/2502] eta: 0:37:54 lr: 0.000008 loss_cls: 2.7630 (2.5518) grad_norm: 1.1449 (1.5987) time: 2.8693 data: 0.0002 max mem: 28454 +[2024-12-12 16:03:52 root] (utils.py 283): INFO Epoch: [10] [1720/2502] eta: 0:37:26 lr: 0.000008 loss_cls: 2.6545 (2.5515) grad_norm: 1.1425 (1.5964) time: 2.8721 data: 0.0002 max mem: 28454 +[2024-12-12 16:04:21 root] (utils.py 283): INFO Epoch: [10] [1730/2502] eta: 0:36:57 lr: 0.000008 loss_cls: 2.4487 (2.5518) grad_norm: 1.1660 (1.5946) time: 2.8730 data: 0.0002 max mem: 28454 +[2024-12-12 16:04:49 root] (utils.py 283): INFO Epoch: [10] [1740/2502] eta: 0:36:28 lr: 0.000008 loss_cls: 2.4785 (2.5509) grad_norm: 1.1743 (1.5946) time: 2.8738 data: 0.0002 max mem: 28454 +[2024-12-12 16:05:18 root] (utils.py 283): INFO Epoch: [10] [1750/2502] eta: 0:35:59 lr: 0.000008 loss_cls: 2.4785 (2.5513) grad_norm: 1.1728 (1.5926) time: 2.8734 data: 0.0003 max mem: 28454 +[2024-12-12 16:05:47 root] (utils.py 283): INFO Epoch: [10] [1760/2502] eta: 0:35:31 lr: 0.000008 loss_cls: 2.5257 (2.5513) grad_norm: 1.1663 (1.5904) time: 2.8743 data: 0.0003 max mem: 28454 +[2024-12-12 16:06:16 root] (utils.py 283): INFO Epoch: [10] [1770/2502] eta: 0:35:02 lr: 0.000008 loss_cls: 2.7850 (2.5525) grad_norm: 1.1367 (1.6400) time: 2.8733 data: 0.0003 max mem: 28454 +[2024-12-12 16:06:44 root] (utils.py 283): INFO Epoch: [10] [1780/2502] eta: 0:34:33 lr: 0.000008 loss_cls: 2.7589 (2.5521) grad_norm: 1.2111 (1.6398) time: 2.8724 data: 0.0003 max mem: 28454 +[2024-12-12 16:07:13 root] (utils.py 283): INFO Epoch: [10] [1790/2502] eta: 0:34:05 lr: 0.000008 loss_cls: 2.7087 (2.5530) grad_norm: 1.5740 (1.6396) time: 2.8749 data: 0.0002 max mem: 28454 +[2024-12-12 16:07:42 root] (utils.py 283): INFO Epoch: [10] [1800/2502] eta: 0:33:36 lr: 0.000008 loss_cls: 2.7701 (2.5531) grad_norm: 1.5028 (1.6388) time: 2.8729 data: 0.0002 max mem: 28454 +[2024-12-12 16:08:10 root] (utils.py 283): INFO Epoch: [10] [1810/2502] eta: 0:33:07 lr: 0.000008 loss_cls: 2.7511 (2.5542) grad_norm: 1.3758 (1.6376) time: 2.8713 data: 0.0002 max mem: 28454 +[2024-12-12 16:08:39 root] (utils.py 283): INFO Epoch: [10] [1820/2502] eta: 0:32:38 lr: 0.000008 loss_cls: 2.7115 (2.5528) grad_norm: 1.3758 (1.6363) time: 2.8715 data: 0.0002 max mem: 28454 +[2024-12-12 16:09:08 root] (utils.py 283): INFO Epoch: [10] [1830/2502] eta: 0:32:10 lr: 0.000008 loss_cls: 2.6427 (2.5532) grad_norm: 1.2121 (1.6337) time: 2.8730 data: 0.0003 max mem: 28454 +[2024-12-12 16:09:37 root] (utils.py 283): INFO Epoch: [10] [1840/2502] eta: 0:31:41 lr: 0.000008 loss_cls: 2.6893 (2.5541) grad_norm: 1.2033 (1.6350) time: 2.8779 data: 0.0003 max mem: 28454 +[2024-12-12 16:10:05 root] (utils.py 283): INFO Epoch: [10] [1850/2502] eta: 0:31:12 lr: 0.000008 loss_cls: 2.6701 (2.5541) grad_norm: 1.2053 (1.6330) time: 2.8743 data: 0.0003 max mem: 28454 +[2024-12-12 16:10:34 root] (utils.py 283): INFO Epoch: [10] [1860/2502] eta: 0:30:44 lr: 0.000008 loss_cls: 2.7642 (2.5551) grad_norm: 1.1771 (1.6305) time: 2.8714 data: 0.0003 max mem: 28454 +[2024-12-12 16:11:03 root] (utils.py 283): INFO Epoch: [10] [1870/2502] eta: 0:30:15 lr: 0.000008 loss_cls: 2.7126 (2.5542) grad_norm: 1.0559 (1.6281) time: 2.8718 data: 0.0002 max mem: 28454 +[2024-12-12 16:11:32 root] (utils.py 283): INFO Epoch: [10] [1880/2502] eta: 0:29:46 lr: 0.000008 loss_cls: 2.6795 (2.5546) grad_norm: 1.0653 (1.6253) time: 2.8712 data: 0.0002 max mem: 28454 +[2024-12-12 16:12:00 root] (utils.py 283): INFO Epoch: [10] [1890/2502] eta: 0:29:17 lr: 0.000008 loss_cls: 2.6795 (2.5546) grad_norm: 1.1799 (1.6237) time: 2.8725 data: 0.0002 max mem: 28454 +[2024-12-12 16:12:29 root] (utils.py 283): INFO Epoch: [10] [1900/2502] eta: 0:28:49 lr: 0.000008 loss_cls: 2.6005 (2.5550) grad_norm: 1.1857 (1.6252) time: 2.8724 data: 0.0002 max mem: 28454 +[2024-12-12 16:12:58 root] (utils.py 283): INFO Epoch: [10] [1910/2502] eta: 0:28:20 lr: 0.000008 loss_cls: 2.6861 (2.5546) grad_norm: 1.1577 (1.6234) time: 2.8723 data: 0.0002 max mem: 28454 +[2024-12-12 16:13:26 root] (utils.py 283): INFO Epoch: [10] [1920/2502] eta: 0:27:51 lr: 0.000008 loss_cls: 2.7181 (2.5553) grad_norm: 1.1717 (1.6228) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-12 16:13:55 root] (utils.py 283): INFO Epoch: [10] [1930/2502] eta: 0:27:22 lr: 0.000008 loss_cls: 2.7684 (2.5557) grad_norm: 1.1362 (1.6202) time: 2.8712 data: 0.0002 max mem: 28454 +[2024-12-12 16:14:24 root] (utils.py 283): INFO Epoch: [10] [1940/2502] eta: 0:26:54 lr: 0.000008 loss_cls: 2.7233 (2.5564) grad_norm: 1.1388 (1.6180) time: 2.8703 data: 0.0002 max mem: 28454 +[2024-12-12 16:14:53 root] (utils.py 283): INFO Epoch: [10] [1950/2502] eta: 0:26:25 lr: 0.000008 loss_cls: 2.6757 (2.5566) grad_norm: 1.1221 (1.6161) time: 2.8705 data: 0.0002 max mem: 28454 +[2024-12-12 16:15:21 root] (utils.py 283): INFO Epoch: [10] [1960/2502] eta: 0:25:56 lr: 0.000008 loss_cls: 2.6294 (2.5562) grad_norm: 1.1221 (1.6140) time: 2.8712 data: 0.0002 max mem: 28454 +[2024-12-12 16:15:50 root] (utils.py 283): INFO Epoch: [10] [1970/2502] eta: 0:25:28 lr: 0.000008 loss_cls: 2.4241 (2.5556) grad_norm: 1.1496 (1.6140) time: 2.8712 data: 0.0002 max mem: 28454 +[2024-12-12 16:16:19 root] (utils.py 283): INFO Epoch: [10] [1980/2502] eta: 0:24:59 lr: 0.000008 loss_cls: 2.6380 (2.5564) grad_norm: 1.1281 (1.6126) time: 2.8711 data: 0.0002 max mem: 28454 +[2024-12-12 16:16:47 root] (utils.py 283): INFO Epoch: [10] [1990/2502] eta: 0:24:30 lr: 0.000008 loss_cls: 2.5618 (2.5556) grad_norm: 1.1551 (1.6108) time: 2.8717 data: 0.0002 max mem: 28454 +[2024-12-12 16:17:16 root] (utils.py 283): INFO Epoch: [10] [2000/2502] eta: 0:24:01 lr: 0.000008 loss_cls: 2.4416 (2.5556) grad_norm: 1.0948 (1.6084) time: 2.8706 data: 0.0003 max mem: 28454 +[2024-12-12 16:17:45 root] (utils.py 283): INFO Epoch: [10] [2010/2502] eta: 0:23:33 lr: 0.000008 loss_cls: 2.6780 (2.5563) grad_norm: 1.0918 (1.6063) time: 2.8671 data: 0.0003 max mem: 28454 +[2024-12-12 16:18:14 root] (utils.py 283): INFO Epoch: [10] [2020/2502] eta: 0:23:04 lr: 0.000008 loss_cls: 2.6024 (2.5560) grad_norm: 1.0653 (1.6043) time: 2.8690 data: 0.0003 max mem: 28454 +[2024-12-12 16:18:42 root] (utils.py 283): INFO Epoch: [10] [2030/2502] eta: 0:22:35 lr: 0.000008 loss_cls: 2.6769 (2.5571) grad_norm: 1.0653 (1.6018) time: 2.8716 data: 0.0003 max mem: 28454 +[2024-12-12 16:19:11 root] (utils.py 283): INFO Epoch: [10] [2040/2502] eta: 0:22:06 lr: 0.000008 loss_cls: 2.6769 (2.5565) grad_norm: 1.0573 (1.5996) time: 2.8712 data: 0.0003 max mem: 28454 +[2024-12-12 16:19:40 root] (utils.py 283): INFO Epoch: [10] [2050/2502] eta: 0:21:38 lr: 0.000008 loss_cls: 2.5385 (2.5566) grad_norm: 1.0573 (1.5972) time: 2.8738 data: 0.0003 max mem: 28454 +[2024-12-12 16:20:08 root] (utils.py 283): INFO Epoch: [10] [2060/2502] eta: 0:21:09 lr: 0.000008 loss_cls: 2.6337 (2.5559) grad_norm: 1.1125 (1.5956) time: 2.8761 data: 0.0003 max mem: 28454 +[2024-12-12 16:20:37 root] (utils.py 283): INFO Epoch: [10] [2070/2502] eta: 0:20:40 lr: 0.000008 loss_cls: 2.7301 (2.5575) grad_norm: 1.1778 (1.5938) time: 2.8752 data: 0.0003 max mem: 28454 +[2024-12-12 16:21:06 root] (utils.py 283): INFO Epoch: [10] [2080/2502] eta: 0:20:12 lr: 0.000008 loss_cls: 2.8600 (2.5583) grad_norm: 1.1791 (1.5949) time: 2.8710 data: 0.0003 max mem: 28454 +[2024-12-12 16:21:35 root] (utils.py 283): INFO Epoch: [10] [2090/2502] eta: 0:19:43 lr: 0.000008 loss_cls: 2.6530 (2.5591) grad_norm: 1.1392 (1.5928) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-12 16:22:03 root] (utils.py 283): INFO Epoch: [10] [2100/2502] eta: 0:19:14 lr: 0.000008 loss_cls: 2.6752 (2.5596) grad_norm: 1.1798 (1.5915) time: 2.8751 data: 0.0002 max mem: 28454 +[2024-12-12 16:22:32 root] (utils.py 283): INFO Epoch: [10] [2110/2502] eta: 0:18:45 lr: 0.000008 loss_cls: 2.7385 (2.5598) grad_norm: 1.1846 (1.5892) time: 2.8734 data: 0.0003 max mem: 28454 +[2024-12-12 16:23:01 root] (utils.py 283): INFO Epoch: [10] [2120/2502] eta: 0:18:17 lr: 0.000008 loss_cls: 2.7685 (2.5596) grad_norm: 1.1307 (1.5877) time: 2.8713 data: 0.0003 max mem: 28454 +[2024-12-12 16:23:30 root] (utils.py 283): INFO Epoch: [10] [2130/2502] eta: 0:17:48 lr: 0.000008 loss_cls: 2.6695 (2.5601) grad_norm: 1.1363 (1.5861) time: 2.8721 data: 0.0002 max mem: 28454 +[2024-12-12 16:23:58 root] (utils.py 283): INFO Epoch: [10] [2140/2502] eta: 0:17:19 lr: 0.000008 loss_cls: 2.6582 (2.5608) grad_norm: 1.1276 (1.5868) time: 2.8737 data: 0.0002 max mem: 28454 +[2024-12-12 16:24:27 root] (utils.py 283): INFO Epoch: [10] [2150/2502] eta: 0:16:51 lr: 0.000008 loss_cls: 2.6307 (2.5605) grad_norm: 1.0802 (1.5844) time: 2.8721 data: 0.0003 max mem: 28454 +[2024-12-12 16:24:56 root] (utils.py 283): INFO Epoch: [10] [2160/2502] eta: 0:16:22 lr: 0.000008 loss_cls: 2.5795 (2.5607) grad_norm: 1.1089 (1.5833) time: 2.8702 data: 0.0002 max mem: 28454 +[2024-12-12 16:25:24 root] (utils.py 283): INFO Epoch: [10] [2170/2502] eta: 0:15:53 lr: 0.000008 loss_cls: 2.5795 (2.5604) grad_norm: 1.0958 (1.5812) time: 2.8682 data: 0.0002 max mem: 28454 +[2024-12-12 16:25:53 root] (utils.py 283): INFO Epoch: [10] [2180/2502] eta: 0:15:24 lr: 0.000008 loss_cls: 2.6155 (2.5606) grad_norm: 1.0558 (1.5790) time: 2.8678 data: 0.0002 max mem: 28454 +[2024-12-12 16:26:22 root] (utils.py 283): INFO Epoch: [10] [2190/2502] eta: 0:14:56 lr: 0.000008 loss_cls: 2.6155 (2.5602) grad_norm: 1.0811 (1.5772) time: 2.8699 data: 0.0002 max mem: 28454 +[2024-12-12 16:26:51 root] (utils.py 283): INFO Epoch: [10] [2200/2502] eta: 0:14:27 lr: 0.000008 loss_cls: 2.6038 (2.5602) grad_norm: 1.1564 (1.5751) time: 2.8727 data: 0.0002 max mem: 28454 +[2024-12-12 16:27:19 root] (utils.py 283): INFO Epoch: [10] [2210/2502] eta: 0:13:58 lr: 0.000008 loss_cls: 2.3785 (2.5584) grad_norm: 1.0444 (1.5727) time: 2.8741 data: 0.0002 max mem: 28454 +[2024-12-12 16:27:48 root] (utils.py 283): INFO Epoch: [10] [2220/2502] eta: 0:13:29 lr: 0.000008 loss_cls: 2.3785 (2.5586) grad_norm: 1.0566 (1.5743) time: 2.8746 data: 0.0002 max mem: 28454 +[2024-12-12 16:28:17 root] (utils.py 283): INFO Epoch: [10] [2230/2502] eta: 0:13:01 lr: 0.000008 loss_cls: 2.6950 (2.5581) grad_norm: 1.1334 (1.5887) time: 2.8751 data: 0.0002 max mem: 28454 +[2024-12-12 16:28:46 root] (utils.py 283): INFO Epoch: [10] [2240/2502] eta: 0:12:32 lr: 0.000008 loss_cls: 2.6887 (2.5591) grad_norm: 1.0828 (1.5865) time: 2.8749 data: 0.0002 max mem: 28454 +[2024-12-12 16:29:14 root] (utils.py 283): INFO Epoch: [10] [2250/2502] eta: 0:12:03 lr: 0.000008 loss_cls: 2.6887 (2.5597) grad_norm: 1.1265 (1.5876) time: 2.8754 data: 0.0002 max mem: 28454 +[2024-12-12 16:29:43 root] (utils.py 283): INFO Epoch: [10] [2260/2502] eta: 0:11:35 lr: 0.000008 loss_cls: 2.7156 (2.5595) grad_norm: 1.2074 (1.5862) time: 2.8755 data: 0.0002 max mem: 28454 +[2024-12-12 16:30:12 root] (utils.py 283): INFO Epoch: [10] [2270/2502] eta: 0:11:06 lr: 0.000008 loss_cls: 2.7354 (2.5600) grad_norm: 1.1425 (1.5843) time: 2.8765 data: 0.0002 max mem: 28454 +[2024-12-12 16:30:41 root] (utils.py 283): INFO Epoch: [10] [2280/2502] eta: 0:10:37 lr: 0.000008 loss_cls: 2.6063 (2.5592) grad_norm: 1.1349 (1.5886) time: 2.8786 data: 0.0002 max mem: 28454 +[2024-12-12 16:31:09 root] (utils.py 283): INFO Epoch: [10] [2290/2502] eta: 0:10:08 lr: 0.000008 loss_cls: 2.5937 (2.5589) grad_norm: 1.1537 (1.5866) time: 2.8765 data: 0.0002 max mem: 28454 +[2024-12-12 16:31:38 root] (utils.py 283): INFO Epoch: [10] [2300/2502] eta: 0:09:40 lr: 0.000008 loss_cls: 2.6466 (2.5590) grad_norm: 1.1537 (1.5861) time: 2.8743 data: 0.0002 max mem: 28454 +[2024-12-12 16:32:07 root] (utils.py 283): INFO Epoch: [10] [2310/2502] eta: 0:09:11 lr: 0.000008 loss_cls: 2.7905 (2.5597) grad_norm: 1.2378 (1.5858) time: 2.8740 data: 0.0002 max mem: 28454 +[2024-12-12 16:32:36 root] (utils.py 283): INFO Epoch: [10] [2320/2502] eta: 0:08:42 lr: 0.000008 loss_cls: 2.7415 (2.5592) grad_norm: 1.1713 (1.5838) time: 2.8754 data: 0.0002 max mem: 28454 +[2024-12-12 16:33:04 root] (utils.py 283): INFO Epoch: [10] [2330/2502] eta: 0:08:14 lr: 0.000008 loss_cls: 2.4971 (2.5593) grad_norm: 1.1053 (1.5836) time: 2.8743 data: 0.0002 max mem: 28454 +[2024-12-12 16:33:33 root] (utils.py 283): INFO Epoch: [10] [2340/2502] eta: 0:07:45 lr: 0.000008 loss_cls: 2.7492 (2.5590) grad_norm: 1.0939 (1.5852) time: 2.8714 data: 0.0002 max mem: 28454 +[2024-12-12 16:34:02 root] (utils.py 283): INFO Epoch: [10] [2350/2502] eta: 0:07:16 lr: 0.000008 loss_cls: 2.4226 (2.5581) grad_norm: 1.0866 (1.5830) time: 2.8727 data: 0.0002 max mem: 28454 +[2024-12-12 16:34:31 root] (utils.py 283): INFO Epoch: [10] [2360/2502] eta: 0:06:47 lr: 0.000008 loss_cls: 2.4400 (2.5586) grad_norm: 1.1031 (1.5820) time: 2.8741 data: 0.0002 max mem: 28454 +[2024-12-12 16:34:59 root] (utils.py 283): INFO Epoch: [10] [2370/2502] eta: 0:06:19 lr: 0.000008 loss_cls: 2.7969 (2.5590) grad_norm: 1.1158 (1.5803) time: 2.8723 data: 0.0002 max mem: 28454 +[2024-12-12 16:35:28 root] (utils.py 283): INFO Epoch: [10] [2380/2502] eta: 0:05:50 lr: 0.000008 loss_cls: 2.7854 (2.5593) grad_norm: 1.0936 (1.5784) time: 2.8711 data: 0.0002 max mem: 28454 +[2024-12-12 16:35:57 root] (utils.py 283): INFO Epoch: [10] [2390/2502] eta: 0:05:21 lr: 0.000008 loss_cls: 2.7280 (2.5590) grad_norm: 1.1111 (1.5792) time: 2.8735 data: 0.0002 max mem: 28454 +[2024-12-12 16:36:25 root] (utils.py 283): INFO Epoch: [10] [2400/2502] eta: 0:04:52 lr: 0.000008 loss_cls: 2.6652 (2.5589) grad_norm: 1.1501 (1.5774) time: 2.8753 data: 0.0002 max mem: 28454 +[2024-12-12 16:36:54 root] (utils.py 283): INFO Epoch: [10] [2410/2502] eta: 0:04:24 lr: 0.000008 loss_cls: 2.5961 (2.5581) grad_norm: 1.0986 (1.5753) time: 2.8740 data: 0.0002 max mem: 28454 +[2024-12-12 16:37:23 root] (utils.py 283): INFO Epoch: [10] [2420/2502] eta: 0:03:55 lr: 0.000008 loss_cls: 2.5325 (2.5584) grad_norm: 1.1010 (1.5735) time: 2.8737 data: 0.0002 max mem: 28454 +[2024-12-12 16:37:52 root] (utils.py 283): INFO Epoch: [10] [2430/2502] eta: 0:03:26 lr: 0.000008 loss_cls: 2.5394 (2.5580) grad_norm: 1.1580 (1.5801) time: 2.8753 data: 0.0002 max mem: 28454 +[2024-12-12 16:38:20 root] (utils.py 283): INFO Epoch: [10] [2440/2502] eta: 0:02:58 lr: 0.000008 loss_cls: 2.5284 (2.5581) grad_norm: 1.1443 (1.5783) time: 2.8750 data: 0.0002 max mem: 28454 +[2024-12-12 16:38:49 root] (utils.py 283): INFO Epoch: [10] [2450/2502] eta: 0:02:29 lr: 0.000008 loss_cls: 2.5170 (2.5575) grad_norm: 1.0733 (1.5763) time: 2.8744 data: 0.0002 max mem: 28454 +[2024-12-12 16:39:18 root] (utils.py 283): INFO Epoch: [10] [2460/2502] eta: 0:02:00 lr: 0.000008 loss_cls: 2.6594 (2.5578) grad_norm: 1.0815 (1.5749) time: 2.8736 data: 0.0002 max mem: 28454 +[2024-12-12 16:39:47 root] (utils.py 283): INFO Epoch: [10] [2470/2502] eta: 0:01:31 lr: 0.000008 loss_cls: 2.7271 (2.5577) grad_norm: 1.0965 (1.5730) time: 2.8738 data: 0.0002 max mem: 28454 +[2024-12-12 16:40:15 root] (utils.py 283): INFO Epoch: [10] [2480/2502] eta: 0:01:03 lr: 0.000008 loss_cls: 2.7068 (2.5579) grad_norm: 1.1757 (1.5765) time: 2.8734 data: 0.0002 max mem: 28454 +[2024-12-12 16:40:45 root] (utils.py 283): INFO Epoch: [10] [2490/2502] eta: 0:00:34 lr: 0.000008 loss_cls: 2.6397 (2.5573) grad_norm: 1.2027 (1.5752) time: 2.8952 data: 0.0204 max mem: 28454 +[2024-12-12 16:41:13 root] (utils.py 283): INFO Epoch: [10] [2500/2502] eta: 0:00:05 lr: 0.000008 loss_cls: 2.3803 (2.5568) grad_norm: 1.1615 (1.5760) time: 2.8962 data: 0.0204 max mem: 28454 +[2024-12-12 16:41:16 root] (utils.py 283): INFO Epoch: [10] [2501/2502] eta: 0:00:02 lr: 0.000008 loss_cls: 2.3803 (2.5567) grad_norm: 1.1615 (1.5757) time: 2.8958 data: 0.0204 max mem: 28454 +[2024-12-12 16:41:16 root] (utils.py 297): INFO Epoch: [10] Total time: 1:59:47 (2.8727 s / it) +[2024-12-12 16:41:16 root] (engine.py 179): INFO Averaged stats:lr: 0.000008 loss_cls: 2.3803 (2.5615) grad_norm: 1.1615 (1.5757) +[2024-12-12 16:41:19 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:53 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.3945 (0.3945) acc1: 92.1875 (92.1875) acc3: 98.4375 (98.4375) acc5: 98.4375 (98.4375) time: 0.5424 data: 0.0004 max mem: 28454 +[2024-12-12 16:41:25 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:48 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6457 (0.5943) acc1: 85.1562 (86.9318) acc3: 97.6562 (96.8750) acc5: 98.4375 (98.0114) time: 0.5461 data: 0.0004 max mem: 28454 +[2024-12-12 16:41:30 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:42 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6105 (0.6197) acc1: 87.5000 (86.8676) acc3: 96.8750 (96.4658) acc5: 97.6562 (97.7679) time: 0.5470 data: 0.0004 max mem: 28454 +[2024-12-12 16:41:36 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:37 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6105 (0.6519) acc1: 87.5000 (85.8367) acc3: 96.0938 (96.3206) acc5: 97.6562 (97.7319) time: 0.5480 data: 0.0005 max mem: 28454 +[2024-12-12 16:41:41 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:31 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6758 (0.6583) acc1: 85.1562 (85.6898) acc3: 96.8750 (96.2843) acc5: 97.6562 (97.6944) time: 0.5481 data: 0.0004 max mem: 28454 +[2024-12-12 16:41:47 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:26 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.8162 (0.7385) acc1: 78.9062 (83.8388) acc3: 92.9688 (95.1440) acc5: 95.3125 (96.8444) time: 0.5480 data: 0.0004 max mem: 28454 +[2024-12-12 16:41:52 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:20 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9668 (0.7676) acc1: 79.6875 (83.5681) acc3: 90.6250 (94.4416) acc5: 92.9688 (96.3243) time: 0.5483 data: 0.0005 max mem: 28454 +[2024-12-12 16:41:58 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:15 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9522 (0.7936) acc1: 80.4688 (82.7905) acc3: 91.4062 (94.1901) acc5: 94.5312 (96.1378) time: 0.5482 data: 0.0005 max mem: 28454 +[2024-12-12 16:42:03 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:09 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9902 (0.8178) acc1: 78.9062 (82.3399) acc3: 91.4062 (93.8079) acc5: 94.5312 (95.8719) time: 0.5480 data: 0.0006 max mem: 28454 +[2024-12-12 16:42:09 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:04 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9466 (0.8360) acc1: 78.1250 (81.6964) acc3: 90.6250 (93.4753) acc5: 93.7500 (95.7074) time: 0.5481 data: 0.0006 max mem: 28454 +[2024-12-12 16:42:12 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9106 (0.8336) acc1: 78.1250 (81.6640) acc3: 92.1875 (93.4960) acc5: 95.3125 (95.7360) time: 0.5390 data: 0.0005 max mem: 28454 +[2024-12-12 16:42:12 root] (utils.py 297): INFO Test: Total time: 0:00:53 (0.5462 s / it) +[2024-12-12 16:42:12 root] (engine.py 264): INFO * Acc@1 81.788 Acc@3 93.402 Acc@5 95.724 loss 0.834 flops 13.207 layer_flops 13.109 +[2024-12-12 16:42:12 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 81.8% +[2024-12-12 16:42:14 root] (main.py 576): INFO Max accuracy: 81.79% +[2024-12-12 16:42:17 root] (utils.py 283): INFO Epoch: [11] [ 0/2502] eta: 1:58:00 lr: 0.000006 loss_cls: 3.0711 (3.0711) grad_norm: 1.2064 (1.2064) time: 2.8301 data: 0.0003 max mem: 28454 +[2024-12-12 16:42:46 root] (utils.py 283): INFO Epoch: [11] [ 10/2502] eta: 1:58:49 lr: 0.000006 loss_cls: 2.8668 (2.7499) grad_norm: 1.1807 (1.3037) time: 2.8609 data: 0.0002 max mem: 28454 +[2024-12-12 16:43:14 root] (utils.py 283): INFO Epoch: [11] [ 20/2502] eta: 1:58:34 lr: 0.000006 loss_cls: 2.8313 (2.7152) grad_norm: 1.1754 (1.2455) time: 2.8682 data: 0.0002 max mem: 28454 +[2024-12-12 16:43:43 root] (utils.py 283): INFO Epoch: [11] [ 30/2502] eta: 1:58:11 lr: 0.000006 loss_cls: 2.8313 (2.7030) grad_norm: 1.1815 (1.2497) time: 2.8732 data: 0.0002 max mem: 28454 +[2024-12-12 16:44:12 root] (utils.py 283): INFO Epoch: [11] [ 40/2502] eta: 1:57:43 lr: 0.000006 loss_cls: 2.7124 (2.6611) grad_norm: 1.0659 (1.2084) time: 2.8720 data: 0.0002 max mem: 28454 +[2024-12-12 16:44:40 root] (utils.py 283): INFO Epoch: [11] [ 50/2502] eta: 1:57:16 lr: 0.000006 loss_cls: 2.6392 (2.6251) grad_norm: 1.0126 (1.1752) time: 2.8710 data: 0.0002 max mem: 28454 +[2024-12-12 16:45:09 root] (utils.py 283): INFO Epoch: [11] [ 60/2502] eta: 1:56:48 lr: 0.000006 loss_cls: 2.5329 (2.6012) grad_norm: 1.1256 (1.1730) time: 2.8724 data: 0.0002 max mem: 28454 +[2024-12-12 16:45:38 root] (utils.py 283): INFO Epoch: [11] [ 70/2502] eta: 1:56:20 lr: 0.000006 loss_cls: 2.5440 (2.5951) grad_norm: 1.1542 (1.1772) time: 2.8718 data: 0.0002 max mem: 28454 +[2024-12-12 16:46:07 root] (utils.py 283): INFO Epoch: [11] [ 80/2502] eta: 1:55:52 lr: 0.000006 loss_cls: 2.6579 (2.5925) grad_norm: 1.0838 (1.1725) time: 2.8712 data: 0.0002 max mem: 28454 +[2024-12-12 16:46:35 root] (utils.py 283): INFO Epoch: [11] [ 90/2502] eta: 1:55:22 lr: 0.000006 loss_cls: 2.6820 (2.5870) grad_norm: 1.1156 (1.1793) time: 2.8691 data: 0.0002 max mem: 28454 +[2024-12-12 16:47:04 root] (utils.py 283): INFO Epoch: [11] [ 100/2502] eta: 1:54:54 lr: 0.000006 loss_cls: 2.4325 (2.5656) grad_norm: 1.1245 (1.1776) time: 2.8704 data: 0.0002 max mem: 28454 +[2024-12-12 16:47:33 root] (utils.py 283): INFO Epoch: [11] [ 110/2502] eta: 1:54:27 lr: 0.000006 loss_cls: 2.5471 (2.5732) grad_norm: 1.1174 (1.1777) time: 2.8745 data: 0.0002 max mem: 28454 +[2024-12-12 16:48:01 root] (utils.py 283): INFO Epoch: [11] [ 120/2502] eta: 1:53:58 lr: 0.000006 loss_cls: 2.6450 (2.5671) grad_norm: 1.2107 (1.2219) time: 2.8730 data: 0.0002 max mem: 28454 +[2024-12-12 16:48:30 root] (utils.py 283): INFO Epoch: [11] [ 130/2502] eta: 1:53:30 lr: 0.000006 loss_cls: 2.4530 (2.5572) grad_norm: 1.1974 (1.2182) time: 2.8732 data: 0.0002 max mem: 28454 +[2024-12-12 16:48:59 root] (utils.py 283): INFO Epoch: [11] [ 140/2502] eta: 1:53:01 lr: 0.000006 loss_cls: 2.7258 (2.5750) grad_norm: 1.1144 (1.2195) time: 2.8721 data: 0.0002 max mem: 28454 +[2024-12-12 16:49:28 root] (utils.py 283): INFO Epoch: [11] [ 150/2502] eta: 1:52:31 lr: 0.000006 loss_cls: 2.8287 (2.5757) grad_norm: 1.1095 (1.2163) time: 2.8668 data: 0.0002 max mem: 28454 +[2024-12-12 16:49:56 root] (utils.py 283): INFO Epoch: [11] [ 160/2502] eta: 1:52:02 lr: 0.000006 loss_cls: 2.6223 (2.5537) grad_norm: 1.1133 (1.2340) time: 2.8674 data: 0.0002 max mem: 28454 +[2024-12-12 16:50:25 root] (utils.py 283): INFO Epoch: [11] [ 170/2502] eta: 1:51:33 lr: 0.000006 loss_cls: 2.6158 (2.5537) grad_norm: 1.1244 (1.5884) time: 2.8661 data: 0.0002 max mem: 28454 +[2024-12-12 16:50:54 root] (utils.py 283): INFO Epoch: [11] [ 180/2502] eta: 1:51:04 lr: 0.000006 loss_cls: 2.6189 (2.5555) grad_norm: 1.1612 (1.5715) time: 2.8667 data: 0.0002 max mem: 28454 +[2024-12-12 16:51:22 root] (utils.py 283): INFO Epoch: [11] [ 190/2502] eta: 1:50:35 lr: 0.000006 loss_cls: 2.6189 (2.5468) grad_norm: 1.2543 (1.5841) time: 2.8700 data: 0.0002 max mem: 28454 +[2024-12-12 16:51:51 root] (utils.py 283): INFO Epoch: [11] [ 200/2502] eta: 1:50:06 lr: 0.000006 loss_cls: 2.5646 (2.5505) grad_norm: 1.3225 (1.5767) time: 2.8677 data: 0.0002 max mem: 28454 +[2024-12-12 16:52:20 root] (utils.py 283): INFO Epoch: [11] [ 210/2502] eta: 1:49:37 lr: 0.000006 loss_cls: 2.6629 (2.5540) grad_norm: 1.2895 (1.5629) time: 2.8673 data: 0.0002 max mem: 28454 +[2024-12-12 16:52:48 root] (utils.py 283): INFO Epoch: [11] [ 220/2502] eta: 1:49:08 lr: 0.000006 loss_cls: 2.4979 (2.5464) grad_norm: 1.1563 (1.5476) time: 2.8686 data: 0.0002 max mem: 28454 +[2024-12-12 16:53:17 root] (utils.py 283): INFO Epoch: [11] [ 230/2502] eta: 1:48:39 lr: 0.000006 loss_cls: 2.5645 (2.5459) grad_norm: 1.1563 (1.5355) time: 2.8684 data: 0.0002 max mem: 28454 +[2024-12-12 16:53:46 root] (utils.py 283): INFO Epoch: [11] [ 240/2502] eta: 1:48:11 lr: 0.000006 loss_cls: 2.6943 (2.5493) grad_norm: 1.1383 (1.5182) time: 2.8693 data: 0.0002 max mem: 28454 +[2024-12-12 16:54:14 root] (utils.py 283): INFO Epoch: [11] [ 250/2502] eta: 1:47:42 lr: 0.000006 loss_cls: 2.6235 (2.5516) grad_norm: 1.1383 (1.5234) time: 2.8721 data: 0.0002 max mem: 28454 +[2024-12-12 16:54:43 root] (utils.py 283): INFO Epoch: [11] [ 260/2502] eta: 1:47:14 lr: 0.000006 loss_cls: 2.6372 (2.5496) grad_norm: 1.0993 (1.5063) time: 2.8705 data: 0.0002 max mem: 28454 +[2024-12-12 16:55:12 root] (utils.py 283): INFO Epoch: [11] [ 270/2502] eta: 1:46:45 lr: 0.000006 loss_cls: 2.5632 (2.5486) grad_norm: 1.0993 (1.5002) time: 2.8669 data: 0.0002 max mem: 28454 +[2024-12-12 16:55:40 root] (utils.py 283): INFO Epoch: [11] [ 280/2502] eta: 1:46:16 lr: 0.000006 loss_cls: 2.6636 (2.5535) grad_norm: 1.1207 (1.5036) time: 2.8675 data: 0.0002 max mem: 28454 +[2024-12-12 16:56:09 root] (utils.py 283): INFO Epoch: [11] [ 290/2502] eta: 1:45:47 lr: 0.000006 loss_cls: 2.7128 (2.5545) grad_norm: 1.1703 (1.5164) time: 2.8686 data: 0.0002 max mem: 28454 +[2024-12-12 16:56:38 root] (utils.py 283): INFO Epoch: [11] [ 300/2502] eta: 1:45:18 lr: 0.000006 loss_cls: 2.7943 (2.5565) grad_norm: 1.0951 (1.5033) time: 2.8675 data: 0.0002 max mem: 28454 +[2024-12-12 16:57:06 root] (utils.py 283): INFO Epoch: [11] [ 310/2502] eta: 1:44:49 lr: 0.000006 loss_cls: 2.7553 (2.5544) grad_norm: 1.0951 (1.4918) time: 2.8681 data: 0.0002 max mem: 28454 +[2024-12-12 16:57:35 root] (utils.py 283): INFO Epoch: [11] [ 320/2502] eta: 1:44:20 lr: 0.000006 loss_cls: 2.6950 (2.5591) grad_norm: 1.1766 (1.4947) time: 2.8676 data: 0.0002 max mem: 28454 +[2024-12-12 16:58:04 root] (utils.py 283): INFO Epoch: [11] [ 330/2502] eta: 1:43:52 lr: 0.000006 loss_cls: 2.7900 (2.5666) grad_norm: 1.1448 (1.4848) time: 2.8662 data: 0.0002 max mem: 28454 +[2024-12-12 16:58:33 root] (utils.py 283): INFO Epoch: [11] [ 340/2502] eta: 1:43:23 lr: 0.000006 loss_cls: 2.8754 (2.5684) grad_norm: 1.1087 (1.4781) time: 2.8685 data: 0.0002 max mem: 28454 +[2024-12-12 16:59:01 root] (utils.py 283): INFO Epoch: [11] [ 350/2502] eta: 1:42:54 lr: 0.000006 loss_cls: 2.8757 (2.5720) grad_norm: 1.1541 (1.4712) time: 2.8699 data: 0.0002 max mem: 28454 +[2024-12-12 16:59:30 root] (utils.py 283): INFO Epoch: [11] [ 360/2502] eta: 1:42:26 lr: 0.000006 loss_cls: 2.7721 (2.5717) grad_norm: 1.1992 (1.4641) time: 2.8722 data: 0.0002 max mem: 28454 +[2024-12-12 16:59:59 root] (utils.py 283): INFO Epoch: [11] [ 370/2502] eta: 1:41:58 lr: 0.000006 loss_cls: 2.5701 (2.5700) grad_norm: 1.1116 (1.4551) time: 2.8750 data: 0.0002 max mem: 28454 +[2024-12-12 17:00:27 root] (utils.py 283): INFO Epoch: [11] [ 380/2502] eta: 1:41:29 lr: 0.000006 loss_cls: 2.5508 (2.5675) grad_norm: 1.1019 (1.4981) time: 2.8746 data: 0.0002 max mem: 28454 +[2024-12-12 17:00:56 root] (utils.py 283): INFO Epoch: [11] [ 390/2502] eta: 1:41:01 lr: 0.000006 loss_cls: 2.4100 (2.5633) grad_norm: 1.0757 (1.4944) time: 2.8729 data: 0.0002 max mem: 28454 +[2024-12-12 17:01:25 root] (utils.py 283): INFO Epoch: [11] [ 400/2502] eta: 1:40:32 lr: 0.000006 loss_cls: 2.5141 (2.5640) grad_norm: 1.1100 (1.4860) time: 2.8703 data: 0.0002 max mem: 28454 +[2024-12-12 17:01:54 root] (utils.py 283): INFO Epoch: [11] [ 410/2502] eta: 1:40:03 lr: 0.000006 loss_cls: 2.7269 (2.5663) grad_norm: 1.1232 (1.5177) time: 2.8705 data: 0.0002 max mem: 28454 +[2024-12-12 17:02:22 root] (utils.py 283): INFO Epoch: [11] [ 420/2502] eta: 1:39:35 lr: 0.000006 loss_cls: 2.5190 (2.5614) grad_norm: 1.1710 (1.5103) time: 2.8736 data: 0.0002 max mem: 28454 +[2024-12-12 17:02:51 root] (utils.py 283): INFO Epoch: [11] [ 430/2502] eta: 1:39:06 lr: 0.000006 loss_cls: 2.5190 (2.5631) grad_norm: 1.2186 (1.5200) time: 2.8721 data: 0.0002 max mem: 28454 +[2024-12-12 17:03:20 root] (utils.py 283): INFO Epoch: [11] [ 440/2502] eta: 1:38:37 lr: 0.000006 loss_cls: 2.6348 (2.5626) grad_norm: 1.1804 (1.5305) time: 2.8699 data: 0.0002 max mem: 28454 +[2024-12-12 17:03:48 root] (utils.py 283): INFO Epoch: [11] [ 450/2502] eta: 1:38:09 lr: 0.000006 loss_cls: 2.7014 (2.5670) grad_norm: 1.2298 (1.5699) time: 2.8710 data: 0.0002 max mem: 28454 +[2024-12-12 17:04:17 root] (utils.py 283): INFO Epoch: [11] [ 460/2502] eta: 1:37:40 lr: 0.000006 loss_cls: 2.7205 (2.5640) grad_norm: 1.2013 (1.5785) time: 2.8705 data: 0.0002 max mem: 28454 +[2024-12-12 17:04:46 root] (utils.py 283): INFO Epoch: [11] [ 470/2502] eta: 1:37:11 lr: 0.000006 loss_cls: 2.5042 (2.5637) grad_norm: 1.1302 (1.5732) time: 2.8696 data: 0.0002 max mem: 28454 +[2024-12-12 17:05:15 root] (utils.py 283): INFO Epoch: [11] [ 480/2502] eta: 1:36:43 lr: 0.000006 loss_cls: 2.6414 (2.5645) grad_norm: 1.1946 (1.5939) time: 2.8722 data: 0.0002 max mem: 28454 +[2024-12-12 17:05:43 root] (utils.py 283): INFO Epoch: [11] [ 490/2502] eta: 1:36:14 lr: 0.000006 loss_cls: 2.6010 (2.5624) grad_norm: 1.2257 (1.5867) time: 2.8735 data: 0.0002 max mem: 28454 +[2024-12-12 17:06:12 root] (utils.py 283): INFO Epoch: [11] [ 500/2502] eta: 1:35:45 lr: 0.000006 loss_cls: 2.6010 (2.5622) grad_norm: 1.2129 (1.5798) time: 2.8712 data: 0.0002 max mem: 28454 +[2024-12-12 17:06:41 root] (utils.py 283): INFO Epoch: [11] [ 510/2502] eta: 1:35:17 lr: 0.000006 loss_cls: 2.5893 (2.5598) grad_norm: 1.1832 (1.5793) time: 2.8712 data: 0.0002 max mem: 28454 +[2024-12-12 17:07:09 root] (utils.py 283): INFO Epoch: [11] [ 520/2502] eta: 1:34:48 lr: 0.000006 loss_cls: 2.5936 (2.5608) grad_norm: 1.2087 (1.5726) time: 2.8736 data: 0.0002 max mem: 28454 +[2024-12-12 17:07:38 root] (utils.py 283): INFO Epoch: [11] [ 530/2502] eta: 1:34:20 lr: 0.000006 loss_cls: 2.6614 (2.5603) grad_norm: 1.2187 (1.5663) time: 2.8712 data: 0.0002 max mem: 28454 +[2024-12-12 17:08:07 root] (utils.py 283): INFO Epoch: [11] [ 540/2502] eta: 1:33:51 lr: 0.000006 loss_cls: 2.5586 (2.5585) grad_norm: 1.1398 (1.5590) time: 2.8683 data: 0.0002 max mem: 28454 +[2024-12-12 17:08:36 root] (utils.py 283): INFO Epoch: [11] [ 550/2502] eta: 1:33:22 lr: 0.000006 loss_cls: 2.5493 (2.5559) grad_norm: 1.1396 (1.5517) time: 2.8708 data: 0.0002 max mem: 28454 +[2024-12-12 17:09:04 root] (utils.py 283): INFO Epoch: [11] [ 560/2502] eta: 1:32:54 lr: 0.000006 loss_cls: 2.7610 (2.5593) grad_norm: 1.1324 (1.5456) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-12 17:09:33 root] (utils.py 283): INFO Epoch: [11] [ 570/2502] eta: 1:32:25 lr: 0.000006 loss_cls: 2.7669 (2.5636) grad_norm: 1.1150 (1.5427) time: 2.8721 data: 0.0002 max mem: 28454 +[2024-12-12 17:10:02 root] (utils.py 283): INFO Epoch: [11] [ 580/2502] eta: 1:31:56 lr: 0.000006 loss_cls: 2.6872 (2.5574) grad_norm: 1.0823 (1.5464) time: 2.8729 data: 0.0002 max mem: 28454 +[2024-12-12 17:10:31 root] (utils.py 283): INFO Epoch: [11] [ 590/2502] eta: 1:31:28 lr: 0.000006 loss_cls: 2.5286 (2.5611) grad_norm: 1.1230 (1.5401) time: 2.8738 data: 0.0002 max mem: 28454 +[2024-12-12 17:10:59 root] (utils.py 283): INFO Epoch: [11] [ 600/2502] eta: 1:30:59 lr: 0.000006 loss_cls: 2.7042 (2.5608) grad_norm: 1.1714 (1.5341) time: 2.8738 data: 0.0002 max mem: 28454 +[2024-12-12 17:11:28 root] (utils.py 283): INFO Epoch: [11] [ 610/2502] eta: 1:30:31 lr: 0.000006 loss_cls: 2.3834 (2.5569) grad_norm: 1.1605 (1.5276) time: 2.8739 data: 0.0002 max mem: 28454 +[2024-12-12 17:11:57 root] (utils.py 283): INFO Epoch: [11] [ 620/2502] eta: 1:30:02 lr: 0.000006 loss_cls: 2.3834 (2.5544) grad_norm: 1.1564 (1.5205) time: 2.8776 data: 0.0002 max mem: 28454 +[2024-12-12 17:12:26 root] (utils.py 283): INFO Epoch: [11] [ 630/2502] eta: 1:29:33 lr: 0.000006 loss_cls: 2.6892 (2.5585) grad_norm: 1.1193 (1.5142) time: 2.8763 data: 0.0002 max mem: 28454 +[2024-12-12 17:12:54 root] (utils.py 283): INFO Epoch: [11] [ 640/2502] eta: 1:29:05 lr: 0.000006 loss_cls: 2.7222 (2.5562) grad_norm: 1.1223 (1.5105) time: 2.8729 data: 0.0002 max mem: 28454 +[2024-12-12 17:13:23 root] (utils.py 283): INFO Epoch: [11] [ 650/2502] eta: 1:28:36 lr: 0.000006 loss_cls: 2.6062 (2.5583) grad_norm: 1.1223 (1.5047) time: 2.8742 data: 0.0002 max mem: 28454 +[2024-12-12 17:13:52 root] (utils.py 283): INFO Epoch: [11] [ 660/2502] eta: 1:28:08 lr: 0.000006 loss_cls: 2.5736 (2.5577) grad_norm: 1.0801 (1.4989) time: 2.8736 data: 0.0002 max mem: 28454 +[2024-12-12 17:14:20 root] (utils.py 283): INFO Epoch: [11] [ 670/2502] eta: 1:27:39 lr: 0.000006 loss_cls: 2.4364 (2.5550) grad_norm: 1.0717 (1.4928) time: 2.8744 data: 0.0002 max mem: 28454 +[2024-12-12 17:14:49 root] (utils.py 283): INFO Epoch: [11] [ 680/2502] eta: 1:27:10 lr: 0.000006 loss_cls: 2.6012 (2.5559) grad_norm: 1.1660 (1.5703) time: 2.8759 data: 0.0002 max mem: 28454 +[2024-12-12 17:15:18 root] (utils.py 283): INFO Epoch: [11] [ 690/2502] eta: 1:26:42 lr: 0.000006 loss_cls: 2.5726 (2.5539) grad_norm: 1.2965 (1.5802) time: 2.8757 data: 0.0002 max mem: 28454 +[2024-12-12 17:15:47 root] (utils.py 283): INFO Epoch: [11] [ 700/2502] eta: 1:26:13 lr: 0.000006 loss_cls: 2.5150 (2.5547) grad_norm: 1.2515 (1.5761) time: 2.8763 data: 0.0002 max mem: 28454 +[2024-12-12 17:16:15 root] (utils.py 283): INFO Epoch: [11] [ 710/2502] eta: 1:25:45 lr: 0.000006 loss_cls: 2.7117 (2.5566) grad_norm: 1.1766 (1.5718) time: 2.8736 data: 0.0002 max mem: 28454 +[2024-12-12 17:16:44 root] (utils.py 283): INFO Epoch: [11] [ 720/2502] eta: 1:25:16 lr: 0.000006 loss_cls: 2.6620 (2.5519) grad_norm: 1.1572 (1.5855) time: 2.8716 data: 0.0002 max mem: 28454 +[2024-12-12 17:17:13 root] (utils.py 283): INFO Epoch: [11] [ 730/2502] eta: 1:24:47 lr: 0.000006 loss_cls: 2.6230 (2.5531) grad_norm: 1.1045 (1.5792) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-12 17:17:42 root] (utils.py 283): INFO Epoch: [11] [ 740/2502] eta: 1:24:18 lr: 0.000006 loss_cls: 2.6918 (2.5558) grad_norm: 1.1348 (1.5788) time: 2.8711 data: 0.0002 max mem: 28454 +[2024-12-12 17:18:10 root] (utils.py 283): INFO Epoch: [11] [ 750/2502] eta: 1:23:50 lr: 0.000006 loss_cls: 2.6855 (2.5557) grad_norm: 1.1284 (1.5723) time: 2.8711 data: 0.0002 max mem: 28454 +[2024-12-12 17:18:39 root] (utils.py 283): INFO Epoch: [11] [ 760/2502] eta: 1:23:21 lr: 0.000006 loss_cls: 2.5985 (2.5552) grad_norm: 1.1184 (1.5679) time: 2.8691 data: 0.0002 max mem: 28454 +[2024-12-12 17:19:08 root] (utils.py 283): INFO Epoch: [11] [ 770/2502] eta: 1:22:52 lr: 0.000006 loss_cls: 2.5937 (2.5547) grad_norm: 1.1387 (1.5624) time: 2.8672 data: 0.0002 max mem: 28454 +[2024-12-12 17:19:36 root] (utils.py 283): INFO Epoch: [11] [ 780/2502] eta: 1:22:23 lr: 0.000006 loss_cls: 2.5333 (2.5534) grad_norm: 1.0932 (1.5565) time: 2.8680 data: 0.0002 max mem: 28454 +[2024-12-12 17:20:05 root] (utils.py 283): INFO Epoch: [11] [ 790/2502] eta: 1:21:55 lr: 0.000006 loss_cls: 2.5932 (2.5550) grad_norm: 1.1023 (1.5538) time: 2.8702 data: 0.0002 max mem: 28454 +[2024-12-12 17:20:34 root] (utils.py 283): INFO Epoch: [11] [ 800/2502] eta: 1:21:26 lr: 0.000006 loss_cls: 2.6493 (2.5537) grad_norm: 1.0730 (1.5488) time: 2.8703 data: 0.0002 max mem: 28454 +[2024-12-12 17:21:02 root] (utils.py 283): INFO Epoch: [11] [ 810/2502] eta: 1:20:57 lr: 0.000006 loss_cls: 2.5071 (2.5522) grad_norm: 1.0751 (1.5515) time: 2.8672 data: 0.0002 max mem: 28454 +[2024-12-12 17:21:31 root] (utils.py 283): INFO Epoch: [11] [ 820/2502] eta: 1:20:28 lr: 0.000006 loss_cls: 2.5463 (2.5516) grad_norm: 1.1497 (1.5475) time: 2.8652 data: 0.0002 max mem: 28454 +[2024-12-12 17:22:00 root] (utils.py 283): INFO Epoch: [11] [ 830/2502] eta: 1:20:00 lr: 0.000006 loss_cls: 2.4217 (2.5490) grad_norm: 1.0869 (1.5415) time: 2.8691 data: 0.0002 max mem: 28454 +[2024-12-12 17:22:29 root] (utils.py 283): INFO Epoch: [11] [ 840/2502] eta: 1:19:31 lr: 0.000006 loss_cls: 2.6074 (2.5510) grad_norm: 1.0895 (1.5378) time: 2.8772 data: 0.0002 max mem: 28454 +[2024-12-12 17:22:57 root] (utils.py 283): INFO Epoch: [11] [ 850/2502] eta: 1:19:02 lr: 0.000006 loss_cls: 2.7227 (2.5515) grad_norm: 1.1337 (1.5326) time: 2.8775 data: 0.0002 max mem: 28454 +[2024-12-12 17:23:26 root] (utils.py 283): INFO Epoch: [11] [ 860/2502] eta: 1:18:34 lr: 0.000006 loss_cls: 2.6176 (2.5512) grad_norm: 1.0810 (1.5270) time: 2.8742 data: 0.0002 max mem: 28454 +[2024-12-12 17:23:55 root] (utils.py 283): INFO Epoch: [11] [ 870/2502] eta: 1:18:05 lr: 0.000006 loss_cls: 2.4428 (2.5501) grad_norm: 1.0865 (1.5259) time: 2.8730 data: 0.0002 max mem: 28454 +[2024-12-12 17:24:24 root] (utils.py 283): INFO Epoch: [11] [ 880/2502] eta: 1:17:36 lr: 0.000006 loss_cls: 2.6242 (2.5501) grad_norm: 1.0865 (1.5214) time: 2.8693 data: 0.0002 max mem: 28454 +[2024-12-12 17:24:52 root] (utils.py 283): INFO Epoch: [11] [ 890/2502] eta: 1:17:08 lr: 0.000006 loss_cls: 2.6242 (2.5496) grad_norm: 1.1144 (1.5181) time: 2.8675 data: 0.0002 max mem: 28454 +[2024-12-12 17:25:21 root] (utils.py 283): INFO Epoch: [11] [ 900/2502] eta: 1:16:39 lr: 0.000006 loss_cls: 2.6098 (2.5490) grad_norm: 1.1144 (1.5142) time: 2.8696 data: 0.0002 max mem: 28454 +[2024-12-12 17:25:50 root] (utils.py 283): INFO Epoch: [11] [ 910/2502] eta: 1:16:10 lr: 0.000006 loss_cls: 2.5880 (2.5485) grad_norm: 1.1256 (1.5125) time: 2.8716 data: 0.0002 max mem: 28454 +[2024-12-12 17:26:18 root] (utils.py 283): INFO Epoch: [11] [ 920/2502] eta: 1:15:42 lr: 0.000006 loss_cls: 2.4883 (2.5484) grad_norm: 1.1145 (1.5079) time: 2.8733 data: 0.0002 max mem: 28454 +[2024-12-12 17:26:47 root] (utils.py 283): INFO Epoch: [11] [ 930/2502] eta: 1:15:13 lr: 0.000006 loss_cls: 2.4770 (2.5485) grad_norm: 1.0946 (1.5060) time: 2.8745 data: 0.0002 max mem: 28454 +[2024-12-12 17:27:16 root] (utils.py 283): INFO Epoch: [11] [ 940/2502] eta: 1:14:44 lr: 0.000006 loss_cls: 2.6472 (2.5493) grad_norm: 1.0937 (1.5039) time: 2.8740 data: 0.0002 max mem: 28454 +[2024-12-12 17:27:45 root] (utils.py 283): INFO Epoch: [11] [ 950/2502] eta: 1:14:15 lr: 0.000006 loss_cls: 2.7232 (2.5508) grad_norm: 1.0703 (1.4993) time: 2.8711 data: 0.0002 max mem: 28454 +[2024-12-12 17:28:13 root] (utils.py 283): INFO Epoch: [11] [ 960/2502] eta: 1:13:47 lr: 0.000006 loss_cls: 2.6942 (2.5498) grad_norm: 1.0341 (1.5021) time: 2.8706 data: 0.0002 max mem: 28454 +[2024-12-12 17:28:42 root] (utils.py 283): INFO Epoch: [11] [ 970/2502] eta: 1:13:18 lr: 0.000006 loss_cls: 2.6475 (2.5503) grad_norm: 1.1501 (1.4993) time: 2.8718 data: 0.0003 max mem: 28454 +[2024-12-12 17:29:11 root] (utils.py 283): INFO Epoch: [11] [ 980/2502] eta: 1:12:49 lr: 0.000006 loss_cls: 2.7551 (2.5503) grad_norm: 1.1501 (1.4960) time: 2.8703 data: 0.0002 max mem: 28454 +[2024-12-12 17:29:39 root] (utils.py 283): INFO Epoch: [11] [ 990/2502] eta: 1:12:21 lr: 0.000006 loss_cls: 2.7551 (2.5521) grad_norm: 1.1441 (1.4998) time: 2.8702 data: 0.0002 max mem: 28454 +[2024-12-12 17:30:08 root] (utils.py 283): INFO Epoch: [11] [1000/2502] eta: 1:11:52 lr: 0.000006 loss_cls: 2.6193 (2.5512) grad_norm: 1.1185 (1.4960) time: 2.8709 data: 0.0002 max mem: 28454 +[2024-12-12 17:30:37 root] (utils.py 283): INFO Epoch: [11] [1010/2502] eta: 1:11:23 lr: 0.000006 loss_cls: 2.5500 (2.5510) grad_norm: 1.1326 (1.4921) time: 2.8732 data: 0.0002 max mem: 28454 +[2024-12-12 17:31:06 root] (utils.py 283): INFO Epoch: [11] [1020/2502] eta: 1:10:55 lr: 0.000006 loss_cls: 2.6333 (2.5524) grad_norm: 1.1326 (1.4885) time: 2.8741 data: 0.0002 max mem: 28454 +[2024-12-12 17:31:34 root] (utils.py 283): INFO Epoch: [11] [1030/2502] eta: 1:10:26 lr: 0.000006 loss_cls: 2.7045 (2.5522) grad_norm: 1.1327 (1.4868) time: 2.8747 data: 0.0002 max mem: 28454 +[2024-12-12 17:32:03 root] (utils.py 283): INFO Epoch: [11] [1040/2502] eta: 1:09:57 lr: 0.000006 loss_cls: 2.7058 (2.5534) grad_norm: 1.1582 (1.4838) time: 2.8764 data: 0.0002 max mem: 28454 +[2024-12-12 17:32:32 root] (utils.py 283): INFO Epoch: [11] [1050/2502] eta: 1:09:28 lr: 0.000006 loss_cls: 2.7006 (2.5510) grad_norm: 1.1813 (1.4872) time: 2.8712 data: 0.0002 max mem: 28454 +[2024-12-12 17:33:00 root] (utils.py 283): INFO Epoch: [11] [1060/2502] eta: 1:09:00 lr: 0.000006 loss_cls: 2.5858 (2.5520) grad_norm: 1.1032 (1.4838) time: 2.8683 data: 0.0002 max mem: 28454 +[2024-12-12 17:33:29 root] (utils.py 283): INFO Epoch: [11] [1070/2502] eta: 1:08:31 lr: 0.000006 loss_cls: 2.6102 (2.5524) grad_norm: 1.1027 (1.4808) time: 2.8725 data: 0.0003 max mem: 28454 +[2024-12-12 17:33:58 root] (utils.py 283): INFO Epoch: [11] [1080/2502] eta: 1:08:02 lr: 0.000006 loss_cls: 2.6347 (2.5526) grad_norm: 1.1305 (1.4806) time: 2.8743 data: 0.0002 max mem: 28454 +[2024-12-12 17:34:27 root] (utils.py 283): INFO Epoch: [11] [1090/2502] eta: 1:07:34 lr: 0.000006 loss_cls: 2.6254 (2.5535) grad_norm: 1.2786 (1.4786) time: 2.8752 data: 0.0003 max mem: 28454 +[2024-12-12 17:34:55 root] (utils.py 283): INFO Epoch: [11] [1100/2502] eta: 1:07:05 lr: 0.000006 loss_cls: 2.7480 (2.5557) grad_norm: 1.1543 (1.4758) time: 2.8749 data: 0.0003 max mem: 28454 +[2024-12-12 17:35:24 root] (utils.py 283): INFO Epoch: [11] [1110/2502] eta: 1:06:36 lr: 0.000006 loss_cls: 2.7480 (2.5551) grad_norm: 1.0815 (1.4721) time: 2.8746 data: 0.0002 max mem: 28454 +[2024-12-12 17:35:53 root] (utils.py 283): INFO Epoch: [11] [1120/2502] eta: 1:06:08 lr: 0.000006 loss_cls: 2.6293 (2.5539) grad_norm: 1.0933 (1.4699) time: 2.8760 data: 0.0002 max mem: 28454 +[2024-12-12 17:36:22 root] (utils.py 283): INFO Epoch: [11] [1130/2502] eta: 1:05:39 lr: 0.000006 loss_cls: 2.6293 (2.5544) grad_norm: 1.1347 (1.4671) time: 2.8752 data: 0.0002 max mem: 28454 +[2024-12-12 17:36:50 root] (utils.py 283): INFO Epoch: [11] [1140/2502] eta: 1:05:10 lr: 0.000006 loss_cls: 2.6332 (2.5552) grad_norm: 1.1141 (1.4643) time: 2.8733 data: 0.0002 max mem: 28454 +[2024-12-12 17:37:19 root] (utils.py 283): INFO Epoch: [11] [1150/2502] eta: 1:04:42 lr: 0.000006 loss_cls: 2.5407 (2.5536) grad_norm: 1.1749 (1.4626) time: 2.8729 data: 0.0002 max mem: 28454 +[2024-12-12 17:37:48 root] (utils.py 283): INFO Epoch: [11] [1160/2502] eta: 1:04:13 lr: 0.000006 loss_cls: 2.5461 (2.5545) grad_norm: 1.1768 (1.4672) time: 2.8734 data: 0.0002 max mem: 28454 +[2024-12-12 17:38:17 root] (utils.py 283): INFO Epoch: [11] [1170/2502] eta: 1:03:44 lr: 0.000006 loss_cls: 2.6894 (2.5553) grad_norm: 1.1366 (1.4642) time: 2.8735 data: 0.0003 max mem: 28454 +[2024-12-12 17:38:45 root] (utils.py 283): INFO Epoch: [11] [1180/2502] eta: 1:03:16 lr: 0.000006 loss_cls: 2.8057 (2.5564) grad_norm: 1.1009 (1.4616) time: 2.8734 data: 0.0002 max mem: 28454 +[2024-12-12 17:39:14 root] (utils.py 283): INFO Epoch: [11] [1190/2502] eta: 1:02:47 lr: 0.000006 loss_cls: 2.7862 (2.5566) grad_norm: 1.1290 (1.4598) time: 2.8804 data: 0.0003 max mem: 28454 +[2024-12-12 17:39:43 root] (utils.py 283): INFO Epoch: [11] [1200/2502] eta: 1:02:18 lr: 0.000006 loss_cls: 2.6137 (2.5566) grad_norm: 1.2024 (1.4590) time: 2.8798 data: 0.0002 max mem: 28454 +[2024-12-12 17:40:12 root] (utils.py 283): INFO Epoch: [11] [1210/2502] eta: 1:01:50 lr: 0.000006 loss_cls: 2.7789 (2.5580) grad_norm: 1.2024 (1.4570) time: 2.8709 data: 0.0002 max mem: 28454 +[2024-12-12 17:40:40 root] (utils.py 283): INFO Epoch: [11] [1220/2502] eta: 1:01:21 lr: 0.000006 loss_cls: 2.5947 (2.5562) grad_norm: 1.1805 (1.4542) time: 2.8730 data: 0.0002 max mem: 28454 +[2024-12-12 17:41:09 root] (utils.py 283): INFO Epoch: [11] [1230/2502] eta: 1:00:52 lr: 0.000006 loss_cls: 2.5947 (2.5570) grad_norm: 1.1350 (1.4519) time: 2.8772 data: 0.0002 max mem: 28454 +[2024-12-12 17:41:38 root] (utils.py 283): INFO Epoch: [11] [1240/2502] eta: 1:00:24 lr: 0.000006 loss_cls: 2.6825 (2.5567) grad_norm: 1.0873 (1.4494) time: 2.8768 data: 0.0002 max mem: 28454 +[2024-12-12 17:42:07 root] (utils.py 283): INFO Epoch: [11] [1250/2502] eta: 0:59:55 lr: 0.000006 loss_cls: 2.6183 (2.5549) grad_norm: 1.0637 (1.4464) time: 2.8749 data: 0.0003 max mem: 28454 +[2024-12-12 17:42:35 root] (utils.py 283): INFO Epoch: [11] [1260/2502] eta: 0:59:26 lr: 0.000006 loss_cls: 2.4654 (2.5541) grad_norm: 1.1211 (1.4488) time: 2.8738 data: 0.0003 max mem: 28454 +[2024-12-12 17:43:04 root] (utils.py 283): INFO Epoch: [11] [1270/2502] eta: 0:58:58 lr: 0.000006 loss_cls: 2.5755 (2.5551) grad_norm: 1.1503 (1.4479) time: 2.8732 data: 0.0002 max mem: 28454 +[2024-12-12 17:43:33 root] (utils.py 283): INFO Epoch: [11] [1280/2502] eta: 0:58:29 lr: 0.000006 loss_cls: 2.6885 (2.5547) grad_norm: 1.1361 (1.4456) time: 2.8742 data: 0.0002 max mem: 28454 +[2024-12-12 17:44:02 root] (utils.py 283): INFO Epoch: [11] [1290/2502] eta: 0:58:00 lr: 0.000006 loss_cls: 2.6467 (2.5541) grad_norm: 1.1654 (1.4437) time: 2.8757 data: 0.0002 max mem: 28454 +[2024-12-12 17:44:30 root] (utils.py 283): INFO Epoch: [11] [1300/2502] eta: 0:57:31 lr: 0.000006 loss_cls: 2.6503 (2.5539) grad_norm: 1.1374 (1.4407) time: 2.8749 data: 0.0002 max mem: 28454 +[2024-12-12 17:44:59 root] (utils.py 283): INFO Epoch: [11] [1310/2502] eta: 0:57:03 lr: 0.000006 loss_cls: 2.6503 (2.5545) grad_norm: 1.0435 (1.4408) time: 2.8716 data: 0.0003 max mem: 28454 +[2024-12-12 17:45:28 root] (utils.py 283): INFO Epoch: [11] [1320/2502] eta: 0:56:34 lr: 0.000006 loss_cls: 2.6908 (2.5533) grad_norm: 1.1279 (1.4385) time: 2.8679 data: 0.0003 max mem: 28454 +[2024-12-12 17:45:57 root] (utils.py 283): INFO Epoch: [11] [1330/2502] eta: 0:56:05 lr: 0.000006 loss_cls: 2.5165 (2.5538) grad_norm: 1.0960 (1.4366) time: 2.8722 data: 0.0003 max mem: 28454 +[2024-12-12 17:46:25 root] (utils.py 283): INFO Epoch: [11] [1340/2502] eta: 0:55:37 lr: 0.000006 loss_cls: 2.7596 (2.5551) grad_norm: 1.1513 (1.4347) time: 2.8742 data: 0.0003 max mem: 28454 +[2024-12-12 17:46:54 root] (utils.py 283): INFO Epoch: [11] [1350/2502] eta: 0:55:08 lr: 0.000006 loss_cls: 2.7635 (2.5561) grad_norm: 1.1513 (1.4326) time: 2.8702 data: 0.0003 max mem: 28454 +[2024-12-12 17:47:23 root] (utils.py 283): INFO Epoch: [11] [1360/2502] eta: 0:54:39 lr: 0.000006 loss_cls: 2.7056 (2.5560) grad_norm: 1.1294 (1.4305) time: 2.8697 data: 0.0003 max mem: 28454 +[2024-12-12 17:47:51 root] (utils.py 283): INFO Epoch: [11] [1370/2502] eta: 0:54:10 lr: 0.000006 loss_cls: 2.4793 (2.5547) grad_norm: 1.1151 (1.4286) time: 2.8697 data: 0.0003 max mem: 28454 +[2024-12-12 17:48:20 root] (utils.py 283): INFO Epoch: [11] [1380/2502] eta: 0:53:42 lr: 0.000006 loss_cls: 2.4859 (2.5543) grad_norm: 1.1151 (1.4273) time: 2.8693 data: 0.0003 max mem: 28454 +[2024-12-12 17:48:49 root] (utils.py 283): INFO Epoch: [11] [1390/2502] eta: 0:53:13 lr: 0.000006 loss_cls: 2.5471 (2.5533) grad_norm: 1.1050 (1.4256) time: 2.8682 data: 0.0003 max mem: 28454 +[2024-12-12 17:49:17 root] (utils.py 283): INFO Epoch: [11] [1400/2502] eta: 0:52:44 lr: 0.000006 loss_cls: 2.5891 (2.5529) grad_norm: 1.0731 (1.4230) time: 2.8688 data: 0.0002 max mem: 28454 +[2024-12-12 17:49:46 root] (utils.py 283): INFO Epoch: [11] [1410/2502] eta: 0:52:15 lr: 0.000006 loss_cls: 2.5090 (2.5518) grad_norm: 1.0973 (1.4215) time: 2.8699 data: 0.0003 max mem: 28454 +[2024-12-12 17:50:15 root] (utils.py 283): INFO Epoch: [11] [1420/2502] eta: 0:51:47 lr: 0.000006 loss_cls: 2.3913 (2.5512) grad_norm: 1.1107 (1.4265) time: 2.8674 data: 0.0002 max mem: 28454 +[2024-12-12 17:50:44 root] (utils.py 283): INFO Epoch: [11] [1430/2502] eta: 0:51:18 lr: 0.000006 loss_cls: 2.3913 (2.5500) grad_norm: 1.1107 (1.4243) time: 2.8677 data: 0.0003 max mem: 28454 +[2024-12-12 17:51:12 root] (utils.py 283): INFO Epoch: [11] [1440/2502] eta: 0:50:49 lr: 0.000006 loss_cls: 2.5419 (2.5510) grad_norm: 1.0978 (1.4223) time: 2.8713 data: 0.0003 max mem: 28454 +[2024-12-12 17:51:41 root] (utils.py 283): INFO Epoch: [11] [1450/2502] eta: 0:50:21 lr: 0.000006 loss_cls: 2.6808 (2.5510) grad_norm: 1.0537 (1.4201) time: 2.8732 data: 0.0003 max mem: 28454 +[2024-12-12 17:52:10 root] (utils.py 283): INFO Epoch: [11] [1460/2502] eta: 0:49:52 lr: 0.000006 loss_cls: 2.6190 (2.5516) grad_norm: 1.0317 (1.4180) time: 2.8727 data: 0.0002 max mem: 28454 +[2024-12-12 17:52:38 root] (utils.py 283): INFO Epoch: [11] [1470/2502] eta: 0:49:23 lr: 0.000006 loss_cls: 2.6362 (2.5511) grad_norm: 1.0919 (1.4160) time: 2.8745 data: 0.0003 max mem: 28454 +[2024-12-12 17:53:07 root] (utils.py 283): INFO Epoch: [11] [1480/2502] eta: 0:48:54 lr: 0.000006 loss_cls: 2.5848 (2.5509) grad_norm: 1.1275 (1.4159) time: 2.8767 data: 0.0003 max mem: 28454 +[2024-12-12 17:53:36 root] (utils.py 283): INFO Epoch: [11] [1490/2502] eta: 0:48:26 lr: 0.000006 loss_cls: 2.7264 (2.5511) grad_norm: 1.1844 (1.4161) time: 2.8761 data: 0.0003 max mem: 28454 +[2024-12-12 17:54:05 root] (utils.py 283): INFO Epoch: [11] [1500/2502] eta: 0:47:57 lr: 0.000006 loss_cls: 2.6173 (2.5511) grad_norm: 1.1174 (1.4141) time: 2.8769 data: 0.0003 max mem: 28454 +[2024-12-12 17:54:34 root] (utils.py 283): INFO Epoch: [11] [1510/2502] eta: 0:47:28 lr: 0.000006 loss_cls: 2.5699 (2.5522) grad_norm: 1.0809 (1.4118) time: 2.8759 data: 0.0002 max mem: 28454 +[2024-12-12 17:55:02 root] (utils.py 283): INFO Epoch: [11] [1520/2502] eta: 0:47:00 lr: 0.000006 loss_cls: 2.5699 (2.5514) grad_norm: 1.0880 (1.4101) time: 2.8727 data: 0.0002 max mem: 28454 +[2024-12-12 17:55:31 root] (utils.py 283): INFO Epoch: [11] [1530/2502] eta: 0:46:31 lr: 0.000006 loss_cls: 2.4957 (2.5512) grad_norm: 1.0880 (1.4087) time: 2.8726 data: 0.0003 max mem: 28454 +[2024-12-12 17:56:00 root] (utils.py 283): INFO Epoch: [11] [1540/2502] eta: 0:46:02 lr: 0.000006 loss_cls: 2.6044 (2.5517) grad_norm: 1.1125 (1.4069) time: 2.8721 data: 0.0003 max mem: 28454 +[2024-12-12 17:56:28 root] (utils.py 283): INFO Epoch: [11] [1550/2502] eta: 0:45:33 lr: 0.000006 loss_cls: 2.6044 (2.5508) grad_norm: 1.1426 (1.4073) time: 2.8699 data: 0.0002 max mem: 28454 +[2024-12-12 17:56:57 root] (utils.py 283): INFO Epoch: [11] [1560/2502] eta: 0:45:05 lr: 0.000006 loss_cls: 2.5668 (2.5522) grad_norm: 1.1245 (1.4123) time: 2.8712 data: 0.0002 max mem: 28454 +[2024-12-12 17:57:26 root] (utils.py 283): INFO Epoch: [11] [1570/2502] eta: 0:44:36 lr: 0.000006 loss_cls: 2.6252 (2.5517) grad_norm: 1.1616 (1.4114) time: 2.8739 data: 0.0002 max mem: 28454 +[2024-12-12 17:57:55 root] (utils.py 283): INFO Epoch: [11] [1580/2502] eta: 0:44:07 lr: 0.000006 loss_cls: 2.6728 (2.5526) grad_norm: 1.1446 (1.4112) time: 2.8724 data: 0.0002 max mem: 28454 +[2024-12-12 17:58:23 root] (utils.py 283): INFO Epoch: [11] [1590/2502] eta: 0:43:39 lr: 0.000006 loss_cls: 2.6705 (2.5525) grad_norm: 1.0844 (1.4105) time: 2.8725 data: 0.0002 max mem: 28454 +[2024-12-12 17:58:52 root] (utils.py 283): INFO Epoch: [11] [1600/2502] eta: 0:43:10 lr: 0.000006 loss_cls: 2.6537 (2.5522) grad_norm: 1.1309 (1.4087) time: 2.8717 data: 0.0003 max mem: 28454 +[2024-12-12 17:59:21 root] (utils.py 283): INFO Epoch: [11] [1610/2502] eta: 0:42:41 lr: 0.000006 loss_cls: 2.5363 (2.5504) grad_norm: 1.0632 (1.4064) time: 2.8679 data: 0.0003 max mem: 28454 +[2024-12-12 17:59:49 root] (utils.py 283): INFO Epoch: [11] [1620/2502] eta: 0:42:12 lr: 0.000006 loss_cls: 2.5407 (2.5515) grad_norm: 1.1317 (1.4052) time: 2.8693 data: 0.0002 max mem: 28454 +[2024-12-12 18:00:18 root] (utils.py 283): INFO Epoch: [11] [1630/2502] eta: 0:41:44 lr: 0.000006 loss_cls: 2.6744 (2.5516) grad_norm: 1.1580 (1.4036) time: 2.8708 data: 0.0002 max mem: 28454 +[2024-12-12 18:00:47 root] (utils.py 283): INFO Epoch: [11] [1640/2502] eta: 0:41:15 lr: 0.000006 loss_cls: 2.6266 (2.5517) grad_norm: 1.0880 (1.4037) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-12 18:01:16 root] (utils.py 283): INFO Epoch: [11] [1650/2502] eta: 0:40:46 lr: 0.000006 loss_cls: 2.5839 (2.5513) grad_norm: 1.0760 (1.4018) time: 2.8740 data: 0.0002 max mem: 28454 +[2024-12-12 18:01:44 root] (utils.py 283): INFO Epoch: [11] [1660/2502] eta: 0:40:18 lr: 0.000006 loss_cls: 2.4395 (2.5506) grad_norm: 1.0732 (1.4004) time: 2.8740 data: 0.0002 max mem: 28454 +[2024-12-12 18:02:13 root] (utils.py 283): INFO Epoch: [11] [1670/2502] eta: 0:39:49 lr: 0.000006 loss_cls: 2.4395 (2.5501) grad_norm: 1.1142 (1.3999) time: 2.8741 data: 0.0002 max mem: 28454 +[2024-12-12 18:02:42 root] (utils.py 283): INFO Epoch: [11] [1680/2502] eta: 0:39:20 lr: 0.000006 loss_cls: 2.6499 (2.5500) grad_norm: 1.1818 (1.3989) time: 2.8715 data: 0.0003 max mem: 28454 +[2024-12-12 18:03:10 root] (utils.py 283): INFO Epoch: [11] [1690/2502] eta: 0:38:51 lr: 0.000006 loss_cls: 2.7102 (2.5502) grad_norm: 1.2118 (1.4472) time: 2.8679 data: 0.0003 max mem: 28454 +[2024-12-12 18:03:39 root] (utils.py 283): INFO Epoch: [11] [1700/2502] eta: 0:38:23 lr: 0.000006 loss_cls: 2.7640 (2.5503) grad_norm: 1.2907 (1.4468) time: 2.8685 data: 0.0002 max mem: 28454 +[2024-12-12 18:04:08 root] (utils.py 283): INFO Epoch: [11] [1710/2502] eta: 0:37:54 lr: 0.000006 loss_cls: 2.5224 (2.5494) grad_norm: 1.3079 (1.4464) time: 2.8708 data: 0.0002 max mem: 28454 +[2024-12-12 18:04:37 root] (utils.py 283): INFO Epoch: [11] [1720/2502] eta: 0:37:25 lr: 0.000006 loss_cls: 2.5973 (2.5489) grad_norm: 1.2044 (1.4769) time: 2.8729 data: 0.0003 max mem: 28454 +[2024-12-12 18:05:05 root] (utils.py 283): INFO Epoch: [11] [1730/2502] eta: 0:36:57 lr: 0.000006 loss_cls: 2.6869 (2.5492) grad_norm: 1.3716 (1.4781) time: 2.8732 data: 0.0003 max mem: 28454 +[2024-12-12 18:05:34 root] (utils.py 283): INFO Epoch: [11] [1740/2502] eta: 0:36:28 lr: 0.000006 loss_cls: 2.7418 (2.5496) grad_norm: 1.4870 (1.4779) time: 2.8732 data: 0.0003 max mem: 28454 +[2024-12-12 18:06:03 root] (utils.py 283): INFO Epoch: [11] [1750/2502] eta: 0:35:59 lr: 0.000006 loss_cls: 2.6147 (2.5492) grad_norm: 1.3883 (1.4794) time: 2.8781 data: 0.0003 max mem: 28454 +[2024-12-12 18:06:32 root] (utils.py 283): INFO Epoch: [11] [1760/2502] eta: 0:35:30 lr: 0.000006 loss_cls: 2.6810 (2.5508) grad_norm: 1.2330 (1.4787) time: 2.8776 data: 0.0003 max mem: 28454 +[2024-12-12 18:07:00 root] (utils.py 283): INFO Epoch: [11] [1770/2502] eta: 0:35:02 lr: 0.000006 loss_cls: 2.7261 (2.5522) grad_norm: 1.1804 (1.4778) time: 2.8733 data: 0.0003 max mem: 28454 +[2024-12-12 18:07:29 root] (utils.py 283): INFO Epoch: [11] [1780/2502] eta: 0:34:33 lr: 0.000006 loss_cls: 2.7187 (2.5523) grad_norm: 1.2295 (1.4765) time: 2.8740 data: 0.0002 max mem: 28454 +[2024-12-12 18:07:58 root] (utils.py 283): INFO Epoch: [11] [1790/2502] eta: 0:34:04 lr: 0.000006 loss_cls: 2.6268 (2.5526) grad_norm: 1.1779 (1.4747) time: 2.8725 data: 0.0002 max mem: 28454 +[2024-12-12 18:08:27 root] (utils.py 283): INFO Epoch: [11] [1800/2502] eta: 0:33:36 lr: 0.000006 loss_cls: 2.5734 (2.5524) grad_norm: 1.1219 (1.4729) time: 2.8734 data: 0.0002 max mem: 28454 +[2024-12-12 18:08:55 root] (utils.py 283): INFO Epoch: [11] [1810/2502] eta: 0:33:07 lr: 0.000006 loss_cls: 2.7117 (2.5530) grad_norm: 1.1965 (1.4721) time: 2.8765 data: 0.0002 max mem: 28454 +[2024-12-12 18:09:24 root] (utils.py 283): INFO Epoch: [11] [1820/2502] eta: 0:32:38 lr: 0.000006 loss_cls: 2.7117 (2.5535) grad_norm: 1.1965 (1.4703) time: 2.8751 data: 0.0003 max mem: 28454 +[2024-12-12 18:09:53 root] (utils.py 283): INFO Epoch: [11] [1830/2502] eta: 0:32:09 lr: 0.000006 loss_cls: 2.8008 (2.5548) grad_norm: 1.1273 (1.4687) time: 2.8713 data: 0.0002 max mem: 28454 +[2024-12-12 18:10:21 root] (utils.py 283): INFO Epoch: [11] [1840/2502] eta: 0:31:41 lr: 0.000006 loss_cls: 2.5827 (2.5534) grad_norm: 1.1694 (1.4714) time: 2.8700 data: 0.0002 max mem: 28454 +[2024-12-12 18:10:50 root] (utils.py 283): INFO Epoch: [11] [1850/2502] eta: 0:31:12 lr: 0.000006 loss_cls: 2.4372 (2.5534) grad_norm: 1.1706 (1.4698) time: 2.8675 data: 0.0002 max mem: 28454 +[2024-12-12 18:11:19 root] (utils.py 283): INFO Epoch: [11] [1860/2502] eta: 0:30:43 lr: 0.000006 loss_cls: 2.4372 (2.5528) grad_norm: 1.1667 (1.4717) time: 2.8656 data: 0.0002 max mem: 28454 +[2024-12-12 18:11:47 root] (utils.py 283): INFO Epoch: [11] [1870/2502] eta: 0:30:14 lr: 0.000006 loss_cls: 2.4650 (2.5526) grad_norm: 1.0702 (1.4692) time: 2.8657 data: 0.0002 max mem: 28454 +[2024-12-12 18:12:16 root] (utils.py 283): INFO Epoch: [11] [1880/2502] eta: 0:29:46 lr: 0.000006 loss_cls: 2.7054 (2.5527) grad_norm: 1.0505 (1.4678) time: 2.8645 data: 0.0002 max mem: 28454 +[2024-12-12 18:12:45 root] (utils.py 283): INFO Epoch: [11] [1890/2502] eta: 0:29:17 lr: 0.000006 loss_cls: 2.6306 (2.5528) grad_norm: 1.1273 (1.4660) time: 2.8637 data: 0.0003 max mem: 28454 +[2024-12-12 18:13:13 root] (utils.py 283): INFO Epoch: [11] [1900/2502] eta: 0:28:48 lr: 0.000006 loss_cls: 2.6275 (2.5526) grad_norm: 1.1482 (1.4649) time: 2.8660 data: 0.0003 max mem: 28454 +[2024-12-12 18:13:42 root] (utils.py 283): INFO Epoch: [11] [1910/2502] eta: 0:28:20 lr: 0.000006 loss_cls: 2.7213 (2.5524) grad_norm: 1.1535 (1.4637) time: 2.8675 data: 0.0002 max mem: 28454 +[2024-12-12 18:14:11 root] (utils.py 283): INFO Epoch: [11] [1920/2502] eta: 0:27:51 lr: 0.000006 loss_cls: 2.6661 (2.5524) grad_norm: 1.1267 (1.4631) time: 2.8675 data: 0.0002 max mem: 28454 +[2024-12-12 18:14:39 root] (utils.py 283): INFO Epoch: [11] [1930/2502] eta: 0:27:22 lr: 0.000006 loss_cls: 2.4977 (2.5514) grad_norm: 1.0867 (1.4619) time: 2.8683 data: 0.0003 max mem: 28454 +[2024-12-12 18:15:08 root] (utils.py 283): INFO Epoch: [11] [1940/2502] eta: 0:26:53 lr: 0.000006 loss_cls: 2.4977 (2.5516) grad_norm: 1.0776 (1.4602) time: 2.8729 data: 0.0003 max mem: 28454 +[2024-12-12 18:15:37 root] (utils.py 283): INFO Epoch: [11] [1950/2502] eta: 0:26:25 lr: 0.000006 loss_cls: 2.7067 (2.5515) grad_norm: 1.0711 (1.4590) time: 2.8724 data: 0.0002 max mem: 28454 +[2024-12-12 18:16:06 root] (utils.py 283): INFO Epoch: [11] [1960/2502] eta: 0:25:56 lr: 0.000006 loss_cls: 2.7067 (2.5524) grad_norm: 1.1463 (1.4597) time: 2.8685 data: 0.0003 max mem: 28454 +[2024-12-12 18:16:34 root] (utils.py 283): INFO Epoch: [11] [1970/2502] eta: 0:25:27 lr: 0.000006 loss_cls: 2.7414 (2.5521) grad_norm: 1.1517 (1.4581) time: 2.8696 data: 0.0003 max mem: 28454 +[2024-12-12 18:17:03 root] (utils.py 283): INFO Epoch: [11] [1980/2502] eta: 0:24:59 lr: 0.000006 loss_cls: 2.7573 (2.5528) grad_norm: 1.1476 (1.4572) time: 2.8712 data: 0.0003 max mem: 28454 +[2024-12-12 18:17:32 root] (utils.py 283): INFO Epoch: [11] [1990/2502] eta: 0:24:30 lr: 0.000006 loss_cls: 2.7374 (2.5535) grad_norm: 1.1476 (1.4558) time: 2.8746 data: 0.0003 max mem: 28454 +[2024-12-12 18:18:00 root] (utils.py 283): INFO Epoch: [11] [2000/2502] eta: 0:24:01 lr: 0.000006 loss_cls: 2.6089 (2.5525) grad_norm: 1.0685 (1.4538) time: 2.8740 data: 0.0002 max mem: 28454 +[2024-12-12 18:18:29 root] (utils.py 283): INFO Epoch: [11] [2010/2502] eta: 0:23:32 lr: 0.000006 loss_cls: 2.3471 (2.5517) grad_norm: 1.0736 (1.4607) time: 2.8736 data: 0.0002 max mem: 28454 +[2024-12-12 18:18:58 root] (utils.py 283): INFO Epoch: [11] [2020/2502] eta: 0:23:04 lr: 0.000006 loss_cls: 2.6031 (2.5525) grad_norm: 1.1135 (1.4594) time: 2.8763 data: 0.0002 max mem: 28454 +[2024-12-12 18:19:27 root] (utils.py 283): INFO Epoch: [11] [2030/2502] eta: 0:22:35 lr: 0.000006 loss_cls: 2.6364 (2.5520) grad_norm: 1.1031 (1.4577) time: 2.8770 data: 0.0002 max mem: 28454 +[2024-12-12 18:19:56 root] (utils.py 283): INFO Epoch: [11] [2040/2502] eta: 0:22:06 lr: 0.000006 loss_cls: 2.5823 (2.5527) grad_norm: 1.1354 (1.4579) time: 2.8762 data: 0.0002 max mem: 28454 +[2024-12-12 18:20:24 root] (utils.py 283): INFO Epoch: [11] [2050/2502] eta: 0:21:38 lr: 0.000006 loss_cls: 2.6304 (2.5531) grad_norm: 1.0871 (1.4685) time: 2.8763 data: 0.0003 max mem: 28454 +[2024-12-12 18:20:53 root] (utils.py 283): INFO Epoch: [11] [2060/2502] eta: 0:21:09 lr: 0.000006 loss_cls: 2.7067 (2.5536) grad_norm: 1.0693 (1.4672) time: 2.8765 data: 0.0003 max mem: 28454 +[2024-12-12 18:21:22 root] (utils.py 283): INFO Epoch: [11] [2070/2502] eta: 0:20:40 lr: 0.000006 loss_cls: 2.7011 (2.5526) grad_norm: 1.0905 (1.4658) time: 2.8773 data: 0.0002 max mem: 28454 +[2024-12-12 18:21:51 root] (utils.py 283): INFO Epoch: [11] [2080/2502] eta: 0:20:11 lr: 0.000006 loss_cls: 2.4386 (2.5521) grad_norm: 1.0922 (1.4641) time: 2.8775 data: 0.0003 max mem: 28454 +[2024-12-12 18:22:19 root] (utils.py 283): INFO Epoch: [11] [2090/2502] eta: 0:19:43 lr: 0.000006 loss_cls: 2.4386 (2.5516) grad_norm: 1.0697 (1.4624) time: 2.8760 data: 0.0003 max mem: 28454 +[2024-12-12 18:22:48 root] (utils.py 283): INFO Epoch: [11] [2100/2502] eta: 0:19:14 lr: 0.000006 loss_cls: 2.4643 (2.5514) grad_norm: 1.0991 (1.4611) time: 2.8764 data: 0.0003 max mem: 28454 +[2024-12-12 18:23:17 root] (utils.py 283): INFO Epoch: [11] [2110/2502] eta: 0:18:45 lr: 0.000006 loss_cls: 2.4976 (2.5510) grad_norm: 1.1256 (1.4603) time: 2.8773 data: 0.0003 max mem: 28454 +[2024-12-12 18:23:46 root] (utils.py 283): INFO Epoch: [11] [2120/2502] eta: 0:18:17 lr: 0.000006 loss_cls: 2.6974 (2.5516) grad_norm: 1.1592 (1.4588) time: 2.8774 data: 0.0002 max mem: 28454 +[2024-12-12 18:24:14 root] (utils.py 283): INFO Epoch: [11] [2130/2502] eta: 0:17:48 lr: 0.000006 loss_cls: 2.6763 (2.5521) grad_norm: 1.1606 (1.4578) time: 2.8767 data: 0.0002 max mem: 28454 +[2024-12-12 18:24:43 root] (utils.py 283): INFO Epoch: [11] [2140/2502] eta: 0:17:19 lr: 0.000006 loss_cls: 2.6613 (2.5521) grad_norm: 1.1367 (1.4559) time: 2.8766 data: 0.0002 max mem: 28454 +[2024-12-12 18:25:12 root] (utils.py 283): INFO Epoch: [11] [2150/2502] eta: 0:16:50 lr: 0.000006 loss_cls: 2.6613 (2.5525) grad_norm: 1.0890 (1.4546) time: 2.8778 data: 0.0002 max mem: 28454 +[2024-12-12 18:25:41 root] (utils.py 283): INFO Epoch: [11] [2160/2502] eta: 0:16:22 lr: 0.000006 loss_cls: 2.6805 (2.5531) grad_norm: 1.1310 (1.4538) time: 2.8796 data: 0.0002 max mem: 28454 +[2024-12-12 18:26:10 root] (utils.py 283): INFO Epoch: [11] [2170/2502] eta: 0:15:53 lr: 0.000006 loss_cls: 2.6990 (2.5534) grad_norm: 1.1063 (1.4519) time: 2.8804 data: 0.0002 max mem: 28454 +[2024-12-12 18:26:38 root] (utils.py 283): INFO Epoch: [11] [2180/2502] eta: 0:15:24 lr: 0.000006 loss_cls: 2.6990 (2.5536) grad_norm: 1.0490 (1.4502) time: 2.8781 data: 0.0003 max mem: 28454 +[2024-12-12 18:27:07 root] (utils.py 283): INFO Epoch: [11] [2190/2502] eta: 0:14:56 lr: 0.000006 loss_cls: 2.6169 (2.5536) grad_norm: 1.0684 (1.4486) time: 2.8793 data: 0.0002 max mem: 28454 +[2024-12-12 18:27:36 root] (utils.py 283): INFO Epoch: [11] [2200/2502] eta: 0:14:27 lr: 0.000006 loss_cls: 2.6700 (2.5538) grad_norm: 1.1187 (1.4531) time: 2.8807 data: 0.0002 max mem: 28454 +[2024-12-12 18:28:05 root] (utils.py 283): INFO Epoch: [11] [2210/2502] eta: 0:13:58 lr: 0.000006 loss_cls: 2.6157 (2.5535) grad_norm: 1.2439 (1.4520) time: 2.8756 data: 0.0002 max mem: 28454 +[2024-12-12 18:28:33 root] (utils.py 283): INFO Epoch: [11] [2220/2502] eta: 0:13:29 lr: 0.000006 loss_cls: 2.4537 (2.5526) grad_norm: 1.1670 (1.4507) time: 2.8733 data: 0.0002 max mem: 28454 +[2024-12-12 18:29:02 root] (utils.py 283): INFO Epoch: [11] [2230/2502] eta: 0:13:01 lr: 0.000006 loss_cls: 2.5371 (2.5529) grad_norm: 1.1121 (1.4497) time: 2.8733 data: 0.0002 max mem: 28454 +[2024-12-12 18:29:31 root] (utils.py 283): INFO Epoch: [11] [2240/2502] eta: 0:12:32 lr: 0.000006 loss_cls: 2.5128 (2.5524) grad_norm: 1.0794 (1.4484) time: 2.8720 data: 0.0002 max mem: 28454 +[2024-12-12 18:30:00 root] (utils.py 283): INFO Epoch: [11] [2250/2502] eta: 0:12:03 lr: 0.000006 loss_cls: 2.5473 (2.5525) grad_norm: 1.0526 (1.4468) time: 2.8716 data: 0.0002 max mem: 28454 +[2024-12-12 18:30:28 root] (utils.py 283): INFO Epoch: [11] [2260/2502] eta: 0:11:35 lr: 0.000006 loss_cls: 2.5572 (2.5516) grad_norm: 1.0791 (1.4452) time: 2.8731 data: 0.0002 max mem: 28454 +[2024-12-12 18:30:57 root] (utils.py 283): INFO Epoch: [11] [2270/2502] eta: 0:11:06 lr: 0.000006 loss_cls: 2.5572 (2.5518) grad_norm: 1.0335 (1.4538) time: 2.8726 data: 0.0002 max mem: 28454 +[2024-12-12 18:31:26 root] (utils.py 283): INFO Epoch: [11] [2280/2502] eta: 0:10:37 lr: 0.000006 loss_cls: 2.6572 (2.5524) grad_norm: 1.0297 (1.4520) time: 2.8721 data: 0.0002 max mem: 28454 +[2024-12-12 18:31:55 root] (utils.py 283): INFO Epoch: [11] [2290/2502] eta: 0:10:08 lr: 0.000006 loss_cls: 2.7435 (2.5527) grad_norm: 1.0839 (1.4508) time: 2.8730 data: 0.0002 max mem: 28454 +[2024-12-12 18:32:23 root] (utils.py 283): INFO Epoch: [11] [2300/2502] eta: 0:09:40 lr: 0.000006 loss_cls: 2.7435 (2.5528) grad_norm: 1.1201 (1.4501) time: 2.8735 data: 0.0003 max mem: 28454 +[2024-12-12 18:32:52 root] (utils.py 283): INFO Epoch: [11] [2310/2502] eta: 0:09:11 lr: 0.000006 loss_cls: 2.6135 (2.5530) grad_norm: 1.0262 (1.4494) time: 2.8745 data: 0.0003 max mem: 28454 +[2024-12-12 18:33:21 root] (utils.py 283): INFO Epoch: [11] [2320/2502] eta: 0:08:42 lr: 0.000006 loss_cls: 2.6706 (2.5528) grad_norm: 1.0348 (1.4483) time: 2.8748 data: 0.0002 max mem: 28454 +[2024-12-12 18:33:50 root] (utils.py 283): INFO Epoch: [11] [2330/2502] eta: 0:08:14 lr: 0.000006 loss_cls: 2.6706 (2.5529) grad_norm: 1.1761 (1.4475) time: 2.8733 data: 0.0002 max mem: 28454 +[2024-12-12 18:34:18 root] (utils.py 283): INFO Epoch: [11] [2340/2502] eta: 0:07:45 lr: 0.000006 loss_cls: 2.5455 (2.5528) grad_norm: 1.2651 (1.4476) time: 2.8728 data: 0.0003 max mem: 28454 +[2024-12-12 18:34:47 root] (utils.py 283): INFO Epoch: [11] [2350/2502] eta: 0:07:16 lr: 0.000006 loss_cls: 2.6767 (2.5529) grad_norm: 1.1592 (1.4472) time: 2.8770 data: 0.0002 max mem: 28454 +[2024-12-12 18:35:16 root] (utils.py 283): INFO Epoch: [11] [2360/2502] eta: 0:06:47 lr: 0.000006 loss_cls: 2.6502 (2.5526) grad_norm: 1.1195 (1.4458) time: 2.8776 data: 0.0002 max mem: 28454 +[2024-12-12 18:35:45 root] (utils.py 283): INFO Epoch: [11] [2370/2502] eta: 0:06:19 lr: 0.000006 loss_cls: 2.5639 (2.5520) grad_norm: 1.1043 (1.4462) time: 2.8740 data: 0.0002 max mem: 28454 +[2024-12-12 18:36:13 root] (utils.py 283): INFO Epoch: [11] [2380/2502] eta: 0:05:50 lr: 0.000006 loss_cls: 2.5988 (2.5516) grad_norm: 1.1043 (1.4466) time: 2.8744 data: 0.0002 max mem: 28454 +[2024-12-12 18:36:42 root] (utils.py 283): INFO Epoch: [11] [2390/2502] eta: 0:05:21 lr: 0.000006 loss_cls: 2.5914 (2.5514) grad_norm: 1.1641 (1.4467) time: 2.8752 data: 0.0002 max mem: 28454 +[2024-12-12 18:37:11 root] (utils.py 283): INFO Epoch: [11] [2400/2502] eta: 0:04:52 lr: 0.000006 loss_cls: 2.5914 (2.5508) grad_norm: 1.1437 (1.4466) time: 2.8745 data: 0.0003 max mem: 28454 +[2024-12-12 18:37:40 root] (utils.py 283): INFO Epoch: [11] [2410/2502] eta: 0:04:24 lr: 0.000006 loss_cls: 2.4465 (2.5505) grad_norm: 1.0590 (1.4451) time: 2.8739 data: 0.0003 max mem: 28454 +[2024-12-12 18:38:08 root] (utils.py 283): INFO Epoch: [11] [2420/2502] eta: 0:03:55 lr: 0.000006 loss_cls: 2.5419 (2.5512) grad_norm: 1.1133 (1.4446) time: 2.8738 data: 0.0002 max mem: 28454 +[2024-12-12 18:38:37 root] (utils.py 283): INFO Epoch: [11] [2430/2502] eta: 0:03:26 lr: 0.000006 loss_cls: 2.8283 (2.5519) grad_norm: 1.1465 (1.4432) time: 2.8727 data: 0.0002 max mem: 28454 +[2024-12-12 18:39:06 root] (utils.py 283): INFO Epoch: [11] [2440/2502] eta: 0:02:58 lr: 0.000006 loss_cls: 2.7822 (2.5521) grad_norm: 1.0749 (1.4427) time: 2.8713 data: 0.0003 max mem: 28454 +[2024-12-12 18:39:34 root] (utils.py 283): INFO Epoch: [11] [2450/2502] eta: 0:02:29 lr: 0.000006 loss_cls: 2.7854 (2.5525) grad_norm: 1.1612 (1.4422) time: 2.8708 data: 0.0003 max mem: 28454 +[2024-12-12 18:40:03 root] (utils.py 283): INFO Epoch: [11] [2460/2502] eta: 0:02:00 lr: 0.000006 loss_cls: 2.7639 (2.5518) grad_norm: 1.1658 (1.4409) time: 2.8729 data: 0.0003 max mem: 28454 +[2024-12-12 18:40:32 root] (utils.py 283): INFO Epoch: [11] [2470/2502] eta: 0:01:31 lr: 0.000006 loss_cls: 2.7471 (2.5523) grad_norm: 1.1163 (1.4400) time: 2.8756 data: 0.0002 max mem: 28454 +[2024-12-12 18:41:01 root] (utils.py 283): INFO Epoch: [11] [2480/2502] eta: 0:01:03 lr: 0.000006 loss_cls: 2.7332 (2.5523) grad_norm: 1.1104 (1.4394) time: 2.8775 data: 0.0002 max mem: 28454 +[2024-12-12 18:41:30 root] (utils.py 283): INFO Epoch: [11] [2490/2502] eta: 0:00:34 lr: 0.000006 loss_cls: 2.4863 (2.5524) grad_norm: 1.1104 (1.4395) time: 2.9003 data: 0.0213 max mem: 28454 +[2024-12-12 18:41:59 root] (utils.py 283): INFO Epoch: [11] [2500/2502] eta: 0:00:05 lr: 0.000006 loss_cls: 2.6784 (2.5524) grad_norm: 1.2300 (1.4416) time: 2.9014 data: 0.0213 max mem: 28454 +[2024-12-12 18:42:02 root] (utils.py 283): INFO Epoch: [11] [2501/2502] eta: 0:00:02 lr: 0.000006 loss_cls: 2.5535 (2.5522) grad_norm: 1.2300 (1.4414) time: 2.9010 data: 0.0213 max mem: 28454 +[2024-12-12 18:42:02 root] (utils.py 297): INFO Epoch: [11] Total time: 1:59:47 (2.8727 s / it) +[2024-12-12 18:42:02 root] (engine.py 179): INFO Averaged stats:lr: 0.000006 loss_cls: 2.5535 (2.5521) grad_norm: 1.2300 (1.4414) +[2024-12-12 18:42:05 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:53 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.3973 (0.3973) acc1: 92.1875 (92.1875) acc3: 98.4375 (98.4375) acc5: 99.2188 (99.2188) time: 0.5432 data: 0.0005 max mem: 28454 +[2024-12-12 18:42:10 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:48 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6393 (0.5923) acc1: 86.7188 (87.4290) acc3: 97.6562 (96.7330) acc5: 98.4375 (98.0824) time: 0.5473 data: 0.0004 max mem: 28454 +[2024-12-12 18:42:16 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:42 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6273 (0.6202) acc1: 86.7188 (87.2396) acc3: 96.0938 (96.3170) acc5: 97.6562 (97.6562) time: 0.5476 data: 0.0004 max mem: 28454 +[2024-12-12 18:42:21 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:37 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6273 (0.6507) acc1: 86.7188 (86.0635) acc3: 95.3125 (96.1694) acc5: 97.6562 (97.6815) time: 0.5484 data: 0.0004 max mem: 28454 +[2024-12-12 18:42:27 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:31 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6893 (0.6568) acc1: 86.7188 (85.8613) acc3: 96.0938 (96.1700) acc5: 97.6562 (97.6372) time: 0.5485 data: 0.0004 max mem: 28454 +[2024-12-12 18:42:32 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:26 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.8135 (0.7378) acc1: 79.6875 (83.9001) acc3: 93.7500 (95.1593) acc5: 95.3125 (96.7678) time: 0.5482 data: 0.0004 max mem: 28454 +[2024-12-12 18:42:38 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:20 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9588 (0.7652) acc1: 79.6875 (83.6322) acc3: 91.4062 (94.5441) acc5: 93.7500 (96.2731) time: 0.5486 data: 0.0005 max mem: 28454 +[2024-12-12 18:42:43 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:15 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9371 (0.7934) acc1: 80.4688 (82.8455) acc3: 91.4062 (94.2232) acc5: 95.3125 (96.1598) time: 0.5483 data: 0.0005 max mem: 28454 +[2024-12-12 18:42:48 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:09 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 1.0108 (0.8178) acc1: 78.1250 (82.3785) acc3: 92.1875 (93.9236) acc5: 94.5312 (95.8140) time: 0.5486 data: 0.0007 max mem: 28454 +[2024-12-12 18:42:54 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:04 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9463 (0.8364) acc1: 78.1250 (81.7565) acc3: 90.6250 (93.5611) acc5: 94.5312 (95.6645) time: 0.5492 data: 0.0006 max mem: 28454 +[2024-12-12 18:42:58 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.8818 (0.8334) acc1: 78.1250 (81.7440) acc3: 92.1875 (93.5760) acc5: 94.5312 (95.7200) time: 0.5398 data: 0.0006 max mem: 28454 +[2024-12-12 18:42:58 root] (utils.py 297): INFO Test: Total time: 0:00:53 (0.5467 s / it) +[2024-12-12 18:42:58 root] (engine.py 264): INFO * Acc@1 81.874 Acc@3 93.384 Acc@5 95.736 loss 0.835 flops 13.207 layer_flops 13.109 +[2024-12-12 18:42:58 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 81.9% +[2024-12-12 18:43:00 root] (main.py 576): INFO Max accuracy: 81.87% +[2024-12-12 18:43:02 root] (utils.py 283): INFO Epoch: [12] [ 0/2502] eta: 1:58:58 lr: 0.000004 loss_cls: 2.8592 (2.8592) grad_norm: 1.2360 (1.2360) time: 2.8532 data: 0.0003 max mem: 28454 +[2024-12-12 18:43:31 root] (utils.py 283): INFO Epoch: [12] [ 10/2502] eta: 1:59:05 lr: 0.000004 loss_cls: 2.6966 (2.5084) grad_norm: 1.1816 (1.1929) time: 2.8676 data: 0.0003 max mem: 28454 +[2024-12-12 18:44:00 root] (utils.py 283): INFO Epoch: [12] [ 20/2502] eta: 1:58:48 lr: 0.000004 loss_cls: 2.5847 (2.4971) grad_norm: 1.1584 (1.1912) time: 2.8728 data: 0.0003 max mem: 28454 +[2024-12-12 18:44:29 root] (utils.py 283): INFO Epoch: [12] [ 30/2502] eta: 1:58:23 lr: 0.000004 loss_cls: 2.5614 (2.5410) grad_norm: 1.1308 (1.1581) time: 2.8772 data: 0.0003 max mem: 28454 +[2024-12-12 18:44:57 root] (utils.py 283): INFO Epoch: [12] [ 40/2502] eta: 1:57:57 lr: 0.000004 loss_cls: 2.6081 (2.5437) grad_norm: 1.0763 (1.1653) time: 2.8775 data: 0.0003 max mem: 28454 +[2024-12-12 18:45:26 root] (utils.py 283): INFO Epoch: [12] [ 50/2502] eta: 1:57:29 lr: 0.000004 loss_cls: 2.6910 (2.5589) grad_norm: 1.0814 (1.1575) time: 2.8773 data: 0.0003 max mem: 28454 +[2024-12-12 18:45:55 root] (utils.py 283): INFO Epoch: [12] [ 60/2502] eta: 1:57:03 lr: 0.000004 loss_cls: 2.4813 (2.5157) grad_norm: 1.1358 (1.1534) time: 2.8798 data: 0.0002 max mem: 28454 +[2024-12-12 18:46:24 root] (utils.py 283): INFO Epoch: [12] [ 70/2502] eta: 1:56:36 lr: 0.000004 loss_cls: 2.4813 (2.5140) grad_norm: 1.1548 (1.1675) time: 2.8809 data: 0.0002 max mem: 28454 +[2024-12-12 18:46:53 root] (utils.py 283): INFO Epoch: [12] [ 80/2502] eta: 1:56:08 lr: 0.000004 loss_cls: 2.7270 (2.5473) grad_norm: 1.1795 (1.1887) time: 2.8803 data: 0.0003 max mem: 28454 +[2024-12-12 18:47:21 root] (utils.py 283): INFO Epoch: [12] [ 90/2502] eta: 1:55:40 lr: 0.000004 loss_cls: 2.8652 (2.5602) grad_norm: 1.1617 (1.1847) time: 2.8799 data: 0.0003 max mem: 28454 +[2024-12-12 18:47:50 root] (utils.py 283): INFO Epoch: [12] [ 100/2502] eta: 1:55:12 lr: 0.000004 loss_cls: 2.7739 (2.5678) grad_norm: 1.1225 (1.1830) time: 2.8797 data: 0.0002 max mem: 28454 +[2024-12-12 18:48:19 root] (utils.py 283): INFO Epoch: [12] [ 110/2502] eta: 1:54:43 lr: 0.000004 loss_cls: 2.7845 (2.5835) grad_norm: 1.0605 (1.1845) time: 2.8783 data: 0.0003 max mem: 28454 +[2024-12-12 18:48:48 root] (utils.py 283): INFO Epoch: [12] [ 120/2502] eta: 1:54:14 lr: 0.000004 loss_cls: 2.7172 (2.5785) grad_norm: 1.0782 (1.2610) time: 2.8757 data: 0.0003 max mem: 28454 +[2024-12-12 18:49:16 root] (utils.py 283): INFO Epoch: [12] [ 130/2502] eta: 1:53:45 lr: 0.000004 loss_cls: 2.6995 (2.5879) grad_norm: 1.1053 (1.2662) time: 2.8765 data: 0.0003 max mem: 28454 +[2024-12-12 18:49:45 root] (utils.py 283): INFO Epoch: [12] [ 140/2502] eta: 1:53:16 lr: 0.000004 loss_cls: 2.5688 (2.5749) grad_norm: 1.1053 (1.2668) time: 2.8764 data: 0.0003 max mem: 28454 +[2024-12-12 18:50:14 root] (utils.py 283): INFO Epoch: [12] [ 150/2502] eta: 1:52:47 lr: 0.000004 loss_cls: 2.5441 (2.5793) grad_norm: 1.0572 (1.2643) time: 2.8751 data: 0.0002 max mem: 28454 +[2024-12-12 18:50:43 root] (utils.py 283): INFO Epoch: [12] [ 160/2502] eta: 1:52:17 lr: 0.000004 loss_cls: 2.5193 (2.5705) grad_norm: 1.1683 (1.2594) time: 2.8749 data: 0.0002 max mem: 28454 +[2024-12-12 18:51:12 root] (utils.py 283): INFO Epoch: [12] [ 170/2502] eta: 1:51:49 lr: 0.000004 loss_cls: 2.6083 (2.5800) grad_norm: 1.2439 (1.2608) time: 2.8774 data: 0.0002 max mem: 28454 +[2024-12-12 18:51:40 root] (utils.py 283): INFO Epoch: [12] [ 180/2502] eta: 1:51:20 lr: 0.000004 loss_cls: 2.6653 (2.5846) grad_norm: 1.2323 (1.2677) time: 2.8758 data: 0.0002 max mem: 28454 +[2024-12-12 18:52:09 root] (utils.py 283): INFO Epoch: [12] [ 190/2502] eta: 1:50:51 lr: 0.000004 loss_cls: 2.4713 (2.5815) grad_norm: 1.2024 (1.2681) time: 2.8753 data: 0.0002 max mem: 28454 +[2024-12-12 18:52:38 root] (utils.py 283): INFO Epoch: [12] [ 200/2502] eta: 1:50:22 lr: 0.000004 loss_cls: 2.3989 (2.5686) grad_norm: 1.0497 (1.2558) time: 2.8760 data: 0.0002 max mem: 28454 +[2024-12-12 18:53:07 root] (utils.py 283): INFO Epoch: [12] [ 210/2502] eta: 1:49:53 lr: 0.000004 loss_cls: 2.3989 (2.5597) grad_norm: 1.0323 (1.2492) time: 2.8728 data: 0.0002 max mem: 28454 +[2024-12-12 18:53:35 root] (utils.py 283): INFO Epoch: [12] [ 220/2502] eta: 1:49:24 lr: 0.000004 loss_cls: 2.5244 (2.5616) grad_norm: 1.1478 (1.2446) time: 2.8731 data: 0.0002 max mem: 28454 +[2024-12-12 18:54:04 root] (utils.py 283): INFO Epoch: [12] [ 230/2502] eta: 1:48:55 lr: 0.000004 loss_cls: 2.7847 (2.5693) grad_norm: 1.1718 (1.2439) time: 2.8769 data: 0.0003 max mem: 28454 +[2024-12-12 18:54:33 root] (utils.py 283): INFO Epoch: [12] [ 240/2502] eta: 1:48:26 lr: 0.000004 loss_cls: 2.7847 (2.5737) grad_norm: 1.1551 (1.2546) time: 2.8781 data: 0.0002 max mem: 28454 +[2024-12-12 18:55:02 root] (utils.py 283): INFO Epoch: [12] [ 250/2502] eta: 1:47:58 lr: 0.000004 loss_cls: 2.6267 (2.5699) grad_norm: 1.0932 (1.2503) time: 2.8774 data: 0.0003 max mem: 28454 +[2024-12-12 18:55:30 root] (utils.py 283): INFO Epoch: [12] [ 260/2502] eta: 1:47:29 lr: 0.000004 loss_cls: 2.6057 (2.5663) grad_norm: 1.0553 (1.2435) time: 2.8788 data: 0.0003 max mem: 28454 +[2024-12-12 18:55:59 root] (utils.py 283): INFO Epoch: [12] [ 270/2502] eta: 1:47:01 lr: 0.000004 loss_cls: 2.5848 (2.5632) grad_norm: 1.0772 (1.2399) time: 2.8795 data: 0.0002 max mem: 28454 +[2024-12-12 18:56:28 root] (utils.py 283): INFO Epoch: [12] [ 280/2502] eta: 1:46:33 lr: 0.000004 loss_cls: 2.6600 (2.5659) grad_norm: 1.1538 (1.2760) time: 2.8857 data: 0.0002 max mem: 28454 +[2024-12-12 18:56:57 root] (utils.py 283): INFO Epoch: [12] [ 290/2502] eta: 1:46:05 lr: 0.000004 loss_cls: 2.7369 (2.5660) grad_norm: 1.1626 (1.2724) time: 2.8859 data: 0.0002 max mem: 28454 +[2024-12-12 18:57:26 root] (utils.py 283): INFO Epoch: [12] [ 300/2502] eta: 1:45:36 lr: 0.000004 loss_cls: 2.4947 (2.5652) grad_norm: 1.1743 (1.2729) time: 2.8793 data: 0.0002 max mem: 28454 +[2024-12-12 18:57:54 root] (utils.py 283): INFO Epoch: [12] [ 310/2502] eta: 1:45:07 lr: 0.000004 loss_cls: 2.4922 (2.5639) grad_norm: 1.1564 (1.2700) time: 2.8784 data: 0.0002 max mem: 28454 +[2024-12-12 18:58:23 root] (utils.py 283): INFO Epoch: [12] [ 320/2502] eta: 1:44:38 lr: 0.000004 loss_cls: 2.6680 (2.5662) grad_norm: 1.0667 (1.2723) time: 2.8781 data: 0.0003 max mem: 28454 +[2024-12-12 18:58:52 root] (utils.py 283): INFO Epoch: [12] [ 330/2502] eta: 1:44:10 lr: 0.000004 loss_cls: 2.5700 (2.5621) grad_norm: 1.1643 (1.2702) time: 2.8788 data: 0.0003 max mem: 28454 +[2024-12-12 18:59:21 root] (utils.py 283): INFO Epoch: [12] [ 340/2502] eta: 1:43:41 lr: 0.000004 loss_cls: 2.6193 (2.5668) grad_norm: 1.1427 (1.2672) time: 2.8798 data: 0.0003 max mem: 28454 +[2024-12-12 18:59:50 root] (utils.py 283): INFO Epoch: [12] [ 350/2502] eta: 1:43:12 lr: 0.000004 loss_cls: 2.5465 (2.5646) grad_norm: 1.1330 (1.2658) time: 2.8790 data: 0.0003 max mem: 28454 +[2024-12-12 19:00:19 root] (utils.py 283): INFO Epoch: [12] [ 360/2502] eta: 1:42:44 lr: 0.000004 loss_cls: 2.5273 (2.5633) grad_norm: 1.1398 (1.2637) time: 2.8835 data: 0.0002 max mem: 28454 +[2024-12-12 19:00:47 root] (utils.py 283): INFO Epoch: [12] [ 370/2502] eta: 1:42:15 lr: 0.000004 loss_cls: 2.6794 (2.5643) grad_norm: 1.1675 (1.2672) time: 2.8822 data: 0.0002 max mem: 28454 +[2024-12-12 19:01:16 root] (utils.py 283): INFO Epoch: [12] [ 380/2502] eta: 1:41:46 lr: 0.000004 loss_cls: 2.6488 (2.5595) grad_norm: 1.2692 (1.2792) time: 2.8767 data: 0.0002 max mem: 28454 +[2024-12-12 19:01:45 root] (utils.py 283): INFO Epoch: [12] [ 390/2502] eta: 1:41:18 lr: 0.000004 loss_cls: 2.2610 (2.5526) grad_norm: 1.1427 (1.2761) time: 2.8779 data: 0.0002 max mem: 28454 +[2024-12-12 19:02:14 root] (utils.py 283): INFO Epoch: [12] [ 400/2502] eta: 1:40:49 lr: 0.000004 loss_cls: 2.5364 (2.5545) grad_norm: 1.1073 (1.2716) time: 2.8780 data: 0.0002 max mem: 28454 +[2024-12-12 19:02:42 root] (utils.py 283): INFO Epoch: [12] [ 410/2502] eta: 1:40:20 lr: 0.000004 loss_cls: 2.7257 (2.5558) grad_norm: 1.1063 (1.2709) time: 2.8791 data: 0.0002 max mem: 28454 +[2024-12-12 19:03:11 root] (utils.py 283): INFO Epoch: [12] [ 420/2502] eta: 1:39:51 lr: 0.000004 loss_cls: 2.8098 (2.5603) grad_norm: 1.1079 (1.2760) time: 2.8781 data: 0.0002 max mem: 28454 +[2024-12-12 19:03:40 root] (utils.py 283): INFO Epoch: [12] [ 430/2502] eta: 1:39:22 lr: 0.000004 loss_cls: 2.8076 (2.5582) grad_norm: 1.1688 (1.2763) time: 2.8742 data: 0.0002 max mem: 28454 +[2024-12-12 19:04:09 root] (utils.py 283): INFO Epoch: [12] [ 440/2502] eta: 1:38:53 lr: 0.000004 loss_cls: 2.8076 (2.5612) grad_norm: 1.1574 (1.2752) time: 2.8729 data: 0.0002 max mem: 28454 +[2024-12-12 19:04:37 root] (utils.py 283): INFO Epoch: [12] [ 450/2502] eta: 1:38:24 lr: 0.000004 loss_cls: 2.7634 (2.5618) grad_norm: 1.0757 (1.2705) time: 2.8735 data: 0.0003 max mem: 28454 +[2024-12-12 19:05:06 root] (utils.py 283): INFO Epoch: [12] [ 460/2502] eta: 1:37:55 lr: 0.000004 loss_cls: 2.7634 (2.5667) grad_norm: 1.0696 (1.2686) time: 2.8726 data: 0.0003 max mem: 28454 +[2024-12-12 19:05:35 root] (utils.py 283): INFO Epoch: [12] [ 470/2502] eta: 1:37:26 lr: 0.000004 loss_cls: 2.7175 (2.5659) grad_norm: 1.1203 (1.2655) time: 2.8719 data: 0.0003 max mem: 28454 +[2024-12-12 19:06:04 root] (utils.py 283): INFO Epoch: [12] [ 480/2502] eta: 1:36:57 lr: 0.000004 loss_cls: 2.5672 (2.5659) grad_norm: 1.0800 (1.2630) time: 2.8729 data: 0.0002 max mem: 28454 +[2024-12-12 19:06:32 root] (utils.py 283): INFO Epoch: [12] [ 490/2502] eta: 1:36:28 lr: 0.000004 loss_cls: 2.3900 (2.5597) grad_norm: 1.1180 (1.2682) time: 2.8729 data: 0.0002 max mem: 28454 +[2024-12-12 19:07:01 root] (utils.py 283): INFO Epoch: [12] [ 500/2502] eta: 1:36:00 lr: 0.000004 loss_cls: 2.2788 (2.5567) grad_norm: 1.0958 (1.2654) time: 2.8731 data: 0.0002 max mem: 28454 +[2024-12-12 19:07:30 root] (utils.py 283): INFO Epoch: [12] [ 510/2502] eta: 1:35:31 lr: 0.000004 loss_cls: 2.2788 (2.5525) grad_norm: 1.1170 (1.2675) time: 2.8751 data: 0.0002 max mem: 28454 +[2024-12-12 19:07:59 root] (utils.py 283): INFO Epoch: [12] [ 520/2502] eta: 1:35:02 lr: 0.000004 loss_cls: 2.3806 (2.5534) grad_norm: 1.1478 (1.2837) time: 2.8766 data: 0.0002 max mem: 28454 +[2024-12-12 19:08:27 root] (utils.py 283): INFO Epoch: [12] [ 530/2502] eta: 1:34:33 lr: 0.000004 loss_cls: 2.5730 (2.5534) grad_norm: 1.1741 (1.2826) time: 2.8760 data: 0.0002 max mem: 28454 +[2024-12-12 19:08:56 root] (utils.py 283): INFO Epoch: [12] [ 540/2502] eta: 1:34:04 lr: 0.000004 loss_cls: 2.4337 (2.5488) grad_norm: 1.1271 (1.2788) time: 2.8739 data: 0.0002 max mem: 28454 +[2024-12-12 19:09:25 root] (utils.py 283): INFO Epoch: [12] [ 550/2502] eta: 1:33:35 lr: 0.000004 loss_cls: 2.4037 (2.5462) grad_norm: 1.0824 (1.2776) time: 2.8751 data: 0.0002 max mem: 28454 +[2024-12-12 19:09:54 root] (utils.py 283): INFO Epoch: [12] [ 560/2502] eta: 1:33:07 lr: 0.000004 loss_cls: 2.5872 (2.5471) grad_norm: 1.0438 (1.2735) time: 2.8759 data: 0.0002 max mem: 28454 +[2024-12-12 19:10:22 root] (utils.py 283): INFO Epoch: [12] [ 570/2502] eta: 1:32:38 lr: 0.000004 loss_cls: 2.6864 (2.5486) grad_norm: 1.0770 (1.2715) time: 2.8775 data: 0.0002 max mem: 28454 +[2024-12-12 19:10:51 root] (utils.py 283): INFO Epoch: [12] [ 580/2502] eta: 1:32:09 lr: 0.000004 loss_cls: 2.6788 (2.5471) grad_norm: 1.1011 (1.2684) time: 2.8763 data: 0.0002 max mem: 28454 +[2024-12-12 19:11:20 root] (utils.py 283): INFO Epoch: [12] [ 590/2502] eta: 1:31:40 lr: 0.000004 loss_cls: 2.6788 (2.5479) grad_norm: 1.1324 (1.2688) time: 2.8711 data: 0.0002 max mem: 28454 +[2024-12-12 19:11:48 root] (utils.py 283): INFO Epoch: [12] [ 600/2502] eta: 1:31:11 lr: 0.000004 loss_cls: 2.6833 (2.5494) grad_norm: 1.1395 (1.2667) time: 2.8692 data: 0.0002 max mem: 28454 +[2024-12-12 19:12:17 root] (utils.py 283): INFO Epoch: [12] [ 610/2502] eta: 1:30:42 lr: 0.000004 loss_cls: 2.6588 (2.5488) grad_norm: 1.1531 (1.2693) time: 2.8691 data: 0.0002 max mem: 28454 +[2024-12-12 19:12:46 root] (utils.py 283): INFO Epoch: [12] [ 620/2502] eta: 1:30:13 lr: 0.000004 loss_cls: 2.6760 (2.5499) grad_norm: 1.1433 (1.2676) time: 2.8711 data: 0.0002 max mem: 28454 +[2024-12-12 19:13:15 root] (utils.py 283): INFO Epoch: [12] [ 630/2502] eta: 1:29:44 lr: 0.000004 loss_cls: 2.6950 (2.5520) grad_norm: 1.0658 (1.2645) time: 2.8723 data: 0.0002 max mem: 28454 +[2024-12-12 19:13:43 root] (utils.py 283): INFO Epoch: [12] [ 640/2502] eta: 1:29:15 lr: 0.000004 loss_cls: 2.7890 (2.5549) grad_norm: 1.1315 (1.2631) time: 2.8711 data: 0.0002 max mem: 28454 +[2024-12-12 19:14:12 root] (utils.py 283): INFO Epoch: [12] [ 650/2502] eta: 1:28:46 lr: 0.000004 loss_cls: 2.6906 (2.5528) grad_norm: 1.1315 (1.2610) time: 2.8716 data: 0.0002 max mem: 28454 +[2024-12-12 19:14:41 root] (utils.py 283): INFO Epoch: [12] [ 660/2502] eta: 1:28:18 lr: 0.000004 loss_cls: 2.4455 (2.5492) grad_norm: 1.1652 (1.2628) time: 2.8739 data: 0.0002 max mem: 28454 +[2024-12-12 19:15:10 root] (utils.py 283): INFO Epoch: [12] [ 670/2502] eta: 1:27:49 lr: 0.000004 loss_cls: 2.4086 (2.5478) grad_norm: 1.1595 (1.2613) time: 2.8748 data: 0.0002 max mem: 28454 +[2024-12-12 19:15:38 root] (utils.py 283): INFO Epoch: [12] [ 680/2502] eta: 1:27:20 lr: 0.000004 loss_cls: 2.6773 (2.5489) grad_norm: 1.1231 (1.2706) time: 2.8756 data: 0.0002 max mem: 28454 +[2024-12-12 19:16:07 root] (utils.py 283): INFO Epoch: [12] [ 690/2502] eta: 1:26:51 lr: 0.000004 loss_cls: 2.5838 (2.5487) grad_norm: 1.0984 (1.2695) time: 2.8743 data: 0.0002 max mem: 28454 +[2024-12-12 19:16:36 root] (utils.py 283): INFO Epoch: [12] [ 700/2502] eta: 1:26:22 lr: 0.000004 loss_cls: 2.5828 (2.5494) grad_norm: 1.0866 (1.2666) time: 2.8714 data: 0.0002 max mem: 28454 +[2024-12-12 19:17:04 root] (utils.py 283): INFO Epoch: [12] [ 710/2502] eta: 1:25:53 lr: 0.000004 loss_cls: 2.6701 (2.5513) grad_norm: 1.1112 (1.2656) time: 2.8692 data: 0.0002 max mem: 28454 +[2024-12-12 19:17:33 root] (utils.py 283): INFO Epoch: [12] [ 720/2502] eta: 1:25:24 lr: 0.000004 loss_cls: 2.6902 (2.5497) grad_norm: 1.0686 (1.2632) time: 2.8702 data: 0.0002 max mem: 28454 +[2024-12-12 19:18:02 root] (utils.py 283): INFO Epoch: [12] [ 730/2502] eta: 1:24:56 lr: 0.000004 loss_cls: 2.6715 (2.5512) grad_norm: 1.1331 (1.2649) time: 2.8734 data: 0.0002 max mem: 28454 +[2024-12-12 19:18:31 root] (utils.py 283): INFO Epoch: [12] [ 740/2502] eta: 1:24:27 lr: 0.000004 loss_cls: 2.6715 (2.5522) grad_norm: 1.1657 (1.2638) time: 2.8730 data: 0.0002 max mem: 28454 +[2024-12-12 19:18:59 root] (utils.py 283): INFO Epoch: [12] [ 750/2502] eta: 1:23:58 lr: 0.000004 loss_cls: 2.7103 (2.5545) grad_norm: 1.0772 (1.2619) time: 2.8714 data: 0.0002 max mem: 28454 +[2024-12-12 19:19:28 root] (utils.py 283): INFO Epoch: [12] [ 760/2502] eta: 1:23:29 lr: 0.000004 loss_cls: 2.6807 (2.5553) grad_norm: 1.0489 (1.2585) time: 2.8729 data: 0.0002 max mem: 28454 +[2024-12-12 19:19:57 root] (utils.py 283): INFO Epoch: [12] [ 770/2502] eta: 1:23:00 lr: 0.000004 loss_cls: 2.6493 (2.5564) grad_norm: 1.0383 (1.2653) time: 2.8737 data: 0.0002 max mem: 28454 +[2024-12-12 19:20:26 root] (utils.py 283): INFO Epoch: [12] [ 780/2502] eta: 1:22:32 lr: 0.000004 loss_cls: 2.6493 (2.5572) grad_norm: 1.1209 (1.2641) time: 2.8737 data: 0.0002 max mem: 28454 +[2024-12-12 19:20:54 root] (utils.py 283): INFO Epoch: [12] [ 790/2502] eta: 1:22:03 lr: 0.000004 loss_cls: 2.7876 (2.5605) grad_norm: 1.1358 (1.2686) time: 2.8717 data: 0.0002 max mem: 28454 +[2024-12-12 19:21:23 root] (utils.py 283): INFO Epoch: [12] [ 800/2502] eta: 1:21:34 lr: 0.000004 loss_cls: 2.8172 (2.5630) grad_norm: 1.1378 (1.2673) time: 2.8701 data: 0.0002 max mem: 28454 +[2024-12-12 19:21:52 root] (utils.py 283): INFO Epoch: [12] [ 810/2502] eta: 1:21:05 lr: 0.000004 loss_cls: 2.7126 (2.5626) grad_norm: 1.1302 (1.2659) time: 2.8717 data: 0.0003 max mem: 28454 +[2024-12-12 19:22:20 root] (utils.py 283): INFO Epoch: [12] [ 820/2502] eta: 1:20:36 lr: 0.000004 loss_cls: 2.7000 (2.5632) grad_norm: 1.1158 (1.2650) time: 2.8711 data: 0.0003 max mem: 28454 +[2024-12-12 19:22:49 root] (utils.py 283): INFO Epoch: [12] [ 830/2502] eta: 1:20:07 lr: 0.000004 loss_cls: 2.7170 (2.5639) grad_norm: 1.1158 (1.2638) time: 2.8706 data: 0.0002 max mem: 28454 +[2024-12-12 19:23:18 root] (utils.py 283): INFO Epoch: [12] [ 840/2502] eta: 1:19:38 lr: 0.000004 loss_cls: 2.7243 (2.5661) grad_norm: 1.1639 (1.2633) time: 2.8714 data: 0.0002 max mem: 28454 +[2024-12-12 19:23:47 root] (utils.py 283): INFO Epoch: [12] [ 850/2502] eta: 1:19:10 lr: 0.000004 loss_cls: 2.7566 (2.5674) grad_norm: 1.1409 (1.2651) time: 2.8707 data: 0.0002 max mem: 28454 +[2024-12-12 19:24:15 root] (utils.py 283): INFO Epoch: [12] [ 860/2502] eta: 1:18:41 lr: 0.000004 loss_cls: 2.6544 (2.5673) grad_norm: 1.1199 (1.2635) time: 2.8712 data: 0.0003 max mem: 28454 +[2024-12-12 19:24:44 root] (utils.py 283): INFO Epoch: [12] [ 870/2502] eta: 1:18:12 lr: 0.000004 loss_cls: 2.5784 (2.5664) grad_norm: 1.0802 (1.2631) time: 2.8724 data: 0.0003 max mem: 28454 +[2024-12-12 19:25:13 root] (utils.py 283): INFO Epoch: [12] [ 880/2502] eta: 1:17:43 lr: 0.000004 loss_cls: 2.5694 (2.5655) grad_norm: 1.1111 (1.2628) time: 2.8716 data: 0.0002 max mem: 28454 +[2024-12-12 19:25:41 root] (utils.py 283): INFO Epoch: [12] [ 890/2502] eta: 1:17:14 lr: 0.000004 loss_cls: 2.6306 (2.5671) grad_norm: 1.1111 (1.2612) time: 2.8712 data: 0.0002 max mem: 28454 +[2024-12-12 19:26:10 root] (utils.py 283): INFO Epoch: [12] [ 900/2502] eta: 1:16:46 lr: 0.000004 loss_cls: 2.6643 (2.5659) grad_norm: 1.1235 (1.2598) time: 2.8736 data: 0.0003 max mem: 28454 +[2024-12-12 19:26:39 root] (utils.py 283): INFO Epoch: [12] [ 910/2502] eta: 1:16:17 lr: 0.000004 loss_cls: 2.6494 (2.5678) grad_norm: 1.1273 (1.2585) time: 2.8759 data: 0.0003 max mem: 28454 +[2024-12-12 19:27:08 root] (utils.py 283): INFO Epoch: [12] [ 920/2502] eta: 1:15:48 lr: 0.000004 loss_cls: 2.6494 (2.5639) grad_norm: 1.0464 (1.2569) time: 2.8736 data: 0.0002 max mem: 28454 +[2024-12-12 19:27:36 root] (utils.py 283): INFO Epoch: [12] [ 930/2502] eta: 1:15:19 lr: 0.000004 loss_cls: 2.4053 (2.5622) grad_norm: 1.0528 (1.2602) time: 2.8718 data: 0.0002 max mem: 28454 +[2024-12-12 19:28:05 root] (utils.py 283): INFO Epoch: [12] [ 940/2502] eta: 1:14:50 lr: 0.000004 loss_cls: 2.6572 (2.5624) grad_norm: 1.1275 (1.2596) time: 2.8737 data: 0.0003 max mem: 28454 +[2024-12-12 19:28:34 root] (utils.py 283): INFO Epoch: [12] [ 950/2502] eta: 1:14:22 lr: 0.000004 loss_cls: 2.6533 (2.5617) grad_norm: 1.1078 (1.2574) time: 2.8734 data: 0.0003 max mem: 28454 +[2024-12-12 19:29:03 root] (utils.py 283): INFO Epoch: [12] [ 960/2502] eta: 1:13:53 lr: 0.000004 loss_cls: 2.6158 (2.5622) grad_norm: 1.0224 (1.2558) time: 2.8724 data: 0.0002 max mem: 28454 +[2024-12-12 19:29:31 root] (utils.py 283): INFO Epoch: [12] [ 970/2502] eta: 1:13:24 lr: 0.000004 loss_cls: 2.6209 (2.5614) grad_norm: 1.1260 (1.2568) time: 2.8733 data: 0.0002 max mem: 28454 +[2024-12-12 19:30:00 root] (utils.py 283): INFO Epoch: [12] [ 980/2502] eta: 1:12:55 lr: 0.000004 loss_cls: 2.5679 (2.5602) grad_norm: 1.1273 (1.2751) time: 2.8725 data: 0.0003 max mem: 28454 +[2024-12-12 19:30:29 root] (utils.py 283): INFO Epoch: [12] [ 990/2502] eta: 1:12:26 lr: 0.000004 loss_cls: 2.6207 (2.5612) grad_norm: 1.1510 (1.2770) time: 2.8708 data: 0.0003 max mem: 28454 +[2024-12-12 19:30:57 root] (utils.py 283): INFO Epoch: [12] [1000/2502] eta: 1:11:58 lr: 0.000004 loss_cls: 2.5955 (2.5599) grad_norm: 1.1836 (1.2773) time: 2.8698 data: 0.0003 max mem: 28454 +[2024-12-12 19:31:26 root] (utils.py 283): INFO Epoch: [12] [1010/2502] eta: 1:11:29 lr: 0.000004 loss_cls: 2.5514 (2.5612) grad_norm: 1.1813 (1.2780) time: 2.8701 data: 0.0003 max mem: 28454 +[2024-12-12 19:31:55 root] (utils.py 283): INFO Epoch: [12] [1020/2502] eta: 1:11:00 lr: 0.000004 loss_cls: 2.7759 (2.5633) grad_norm: 1.1682 (1.2791) time: 2.8712 data: 0.0003 max mem: 28454 +[2024-12-12 19:32:24 root] (utils.py 283): INFO Epoch: [12] [1030/2502] eta: 1:10:31 lr: 0.000004 loss_cls: 2.8748 (2.5642) grad_norm: 1.1656 (1.2781) time: 2.8713 data: 0.0003 max mem: 28454 +[2024-12-12 19:32:52 root] (utils.py 283): INFO Epoch: [12] [1040/2502] eta: 1:10:02 lr: 0.000004 loss_cls: 2.8875 (2.5651) grad_norm: 1.0868 (1.2840) time: 2.8714 data: 0.0003 max mem: 28454 +[2024-12-12 19:33:21 root] (utils.py 283): INFO Epoch: [12] [1050/2502] eta: 1:09:34 lr: 0.000004 loss_cls: 2.6117 (2.5649) grad_norm: 1.0782 (1.2833) time: 2.8717 data: 0.0003 max mem: 28454 +[2024-12-12 19:33:50 root] (utils.py 283): INFO Epoch: [12] [1060/2502] eta: 1:09:05 lr: 0.000004 loss_cls: 2.6663 (2.5664) grad_norm: 1.1538 (1.2825) time: 2.8701 data: 0.0003 max mem: 28454 +[2024-12-12 19:34:18 root] (utils.py 283): INFO Epoch: [12] [1070/2502] eta: 1:08:36 lr: 0.000004 loss_cls: 2.7669 (2.5673) grad_norm: 1.1538 (1.2817) time: 2.8703 data: 0.0002 max mem: 28454 +[2024-12-12 19:34:47 root] (utils.py 283): INFO Epoch: [12] [1080/2502] eta: 1:08:07 lr: 0.000004 loss_cls: 2.7805 (2.5688) grad_norm: 1.0963 (1.2805) time: 2.8706 data: 0.0002 max mem: 28454 +[2024-12-12 19:35:16 root] (utils.py 283): INFO Epoch: [12] [1090/2502] eta: 1:07:38 lr: 0.000004 loss_cls: 2.7531 (2.5698) grad_norm: 1.0893 (1.2794) time: 2.8686 data: 0.0003 max mem: 28454 +[2024-12-12 19:35:44 root] (utils.py 283): INFO Epoch: [12] [1100/2502] eta: 1:07:10 lr: 0.000004 loss_cls: 2.6708 (2.5700) grad_norm: 1.0893 (1.2780) time: 2.8700 data: 0.0002 max mem: 28454 +[2024-12-12 19:36:13 root] (utils.py 283): INFO Epoch: [12] [1110/2502] eta: 1:06:41 lr: 0.000004 loss_cls: 2.7769 (2.5723) grad_norm: 1.1164 (1.2772) time: 2.8691 data: 0.0002 max mem: 28454 +[2024-12-12 19:36:42 root] (utils.py 283): INFO Epoch: [12] [1120/2502] eta: 1:06:12 lr: 0.000004 loss_cls: 2.7769 (2.5730) grad_norm: 1.1062 (1.2792) time: 2.8678 data: 0.0002 max mem: 28454 +[2024-12-12 19:37:11 root] (utils.py 283): INFO Epoch: [12] [1130/2502] eta: 1:05:43 lr: 0.000004 loss_cls: 2.6824 (2.5733) grad_norm: 1.0632 (1.2824) time: 2.8687 data: 0.0002 max mem: 28454 +[2024-12-12 19:37:39 root] (utils.py 283): INFO Epoch: [12] [1140/2502] eta: 1:05:14 lr: 0.000004 loss_cls: 2.6716 (2.5720) grad_norm: 1.1480 (1.2834) time: 2.8684 data: 0.0002 max mem: 28454 +[2024-12-12 19:38:08 root] (utils.py 283): INFO Epoch: [12] [1150/2502] eta: 1:04:45 lr: 0.000004 loss_cls: 2.6633 (2.5728) grad_norm: 1.1579 (1.2856) time: 2.8677 data: 0.0002 max mem: 28454 +[2024-12-12 19:38:37 root] (utils.py 283): INFO Epoch: [12] [1160/2502] eta: 1:04:17 lr: 0.000004 loss_cls: 2.7226 (2.5722) grad_norm: 1.1210 (1.2856) time: 2.8684 data: 0.0002 max mem: 28454 +[2024-12-12 19:39:05 root] (utils.py 283): INFO Epoch: [12] [1170/2502] eta: 1:03:48 lr: 0.000004 loss_cls: 2.4471 (2.5710) grad_norm: 1.1210 (1.2843) time: 2.8674 data: 0.0002 max mem: 28454 +[2024-12-12 19:39:34 root] (utils.py 283): INFO Epoch: [12] [1180/2502] eta: 1:03:19 lr: 0.000004 loss_cls: 2.4338 (2.5701) grad_norm: 1.0788 (1.2830) time: 2.8675 data: 0.0002 max mem: 28454 +[2024-12-12 19:40:03 root] (utils.py 283): INFO Epoch: [12] [1190/2502] eta: 1:02:50 lr: 0.000004 loss_cls: 2.4958 (2.5695) grad_norm: 1.1177 (1.2816) time: 2.8697 data: 0.0002 max mem: 28454 +[2024-12-12 19:40:31 root] (utils.py 283): INFO Epoch: [12] [1200/2502] eta: 1:02:21 lr: 0.000004 loss_cls: 2.4958 (2.5688) grad_norm: 1.0749 (1.2814) time: 2.8681 data: 0.0002 max mem: 28454 +[2024-12-12 19:41:00 root] (utils.py 283): INFO Epoch: [12] [1210/2502] eta: 1:01:53 lr: 0.000004 loss_cls: 2.6609 (2.5695) grad_norm: 1.1324 (1.2804) time: 2.8698 data: 0.0002 max mem: 28454 +[2024-12-12 19:41:29 root] (utils.py 283): INFO Epoch: [12] [1220/2502] eta: 1:01:24 lr: 0.000004 loss_cls: 2.7413 (2.5703) grad_norm: 1.1491 (1.2795) time: 2.8711 data: 0.0002 max mem: 28454 +[2024-12-12 19:41:57 root] (utils.py 283): INFO Epoch: [12] [1230/2502] eta: 1:00:55 lr: 0.000004 loss_cls: 2.7413 (2.5705) grad_norm: 1.0730 (1.2782) time: 2.8712 data: 0.0002 max mem: 28454 +[2024-12-12 19:42:26 root] (utils.py 283): INFO Epoch: [12] [1240/2502] eta: 1:00:26 lr: 0.000004 loss_cls: 2.7493 (2.5719) grad_norm: 1.1288 (1.2772) time: 2.8708 data: 0.0002 max mem: 28454 +[2024-12-12 19:42:55 root] (utils.py 283): INFO Epoch: [12] [1250/2502] eta: 0:59:58 lr: 0.000004 loss_cls: 2.6965 (2.5710) grad_norm: 1.1618 (1.2765) time: 2.8697 data: 0.0002 max mem: 28454 +[2024-12-12 19:43:24 root] (utils.py 283): INFO Epoch: [12] [1260/2502] eta: 0:59:29 lr: 0.000004 loss_cls: 2.6147 (2.5717) grad_norm: 1.1658 (1.2768) time: 2.8690 data: 0.0002 max mem: 28454 +[2024-12-12 19:43:52 root] (utils.py 283): INFO Epoch: [12] [1270/2502] eta: 0:59:00 lr: 0.000004 loss_cls: 2.5674 (2.5703) grad_norm: 1.1716 (1.2794) time: 2.8691 data: 0.0003 max mem: 28454 +[2024-12-12 19:44:21 root] (utils.py 283): INFO Epoch: [12] [1280/2502] eta: 0:58:31 lr: 0.000004 loss_cls: 2.3746 (2.5688) grad_norm: 1.0872 (1.2780) time: 2.8685 data: 0.0002 max mem: 28454 +[2024-12-12 19:44:50 root] (utils.py 283): INFO Epoch: [12] [1290/2502] eta: 0:58:02 lr: 0.000004 loss_cls: 2.5771 (2.5695) grad_norm: 1.0660 (1.2774) time: 2.8713 data: 0.0003 max mem: 28454 +[2024-12-12 19:45:18 root] (utils.py 283): INFO Epoch: [12] [1300/2502] eta: 0:57:34 lr: 0.000004 loss_cls: 2.5622 (2.5676) grad_norm: 1.2204 (1.2810) time: 2.8715 data: 0.0003 max mem: 28454 +[2024-12-12 19:45:47 root] (utils.py 283): INFO Epoch: [12] [1310/2502] eta: 0:57:05 lr: 0.000004 loss_cls: 2.6185 (2.5688) grad_norm: 1.2204 (1.2798) time: 2.8673 data: 0.0002 max mem: 28454 +[2024-12-12 19:46:16 root] (utils.py 283): INFO Epoch: [12] [1320/2502] eta: 0:56:36 lr: 0.000004 loss_cls: 2.6985 (2.5693) grad_norm: 1.1121 (1.2799) time: 2.8688 data: 0.0002 max mem: 28454 +[2024-12-12 19:46:44 root] (utils.py 283): INFO Epoch: [12] [1330/2502] eta: 0:56:07 lr: 0.000004 loss_cls: 2.6039 (2.5691) grad_norm: 1.1231 (1.2791) time: 2.8695 data: 0.0002 max mem: 28454 +[2024-12-12 19:47:13 root] (utils.py 283): INFO Epoch: [12] [1340/2502] eta: 0:55:39 lr: 0.000004 loss_cls: 2.6039 (2.5687) grad_norm: 1.1340 (1.2804) time: 2.8693 data: 0.0002 max mem: 28454 +[2024-12-12 19:47:42 root] (utils.py 283): INFO Epoch: [12] [1350/2502] eta: 0:55:10 lr: 0.000004 loss_cls: 2.5905 (2.5684) grad_norm: 1.1165 (1.2790) time: 2.8721 data: 0.0002 max mem: 28454 +[2024-12-12 19:48:11 root] (utils.py 283): INFO Epoch: [12] [1360/2502] eta: 0:54:41 lr: 0.000004 loss_cls: 2.6220 (2.5675) grad_norm: 1.0759 (1.2809) time: 2.8718 data: 0.0002 max mem: 28454 +[2024-12-12 19:48:39 root] (utils.py 283): INFO Epoch: [12] [1370/2502] eta: 0:54:12 lr: 0.000004 loss_cls: 2.6274 (2.5682) grad_norm: 1.0759 (1.2803) time: 2.8698 data: 0.0002 max mem: 28454 +[2024-12-12 19:49:08 root] (utils.py 283): INFO Epoch: [12] [1380/2502] eta: 0:53:44 lr: 0.000004 loss_cls: 2.6120 (2.5669) grad_norm: 1.0584 (1.2786) time: 2.8703 data: 0.0002 max mem: 28454 +[2024-12-12 19:49:37 root] (utils.py 283): INFO Epoch: [12] [1390/2502] eta: 0:53:15 lr: 0.000004 loss_cls: 2.5120 (2.5670) grad_norm: 1.0492 (1.2772) time: 2.8700 data: 0.0002 max mem: 28454 +[2024-12-12 19:50:05 root] (utils.py 283): INFO Epoch: [12] [1400/2502] eta: 0:52:46 lr: 0.000004 loss_cls: 2.6429 (2.5683) grad_norm: 1.1489 (1.2787) time: 2.8705 data: 0.0002 max mem: 28454 +[2024-12-12 19:50:34 root] (utils.py 283): INFO Epoch: [12] [1410/2502] eta: 0:52:17 lr: 0.000004 loss_cls: 2.8184 (2.5688) grad_norm: 1.2486 (1.2790) time: 2.8716 data: 0.0003 max mem: 28454 +[2024-12-12 19:51:03 root] (utils.py 283): INFO Epoch: [12] [1420/2502] eta: 0:51:49 lr: 0.000004 loss_cls: 2.8055 (2.5684) grad_norm: 1.1937 (1.2804) time: 2.8715 data: 0.0002 max mem: 28454 +[2024-12-12 19:51:31 root] (utils.py 283): INFO Epoch: [12] [1430/2502] eta: 0:51:20 lr: 0.000004 loss_cls: 2.5802 (2.5680) grad_norm: 1.1384 (1.2788) time: 2.8708 data: 0.0002 max mem: 28454 +[2024-12-12 19:52:00 root] (utils.py 283): INFO Epoch: [12] [1440/2502] eta: 0:50:51 lr: 0.000004 loss_cls: 2.6373 (2.5686) grad_norm: 1.0838 (1.2784) time: 2.8702 data: 0.0002 max mem: 28454 +[2024-12-12 19:52:29 root] (utils.py 283): INFO Epoch: [12] [1450/2502] eta: 0:50:22 lr: 0.000004 loss_cls: 2.6559 (2.5688) grad_norm: 1.1430 (1.2776) time: 2.8673 data: 0.0002 max mem: 28454 +[2024-12-12 19:52:58 root] (utils.py 283): INFO Epoch: [12] [1460/2502] eta: 0:49:53 lr: 0.000004 loss_cls: 2.6491 (2.5688) grad_norm: 1.1430 (1.2862) time: 2.8666 data: 0.0003 max mem: 28454 +[2024-12-12 19:53:26 root] (utils.py 283): INFO Epoch: [12] [1470/2502] eta: 0:49:25 lr: 0.000004 loss_cls: 2.6491 (2.5686) grad_norm: 1.1186 (1.2850) time: 2.8689 data: 0.0003 max mem: 28454 +[2024-12-12 19:53:55 root] (utils.py 283): INFO Epoch: [12] [1480/2502] eta: 0:48:56 lr: 0.000004 loss_cls: 2.6726 (2.5688) grad_norm: 1.1325 (1.2846) time: 2.8685 data: 0.0002 max mem: 28454 +[2024-12-12 19:54:24 root] (utils.py 283): INFO Epoch: [12] [1490/2502] eta: 0:48:27 lr: 0.000004 loss_cls: 2.5342 (2.5680) grad_norm: 1.1491 (1.2833) time: 2.8666 data: 0.0002 max mem: 28454 +[2024-12-12 19:54:52 root] (utils.py 283): INFO Epoch: [12] [1500/2502] eta: 0:47:58 lr: 0.000004 loss_cls: 2.5871 (2.5685) grad_norm: 1.0759 (1.2820) time: 2.8641 data: 0.0002 max mem: 28454 +[2024-12-12 19:55:21 root] (utils.py 283): INFO Epoch: [12] [1510/2502] eta: 0:47:30 lr: 0.000004 loss_cls: 2.6169 (2.5683) grad_norm: 1.0772 (1.2815) time: 2.8646 data: 0.0002 max mem: 28454 +[2024-12-12 19:55:50 root] (utils.py 283): INFO Epoch: [12] [1520/2502] eta: 0:47:01 lr: 0.000004 loss_cls: 2.5561 (2.5667) grad_norm: 1.1241 (1.2809) time: 2.8662 data: 0.0002 max mem: 28454 +[2024-12-12 19:56:18 root] (utils.py 283): INFO Epoch: [12] [1530/2502] eta: 0:46:32 lr: 0.000004 loss_cls: 2.1649 (2.5654) grad_norm: 1.1264 (1.2806) time: 2.8673 data: 0.0003 max mem: 28454 +[2024-12-12 19:56:47 root] (utils.py 283): INFO Epoch: [12] [1540/2502] eta: 0:46:03 lr: 0.000004 loss_cls: 2.6259 (2.5651) grad_norm: 1.1161 (1.2811) time: 2.8667 data: 0.0003 max mem: 28454 +[2024-12-12 19:57:16 root] (utils.py 283): INFO Epoch: [12] [1550/2502] eta: 0:45:34 lr: 0.000004 loss_cls: 2.7568 (2.5656) grad_norm: 1.0839 (1.2806) time: 2.8664 data: 0.0003 max mem: 28454 +[2024-12-12 19:57:44 root] (utils.py 283): INFO Epoch: [12] [1560/2502] eta: 0:45:06 lr: 0.000004 loss_cls: 2.7657 (2.5662) grad_norm: 1.0809 (1.2919) time: 2.8717 data: 0.0002 max mem: 28454 +[2024-12-12 19:58:13 root] (utils.py 283): INFO Epoch: [12] [1570/2502] eta: 0:44:37 lr: 0.000004 loss_cls: 2.7109 (2.5670) grad_norm: 1.0772 (1.2908) time: 2.8716 data: 0.0002 max mem: 28454 +[2024-12-12 19:58:42 root] (utils.py 283): INFO Epoch: [12] [1580/2502] eta: 0:44:08 lr: 0.000004 loss_cls: 2.5478 (2.5655) grad_norm: 1.0788 (1.2898) time: 2.8661 data: 0.0003 max mem: 28454 +[2024-12-12 19:59:10 root] (utils.py 283): INFO Epoch: [12] [1590/2502] eta: 0:43:39 lr: 0.000004 loss_cls: 2.3983 (2.5654) grad_norm: 1.1159 (1.2891) time: 2.8660 data: 0.0002 max mem: 28454 +[2024-12-12 19:59:39 root] (utils.py 283): INFO Epoch: [12] [1600/2502] eta: 0:43:11 lr: 0.000004 loss_cls: 2.4155 (2.5644) grad_norm: 1.1037 (1.2882) time: 2.8701 data: 0.0002 max mem: 28454 +[2024-12-12 20:00:08 root] (utils.py 283): INFO Epoch: [12] [1610/2502] eta: 0:42:42 lr: 0.000004 loss_cls: 2.4155 (2.5641) grad_norm: 1.0787 (1.2872) time: 2.8715 data: 0.0003 max mem: 28454 +[2024-12-12 20:00:36 root] (utils.py 283): INFO Epoch: [12] [1620/2502] eta: 0:42:13 lr: 0.000004 loss_cls: 2.6937 (2.5642) grad_norm: 1.1599 (1.2871) time: 2.8703 data: 0.0002 max mem: 28454 +[2024-12-12 20:01:05 root] (utils.py 283): INFO Epoch: [12] [1630/2502] eta: 0:41:45 lr: 0.000004 loss_cls: 2.7535 (2.5647) grad_norm: 1.1891 (1.2864) time: 2.8687 data: 0.0002 max mem: 28454 +[2024-12-12 20:01:34 root] (utils.py 283): INFO Epoch: [12] [1640/2502] eta: 0:41:16 lr: 0.000004 loss_cls: 2.7061 (2.5652) grad_norm: 1.1891 (1.2860) time: 2.8655 data: 0.0002 max mem: 28454 +[2024-12-12 20:02:02 root] (utils.py 283): INFO Epoch: [12] [1650/2502] eta: 0:40:47 lr: 0.000004 loss_cls: 2.6761 (2.5657) grad_norm: 1.1277 (1.2888) time: 2.8675 data: 0.0002 max mem: 28454 +[2024-12-12 20:02:31 root] (utils.py 283): INFO Epoch: [12] [1660/2502] eta: 0:40:18 lr: 0.000004 loss_cls: 2.7975 (2.5667) grad_norm: 1.0725 (1.2884) time: 2.8694 data: 0.0002 max mem: 28454 +[2024-12-12 20:03:00 root] (utils.py 283): INFO Epoch: [12] [1670/2502] eta: 0:39:49 lr: 0.000004 loss_cls: 2.7019 (2.5657) grad_norm: 1.2473 (1.2941) time: 2.8651 data: 0.0002 max mem: 28454 +[2024-12-12 20:03:28 root] (utils.py 283): INFO Epoch: [12] [1680/2502] eta: 0:39:21 lr: 0.000004 loss_cls: 2.6795 (2.5667) grad_norm: 1.1279 (1.2932) time: 2.8667 data: 0.0002 max mem: 28454 +[2024-12-12 20:03:57 root] (utils.py 283): INFO Epoch: [12] [1690/2502] eta: 0:38:52 lr: 0.000004 loss_cls: 2.5808 (2.5666) grad_norm: 1.1141 (1.2921) time: 2.8704 data: 0.0003 max mem: 28454 +[2024-12-12 20:04:26 root] (utils.py 283): INFO Epoch: [12] [1700/2502] eta: 0:38:23 lr: 0.000004 loss_cls: 2.5808 (2.5666) grad_norm: 1.1350 (1.2932) time: 2.8700 data: 0.0003 max mem: 28454 +[2024-12-12 20:04:55 root] (utils.py 283): INFO Epoch: [12] [1710/2502] eta: 0:37:54 lr: 0.000004 loss_cls: 2.6536 (2.5651) grad_norm: 1.1142 (1.2920) time: 2.8677 data: 0.0003 max mem: 28454 +[2024-12-12 20:05:23 root] (utils.py 283): INFO Epoch: [12] [1720/2502] eta: 0:37:26 lr: 0.000004 loss_cls: 2.4682 (2.5649) grad_norm: 1.1790 (1.2966) time: 2.8652 data: 0.0003 max mem: 28454 +[2024-12-12 20:05:52 root] (utils.py 283): INFO Epoch: [12] [1730/2502] eta: 0:36:57 lr: 0.000004 loss_cls: 2.5952 (2.5655) grad_norm: 1.1790 (1.2958) time: 2.8649 data: 0.0002 max mem: 28454 +[2024-12-12 20:06:20 root] (utils.py 283): INFO Epoch: [12] [1740/2502] eta: 0:36:28 lr: 0.000004 loss_cls: 2.7244 (2.5660) grad_norm: 1.0600 (1.2952) time: 2.8661 data: 0.0003 max mem: 28454 +[2024-12-12 20:06:49 root] (utils.py 283): INFO Epoch: [12] [1750/2502] eta: 0:36:00 lr: 0.000004 loss_cls: 2.7646 (2.5668) grad_norm: 1.1027 (1.2950) time: 2.8694 data: 0.0003 max mem: 28454 +[2024-12-12 20:07:18 root] (utils.py 283): INFO Epoch: [12] [1760/2502] eta: 0:35:31 lr: 0.000004 loss_cls: 2.7801 (2.5676) grad_norm: 1.2005 (1.2995) time: 2.8714 data: 0.0003 max mem: 28454 +[2024-12-12 20:07:47 root] (utils.py 283): INFO Epoch: [12] [1770/2502] eta: 0:35:02 lr: 0.000004 loss_cls: 2.7752 (2.5681) grad_norm: 1.1886 (1.2986) time: 2.8695 data: 0.0003 max mem: 28454 +[2024-12-12 20:08:15 root] (utils.py 283): INFO Epoch: [12] [1780/2502] eta: 0:34:33 lr: 0.000004 loss_cls: 2.7280 (2.5684) grad_norm: 1.1571 (1.2983) time: 2.8704 data: 0.0003 max mem: 28454 +[2024-12-12 20:08:44 root] (utils.py 283): INFO Epoch: [12] [1790/2502] eta: 0:34:05 lr: 0.000004 loss_cls: 2.6747 (2.5681) grad_norm: 1.0773 (1.2985) time: 2.8728 data: 0.0003 max mem: 28454 +[2024-12-12 20:09:13 root] (utils.py 283): INFO Epoch: [12] [1800/2502] eta: 0:33:36 lr: 0.000004 loss_cls: 2.6517 (2.5679) grad_norm: 1.0687 (1.2977) time: 2.8790 data: 0.0002 max mem: 28454 +[2024-12-12 20:09:42 root] (utils.py 283): INFO Epoch: [12] [1810/2502] eta: 0:33:07 lr: 0.000004 loss_cls: 2.5998 (2.5679) grad_norm: 1.1630 (1.2967) time: 2.8778 data: 0.0002 max mem: 28454 +[2024-12-12 20:10:10 root] (utils.py 283): INFO Epoch: [12] [1820/2502] eta: 0:32:38 lr: 0.000004 loss_cls: 2.5998 (2.5673) grad_norm: 1.1075 (1.2956) time: 2.8716 data: 0.0003 max mem: 28454 +[2024-12-12 20:10:39 root] (utils.py 283): INFO Epoch: [12] [1830/2502] eta: 0:32:10 lr: 0.000004 loss_cls: 2.3394 (2.5660) grad_norm: 1.1148 (1.2985) time: 2.8701 data: 0.0003 max mem: 28454 +[2024-12-12 20:11:08 root] (utils.py 283): INFO Epoch: [12] [1840/2502] eta: 0:31:41 lr: 0.000004 loss_cls: 2.4116 (2.5658) grad_norm: 1.2052 (1.3002) time: 2.8684 data: 0.0003 max mem: 28454 +[2024-12-12 20:11:36 root] (utils.py 283): INFO Epoch: [12] [1850/2502] eta: 0:31:12 lr: 0.000004 loss_cls: 2.6528 (2.5657) grad_norm: 1.1069 (1.3011) time: 2.8690 data: 0.0002 max mem: 28454 +[2024-12-12 20:12:05 root] (utils.py 283): INFO Epoch: [12] [1860/2502] eta: 0:30:44 lr: 0.000004 loss_cls: 2.4712 (2.5652) grad_norm: 1.1015 (1.3003) time: 2.8677 data: 0.0002 max mem: 28454 +[2024-12-12 20:12:34 root] (utils.py 283): INFO Epoch: [12] [1870/2502] eta: 0:30:15 lr: 0.000004 loss_cls: 2.4174 (2.5646) grad_norm: 1.1370 (1.3033) time: 2.8691 data: 0.0002 max mem: 28454 +[2024-12-12 20:13:02 root] (utils.py 283): INFO Epoch: [12] [1880/2502] eta: 0:29:46 lr: 0.000004 loss_cls: 2.3582 (2.5639) grad_norm: 1.3349 (1.3044) time: 2.8644 data: 0.0002 max mem: 28454 +[2024-12-12 20:13:31 root] (utils.py 283): INFO Epoch: [12] [1890/2502] eta: 0:29:17 lr: 0.000004 loss_cls: 2.7323 (2.5643) grad_norm: 1.1779 (1.3040) time: 2.8567 data: 0.0002 max mem: 28454 +[2024-12-12 20:14:00 root] (utils.py 283): INFO Epoch: [12] [1900/2502] eta: 0:28:48 lr: 0.000004 loss_cls: 2.7905 (2.5650) grad_norm: 1.1756 (1.3035) time: 2.8569 data: 0.0002 max mem: 28454 +[2024-12-12 20:14:28 root] (utils.py 283): INFO Epoch: [12] [1910/2502] eta: 0:28:20 lr: 0.000004 loss_cls: 2.7774 (2.5652) grad_norm: 1.2417 (1.3043) time: 2.8571 data: 0.0002 max mem: 28454 +[2024-12-12 20:14:57 root] (utils.py 283): INFO Epoch: [12] [1920/2502] eta: 0:27:51 lr: 0.000004 loss_cls: 2.7581 (2.5657) grad_norm: 1.1324 (1.3032) time: 2.8567 data: 0.0002 max mem: 28454 +[2024-12-12 20:15:25 root] (utils.py 283): INFO Epoch: [12] [1930/2502] eta: 0:27:22 lr: 0.000004 loss_cls: 2.7795 (2.5664) grad_norm: 1.1311 (1.3034) time: 2.8584 data: 0.0002 max mem: 28454 +[2024-12-12 20:15:54 root] (utils.py 283): INFO Epoch: [12] [1940/2502] eta: 0:26:53 lr: 0.000004 loss_cls: 2.7634 (2.5659) grad_norm: 1.1514 (1.3032) time: 2.8584 data: 0.0002 max mem: 28454 +[2024-12-12 20:16:22 root] (utils.py 283): INFO Epoch: [12] [1950/2502] eta: 0:26:25 lr: 0.000004 loss_cls: 2.5891 (2.5655) grad_norm: 1.1519 (1.3035) time: 2.8591 data: 0.0003 max mem: 28454 +[2024-12-12 20:16:51 root] (utils.py 283): INFO Epoch: [12] [1960/2502] eta: 0:25:56 lr: 0.000004 loss_cls: 2.3724 (2.5642) grad_norm: 1.1069 (1.3025) time: 2.8611 data: 0.0002 max mem: 28454 +[2024-12-12 20:17:20 root] (utils.py 283): INFO Epoch: [12] [1970/2502] eta: 0:25:27 lr: 0.000004 loss_cls: 2.4276 (2.5642) grad_norm: 1.0795 (1.3016) time: 2.8614 data: 0.0002 max mem: 28454 +[2024-12-12 20:17:48 root] (utils.py 283): INFO Epoch: [12] [1980/2502] eta: 0:24:58 lr: 0.000004 loss_cls: 2.7001 (2.5643) grad_norm: 1.1257 (1.3012) time: 2.8603 data: 0.0002 max mem: 28454 +[2024-12-12 20:18:17 root] (utils.py 283): INFO Epoch: [12] [1990/2502] eta: 0:24:30 lr: 0.000004 loss_cls: 2.5935 (2.5633) grad_norm: 1.1415 (1.3001) time: 2.8601 data: 0.0002 max mem: 28454 +[2024-12-12 20:18:45 root] (utils.py 283): INFO Epoch: [12] [2000/2502] eta: 0:24:01 lr: 0.000004 loss_cls: 2.5567 (2.5637) grad_norm: 1.1443 (1.2995) time: 2.8601 data: 0.0003 max mem: 28454 +[2024-12-12 20:19:14 root] (utils.py 283): INFO Epoch: [12] [2010/2502] eta: 0:23:32 lr: 0.000004 loss_cls: 2.5567 (2.5634) grad_norm: 1.2083 (1.2997) time: 2.8595 data: 0.0002 max mem: 28454 +[2024-12-12 20:19:43 root] (utils.py 283): INFO Epoch: [12] [2020/2502] eta: 0:23:03 lr: 0.000004 loss_cls: 2.4484 (2.5627) grad_norm: 1.2152 (1.3000) time: 2.8611 data: 0.0002 max mem: 28454 +[2024-12-12 20:20:11 root] (utils.py 283): INFO Epoch: [12] [2030/2502] eta: 0:22:35 lr: 0.000004 loss_cls: 2.4780 (2.5623) grad_norm: 1.1822 (1.2993) time: 2.8626 data: 0.0002 max mem: 28454 +[2024-12-12 20:20:40 root] (utils.py 283): INFO Epoch: [12] [2040/2502] eta: 0:22:06 lr: 0.000004 loss_cls: 2.6190 (2.5629) grad_norm: 1.1357 (1.2988) time: 2.8609 data: 0.0003 max mem: 28454 +[2024-12-12 20:21:09 root] (utils.py 283): INFO Epoch: [12] [2050/2502] eta: 0:21:37 lr: 0.000004 loss_cls: 2.6847 (2.5634) grad_norm: 1.1883 (1.2984) time: 2.8601 data: 0.0003 max mem: 28454 +[2024-12-12 20:21:37 root] (utils.py 283): INFO Epoch: [12] [2060/2502] eta: 0:21:09 lr: 0.000004 loss_cls: 2.6341 (2.5636) grad_norm: 1.1400 (1.2977) time: 2.8633 data: 0.0003 max mem: 28454 +[2024-12-12 20:22:06 root] (utils.py 283): INFO Epoch: [12] [2070/2502] eta: 0:20:40 lr: 0.000004 loss_cls: 2.6976 (2.5635) grad_norm: 1.0750 (1.2968) time: 2.8621 data: 0.0002 max mem: 28454 +[2024-12-12 20:22:34 root] (utils.py 283): INFO Epoch: [12] [2080/2502] eta: 0:20:11 lr: 0.000004 loss_cls: 2.7005 (2.5638) grad_norm: 1.0580 (1.2963) time: 2.8604 data: 0.0003 max mem: 28454 +[2024-12-12 20:23:03 root] (utils.py 283): INFO Epoch: [12] [2090/2502] eta: 0:19:42 lr: 0.000004 loss_cls: 2.6249 (2.5643) grad_norm: 1.1151 (1.2959) time: 2.8645 data: 0.0003 max mem: 28454 +[2024-12-12 20:23:32 root] (utils.py 283): INFO Epoch: [12] [2100/2502] eta: 0:19:14 lr: 0.000004 loss_cls: 2.7723 (2.5656) grad_norm: 1.1285 (1.2955) time: 2.8626 data: 0.0003 max mem: 28454 +[2024-12-12 20:24:00 root] (utils.py 283): INFO Epoch: [12] [2110/2502] eta: 0:18:45 lr: 0.000004 loss_cls: 2.6608 (2.5652) grad_norm: 1.0779 (1.3160) time: 2.8567 data: 0.0003 max mem: 28454 +[2024-12-12 20:24:29 root] (utils.py 283): INFO Epoch: [12] [2120/2502] eta: 0:18:16 lr: 0.000004 loss_cls: 2.5462 (2.5654) grad_norm: 1.1929 (1.3323) time: 2.8595 data: 0.0003 max mem: 28454 +[2024-12-12 20:24:57 root] (utils.py 283): INFO Epoch: [12] [2130/2502] eta: 0:17:47 lr: 0.000004 loss_cls: 2.7497 (2.5659) grad_norm: 1.1929 (1.3316) time: 2.8627 data: 0.0002 max mem: 28454 +[2024-12-12 20:25:26 root] (utils.py 283): INFO Epoch: [12] [2140/2502] eta: 0:17:19 lr: 0.000004 loss_cls: 2.7497 (2.5664) grad_norm: 1.1815 (1.3310) time: 2.8592 data: 0.0002 max mem: 28454 +[2024-12-12 20:25:55 root] (utils.py 283): INFO Epoch: [12] [2150/2502] eta: 0:16:50 lr: 0.000004 loss_cls: 2.7458 (2.5669) grad_norm: 1.1822 (1.3304) time: 2.8589 data: 0.0002 max mem: 28454 +[2024-12-12 20:26:23 root] (utils.py 283): INFO Epoch: [12] [2160/2502] eta: 0:16:21 lr: 0.000004 loss_cls: 2.7554 (2.5672) grad_norm: 1.1653 (1.3301) time: 2.8601 data: 0.0003 max mem: 28454 +[2024-12-12 20:26:52 root] (utils.py 283): INFO Epoch: [12] [2170/2502] eta: 0:15:53 lr: 0.000004 loss_cls: 2.6924 (2.5674) grad_norm: 1.1653 (1.3293) time: 2.8579 data: 0.0002 max mem: 28454 +[2024-12-12 20:27:20 root] (utils.py 283): INFO Epoch: [12] [2180/2502] eta: 0:15:24 lr: 0.000004 loss_cls: 2.6924 (2.5673) grad_norm: 1.1375 (1.3289) time: 2.8572 data: 0.0002 max mem: 28454 +[2024-12-12 20:27:49 root] (utils.py 283): INFO Epoch: [12] [2190/2502] eta: 0:14:55 lr: 0.000004 loss_cls: 2.6591 (2.5677) grad_norm: 1.1375 (1.3281) time: 2.8586 data: 0.0002 max mem: 28454 +[2024-12-12 20:28:18 root] (utils.py 283): INFO Epoch: [12] [2200/2502] eta: 0:14:26 lr: 0.000004 loss_cls: 2.6551 (2.5675) grad_norm: 1.0798 (1.3272) time: 2.8578 data: 0.0003 max mem: 28454 +[2024-12-12 20:28:46 root] (utils.py 283): INFO Epoch: [12] [2210/2502] eta: 0:13:58 lr: 0.000004 loss_cls: 2.4154 (2.5666) grad_norm: 1.1071 (1.3270) time: 2.8572 data: 0.0003 max mem: 28454 +[2024-12-12 20:29:15 root] (utils.py 283): INFO Epoch: [12] [2220/2502] eta: 0:13:29 lr: 0.000004 loss_cls: 2.6851 (2.5673) grad_norm: 1.1071 (1.3484) time: 2.8586 data: 0.0002 max mem: 28454 +[2024-12-12 20:29:43 root] (utils.py 283): INFO Epoch: [12] [2230/2502] eta: 0:13:00 lr: 0.000004 loss_cls: 2.6851 (2.5668) grad_norm: 1.0800 (1.3472) time: 2.8570 data: 0.0002 max mem: 28454 +[2024-12-12 20:30:12 root] (utils.py 283): INFO Epoch: [12] [2240/2502] eta: 0:12:31 lr: 0.000004 loss_cls: 2.6166 (2.5668) grad_norm: 1.1344 (1.3463) time: 2.8561 data: 0.0002 max mem: 28454 +[2024-12-12 20:30:40 root] (utils.py 283): INFO Epoch: [12] [2250/2502] eta: 0:12:03 lr: 0.000004 loss_cls: 2.5834 (2.5666) grad_norm: 1.0838 (1.3477) time: 2.8569 data: 0.0002 max mem: 28454 +[2024-12-12 20:31:09 root] (utils.py 283): INFO Epoch: [12] [2260/2502] eta: 0:11:34 lr: 0.000004 loss_cls: 2.6053 (2.5664) grad_norm: 1.0661 (1.3469) time: 2.8575 data: 0.0002 max mem: 28454 +[2024-12-12 20:31:38 root] (utils.py 283): INFO Epoch: [12] [2270/2502] eta: 0:11:05 lr: 0.000004 loss_cls: 2.4354 (2.5655) grad_norm: 1.0782 (1.3465) time: 2.8585 data: 0.0003 max mem: 28454 +[2024-12-12 20:32:06 root] (utils.py 283): INFO Epoch: [12] [2280/2502] eta: 0:10:37 lr: 0.000004 loss_cls: 2.6104 (2.5662) grad_norm: 1.0889 (1.3535) time: 2.8580 data: 0.0002 max mem: 28454 +[2024-12-12 20:32:35 root] (utils.py 283): INFO Epoch: [12] [2290/2502] eta: 0:10:08 lr: 0.000004 loss_cls: 2.7547 (2.5667) grad_norm: 1.0851 (1.3523) time: 2.8598 data: 0.0002 max mem: 28454 +[2024-12-12 20:33:03 root] (utils.py 283): INFO Epoch: [12] [2300/2502] eta: 0:09:39 lr: 0.000004 loss_cls: 2.6728 (2.5672) grad_norm: 1.1004 (1.3535) time: 2.8595 data: 0.0002 max mem: 28454 +[2024-12-12 20:33:32 root] (utils.py 283): INFO Epoch: [12] [2310/2502] eta: 0:09:11 lr: 0.000004 loss_cls: 2.6670 (2.5675) grad_norm: 1.1080 (1.3526) time: 2.8589 data: 0.0002 max mem: 28454 +[2024-12-12 20:34:01 root] (utils.py 283): INFO Epoch: [12] [2320/2502] eta: 0:08:42 lr: 0.000004 loss_cls: 2.6887 (2.5681) grad_norm: 1.0920 (1.3518) time: 2.8605 data: 0.0003 max mem: 28454 +[2024-12-12 20:34:29 root] (utils.py 283): INFO Epoch: [12] [2330/2502] eta: 0:08:13 lr: 0.000004 loss_cls: 2.6337 (2.5678) grad_norm: 1.0897 (1.3511) time: 2.8581 data: 0.0003 max mem: 28454 +[2024-12-12 20:34:58 root] (utils.py 283): INFO Epoch: [12] [2340/2502] eta: 0:07:44 lr: 0.000004 loss_cls: 2.4389 (2.5671) grad_norm: 1.1271 (1.3505) time: 2.8571 data: 0.0002 max mem: 28454 +[2024-12-12 20:35:26 root] (utils.py 283): INFO Epoch: [12] [2350/2502] eta: 0:07:16 lr: 0.000004 loss_cls: 2.4495 (2.5666) grad_norm: 1.1637 (1.3499) time: 2.8577 data: 0.0002 max mem: 28454 +[2024-12-12 20:35:55 root] (utils.py 283): INFO Epoch: [12] [2360/2502] eta: 0:06:47 lr: 0.000004 loss_cls: 2.5782 (2.5669) grad_norm: 1.1637 (1.3489) time: 2.8591 data: 0.0003 max mem: 28454 +[2024-12-12 20:36:23 root] (utils.py 283): INFO Epoch: [12] [2370/2502] eta: 0:06:18 lr: 0.000004 loss_cls: 2.7111 (2.5667) grad_norm: 1.1234 (1.3483) time: 2.8583 data: 0.0003 max mem: 28454 +[2024-12-12 20:36:52 root] (utils.py 283): INFO Epoch: [12] [2380/2502] eta: 0:05:50 lr: 0.000004 loss_cls: 2.7213 (2.5666) grad_norm: 1.1004 (1.3471) time: 2.8577 data: 0.0002 max mem: 28454 +[2024-12-12 20:37:21 root] (utils.py 283): INFO Epoch: [12] [2390/2502] eta: 0:05:21 lr: 0.000004 loss_cls: 2.5869 (2.5663) grad_norm: 1.0476 (1.3461) time: 2.8588 data: 0.0002 max mem: 28454 +[2024-12-12 20:37:49 root] (utils.py 283): INFO Epoch: [12] [2400/2502] eta: 0:04:52 lr: 0.000004 loss_cls: 2.6040 (2.5666) grad_norm: 1.1250 (1.3453) time: 2.8588 data: 0.0002 max mem: 28454 +[2024-12-12 20:38:18 root] (utils.py 283): INFO Epoch: [12] [2410/2502] eta: 0:04:23 lr: 0.000004 loss_cls: 2.7056 (2.5662) grad_norm: 1.1250 (1.3449) time: 2.8602 data: 0.0002 max mem: 28454 +[2024-12-12 20:38:46 root] (utils.py 283): INFO Epoch: [12] [2420/2502] eta: 0:03:55 lr: 0.000004 loss_cls: 2.6333 (2.5666) grad_norm: 1.1881 (1.3444) time: 2.8591 data: 0.0003 max mem: 28454 +[2024-12-12 20:39:15 root] (utils.py 283): INFO Epoch: [12] [2430/2502] eta: 0:03:26 lr: 0.000004 loss_cls: 2.6041 (2.5658) grad_norm: 1.2313 (1.3454) time: 2.8577 data: 0.0003 max mem: 28454 +[2024-12-12 20:39:44 root] (utils.py 283): INFO Epoch: [12] [2440/2502] eta: 0:02:57 lr: 0.000004 loss_cls: 2.5788 (2.5660) grad_norm: 1.2313 (1.3446) time: 2.8587 data: 0.0003 max mem: 28454 +[2024-12-12 20:40:12 root] (utils.py 283): INFO Epoch: [12] [2450/2502] eta: 0:02:29 lr: 0.000004 loss_cls: 2.6590 (2.5662) grad_norm: 1.1247 (1.3437) time: 2.8605 data: 0.0002 max mem: 28454 +[2024-12-12 20:40:41 root] (utils.py 283): INFO Epoch: [12] [2460/2502] eta: 0:02:00 lr: 0.000004 loss_cls: 2.5518 (2.5656) grad_norm: 1.0769 (1.3430) time: 2.8589 data: 0.0002 max mem: 28454 +[2024-12-12 20:41:09 root] (utils.py 283): INFO Epoch: [12] [2470/2502] eta: 0:01:31 lr: 0.000004 loss_cls: 2.5278 (2.5655) grad_norm: 1.1111 (1.3438) time: 2.8609 data: 0.0002 max mem: 28454 +[2024-12-12 20:41:38 root] (utils.py 283): INFO Epoch: [12] [2480/2502] eta: 0:01:03 lr: 0.000004 loss_cls: 2.5278 (2.5650) grad_norm: 1.0883 (1.3427) time: 2.8630 data: 0.0003 max mem: 28454 +[2024-12-12 20:42:07 root] (utils.py 283): INFO Epoch: [12] [2490/2502] eta: 0:00:34 lr: 0.000004 loss_cls: 2.4942 (2.5644) grad_norm: 1.0590 (1.3418) time: 2.8783 data: 0.0218 max mem: 28454 +[2024-12-12 20:42:36 root] (utils.py 283): INFO Epoch: [12] [2500/2502] eta: 0:00:05 lr: 0.000004 loss_cls: 2.5588 (2.5646) grad_norm: 1.0695 (1.3406) time: 2.8768 data: 0.0218 max mem: 28454 +[2024-12-12 20:42:38 root] (utils.py 283): INFO Epoch: [12] [2501/2502] eta: 0:00:02 lr: 0.000004 loss_cls: 2.5588 (2.5647) grad_norm: 1.0755 (1.3406) time: 2.8766 data: 0.0218 max mem: 28454 +[2024-12-12 20:42:38 root] (utils.py 297): INFO Epoch: [12] Total time: 1:59:38 (2.8693 s / it) +[2024-12-12 20:42:38 root] (engine.py 179): INFO Averaged stats:lr: 0.000004 loss_cls: 2.5588 (2.5585) grad_norm: 1.0755 (1.3406) +[2024-12-12 20:42:41 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:53 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.3952 (0.3952) acc1: 92.1875 (92.1875) acc3: 98.4375 (98.4375) acc5: 99.2188 (99.2188) time: 0.5420 data: 0.0003 max mem: 28454 +[2024-12-12 20:42:47 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:48 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6217 (0.5927) acc1: 86.7188 (87.2159) acc3: 96.8750 (96.5909) acc5: 98.4375 (98.2955) time: 0.5459 data: 0.0004 max mem: 28454 +[2024-12-12 20:42:52 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:42 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6217 (0.6168) acc1: 87.5000 (87.0164) acc3: 96.0938 (96.3542) acc5: 98.4375 (97.8795) time: 0.5470 data: 0.0004 max mem: 28454 +[2024-12-12 20:42:58 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:37 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6300 (0.6498) acc1: 86.7188 (85.8115) acc3: 95.3125 (96.1694) acc5: 97.6562 (97.7319) time: 0.5481 data: 0.0004 max mem: 28454 +[2024-12-12 20:43:03 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:31 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6938 (0.6584) acc1: 85.9375 (85.5945) acc3: 96.8750 (96.1509) acc5: 97.6562 (97.6562) time: 0.5482 data: 0.0004 max mem: 28454 +[2024-12-12 20:43:09 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:26 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.8288 (0.7405) acc1: 78.9062 (83.7010) acc3: 93.7500 (95.0827) acc5: 95.3125 (96.8444) time: 0.5481 data: 0.0004 max mem: 28454 +[2024-12-12 20:43:14 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:20 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9542 (0.7665) acc1: 79.6875 (83.4785) acc3: 90.6250 (94.4544) acc5: 93.7500 (96.4011) time: 0.5480 data: 0.0004 max mem: 28454 +[2024-12-12 20:43:20 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:15 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9457 (0.7940) acc1: 80.4688 (82.6474) acc3: 92.1875 (94.1241) acc5: 94.5312 (96.2258) time: 0.5478 data: 0.0004 max mem: 28454 +[2024-12-12 20:43:25 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:09 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9988 (0.8186) acc1: 77.3438 (82.1277) acc3: 92.1875 (93.7789) acc5: 93.7500 (95.9008) time: 0.5478 data: 0.0006 max mem: 28454 +[2024-12-12 20:43:31 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:04 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9633 (0.8375) acc1: 78.1250 (81.4990) acc3: 90.6250 (93.5268) acc5: 94.5312 (95.7332) time: 0.5478 data: 0.0006 max mem: 28454 +[2024-12-12 20:43:34 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9330 (0.8350) acc1: 78.9062 (81.4960) acc3: 92.1875 (93.5520) acc5: 94.5312 (95.8080) time: 0.5385 data: 0.0005 max mem: 28454 +[2024-12-12 20:43:34 root] (utils.py 297): INFO Test: Total time: 0:00:53 (0.5460 s / it) +[2024-12-12 20:43:34 root] (engine.py 264): INFO * Acc@1 81.782 Acc@3 93.486 Acc@5 95.748 loss 0.835 flops 13.207 layer_flops 13.109 +[2024-12-12 20:43:34 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 81.8% +[2024-12-12 20:43:34 root] (main.py 576): INFO Max accuracy: 81.87% +[2024-12-12 20:43:37 root] (utils.py 283): INFO Epoch: [13] [ 0/2502] eta: 1:58:34 lr: 0.000003 loss_cls: 2.5766 (2.5766) grad_norm: 1.0465 (1.0465) time: 2.8435 data: 0.0004 max mem: 28454 +[2024-12-12 20:44:06 root] (utils.py 283): INFO Epoch: [13] [ 10/2502] eta: 1:59:06 lr: 0.000003 loss_cls: 2.5766 (2.5011) grad_norm: 1.2327 (3.8087) time: 2.8678 data: 0.0003 max mem: 28454 +[2024-12-12 20:44:35 root] (utils.py 283): INFO Epoch: [13] [ 20/2502] eta: 1:58:44 lr: 0.000003 loss_cls: 2.5758 (2.5069) grad_norm: 1.1025 (2.5569) time: 2.8720 data: 0.0003 max mem: 28454 +[2024-12-12 20:45:03 root] (utils.py 283): INFO Epoch: [13] [ 30/2502] eta: 1:58:20 lr: 0.000003 loss_cls: 2.5758 (2.4844) grad_norm: 1.0693 (2.1621) time: 2.8746 data: 0.0003 max mem: 28454 +[2024-12-12 20:45:32 root] (utils.py 283): INFO Epoch: [13] [ 40/2502] eta: 1:57:50 lr: 0.000003 loss_cls: 2.7169 (2.5414) grad_norm: 1.1310 (1.9441) time: 2.8731 data: 0.0003 max mem: 28454 +[2024-12-12 20:46:01 root] (utils.py 283): INFO Epoch: [13] [ 50/2502] eta: 1:57:20 lr: 0.000003 loss_cls: 2.7169 (2.5468) grad_norm: 1.1918 (1.8006) time: 2.8705 data: 0.0002 max mem: 28454 +[2024-12-12 20:46:30 root] (utils.py 283): INFO Epoch: [13] [ 60/2502] eta: 1:56:51 lr: 0.000003 loss_cls: 2.6632 (2.5487) grad_norm: 1.1918 (1.7031) time: 2.8706 data: 0.0002 max mem: 28454 +[2024-12-12 20:46:58 root] (utils.py 283): INFO Epoch: [13] [ 70/2502] eta: 1:56:22 lr: 0.000003 loss_cls: 2.6376 (2.5411) grad_norm: 1.0600 (1.6158) time: 2.8700 data: 0.0002 max mem: 28454 +[2024-12-12 20:47:27 root] (utils.py 283): INFO Epoch: [13] [ 80/2502] eta: 1:55:54 lr: 0.000003 loss_cls: 2.6102 (2.5405) grad_norm: 1.0862 (1.5558) time: 2.8711 data: 0.0002 max mem: 28454 +[2024-12-12 20:47:56 root] (utils.py 283): INFO Epoch: [13] [ 90/2502] eta: 1:55:25 lr: 0.000003 loss_cls: 2.3852 (2.5273) grad_norm: 1.1175 (1.5145) time: 2.8722 data: 0.0002 max mem: 28454 +[2024-12-12 20:48:24 root] (utils.py 283): INFO Epoch: [13] [ 100/2502] eta: 1:54:57 lr: 0.000003 loss_cls: 2.3102 (2.5103) grad_norm: 1.1095 (1.4798) time: 2.8727 data: 0.0002 max mem: 28454 +[2024-12-12 20:48:53 root] (utils.py 283): INFO Epoch: [13] [ 110/2502] eta: 1:54:30 lr: 0.000003 loss_cls: 2.6462 (2.5271) grad_norm: 1.0789 (1.4426) time: 2.8755 data: 0.0002 max mem: 28454 +[2024-12-12 20:49:22 root] (utils.py 283): INFO Epoch: [13] [ 120/2502] eta: 1:54:02 lr: 0.000003 loss_cls: 2.8288 (2.5340) grad_norm: 1.0932 (1.4326) time: 2.8773 data: 0.0002 max mem: 28454 +[2024-12-12 20:49:51 root] (utils.py 283): INFO Epoch: [13] [ 130/2502] eta: 1:53:35 lr: 0.000003 loss_cls: 2.6500 (2.5317) grad_norm: 1.1451 (1.4181) time: 2.8799 data: 0.0002 max mem: 28454 +[2024-12-12 20:50:20 root] (utils.py 283): INFO Epoch: [13] [ 140/2502] eta: 1:53:07 lr: 0.000003 loss_cls: 2.6381 (2.5312) grad_norm: 1.1028 (1.3988) time: 2.8803 data: 0.0002 max mem: 28454 +[2024-12-12 20:50:48 root] (utils.py 283): INFO Epoch: [13] [ 150/2502] eta: 1:52:38 lr: 0.000003 loss_cls: 2.7752 (2.5419) grad_norm: 1.1028 (1.3833) time: 2.8761 data: 0.0002 max mem: 28454 +[2024-12-12 20:51:17 root] (utils.py 283): INFO Epoch: [13] [ 160/2502] eta: 1:52:10 lr: 0.000003 loss_cls: 2.8426 (2.5466) grad_norm: 1.1864 (1.4455) time: 2.8734 data: 0.0002 max mem: 28454 +[2024-12-12 20:51:46 root] (utils.py 283): INFO Epoch: [13] [ 170/2502] eta: 1:51:41 lr: 0.000003 loss_cls: 2.5343 (2.5426) grad_norm: 1.1864 (1.4283) time: 2.8738 data: 0.0002 max mem: 28454 +[2024-12-12 20:52:15 root] (utils.py 283): INFO Epoch: [13] [ 180/2502] eta: 1:51:12 lr: 0.000003 loss_cls: 2.3940 (2.5321) grad_norm: 1.0878 (1.4151) time: 2.8754 data: 0.0002 max mem: 28454 +[2024-12-12 20:52:43 root] (utils.py 283): INFO Epoch: [13] [ 190/2502] eta: 1:50:45 lr: 0.000003 loss_cls: 2.6326 (2.5419) grad_norm: 1.1533 (1.4026) time: 2.8782 data: 0.0002 max mem: 28454 +[2024-12-12 20:53:12 root] (utils.py 283): INFO Epoch: [13] [ 200/2502] eta: 1:50:16 lr: 0.000003 loss_cls: 2.6674 (2.5470) grad_norm: 1.1533 (1.3870) time: 2.8785 data: 0.0002 max mem: 28454 +[2024-12-12 20:53:41 root] (utils.py 283): INFO Epoch: [13] [ 210/2502] eta: 1:49:48 lr: 0.000003 loss_cls: 2.6986 (2.5544) grad_norm: 1.0701 (1.3910) time: 2.8776 data: 0.0002 max mem: 28454 +[2024-12-12 20:54:10 root] (utils.py 283): INFO Epoch: [13] [ 220/2502] eta: 1:49:19 lr: 0.000003 loss_cls: 2.6929 (2.5522) grad_norm: 1.0701 (1.3794) time: 2.8770 data: 0.0002 max mem: 28454 +[2024-12-12 20:54:38 root] (utils.py 283): INFO Epoch: [13] [ 230/2502] eta: 1:48:51 lr: 0.000003 loss_cls: 2.4453 (2.5520) grad_norm: 1.0712 (1.3743) time: 2.8755 data: 0.0003 max mem: 28454 +[2024-12-12 20:55:07 root] (utils.py 283): INFO Epoch: [13] [ 240/2502] eta: 1:48:22 lr: 0.000003 loss_cls: 2.5542 (2.5537) grad_norm: 1.0890 (1.3619) time: 2.8773 data: 0.0003 max mem: 28454 +[2024-12-12 20:55:36 root] (utils.py 283): INFO Epoch: [13] [ 250/2502] eta: 1:47:54 lr: 0.000003 loss_cls: 2.6844 (2.5578) grad_norm: 1.0770 (1.3520) time: 2.8802 data: 0.0002 max mem: 28454 +[2024-12-12 20:56:05 root] (utils.py 283): INFO Epoch: [13] [ 260/2502] eta: 1:47:26 lr: 0.000003 loss_cls: 2.6097 (2.5577) grad_norm: 1.0770 (1.3451) time: 2.8805 data: 0.0002 max mem: 28454 +[2024-12-12 20:56:34 root] (utils.py 283): INFO Epoch: [13] [ 270/2502] eta: 1:46:57 lr: 0.000003 loss_cls: 2.5692 (2.5581) grad_norm: 1.1217 (1.3565) time: 2.8780 data: 0.0002 max mem: 28454 +[2024-12-12 20:57:02 root] (utils.py 283): INFO Epoch: [13] [ 280/2502] eta: 1:46:29 lr: 0.000003 loss_cls: 2.4984 (2.5504) grad_norm: 1.1457 (1.3549) time: 2.8785 data: 0.0003 max mem: 28454 +[2024-12-12 20:57:31 root] (utils.py 283): INFO Epoch: [13] [ 290/2502] eta: 1:46:00 lr: 0.000003 loss_cls: 2.6278 (2.5545) grad_norm: 1.0556 (1.3456) time: 2.8800 data: 0.0003 max mem: 28454 +[2024-12-12 20:58:00 root] (utils.py 283): INFO Epoch: [13] [ 300/2502] eta: 1:45:32 lr: 0.000003 loss_cls: 2.7337 (2.5611) grad_norm: 1.1225 (1.3406) time: 2.8794 data: 0.0003 max mem: 28454 +[2024-12-12 20:58:29 root] (utils.py 283): INFO Epoch: [13] [ 310/2502] eta: 1:45:03 lr: 0.000003 loss_cls: 2.7167 (2.5636) grad_norm: 1.1935 (1.3402) time: 2.8771 data: 0.0003 max mem: 28454 +[2024-12-12 20:58:57 root] (utils.py 283): INFO Epoch: [13] [ 320/2502] eta: 1:44:34 lr: 0.000003 loss_cls: 2.6433 (2.5627) grad_norm: 1.0435 (1.3314) time: 2.8767 data: 0.0003 max mem: 28454 +[2024-12-12 20:59:26 root] (utils.py 283): INFO Epoch: [13] [ 330/2502] eta: 1:44:06 lr: 0.000003 loss_cls: 2.7764 (2.5697) grad_norm: 1.0435 (1.3260) time: 2.8779 data: 0.0003 max mem: 28454 +[2024-12-12 20:59:55 root] (utils.py 283): INFO Epoch: [13] [ 340/2502] eta: 1:43:37 lr: 0.000003 loss_cls: 2.8536 (2.5724) grad_norm: 1.0970 (1.3198) time: 2.8761 data: 0.0002 max mem: 28454 +[2024-12-12 21:00:24 root] (utils.py 283): INFO Epoch: [13] [ 350/2502] eta: 1:43:09 lr: 0.000003 loss_cls: 2.5995 (2.5661) grad_norm: 1.0804 (1.3122) time: 2.8844 data: 0.0003 max mem: 28454 +[2024-12-12 21:00:53 root] (utils.py 283): INFO Epoch: [13] [ 360/2502] eta: 1:42:41 lr: 0.000003 loss_cls: 2.5854 (2.5633) grad_norm: 1.0874 (1.3244) time: 2.8868 data: 0.0003 max mem: 28454 +[2024-12-12 21:01:22 root] (utils.py 283): INFO Epoch: [13] [ 370/2502] eta: 1:42:12 lr: 0.000003 loss_cls: 2.5854 (2.5582) grad_norm: 1.1779 (1.3189) time: 2.8799 data: 0.0003 max mem: 28454 +[2024-12-12 21:01:50 root] (utils.py 283): INFO Epoch: [13] [ 380/2502] eta: 1:41:43 lr: 0.000003 loss_cls: 2.4616 (2.5576) grad_norm: 1.1024 (1.3260) time: 2.8785 data: 0.0002 max mem: 28454 +[2024-12-12 21:02:19 root] (utils.py 283): INFO Epoch: [13] [ 390/2502] eta: 1:41:15 lr: 0.000003 loss_cls: 2.6621 (2.5598) grad_norm: 1.1347 (1.3349) time: 2.8794 data: 0.0002 max mem: 28454 +[2024-12-12 21:02:48 root] (utils.py 283): INFO Epoch: [13] [ 400/2502] eta: 1:40:46 lr: 0.000003 loss_cls: 2.6594 (2.5584) grad_norm: 1.0413 (1.3293) time: 2.8801 data: 0.0002 max mem: 28454 +[2024-12-12 21:03:17 root] (utils.py 283): INFO Epoch: [13] [ 410/2502] eta: 1:40:17 lr: 0.000003 loss_cls: 2.5996 (2.5599) grad_norm: 1.1670 (1.3272) time: 2.8749 data: 0.0003 max mem: 28454 +[2024-12-12 21:03:45 root] (utils.py 283): INFO Epoch: [13] [ 420/2502] eta: 1:39:48 lr: 0.000003 loss_cls: 2.6723 (2.5604) grad_norm: 1.1670 (1.3225) time: 2.8731 data: 0.0002 max mem: 28454 +[2024-12-12 21:04:14 root] (utils.py 283): INFO Epoch: [13] [ 430/2502] eta: 1:39:19 lr: 0.000003 loss_cls: 2.5523 (2.5574) grad_norm: 1.0973 (1.3171) time: 2.8743 data: 0.0002 max mem: 28454 +[2024-12-12 21:04:43 root] (utils.py 283): INFO Epoch: [13] [ 440/2502] eta: 1:38:51 lr: 0.000003 loss_cls: 2.4949 (2.5566) grad_norm: 1.1367 (1.3193) time: 2.8757 data: 0.0003 max mem: 28454 +[2024-12-12 21:05:12 root] (utils.py 283): INFO Epoch: [13] [ 450/2502] eta: 1:38:22 lr: 0.000003 loss_cls: 2.5622 (2.5568) grad_norm: 1.1367 (1.3173) time: 2.8743 data: 0.0002 max mem: 28454 +[2024-12-12 21:05:40 root] (utils.py 283): INFO Epoch: [13] [ 460/2502] eta: 1:37:53 lr: 0.000003 loss_cls: 2.4401 (2.5503) grad_norm: 1.0826 (1.3128) time: 2.8749 data: 0.0002 max mem: 28454 +[2024-12-12 21:06:09 root] (utils.py 283): INFO Epoch: [13] [ 470/2502] eta: 1:37:24 lr: 0.000003 loss_cls: 2.4110 (2.5507) grad_norm: 1.0679 (1.3100) time: 2.8784 data: 0.0002 max mem: 28454 +[2024-12-12 21:06:38 root] (utils.py 283): INFO Epoch: [13] [ 480/2502] eta: 1:36:56 lr: 0.000003 loss_cls: 2.6450 (2.5514) grad_norm: 1.0737 (1.3509) time: 2.8762 data: 0.0003 max mem: 28454 +[2024-12-12 21:07:07 root] (utils.py 283): INFO Epoch: [13] [ 490/2502] eta: 1:36:27 lr: 0.000003 loss_cls: 2.4829 (2.5524) grad_norm: 1.0807 (1.3451) time: 2.8749 data: 0.0003 max mem: 28454 +[2024-12-12 21:07:35 root] (utils.py 283): INFO Epoch: [13] [ 500/2502] eta: 1:35:58 lr: 0.000003 loss_cls: 2.7097 (2.5561) grad_norm: 1.1553 (1.3441) time: 2.8738 data: 0.0003 max mem: 28454 +[2024-12-12 21:08:04 root] (utils.py 283): INFO Epoch: [13] [ 510/2502] eta: 1:35:29 lr: 0.000003 loss_cls: 2.7262 (2.5565) grad_norm: 1.1657 (1.3435) time: 2.8744 data: 0.0003 max mem: 28454 +[2024-12-12 21:08:33 root] (utils.py 283): INFO Epoch: [13] [ 520/2502] eta: 1:35:00 lr: 0.000003 loss_cls: 2.7407 (2.5603) grad_norm: 1.1021 (1.3463) time: 2.8772 data: 0.0003 max mem: 28454 +[2024-12-12 21:09:02 root] (utils.py 283): INFO Epoch: [13] [ 530/2502] eta: 1:34:32 lr: 0.000003 loss_cls: 2.7936 (2.5630) grad_norm: 1.0997 (1.3420) time: 2.8770 data: 0.0003 max mem: 28454 +[2024-12-12 21:09:30 root] (utils.py 283): INFO Epoch: [13] [ 540/2502] eta: 1:34:03 lr: 0.000003 loss_cls: 2.7242 (2.5641) grad_norm: 1.0719 (1.3379) time: 2.8763 data: 0.0003 max mem: 28454 +[2024-12-12 21:09:59 root] (utils.py 283): INFO Epoch: [13] [ 550/2502] eta: 1:33:34 lr: 0.000003 loss_cls: 2.6784 (2.5630) grad_norm: 1.0719 (1.3336) time: 2.8777 data: 0.0003 max mem: 28454 +[2024-12-12 21:10:28 root] (utils.py 283): INFO Epoch: [13] [ 560/2502] eta: 1:33:05 lr: 0.000003 loss_cls: 2.6566 (2.5649) grad_norm: 1.0440 (1.3289) time: 2.8784 data: 0.0002 max mem: 28454 +[2024-12-12 21:10:57 root] (utils.py 283): INFO Epoch: [13] [ 570/2502] eta: 1:32:37 lr: 0.000003 loss_cls: 2.6655 (2.5644) grad_norm: 1.1008 (1.3325) time: 2.8790 data: 0.0003 max mem: 28454 +[2024-12-12 21:11:26 root] (utils.py 283): INFO Epoch: [13] [ 580/2502] eta: 1:32:08 lr: 0.000003 loss_cls: 2.6655 (2.5641) grad_norm: 1.1324 (1.3290) time: 2.8788 data: 0.0002 max mem: 28454 +[2024-12-12 21:11:54 root] (utils.py 283): INFO Epoch: [13] [ 590/2502] eta: 1:31:39 lr: 0.000003 loss_cls: 2.6713 (2.5660) grad_norm: 1.1324 (1.3314) time: 2.8766 data: 0.0002 max mem: 28454 +[2024-12-12 21:12:23 root] (utils.py 283): INFO Epoch: [13] [ 600/2502] eta: 1:31:10 lr: 0.000003 loss_cls: 2.7258 (2.5672) grad_norm: 1.1527 (1.3289) time: 2.8751 data: 0.0003 max mem: 28454 +[2024-12-12 21:12:52 root] (utils.py 283): INFO Epoch: [13] [ 610/2502] eta: 1:30:42 lr: 0.000003 loss_cls: 2.6636 (2.5665) grad_norm: 1.1211 (1.3252) time: 2.8770 data: 0.0003 max mem: 28454 +[2024-12-12 21:13:21 root] (utils.py 283): INFO Epoch: [13] [ 620/2502] eta: 1:30:13 lr: 0.000003 loss_cls: 2.4376 (2.5612) grad_norm: 1.1318 (1.3220) time: 2.8784 data: 0.0003 max mem: 28454 +[2024-12-12 21:13:50 root] (utils.py 283): INFO Epoch: [13] [ 630/2502] eta: 1:29:44 lr: 0.000003 loss_cls: 2.4087 (2.5613) grad_norm: 1.0895 (1.3211) time: 2.8783 data: 0.0002 max mem: 28454 +[2024-12-12 21:14:18 root] (utils.py 283): INFO Epoch: [13] [ 640/2502] eta: 1:29:16 lr: 0.000003 loss_cls: 2.5613 (2.5617) grad_norm: 1.0817 (1.3181) time: 2.8792 data: 0.0003 max mem: 28454 +[2024-12-12 21:14:47 root] (utils.py 283): INFO Epoch: [13] [ 650/2502] eta: 1:28:47 lr: 0.000003 loss_cls: 2.5909 (2.5607) grad_norm: 1.1019 (1.3148) time: 2.8795 data: 0.0003 max mem: 28454 +[2024-12-12 21:15:16 root] (utils.py 283): INFO Epoch: [13] [ 660/2502] eta: 1:28:18 lr: 0.000003 loss_cls: 2.6556 (2.5601) grad_norm: 1.1075 (1.3124) time: 2.8813 data: 0.0003 max mem: 28454 +[2024-12-12 21:15:45 root] (utils.py 283): INFO Epoch: [13] [ 670/2502] eta: 1:27:50 lr: 0.000003 loss_cls: 2.6624 (2.5612) grad_norm: 1.1481 (1.3127) time: 2.8801 data: 0.0002 max mem: 28454 +[2024-12-12 21:16:14 root] (utils.py 283): INFO Epoch: [13] [ 680/2502] eta: 1:27:21 lr: 0.000003 loss_cls: 2.6200 (2.5623) grad_norm: 1.1360 (1.3115) time: 2.8792 data: 0.0002 max mem: 28454 +[2024-12-12 21:16:42 root] (utils.py 283): INFO Epoch: [13] [ 690/2502] eta: 1:26:52 lr: 0.000003 loss_cls: 2.5905 (2.5593) grad_norm: 1.0409 (1.3079) time: 2.8804 data: 0.0002 max mem: 28454 +[2024-12-12 21:17:11 root] (utils.py 283): INFO Epoch: [13] [ 700/2502] eta: 1:26:24 lr: 0.000003 loss_cls: 2.4465 (2.5570) grad_norm: 1.0409 (1.3164) time: 2.8795 data: 0.0002 max mem: 28454 +[2024-12-12 21:17:40 root] (utils.py 283): INFO Epoch: [13] [ 710/2502] eta: 1:25:55 lr: 0.000003 loss_cls: 2.5817 (2.5576) grad_norm: 1.0720 (1.3151) time: 2.8790 data: 0.0003 max mem: 28454 +[2024-12-12 21:18:09 root] (utils.py 283): INFO Epoch: [13] [ 720/2502] eta: 1:25:26 lr: 0.000003 loss_cls: 2.6903 (2.5592) grad_norm: 1.0749 (1.3147) time: 2.8786 data: 0.0003 max mem: 28454 +[2024-12-12 21:18:37 root] (utils.py 283): INFO Epoch: [13] [ 730/2502] eta: 1:24:57 lr: 0.000003 loss_cls: 2.5951 (2.5562) grad_norm: 1.1860 (1.3138) time: 2.8780 data: 0.0002 max mem: 28454 +[2024-12-12 21:19:06 root] (utils.py 283): INFO Epoch: [13] [ 740/2502] eta: 1:24:29 lr: 0.000003 loss_cls: 2.5806 (2.5583) grad_norm: 1.1802 (1.3115) time: 2.8782 data: 0.0002 max mem: 28454 +[2024-12-12 21:19:35 root] (utils.py 283): INFO Epoch: [13] [ 750/2502] eta: 1:24:00 lr: 0.000003 loss_cls: 2.5806 (2.5565) grad_norm: 1.1445 (1.3094) time: 2.8777 data: 0.0002 max mem: 28454 +[2024-12-12 21:20:04 root] (utils.py 283): INFO Epoch: [13] [ 760/2502] eta: 1:23:31 lr: 0.000003 loss_cls: 2.5920 (2.5585) grad_norm: 1.0744 (1.3069) time: 2.8768 data: 0.0002 max mem: 28454 +[2024-12-12 21:20:33 root] (utils.py 283): INFO Epoch: [13] [ 770/2502] eta: 1:23:02 lr: 0.000003 loss_cls: 2.5920 (2.5588) grad_norm: 1.1308 (1.3054) time: 2.8772 data: 0.0002 max mem: 28454 +[2024-12-12 21:21:01 root] (utils.py 283): INFO Epoch: [13] [ 780/2502] eta: 1:22:34 lr: 0.000003 loss_cls: 2.5896 (2.5600) grad_norm: 1.1528 (1.3039) time: 2.8788 data: 0.0002 max mem: 28454 +[2024-12-12 21:21:30 root] (utils.py 283): INFO Epoch: [13] [ 790/2502] eta: 1:22:05 lr: 0.000003 loss_cls: 2.6167 (2.5605) grad_norm: 1.1833 (1.3044) time: 2.8790 data: 0.0002 max mem: 28454 +[2024-12-12 21:21:59 root] (utils.py 283): INFO Epoch: [13] [ 800/2502] eta: 1:21:36 lr: 0.000003 loss_cls: 2.6393 (2.5618) grad_norm: 1.1214 (1.3011) time: 2.8797 data: 0.0002 max mem: 28454 +[2024-12-12 21:22:28 root] (utils.py 283): INFO Epoch: [13] [ 810/2502] eta: 1:21:08 lr: 0.000003 loss_cls: 2.6393 (2.5626) grad_norm: 1.0255 (1.2989) time: 2.8803 data: 0.0002 max mem: 28454 +[2024-12-12 21:22:57 root] (utils.py 283): INFO Epoch: [13] [ 820/2502] eta: 1:20:39 lr: 0.000003 loss_cls: 2.6188 (2.5630) grad_norm: 1.1123 (1.3098) time: 2.8783 data: 0.0003 max mem: 28454 +[2024-12-12 21:23:25 root] (utils.py 283): INFO Epoch: [13] [ 830/2502] eta: 1:20:10 lr: 0.000003 loss_cls: 2.6188 (2.5640) grad_norm: 1.1179 (1.3092) time: 2.8780 data: 0.0002 max mem: 28454 +[2024-12-12 21:23:54 root] (utils.py 283): INFO Epoch: [13] [ 840/2502] eta: 1:19:41 lr: 0.000003 loss_cls: 2.4956 (2.5612) grad_norm: 1.1586 (1.3088) time: 2.8788 data: 0.0002 max mem: 28454 +[2024-12-12 21:24:23 root] (utils.py 283): INFO Epoch: [13] [ 850/2502] eta: 1:19:13 lr: 0.000003 loss_cls: 2.4937 (2.5622) grad_norm: 1.2174 (1.3124) time: 2.8777 data: 0.0002 max mem: 28454 +[2024-12-12 21:24:52 root] (utils.py 283): INFO Epoch: [13] [ 860/2502] eta: 1:18:44 lr: 0.000003 loss_cls: 2.7465 (2.5637) grad_norm: 1.2060 (1.3168) time: 2.8795 data: 0.0002 max mem: 28454 +[2024-12-12 21:25:20 root] (utils.py 283): INFO Epoch: [13] [ 870/2502] eta: 1:18:15 lr: 0.000003 loss_cls: 2.5856 (2.5623) grad_norm: 1.0955 (1.3142) time: 2.8787 data: 0.0002 max mem: 28454 +[2024-12-12 21:25:49 root] (utils.py 283): INFO Epoch: [13] [ 880/2502] eta: 1:17:46 lr: 0.000003 loss_cls: 2.4724 (2.5611) grad_norm: 1.0662 (1.3136) time: 2.8748 data: 0.0003 max mem: 28454 +[2024-12-12 21:26:18 root] (utils.py 283): INFO Epoch: [13] [ 890/2502] eta: 1:17:17 lr: 0.000003 loss_cls: 2.3598 (2.5586) grad_norm: 1.0249 (1.3125) time: 2.8725 data: 0.0002 max mem: 28454 +[2024-12-12 21:26:47 root] (utils.py 283): INFO Epoch: [13] [ 900/2502] eta: 1:16:48 lr: 0.000003 loss_cls: 2.6698 (2.5583) grad_norm: 1.1278 (1.3224) time: 2.8718 data: 0.0003 max mem: 28454 +[2024-12-12 21:27:15 root] (utils.py 283): INFO Epoch: [13] [ 910/2502] eta: 1:16:20 lr: 0.000003 loss_cls: 2.6390 (2.5559) grad_norm: 1.1145 (1.3205) time: 2.8754 data: 0.0003 max mem: 28454 +[2024-12-12 21:27:44 root] (utils.py 283): INFO Epoch: [13] [ 920/2502] eta: 1:15:51 lr: 0.000003 loss_cls: 2.4242 (2.5558) grad_norm: 1.0303 (1.3184) time: 2.8760 data: 0.0003 max mem: 28454 +[2024-12-12 21:28:13 root] (utils.py 283): INFO Epoch: [13] [ 930/2502] eta: 1:15:22 lr: 0.000003 loss_cls: 2.5073 (2.5554) grad_norm: 1.1565 (1.3348) time: 2.8761 data: 0.0002 max mem: 28454 +[2024-12-12 21:28:42 root] (utils.py 283): INFO Epoch: [13] [ 940/2502] eta: 1:14:53 lr: 0.000003 loss_cls: 2.5959 (2.5563) grad_norm: 1.2255 (1.3336) time: 2.8775 data: 0.0003 max mem: 28454 +[2024-12-12 21:29:10 root] (utils.py 283): INFO Epoch: [13] [ 950/2502] eta: 1:14:25 lr: 0.000003 loss_cls: 2.7422 (2.5592) grad_norm: 1.1706 (1.3316) time: 2.8776 data: 0.0003 max mem: 28454 +[2024-12-12 21:29:39 root] (utils.py 283): INFO Epoch: [13] [ 960/2502] eta: 1:13:56 lr: 0.000003 loss_cls: 2.7675 (2.5602) grad_norm: 1.1004 (1.3296) time: 2.8775 data: 0.0002 max mem: 28454 +[2024-12-12 21:30:08 root] (utils.py 283): INFO Epoch: [13] [ 970/2502] eta: 1:13:27 lr: 0.000003 loss_cls: 2.7675 (2.5601) grad_norm: 1.0705 (1.3270) time: 2.8770 data: 0.0002 max mem: 28454 +[2024-12-12 21:30:37 root] (utils.py 283): INFO Epoch: [13] [ 980/2502] eta: 1:12:58 lr: 0.000003 loss_cls: 2.7147 (2.5612) grad_norm: 1.0915 (1.3299) time: 2.8788 data: 0.0002 max mem: 28454 +[2024-12-12 21:31:06 root] (utils.py 283): INFO Epoch: [13] [ 990/2502] eta: 1:12:30 lr: 0.000003 loss_cls: 2.7272 (2.5632) grad_norm: 1.1079 (1.3279) time: 2.8788 data: 0.0002 max mem: 28454 +[2024-12-12 21:31:34 root] (utils.py 283): INFO Epoch: [13] [1000/2502] eta: 1:12:01 lr: 0.000003 loss_cls: 2.6677 (2.5628) grad_norm: 1.1079 (1.3264) time: 2.8760 data: 0.0002 max mem: 28454 +[2024-12-12 21:32:03 root] (utils.py 283): INFO Epoch: [13] [1010/2502] eta: 1:11:32 lr: 0.000003 loss_cls: 2.7391 (2.5640) grad_norm: 1.1227 (1.3249) time: 2.8769 data: 0.0002 max mem: 28454 +[2024-12-12 21:32:32 root] (utils.py 283): INFO Epoch: [13] [1020/2502] eta: 1:11:03 lr: 0.000003 loss_cls: 2.7755 (2.5652) grad_norm: 1.1309 (1.3300) time: 2.8775 data: 0.0002 max mem: 28454 +[2024-12-12 21:33:01 root] (utils.py 283): INFO Epoch: [13] [1030/2502] eta: 1:10:34 lr: 0.000003 loss_cls: 2.7345 (2.5651) grad_norm: 1.1850 (1.3283) time: 2.8746 data: 0.0003 max mem: 28454 +[2024-12-12 21:33:29 root] (utils.py 283): INFO Epoch: [13] [1040/2502] eta: 1:10:06 lr: 0.000003 loss_cls: 2.6444 (2.5654) grad_norm: 1.1507 (1.3271) time: 2.8745 data: 0.0003 max mem: 28454 +[2024-12-12 21:33:58 root] (utils.py 283): INFO Epoch: [13] [1050/2502] eta: 1:09:37 lr: 0.000003 loss_cls: 2.7190 (2.5657) grad_norm: 1.0574 (1.3263) time: 2.8775 data: 0.0002 max mem: 28454 +[2024-12-12 21:34:27 root] (utils.py 283): INFO Epoch: [13] [1060/2502] eta: 1:09:08 lr: 0.000003 loss_cls: 2.6051 (2.5655) grad_norm: 1.0900 (1.3387) time: 2.8760 data: 0.0003 max mem: 28454 +[2024-12-12 21:34:56 root] (utils.py 283): INFO Epoch: [13] [1070/2502] eta: 1:08:39 lr: 0.000003 loss_cls: 2.6798 (2.5669) grad_norm: 1.1469 (1.3477) time: 2.8746 data: 0.0003 max mem: 28454 +[2024-12-12 21:35:24 root] (utils.py 283): INFO Epoch: [13] [1080/2502] eta: 1:08:10 lr: 0.000003 loss_cls: 2.7524 (2.5673) grad_norm: 1.1401 (1.3462) time: 2.8747 data: 0.0003 max mem: 28454 +[2024-12-12 21:35:53 root] (utils.py 283): INFO Epoch: [13] [1090/2502] eta: 1:07:42 lr: 0.000003 loss_cls: 2.7078 (2.5693) grad_norm: 1.1939 (1.3456) time: 2.8755 data: 0.0003 max mem: 28454 +[2024-12-12 21:36:22 root] (utils.py 283): INFO Epoch: [13] [1100/2502] eta: 1:07:13 lr: 0.000003 loss_cls: 2.7078 (2.5691) grad_norm: 1.1617 (1.3440) time: 2.8785 data: 0.0002 max mem: 28454 +[2024-12-12 21:36:51 root] (utils.py 283): INFO Epoch: [13] [1110/2502] eta: 1:06:44 lr: 0.000003 loss_cls: 2.6857 (2.5683) grad_norm: 1.1113 (1.3419) time: 2.8773 data: 0.0002 max mem: 28454 +[2024-12-12 21:37:20 root] (utils.py 283): INFO Epoch: [13] [1120/2502] eta: 1:06:15 lr: 0.000003 loss_cls: 2.6117 (2.5658) grad_norm: 1.1410 (1.3403) time: 2.8777 data: 0.0002 max mem: 28454 +[2024-12-12 21:37:48 root] (utils.py 283): INFO Epoch: [13] [1130/2502] eta: 1:05:47 lr: 0.000003 loss_cls: 2.5575 (2.5649) grad_norm: 1.1651 (1.3393) time: 2.8792 data: 0.0002 max mem: 28454 +[2024-12-12 21:38:17 root] (utils.py 283): INFO Epoch: [13] [1140/2502] eta: 1:05:18 lr: 0.000003 loss_cls: 2.5261 (2.5642) grad_norm: 1.1333 (1.3370) time: 2.8769 data: 0.0002 max mem: 28454 +[2024-12-12 21:38:46 root] (utils.py 283): INFO Epoch: [13] [1150/2502] eta: 1:04:49 lr: 0.000003 loss_cls: 2.5261 (2.5638) grad_norm: 1.0660 (1.3449) time: 2.8738 data: 0.0002 max mem: 28454 +[2024-12-12 21:39:15 root] (utils.py 283): INFO Epoch: [13] [1160/2502] eta: 1:04:20 lr: 0.000003 loss_cls: 2.6450 (2.5630) grad_norm: 1.1042 (1.3431) time: 2.8719 data: 0.0002 max mem: 28454 +[2024-12-12 21:39:43 root] (utils.py 283): INFO Epoch: [13] [1170/2502] eta: 1:03:51 lr: 0.000003 loss_cls: 2.5722 (2.5629) grad_norm: 1.1256 (1.3423) time: 2.8710 data: 0.0003 max mem: 28454 +[2024-12-12 21:40:12 root] (utils.py 283): INFO Epoch: [13] [1180/2502] eta: 1:03:23 lr: 0.000003 loss_cls: 2.5722 (2.5631) grad_norm: 1.1548 (1.3412) time: 2.8724 data: 0.0003 max mem: 28454 +[2024-12-12 21:40:41 root] (utils.py 283): INFO Epoch: [13] [1190/2502] eta: 1:02:54 lr: 0.000003 loss_cls: 2.6749 (2.5630) grad_norm: 1.1548 (1.3393) time: 2.8712 data: 0.0002 max mem: 28454 +[2024-12-12 21:41:09 root] (utils.py 283): INFO Epoch: [13] [1200/2502] eta: 1:02:25 lr: 0.000003 loss_cls: 2.2907 (2.5604) grad_norm: 1.1002 (1.3374) time: 2.8691 data: 0.0002 max mem: 28454 +[2024-12-12 21:41:38 root] (utils.py 283): INFO Epoch: [13] [1210/2502] eta: 1:01:56 lr: 0.000003 loss_cls: 2.2595 (2.5602) grad_norm: 1.0910 (1.3380) time: 2.8715 data: 0.0002 max mem: 28454 +[2024-12-12 21:42:07 root] (utils.py 283): INFO Epoch: [13] [1220/2502] eta: 1:01:27 lr: 0.000003 loss_cls: 2.6709 (2.5616) grad_norm: 1.1595 (1.3369) time: 2.8736 data: 0.0002 max mem: 28454 +[2024-12-12 21:42:36 root] (utils.py 283): INFO Epoch: [13] [1230/2502] eta: 1:00:59 lr: 0.000003 loss_cls: 2.6678 (2.5603) grad_norm: 1.1365 (1.3360) time: 2.8737 data: 0.0002 max mem: 28454 +[2024-12-12 21:43:04 root] (utils.py 283): INFO Epoch: [13] [1240/2502] eta: 1:00:30 lr: 0.000003 loss_cls: 2.3660 (2.5589) grad_norm: 1.0511 (1.3339) time: 2.8736 data: 0.0002 max mem: 28454 +[2024-12-12 21:43:33 root] (utils.py 283): INFO Epoch: [13] [1250/2502] eta: 1:00:01 lr: 0.000003 loss_cls: 2.4511 (2.5580) grad_norm: 1.1271 (1.3327) time: 2.8745 data: 0.0002 max mem: 28454 +[2024-12-12 21:44:02 root] (utils.py 283): INFO Epoch: [13] [1260/2502] eta: 0:59:32 lr: 0.000003 loss_cls: 2.5209 (2.5561) grad_norm: 1.1376 (1.3312) time: 2.8783 data: 0.0003 max mem: 28454 +[2024-12-12 21:44:31 root] (utils.py 283): INFO Epoch: [13] [1270/2502] eta: 0:59:03 lr: 0.000003 loss_cls: 2.5386 (2.5548) grad_norm: 1.0782 (1.3297) time: 2.8795 data: 0.0003 max mem: 28454 +[2024-12-12 21:44:59 root] (utils.py 283): INFO Epoch: [13] [1280/2502] eta: 0:58:35 lr: 0.000003 loss_cls: 2.6968 (2.5572) grad_norm: 1.1161 (1.3288) time: 2.8777 data: 0.0003 max mem: 28454 +[2024-12-12 21:45:28 root] (utils.py 283): INFO Epoch: [13] [1290/2502] eta: 0:58:06 lr: 0.000003 loss_cls: 2.7272 (2.5574) grad_norm: 1.1683 (1.3293) time: 2.8764 data: 0.0003 max mem: 28454 +[2024-12-12 21:45:57 root] (utils.py 283): INFO Epoch: [13] [1300/2502] eta: 0:57:37 lr: 0.000003 loss_cls: 2.7321 (2.5577) grad_norm: 1.2369 (1.3336) time: 2.8762 data: 0.0002 max mem: 28454 +[2024-12-12 21:46:26 root] (utils.py 283): INFO Epoch: [13] [1310/2502] eta: 0:57:08 lr: 0.000003 loss_cls: 2.8066 (2.5577) grad_norm: 1.1271 (1.3320) time: 2.8769 data: 0.0002 max mem: 28454 +[2024-12-12 21:46:55 root] (utils.py 283): INFO Epoch: [13] [1320/2502] eta: 0:56:40 lr: 0.000003 loss_cls: 2.7767 (2.5585) grad_norm: 1.1271 (1.3308) time: 2.8778 data: 0.0002 max mem: 28454 +[2024-12-12 21:47:23 root] (utils.py 283): INFO Epoch: [13] [1330/2502] eta: 0:56:11 lr: 0.000003 loss_cls: 2.7717 (2.5594) grad_norm: 1.1128 (1.3359) time: 2.8778 data: 0.0003 max mem: 28454 +[2024-12-12 21:47:52 root] (utils.py 283): INFO Epoch: [13] [1340/2502] eta: 0:55:42 lr: 0.000003 loss_cls: 2.7143 (2.5609) grad_norm: 1.1128 (1.3347) time: 2.8828 data: 0.0003 max mem: 28454 +[2024-12-12 21:48:21 root] (utils.py 283): INFO Epoch: [13] [1350/2502] eta: 0:55:13 lr: 0.000003 loss_cls: 2.6997 (2.5616) grad_norm: 1.1203 (1.3335) time: 2.8795 data: 0.0002 max mem: 28454 +[2024-12-12 21:48:50 root] (utils.py 283): INFO Epoch: [13] [1360/2502] eta: 0:54:45 lr: 0.000003 loss_cls: 2.6172 (2.5607) grad_norm: 1.1003 (1.3413) time: 2.8729 data: 0.0002 max mem: 28454 +[2024-12-12 21:49:18 root] (utils.py 283): INFO Epoch: [13] [1370/2502] eta: 0:54:16 lr: 0.000003 loss_cls: 2.6228 (2.5614) grad_norm: 1.0995 (1.3397) time: 2.8775 data: 0.0002 max mem: 28454 +[2024-12-12 21:49:47 root] (utils.py 283): INFO Epoch: [13] [1380/2502] eta: 0:53:47 lr: 0.000003 loss_cls: 2.6228 (2.5598) grad_norm: 1.1067 (1.3394) time: 2.8773 data: 0.0002 max mem: 28454 +[2024-12-12 21:50:16 root] (utils.py 283): INFO Epoch: [13] [1390/2502] eta: 0:53:18 lr: 0.000003 loss_cls: 2.3681 (2.5586) grad_norm: 1.1104 (1.3379) time: 2.8758 data: 0.0002 max mem: 28454 +[2024-12-12 21:50:45 root] (utils.py 283): INFO Epoch: [13] [1400/2502] eta: 0:52:50 lr: 0.000003 loss_cls: 2.3953 (2.5583) grad_norm: 1.1104 (1.3376) time: 2.8758 data: 0.0002 max mem: 28454 +[2024-12-12 21:51:13 root] (utils.py 283): INFO Epoch: [13] [1410/2502] eta: 0:52:21 lr: 0.000003 loss_cls: 2.6191 (2.5591) grad_norm: 1.0836 (1.3361) time: 2.8754 data: 0.0002 max mem: 28454 +[2024-12-12 21:51:42 root] (utils.py 283): INFO Epoch: [13] [1420/2502] eta: 0:51:52 lr: 0.000003 loss_cls: 2.6344 (2.5576) grad_norm: 1.0781 (1.3343) time: 2.8750 data: 0.0002 max mem: 28454 +[2024-12-12 21:52:11 root] (utils.py 283): INFO Epoch: [13] [1430/2502] eta: 0:51:23 lr: 0.000003 loss_cls: 2.1001 (2.5543) grad_norm: 1.1498 (1.3337) time: 2.8755 data: 0.0002 max mem: 28454 +[2024-12-12 21:52:40 root] (utils.py 283): INFO Epoch: [13] [1440/2502] eta: 0:50:54 lr: 0.000003 loss_cls: 2.4244 (2.5551) grad_norm: 1.1793 (1.3325) time: 2.8767 data: 0.0002 max mem: 28454 +[2024-12-12 21:53:09 root] (utils.py 283): INFO Epoch: [13] [1450/2502] eta: 0:50:26 lr: 0.000003 loss_cls: 2.5875 (2.5552) grad_norm: 1.0862 (1.3310) time: 2.8762 data: 0.0002 max mem: 28454 +[2024-12-12 21:53:37 root] (utils.py 283): INFO Epoch: [13] [1460/2502] eta: 0:49:57 lr: 0.000003 loss_cls: 2.5875 (2.5549) grad_norm: 1.0838 (1.3302) time: 2.8759 data: 0.0002 max mem: 28454 +[2024-12-12 21:54:06 root] (utils.py 283): INFO Epoch: [13] [1470/2502] eta: 0:49:28 lr: 0.000003 loss_cls: 2.7102 (2.5543) grad_norm: 1.0838 (1.3290) time: 2.8783 data: 0.0002 max mem: 28454 +[2024-12-12 21:54:35 root] (utils.py 283): INFO Epoch: [13] [1480/2502] eta: 0:48:59 lr: 0.000003 loss_cls: 2.7102 (2.5554) grad_norm: 1.1373 (1.3274) time: 2.8805 data: 0.0002 max mem: 28454 +[2024-12-12 21:55:04 root] (utils.py 283): INFO Epoch: [13] [1490/2502] eta: 0:48:31 lr: 0.000003 loss_cls: 2.6133 (2.5551) grad_norm: 1.1439 (1.3268) time: 2.8795 data: 0.0002 max mem: 28454 +[2024-12-12 21:55:32 root] (utils.py 283): INFO Epoch: [13] [1500/2502] eta: 0:48:02 lr: 0.000003 loss_cls: 2.5503 (2.5548) grad_norm: 1.1018 (1.3265) time: 2.8782 data: 0.0003 max mem: 28454 +[2024-12-12 21:56:01 root] (utils.py 283): INFO Epoch: [13] [1510/2502] eta: 0:47:33 lr: 0.000003 loss_cls: 2.4873 (2.5541) grad_norm: 1.1038 (1.3253) time: 2.8769 data: 0.0003 max mem: 28454 +[2024-12-12 21:56:30 root] (utils.py 283): INFO Epoch: [13] [1520/2502] eta: 0:47:04 lr: 0.000003 loss_cls: 2.6230 (2.5543) grad_norm: 1.1468 (1.3253) time: 2.8766 data: 0.0002 max mem: 28454 +[2024-12-12 21:56:59 root] (utils.py 283): INFO Epoch: [13] [1530/2502] eta: 0:46:36 lr: 0.000003 loss_cls: 2.6030 (2.5533) grad_norm: 1.1468 (1.3248) time: 2.8756 data: 0.0002 max mem: 28454 +[2024-12-12 21:57:27 root] (utils.py 283): INFO Epoch: [13] [1540/2502] eta: 0:46:07 lr: 0.000003 loss_cls: 2.5910 (2.5542) grad_norm: 1.1370 (1.3240) time: 2.8758 data: 0.0002 max mem: 28454 +[2024-12-12 21:57:56 root] (utils.py 283): INFO Epoch: [13] [1550/2502] eta: 0:45:38 lr: 0.000003 loss_cls: 2.6957 (2.5548) grad_norm: 1.1370 (1.3229) time: 2.8771 data: 0.0002 max mem: 28454 +[2024-12-12 21:58:25 root] (utils.py 283): INFO Epoch: [13] [1560/2502] eta: 0:45:09 lr: 0.000003 loss_cls: 2.4042 (2.5531) grad_norm: 1.1784 (1.3224) time: 2.8793 data: 0.0002 max mem: 28454 +[2024-12-12 21:58:54 root] (utils.py 283): INFO Epoch: [13] [1570/2502] eta: 0:44:41 lr: 0.000003 loss_cls: 2.5188 (2.5537) grad_norm: 1.0890 (1.3212) time: 2.8801 data: 0.0002 max mem: 28454 +[2024-12-12 21:59:23 root] (utils.py 283): INFO Epoch: [13] [1580/2502] eta: 0:44:12 lr: 0.000003 loss_cls: 2.6996 (2.5534) grad_norm: 1.0753 (1.3200) time: 2.8787 data: 0.0002 max mem: 28454 +[2024-12-12 21:59:51 root] (utils.py 283): INFO Epoch: [13] [1590/2502] eta: 0:43:43 lr: 0.000003 loss_cls: 2.4859 (2.5525) grad_norm: 1.1386 (1.3189) time: 2.8757 data: 0.0002 max mem: 28454 +[2024-12-12 22:00:20 root] (utils.py 283): INFO Epoch: [13] [1600/2502] eta: 0:43:14 lr: 0.000003 loss_cls: 2.5998 (2.5532) grad_norm: 1.0954 (1.3174) time: 2.8722 data: 0.0002 max mem: 28454 +[2024-12-12 22:00:49 root] (utils.py 283): INFO Epoch: [13] [1610/2502] eta: 0:42:45 lr: 0.000003 loss_cls: 2.6646 (2.5539) grad_norm: 1.0654 (1.3160) time: 2.8730 data: 0.0003 max mem: 28454 +[2024-12-12 22:01:18 root] (utils.py 283): INFO Epoch: [13] [1620/2502] eta: 0:42:17 lr: 0.000003 loss_cls: 2.6164 (2.5536) grad_norm: 1.1116 (1.3154) time: 2.8726 data: 0.0003 max mem: 28454 +[2024-12-12 22:01:46 root] (utils.py 283): INFO Epoch: [13] [1630/2502] eta: 0:41:48 lr: 0.000003 loss_cls: 2.5627 (2.5529) grad_norm: 1.1561 (1.3142) time: 2.8721 data: 0.0002 max mem: 28454 +[2024-12-12 22:02:15 root] (utils.py 283): INFO Epoch: [13] [1640/2502] eta: 0:41:19 lr: 0.000003 loss_cls: 2.6921 (2.5541) grad_norm: 1.1150 (1.3139) time: 2.8724 data: 0.0002 max mem: 28454 +[2024-12-12 22:02:44 root] (utils.py 283): INFO Epoch: [13] [1650/2502] eta: 0:40:50 lr: 0.000003 loss_cls: 2.6159 (2.5518) grad_norm: 1.0976 (1.3128) time: 2.8721 data: 0.0002 max mem: 28454 +[2024-12-12 22:03:12 root] (utils.py 283): INFO Epoch: [13] [1660/2502] eta: 0:40:22 lr: 0.000003 loss_cls: 2.6159 (2.5528) grad_norm: 1.1372 (1.3129) time: 2.8736 data: 0.0003 max mem: 28454 +[2024-12-12 22:03:41 root] (utils.py 283): INFO Epoch: [13] [1670/2502] eta: 0:39:53 lr: 0.000003 loss_cls: 2.6258 (2.5514) grad_norm: 1.1534 (1.3122) time: 2.8747 data: 0.0002 max mem: 28454 +[2024-12-12 22:04:10 root] (utils.py 283): INFO Epoch: [13] [1680/2502] eta: 0:39:24 lr: 0.000003 loss_cls: 2.3110 (2.5503) grad_norm: 1.1067 (1.3110) time: 2.8739 data: 0.0002 max mem: 28454 +[2024-12-12 22:04:39 root] (utils.py 283): INFO Epoch: [13] [1690/2502] eta: 0:38:55 lr: 0.000003 loss_cls: 2.2981 (2.5493) grad_norm: 1.1067 (1.3097) time: 2.8732 data: 0.0002 max mem: 28454 +[2024-12-12 22:05:07 root] (utils.py 283): INFO Epoch: [13] [1700/2502] eta: 0:38:26 lr: 0.000003 loss_cls: 2.2425 (2.5475) grad_norm: 1.1144 (1.3086) time: 2.8734 data: 0.0002 max mem: 28454 +[2024-12-12 22:05:36 root] (utils.py 283): INFO Epoch: [13] [1710/2502] eta: 0:37:58 lr: 0.000003 loss_cls: 2.5357 (2.5480) grad_norm: 1.1168 (1.3083) time: 2.8743 data: 0.0002 max mem: 28454 +[2024-12-12 22:06:05 root] (utils.py 283): INFO Epoch: [13] [1720/2502] eta: 0:37:29 lr: 0.000003 loss_cls: 2.7535 (2.5490) grad_norm: 1.1880 (1.3076) time: 2.8766 data: 0.0002 max mem: 28454 +[2024-12-12 22:06:34 root] (utils.py 283): INFO Epoch: [13] [1730/2502] eta: 0:37:00 lr: 0.000003 loss_cls: 2.7577 (2.5483) grad_norm: 1.0926 (1.3062) time: 2.8775 data: 0.0002 max mem: 28454 +[2024-12-12 22:07:02 root] (utils.py 283): INFO Epoch: [13] [1740/2502] eta: 0:36:31 lr: 0.000003 loss_cls: 2.5540 (2.5475) grad_norm: 1.0264 (1.3054) time: 2.8765 data: 0.0002 max mem: 28454 +[2024-12-12 22:07:31 root] (utils.py 283): INFO Epoch: [13] [1750/2502] eta: 0:36:03 lr: 0.000003 loss_cls: 2.6661 (2.5490) grad_norm: 1.1381 (1.3051) time: 2.8754 data: 0.0002 max mem: 28454 +[2024-12-12 22:08:00 root] (utils.py 283): INFO Epoch: [13] [1760/2502] eta: 0:35:34 lr: 0.000003 loss_cls: 2.6961 (2.5493) grad_norm: 1.1739 (1.3041) time: 2.8759 data: 0.0002 max mem: 28454 +[2024-12-12 22:08:29 root] (utils.py 283): INFO Epoch: [13] [1770/2502] eta: 0:35:05 lr: 0.000003 loss_cls: 2.5417 (2.5491) grad_norm: 1.1536 (1.3037) time: 2.8788 data: 0.0002 max mem: 28454 +[2024-12-12 22:08:58 root] (utils.py 283): INFO Epoch: [13] [1780/2502] eta: 0:34:36 lr: 0.000003 loss_cls: 2.6742 (2.5501) grad_norm: 1.1334 (1.3026) time: 2.8783 data: 0.0003 max mem: 28454 +[2024-12-12 22:09:26 root] (utils.py 283): INFO Epoch: [13] [1790/2502] eta: 0:34:08 lr: 0.000003 loss_cls: 2.7555 (2.5501) grad_norm: 1.1048 (1.3018) time: 2.8761 data: 0.0003 max mem: 28454 +[2024-12-12 22:09:55 root] (utils.py 283): INFO Epoch: [13] [1800/2502] eta: 0:33:39 lr: 0.000003 loss_cls: 2.7189 (2.5509) grad_norm: 1.0976 (1.3008) time: 2.8748 data: 0.0002 max mem: 28454 +[2024-12-12 22:10:24 root] (utils.py 283): INFO Epoch: [13] [1810/2502] eta: 0:33:10 lr: 0.000003 loss_cls: 2.7256 (2.5513) grad_norm: 1.0933 (1.3001) time: 2.8727 data: 0.0002 max mem: 28454 +[2024-12-12 22:10:53 root] (utils.py 283): INFO Epoch: [13] [1820/2502] eta: 0:32:41 lr: 0.000003 loss_cls: 2.3723 (2.5496) grad_norm: 1.1592 (1.2995) time: 2.8713 data: 0.0003 max mem: 28454 +[2024-12-12 22:11:21 root] (utils.py 283): INFO Epoch: [13] [1830/2502] eta: 0:32:12 lr: 0.000003 loss_cls: 2.4183 (2.5495) grad_norm: 1.0697 (1.2984) time: 2.8717 data: 0.0003 max mem: 28454 +[2024-12-12 22:11:50 root] (utils.py 283): INFO Epoch: [13] [1840/2502] eta: 0:31:44 lr: 0.000003 loss_cls: 2.6210 (2.5504) grad_norm: 1.1680 (1.3207) time: 2.8715 data: 0.0003 max mem: 28454 +[2024-12-12 22:12:19 root] (utils.py 283): INFO Epoch: [13] [1850/2502] eta: 0:31:15 lr: 0.000003 loss_cls: 2.7951 (2.5517) grad_norm: 1.2011 (1.3203) time: 2.8715 data: 0.0002 max mem: 28454 +[2024-12-12 22:12:47 root] (utils.py 283): INFO Epoch: [13] [1860/2502] eta: 0:30:46 lr: 0.000003 loss_cls: 2.6735 (2.5516) grad_norm: 1.1826 (1.3201) time: 2.8718 data: 0.0002 max mem: 28454 +[2024-12-12 22:13:16 root] (utils.py 283): INFO Epoch: [13] [1870/2502] eta: 0:30:17 lr: 0.000003 loss_cls: 2.5444 (2.5520) grad_norm: 1.2306 (1.3281) time: 2.8782 data: 0.0002 max mem: 28454 +[2024-12-12 22:13:45 root] (utils.py 283): INFO Epoch: [13] [1880/2502] eta: 0:29:49 lr: 0.000003 loss_cls: 2.6320 (2.5521) grad_norm: 1.2324 (1.3314) time: 2.8802 data: 0.0003 max mem: 28454 +[2024-12-12 22:14:14 root] (utils.py 283): INFO Epoch: [13] [1890/2502] eta: 0:29:20 lr: 0.000003 loss_cls: 2.6162 (2.5519) grad_norm: 1.2155 (1.3353) time: 2.8740 data: 0.0003 max mem: 28454 +[2024-12-12 22:14:42 root] (utils.py 283): INFO Epoch: [13] [1900/2502] eta: 0:28:51 lr: 0.000003 loss_cls: 2.4491 (2.5507) grad_norm: 1.2155 (1.3351) time: 2.8742 data: 0.0002 max mem: 28454 +[2024-12-12 22:15:11 root] (utils.py 283): INFO Epoch: [13] [1910/2502] eta: 0:28:22 lr: 0.000003 loss_cls: 2.4491 (2.5504) grad_norm: 1.1913 (1.3346) time: 2.8777 data: 0.0002 max mem: 28454 +[2024-12-12 22:15:40 root] (utils.py 283): INFO Epoch: [13] [1920/2502] eta: 0:27:54 lr: 0.000003 loss_cls: 2.5499 (2.5509) grad_norm: 1.1797 (1.3340) time: 2.8768 data: 0.0002 max mem: 28454 +[2024-12-12 22:16:09 root] (utils.py 283): INFO Epoch: [13] [1930/2502] eta: 0:27:25 lr: 0.000003 loss_cls: 2.6032 (2.5514) grad_norm: 1.1385 (1.3331) time: 2.8755 data: 0.0002 max mem: 28454 +[2024-12-12 22:16:38 root] (utils.py 283): INFO Epoch: [13] [1940/2502] eta: 0:26:56 lr: 0.000003 loss_cls: 2.6032 (2.5509) grad_norm: 1.1272 (1.3337) time: 2.8743 data: 0.0002 max mem: 28454 +[2024-12-12 22:17:06 root] (utils.py 283): INFO Epoch: [13] [1950/2502] eta: 0:26:27 lr: 0.000003 loss_cls: 2.5874 (2.5509) grad_norm: 1.1272 (1.3325) time: 2.8732 data: 0.0003 max mem: 28454 +[2024-12-12 22:17:35 root] (utils.py 283): INFO Epoch: [13] [1960/2502] eta: 0:25:58 lr: 0.000003 loss_cls: 2.7478 (2.5520) grad_norm: 1.1203 (1.3339) time: 2.8746 data: 0.0003 max mem: 28454 +[2024-12-12 22:18:04 root] (utils.py 283): INFO Epoch: [13] [1970/2502] eta: 0:25:30 lr: 0.000003 loss_cls: 2.7490 (2.5522) grad_norm: 1.1203 (1.3330) time: 2.8769 data: 0.0002 max mem: 28454 +[2024-12-12 22:18:33 root] (utils.py 283): INFO Epoch: [13] [1980/2502] eta: 0:25:01 lr: 0.000003 loss_cls: 2.6156 (2.5521) grad_norm: 1.1356 (1.3331) time: 2.8784 data: 0.0002 max mem: 28454 +[2024-12-12 22:19:01 root] (utils.py 283): INFO Epoch: [13] [1990/2502] eta: 0:24:32 lr: 0.000003 loss_cls: 2.5770 (2.5520) grad_norm: 1.1642 (1.3334) time: 2.8778 data: 0.0002 max mem: 28454 +[2024-12-12 22:19:30 root] (utils.py 283): INFO Epoch: [13] [2000/2502] eta: 0:24:03 lr: 0.000003 loss_cls: 2.5764 (2.5517) grad_norm: 1.0992 (1.3324) time: 2.8779 data: 0.0002 max mem: 28454 +[2024-12-12 22:19:59 root] (utils.py 283): INFO Epoch: [13] [2010/2502] eta: 0:23:35 lr: 0.000003 loss_cls: 2.4772 (2.5512) grad_norm: 1.1192 (1.3350) time: 2.8796 data: 0.0002 max mem: 28454 +[2024-12-12 22:20:28 root] (utils.py 283): INFO Epoch: [13] [2020/2502] eta: 0:23:06 lr: 0.000003 loss_cls: 2.5458 (2.5507) grad_norm: 1.2175 (1.3360) time: 2.8796 data: 0.0002 max mem: 28454 +[2024-12-12 22:20:57 root] (utils.py 283): INFO Epoch: [13] [2030/2502] eta: 0:22:37 lr: 0.000003 loss_cls: 2.6729 (2.5508) grad_norm: 1.1692 (1.3351) time: 2.8780 data: 0.0002 max mem: 28454 +[2024-12-12 22:21:25 root] (utils.py 283): INFO Epoch: [13] [2040/2502] eta: 0:22:08 lr: 0.000003 loss_cls: 2.6706 (2.5508) grad_norm: 1.1677 (1.3347) time: 2.8781 data: 0.0002 max mem: 28454 +[2024-12-12 22:21:54 root] (utils.py 283): INFO Epoch: [13] [2050/2502] eta: 0:21:40 lr: 0.000003 loss_cls: 2.6047 (2.5505) grad_norm: 1.1538 (1.3334) time: 2.8743 data: 0.0002 max mem: 28454 +[2024-12-12 22:22:23 root] (utils.py 283): INFO Epoch: [13] [2060/2502] eta: 0:21:11 lr: 0.000003 loss_cls: 2.6047 (2.5508) grad_norm: 1.0622 (1.3322) time: 2.8735 data: 0.0002 max mem: 28454 +[2024-12-12 22:22:52 root] (utils.py 283): INFO Epoch: [13] [2070/2502] eta: 0:20:42 lr: 0.000003 loss_cls: 2.5819 (2.5500) grad_norm: 1.0957 (1.3311) time: 2.8756 data: 0.0002 max mem: 28454 +[2024-12-12 22:23:20 root] (utils.py 283): INFO Epoch: [13] [2080/2502] eta: 0:20:13 lr: 0.000003 loss_cls: 2.5697 (2.5501) grad_norm: 1.0957 (1.3318) time: 2.8759 data: 0.0002 max mem: 28454 +[2024-12-12 22:23:49 root] (utils.py 283): INFO Epoch: [13] [2090/2502] eta: 0:19:45 lr: 0.000003 loss_cls: 2.6712 (2.5492) grad_norm: 1.1278 (1.3322) time: 2.8754 data: 0.0002 max mem: 28454 +[2024-12-12 22:24:18 root] (utils.py 283): INFO Epoch: [13] [2100/2502] eta: 0:19:16 lr: 0.000003 loss_cls: 2.5905 (2.5485) grad_norm: 1.1278 (1.3315) time: 2.8726 data: 0.0002 max mem: 28454 +[2024-12-12 22:24:46 root] (utils.py 283): INFO Epoch: [13] [2110/2502] eta: 0:18:47 lr: 0.000003 loss_cls: 2.6125 (2.5485) grad_norm: 1.0717 (1.3301) time: 2.8710 data: 0.0002 max mem: 28454 +[2024-12-12 22:25:15 root] (utils.py 283): INFO Epoch: [13] [2120/2502] eta: 0:18:18 lr: 0.000003 loss_cls: 2.4394 (2.5473) grad_norm: 1.0235 (1.3289) time: 2.8731 data: 0.0002 max mem: 28454 +[2024-12-12 22:25:44 root] (utils.py 283): INFO Epoch: [13] [2130/2502] eta: 0:17:49 lr: 0.000003 loss_cls: 2.4394 (2.5472) grad_norm: 1.1071 (1.3283) time: 2.8724 data: 0.0002 max mem: 28454 +[2024-12-12 22:26:13 root] (utils.py 283): INFO Epoch: [13] [2140/2502] eta: 0:17:21 lr: 0.000003 loss_cls: 2.5296 (2.5477) grad_norm: 1.1248 (1.3273) time: 2.8726 data: 0.0002 max mem: 28454 +[2024-12-12 22:26:41 root] (utils.py 283): INFO Epoch: [13] [2150/2502] eta: 0:16:52 lr: 0.000003 loss_cls: 2.6772 (2.5475) grad_norm: 1.1077 (1.3278) time: 2.8747 data: 0.0002 max mem: 28454 +[2024-12-12 22:27:10 root] (utils.py 283): INFO Epoch: [13] [2160/2502] eta: 0:16:23 lr: 0.000003 loss_cls: 2.7060 (2.5478) grad_norm: 1.1071 (1.3268) time: 2.8737 data: 0.0003 max mem: 28454 +[2024-12-12 22:27:39 root] (utils.py 283): INFO Epoch: [13] [2170/2502] eta: 0:15:54 lr: 0.000003 loss_cls: 2.7807 (2.5490) grad_norm: 1.1071 (1.3272) time: 2.8734 data: 0.0003 max mem: 28454 +[2024-12-12 22:28:08 root] (utils.py 283): INFO Epoch: [13] [2180/2502] eta: 0:15:26 lr: 0.000003 loss_cls: 2.7881 (2.5505) grad_norm: 1.1579 (1.3276) time: 2.8724 data: 0.0002 max mem: 28454 +[2024-12-12 22:28:36 root] (utils.py 283): INFO Epoch: [13] [2190/2502] eta: 0:14:57 lr: 0.000003 loss_cls: 2.7465 (2.5509) grad_norm: 1.2038 (1.3272) time: 2.8723 data: 0.0003 max mem: 28454 +[2024-12-12 22:29:05 root] (utils.py 283): INFO Epoch: [13] [2200/2502] eta: 0:14:28 lr: 0.000003 loss_cls: 2.6566 (2.5510) grad_norm: 1.0776 (1.3260) time: 2.8715 data: 0.0003 max mem: 28454 +[2024-12-12 22:29:34 root] (utils.py 283): INFO Epoch: [13] [2210/2502] eta: 0:13:59 lr: 0.000003 loss_cls: 2.6435 (2.5505) grad_norm: 1.0878 (1.3253) time: 2.8717 data: 0.0003 max mem: 28454 +[2024-12-12 22:30:02 root] (utils.py 283): INFO Epoch: [13] [2220/2502] eta: 0:13:31 lr: 0.000003 loss_cls: 2.5405 (2.5505) grad_norm: 1.1347 (1.3244) time: 2.8706 data: 0.0003 max mem: 28454 +[2024-12-12 22:30:31 root] (utils.py 283): INFO Epoch: [13] [2230/2502] eta: 0:13:02 lr: 0.000003 loss_cls: 2.6607 (2.5513) grad_norm: 1.1347 (1.3237) time: 2.8703 data: 0.0003 max mem: 28454 +[2024-12-12 22:31:00 root] (utils.py 283): INFO Epoch: [13] [2240/2502] eta: 0:12:33 lr: 0.000003 loss_cls: 2.6851 (2.5513) grad_norm: 1.2046 (1.3253) time: 2.8722 data: 0.0003 max mem: 28454 +[2024-12-12 22:31:29 root] (utils.py 283): INFO Epoch: [13] [2250/2502] eta: 0:12:04 lr: 0.000003 loss_cls: 2.6851 (2.5519) grad_norm: 1.1581 (1.3244) time: 2.8711 data: 0.0002 max mem: 28454 +[2024-12-12 22:31:57 root] (utils.py 283): INFO Epoch: [13] [2260/2502] eta: 0:11:35 lr: 0.000003 loss_cls: 2.5537 (2.5515) grad_norm: 1.1338 (1.4855) time: 2.8696 data: 0.0002 max mem: 28454 +[2024-12-12 22:32:26 root] (utils.py 283): INFO Epoch: [13] [2270/2502] eta: 0:11:07 lr: 0.000003 loss_cls: 2.4827 (2.5511) grad_norm: 1.2094 (1.4843) time: 2.8699 data: 0.0002 max mem: 28454 +[2024-12-12 22:32:55 root] (utils.py 283): INFO Epoch: [13] [2280/2502] eta: 0:10:38 lr: 0.000003 loss_cls: 2.5252 (2.5505) grad_norm: 1.2094 (1.4834) time: 2.8714 data: 0.0003 max mem: 28454 +[2024-12-12 22:33:23 root] (utils.py 283): INFO Epoch: [13] [2290/2502] eta: 0:10:09 lr: 0.000003 loss_cls: 2.6940 (2.5507) grad_norm: 1.2514 (1.4856) time: 2.8718 data: 0.0003 max mem: 28454 +[2024-12-12 22:33:52 root] (utils.py 283): INFO Epoch: [13] [2300/2502] eta: 0:09:40 lr: 0.000003 loss_cls: 2.5987 (2.5504) grad_norm: 1.2325 (1.4844) time: 2.8728 data: 0.0003 max mem: 28454 +[2024-12-12 22:34:21 root] (utils.py 283): INFO Epoch: [13] [2310/2502] eta: 0:09:12 lr: 0.000003 loss_cls: 2.5987 (2.5513) grad_norm: 1.1916 (1.4846) time: 2.8720 data: 0.0003 max mem: 28454 +[2024-12-12 22:34:50 root] (utils.py 283): INFO Epoch: [13] [2320/2502] eta: 0:08:43 lr: 0.000003 loss_cls: 2.6562 (2.5513) grad_norm: 1.2166 (1.4845) time: 2.8717 data: 0.0003 max mem: 28454 +[2024-12-12 22:35:18 root] (utils.py 283): INFO Epoch: [13] [2330/2502] eta: 0:08:14 lr: 0.000003 loss_cls: 2.6928 (2.5522) grad_norm: 1.2165 (1.4836) time: 2.8715 data: 0.0003 max mem: 28454 +[2024-12-12 22:35:47 root] (utils.py 283): INFO Epoch: [13] [2340/2502] eta: 0:07:45 lr: 0.000003 loss_cls: 2.7111 (2.5522) grad_norm: 1.2033 (1.4835) time: 2.8704 data: 0.0003 max mem: 28454 +[2024-12-12 22:36:16 root] (utils.py 283): INFO Epoch: [13] [2350/2502] eta: 0:07:17 lr: 0.000003 loss_cls: 2.7111 (2.5530) grad_norm: 1.1726 (1.4833) time: 2.8713 data: 0.0002 max mem: 28454 +[2024-12-12 22:36:44 root] (utils.py 283): INFO Epoch: [13] [2360/2502] eta: 0:06:48 lr: 0.000003 loss_cls: 2.5936 (2.5524) grad_norm: 1.1022 (1.4817) time: 2.8729 data: 0.0002 max mem: 28454 +[2024-12-12 22:37:13 root] (utils.py 283): INFO Epoch: [13] [2370/2502] eta: 0:06:19 lr: 0.000003 loss_cls: 2.5403 (2.5521) grad_norm: 1.1210 (1.4805) time: 2.8727 data: 0.0002 max mem: 28454 +[2024-12-12 22:37:42 root] (utils.py 283): INFO Epoch: [13] [2380/2502] eta: 0:05:50 lr: 0.000003 loss_cls: 2.6721 (2.5523) grad_norm: 1.1421 (1.4795) time: 2.8730 data: 0.0002 max mem: 28454 +[2024-12-12 22:38:11 root] (utils.py 283): INFO Epoch: [13] [2390/2502] eta: 0:05:22 lr: 0.000003 loss_cls: 2.7239 (2.5529) grad_norm: 1.1303 (1.4781) time: 2.8710 data: 0.0002 max mem: 28454 +[2024-12-12 22:38:39 root] (utils.py 283): INFO Epoch: [13] [2400/2502] eta: 0:04:53 lr: 0.000003 loss_cls: 2.6621 (2.5531) grad_norm: 1.0910 (1.4785) time: 2.8699 data: 0.0002 max mem: 28454 +[2024-12-12 22:39:08 root] (utils.py 283): INFO Epoch: [13] [2410/2502] eta: 0:04:24 lr: 0.000003 loss_cls: 2.5840 (2.5530) grad_norm: 1.1562 (1.4774) time: 2.8722 data: 0.0002 max mem: 28454 +[2024-12-12 22:39:37 root] (utils.py 283): INFO Epoch: [13] [2420/2502] eta: 0:03:55 lr: 0.000003 loss_cls: 2.6161 (2.5537) grad_norm: 1.2132 (1.4768) time: 2.8730 data: 0.0002 max mem: 28454 +[2024-12-12 22:40:06 root] (utils.py 283): INFO Epoch: [13] [2430/2502] eta: 0:03:27 lr: 0.000003 loss_cls: 2.7370 (2.5544) grad_norm: 1.1487 (1.4766) time: 2.8777 data: 0.0002 max mem: 28454 +[2024-12-12 22:40:34 root] (utils.py 283): INFO Epoch: [13] [2440/2502] eta: 0:02:58 lr: 0.000003 loss_cls: 2.6468 (2.5545) grad_norm: 1.1487 (1.4773) time: 2.8763 data: 0.0002 max mem: 28454 +[2024-12-12 22:41:03 root] (utils.py 283): INFO Epoch: [13] [2450/2502] eta: 0:02:29 lr: 0.000003 loss_cls: 2.6468 (2.5549) grad_norm: 1.2309 (1.4787) time: 2.8728 data: 0.0002 max mem: 28454 +[2024-12-12 22:41:32 root] (utils.py 283): INFO Epoch: [13] [2460/2502] eta: 0:02:00 lr: 0.000003 loss_cls: 2.6765 (2.5539) grad_norm: 1.1201 (1.4775) time: 2.8718 data: 0.0002 max mem: 28454 +[2024-12-12 22:42:00 root] (utils.py 283): INFO Epoch: [13] [2470/2502] eta: 0:01:32 lr: 0.000003 loss_cls: 2.3733 (2.5537) grad_norm: 1.1489 (1.4773) time: 2.8714 data: 0.0002 max mem: 28454 +[2024-12-12 22:42:29 root] (utils.py 283): INFO Epoch: [13] [2480/2502] eta: 0:01:03 lr: 0.000003 loss_cls: 2.6907 (2.5539) grad_norm: 1.1614 (1.4760) time: 2.8734 data: 0.0002 max mem: 28454 +[2024-12-12 22:42:58 root] (utils.py 283): INFO Epoch: [13] [2490/2502] eta: 0:00:34 lr: 0.000003 loss_cls: 2.5720 (2.5535) grad_norm: 1.1435 (1.4747) time: 2.8943 data: 0.0204 max mem: 28454 +[2024-12-12 22:43:27 root] (utils.py 283): INFO Epoch: [13] [2500/2502] eta: 0:00:05 lr: 0.000003 loss_cls: 2.4783 (2.5533) grad_norm: 1.1395 (1.4734) time: 2.8925 data: 0.0204 max mem: 28454 +[2024-12-12 22:43:30 root] (utils.py 283): INFO Epoch: [13] [2501/2502] eta: 0:00:02 lr: 0.000003 loss_cls: 2.3416 (2.5530) grad_norm: 1.1395 (1.4733) time: 2.8922 data: 0.0204 max mem: 28454 +[2024-12-12 22:43:30 root] (utils.py 297): INFO Epoch: [13] Total time: 1:59:55 (2.8759 s / it) +[2024-12-12 22:43:30 root] (engine.py 179): INFO Averaged stats:lr: 0.000003 loss_cls: 2.3416 (2.5500) grad_norm: 1.1395 (1.4733) +[2024-12-12 22:43:33 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:53 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.3896 (0.3896) acc1: 92.1875 (92.1875) acc3: 98.4375 (98.4375) acc5: 98.4375 (98.4375) time: 0.5423 data: 0.0004 max mem: 28454 +[2024-12-12 22:43:38 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:48 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6297 (0.5917) acc1: 85.1562 (86.7188) acc3: 96.8750 (96.8040) acc5: 98.4375 (98.0824) time: 0.5475 data: 0.0005 max mem: 28454 +[2024-12-12 22:43:44 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:42 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6297 (0.6176) acc1: 85.9375 (86.6815) acc3: 96.8750 (96.4658) acc5: 97.6562 (97.6935) time: 0.5479 data: 0.0005 max mem: 28454 +[2024-12-12 22:43:49 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:37 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6351 (0.6497) acc1: 86.7188 (85.5847) acc3: 96.0938 (96.2450) acc5: 97.6562 (97.5806) time: 0.5482 data: 0.0004 max mem: 28454 +[2024-12-12 22:43:55 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:31 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6904 (0.6571) acc1: 86.7188 (85.4802) acc3: 96.0938 (96.1509) acc5: 97.6562 (97.5991) time: 0.5480 data: 0.0005 max mem: 28454 +[2024-12-12 22:44:00 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:26 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.8120 (0.7368) acc1: 78.9062 (83.7010) acc3: 93.7500 (95.1593) acc5: 95.3125 (96.8137) time: 0.5476 data: 0.0004 max mem: 28454 +[2024-12-12 22:44:06 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:20 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9640 (0.7636) acc1: 80.4688 (83.5553) acc3: 90.6250 (94.5569) acc5: 93.7500 (96.3115) time: 0.5480 data: 0.0004 max mem: 28454 +[2024-12-12 22:44:11 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:15 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9467 (0.7917) acc1: 80.4688 (82.6915) acc3: 91.4062 (94.1901) acc5: 93.7500 (96.1598) time: 0.5483 data: 0.0004 max mem: 28454 +[2024-12-12 22:44:17 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:09 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9960 (0.8146) acc1: 78.1250 (82.2820) acc3: 91.4062 (93.9043) acc5: 94.5312 (95.8816) time: 0.5484 data: 0.0007 max mem: 28454 +[2024-12-12 22:44:22 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:04 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9819 (0.8351) acc1: 77.3438 (81.5934) acc3: 91.4062 (93.5869) acc5: 94.5312 (95.7160) time: 0.5487 data: 0.0007 max mem: 28454 +[2024-12-12 22:44:26 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.8935 (0.8320) acc1: 78.1250 (81.5920) acc3: 92.1875 (93.6480) acc5: 94.5312 (95.7680) time: 0.5397 data: 0.0005 max mem: 28454 +[2024-12-12 22:44:26 root] (utils.py 297): INFO Test: Total time: 0:00:53 (0.5465 s / it) +[2024-12-12 22:44:26 root] (engine.py 264): INFO * Acc@1 81.882 Acc@3 93.462 Acc@5 95.758 loss 0.829 flops 13.207 layer_flops 13.109 +[2024-12-12 22:44:26 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 81.9% +[2024-12-12 22:44:28 root] (main.py 576): INFO Max accuracy: 81.88% +[2024-12-12 22:44:31 root] (utils.py 283): INFO Epoch: [14] [ 0/2502] eta: 1:58:19 lr: 0.000002 loss_cls: 2.7679 (2.7679) grad_norm: 1.8595 (1.8595) time: 2.8374 data: 0.0004 max mem: 28454 +[2024-12-12 22:44:59 root] (utils.py 283): INFO Epoch: [14] [ 10/2502] eta: 1:58:45 lr: 0.000002 loss_cls: 2.7679 (2.6603) grad_norm: 1.1347 (1.2348) time: 2.8592 data: 0.0003 max mem: 28454 +[2024-12-12 22:45:28 root] (utils.py 283): INFO Epoch: [14] [ 20/2502] eta: 1:58:23 lr: 0.000002 loss_cls: 2.4400 (2.4927) grad_norm: 1.1347 (1.2002) time: 2.8631 data: 0.0003 max mem: 28454 +[2024-12-12 22:45:57 root] (utils.py 283): INFO Epoch: [14] [ 30/2502] eta: 1:57:58 lr: 0.000002 loss_cls: 2.4400 (2.5090) grad_norm: 1.1821 (1.8874) time: 2.8659 data: 0.0002 max mem: 28454 +[2024-12-12 22:46:25 root] (utils.py 283): INFO Epoch: [14] [ 40/2502] eta: 1:57:29 lr: 0.000002 loss_cls: 2.5260 (2.5139) grad_norm: 1.1821 (1.7650) time: 2.8652 data: 0.0002 max mem: 28454 +[2024-12-12 22:46:54 root] (utils.py 283): INFO Epoch: [14] [ 50/2502] eta: 1:57:03 lr: 0.000002 loss_cls: 2.6042 (2.4788) grad_norm: 1.0462 (1.6409) time: 2.8654 data: 0.0003 max mem: 28454 +[2024-12-12 22:47:23 root] (utils.py 283): INFO Epoch: [14] [ 60/2502] eta: 1:56:33 lr: 0.000002 loss_cls: 2.6578 (2.5240) grad_norm: 1.1153 (1.5858) time: 2.8639 data: 0.0003 max mem: 28454 +[2024-12-12 22:47:51 root] (utils.py 283): INFO Epoch: [14] [ 70/2502] eta: 1:56:06 lr: 0.000002 loss_cls: 2.8320 (2.5339) grad_norm: 1.1705 (1.8214) time: 2.8648 data: 0.0003 max mem: 28454 +[2024-12-12 22:48:20 root] (utils.py 283): INFO Epoch: [14] [ 80/2502] eta: 1:55:37 lr: 0.000002 loss_cls: 2.6354 (2.5203) grad_norm: 1.1312 (1.7377) time: 2.8665 data: 0.0002 max mem: 28454 +[2024-12-12 22:48:49 root] (utils.py 283): INFO Epoch: [14] [ 90/2502] eta: 1:55:09 lr: 0.000002 loss_cls: 2.6613 (2.5441) grad_norm: 1.1090 (1.6744) time: 2.8648 data: 0.0002 max mem: 28454 +[2024-12-12 22:49:17 root] (utils.py 283): INFO Epoch: [14] [ 100/2502] eta: 1:54:40 lr: 0.000002 loss_cls: 2.8133 (2.5626) grad_norm: 1.1090 (2.5851) time: 2.8643 data: 0.0003 max mem: 28454 +[2024-12-12 22:49:46 root] (utils.py 283): INFO Epoch: [14] [ 110/2502] eta: 1:54:11 lr: 0.000002 loss_cls: 2.7791 (2.5776) grad_norm: 1.2312 (2.4690) time: 2.8641 data: 0.0003 max mem: 28454 +[2024-12-12 22:50:14 root] (utils.py 283): INFO Epoch: [14] [ 120/2502] eta: 1:53:43 lr: 0.000002 loss_cls: 2.7209 (2.5710) grad_norm: 1.2005 (2.3600) time: 2.8650 data: 0.0003 max mem: 28454 +[2024-12-12 22:50:43 root] (utils.py 283): INFO Epoch: [14] [ 130/2502] eta: 1:53:15 lr: 0.000002 loss_cls: 2.7090 (2.5746) grad_norm: 1.0953 (2.2729) time: 2.8665 data: 0.0002 max mem: 28454 +[2024-12-12 22:51:12 root] (utils.py 283): INFO Epoch: [14] [ 140/2502] eta: 1:52:46 lr: 0.000002 loss_cls: 2.7351 (2.5833) grad_norm: 1.1570 (2.2150) time: 2.8654 data: 0.0002 max mem: 28454 +[2024-12-12 22:51:40 root] (utils.py 283): INFO Epoch: [14] [ 150/2502] eta: 1:52:18 lr: 0.000002 loss_cls: 2.6993 (2.5859) grad_norm: 1.1763 (2.1528) time: 2.8665 data: 0.0003 max mem: 28454 +[2024-12-12 22:52:09 root] (utils.py 283): INFO Epoch: [14] [ 160/2502] eta: 1:51:50 lr: 0.000002 loss_cls: 2.7706 (2.5999) grad_norm: 1.2220 (2.1064) time: 2.8705 data: 0.0003 max mem: 28454 +[2024-12-12 22:52:38 root] (utils.py 283): INFO Epoch: [14] [ 170/2502] eta: 1:51:22 lr: 0.000002 loss_cls: 2.8199 (2.6037) grad_norm: 1.2035 (2.0510) time: 2.8684 data: 0.0002 max mem: 28454 +[2024-12-12 22:53:07 root] (utils.py 283): INFO Epoch: [14] [ 180/2502] eta: 1:51:04 lr: 0.000002 loss_cls: 2.7357 (2.6090) grad_norm: 1.1237 (2.0165) time: 2.9075 data: 0.0003 max mem: 28454 +[2024-12-12 22:53:36 root] (utils.py 283): INFO Epoch: [14] [ 190/2502] eta: 1:50:38 lr: 0.000002 loss_cls: 2.6997 (2.6093) grad_norm: 1.0846 (1.9702) time: 2.9221 data: 0.0004 max mem: 28454 +[2024-12-12 22:54:06 root] (utils.py 283): INFO Epoch: [14] [ 200/2502] eta: 1:50:19 lr: 0.000002 loss_cls: 2.6296 (2.6088) grad_norm: 1.0592 (1.9334) time: 2.9267 data: 0.0013 max mem: 28454 +[2024-12-12 22:54:35 root] (utils.py 283): INFO Epoch: [14] [ 210/2502] eta: 1:49:52 lr: 0.000002 loss_cls: 2.5973 (2.6048) grad_norm: 1.0740 (1.9083) time: 2.9245 data: 0.0013 max mem: 28454 +[2024-12-12 22:55:04 root] (utils.py 283): INFO Epoch: [14] [ 220/2502] eta: 1:49:28 lr: 0.000002 loss_cls: 2.7351 (2.6168) grad_norm: 1.1675 (1.8793) time: 2.9079 data: 0.0004 max mem: 28454 +[2024-12-12 22:55:33 root] (utils.py 283): INFO Epoch: [14] [ 230/2502] eta: 1:49:01 lr: 0.000002 loss_cls: 2.7606 (2.6193) grad_norm: 1.1711 (1.8471) time: 2.9088 data: 0.0004 max mem: 28454 +[2024-12-12 22:56:02 root] (utils.py 283): INFO Epoch: [14] [ 240/2502] eta: 1:48:38 lr: 0.000002 loss_cls: 2.6802 (2.6133) grad_norm: 1.1128 (1.8180) time: 2.9194 data: 0.0004 max mem: 28454 +[2024-12-12 22:56:31 root] (utils.py 283): INFO Epoch: [14] [ 250/2502] eta: 1:48:10 lr: 0.000002 loss_cls: 2.5086 (2.6040) grad_norm: 1.1128 (1.7949) time: 2.9174 data: 0.0004 max mem: 28454 +[2024-12-12 22:57:00 root] (utils.py 283): INFO Epoch: [14] [ 260/2502] eta: 1:47:40 lr: 0.000002 loss_cls: 2.4121 (2.5923) grad_norm: 1.1482 (1.7692) time: 2.8771 data: 0.0003 max mem: 28454 +[2024-12-12 22:57:29 root] (utils.py 283): INFO Epoch: [14] [ 270/2502] eta: 1:47:10 lr: 0.000002 loss_cls: 2.4801 (2.5910) grad_norm: 1.0953 (1.7456) time: 2.8665 data: 0.0003 max mem: 28454 +[2024-12-12 22:57:57 root] (utils.py 283): INFO Epoch: [14] [ 280/2502] eta: 1:46:40 lr: 0.000002 loss_cls: 2.6722 (2.5894) grad_norm: 1.1018 (1.7512) time: 2.8690 data: 0.0003 max mem: 28454 +[2024-12-12 22:58:26 root] (utils.py 283): INFO Epoch: [14] [ 290/2502] eta: 1:46:11 lr: 0.000002 loss_cls: 2.4848 (2.5851) grad_norm: 1.0704 (1.7463) time: 2.8693 data: 0.0002 max mem: 28454 +[2024-12-12 22:58:55 root] (utils.py 283): INFO Epoch: [14] [ 300/2502] eta: 1:45:41 lr: 0.000002 loss_cls: 2.4848 (2.5824) grad_norm: 1.0557 (1.7385) time: 2.8691 data: 0.0003 max mem: 28454 +[2024-12-12 22:59:23 root] (utils.py 283): INFO Epoch: [14] [ 310/2502] eta: 1:45:11 lr: 0.000002 loss_cls: 2.6260 (2.5810) grad_norm: 1.1799 (1.7537) time: 2.8681 data: 0.0003 max mem: 28454 +[2024-12-12 22:59:52 root] (utils.py 283): INFO Epoch: [14] [ 320/2502] eta: 1:44:42 lr: 0.000002 loss_cls: 2.4541 (2.5784) grad_norm: 1.1497 (1.7358) time: 2.8686 data: 0.0003 max mem: 28454 +[2024-12-12 23:00:21 root] (utils.py 283): INFO Epoch: [14] [ 330/2502] eta: 1:44:13 lr: 0.000002 loss_cls: 2.4393 (2.5747) grad_norm: 1.1694 (1.8065) time: 2.8705 data: 0.0003 max mem: 28454 +[2024-12-12 23:00:50 root] (utils.py 283): INFO Epoch: [14] [ 340/2502] eta: 1:43:43 lr: 0.000002 loss_cls: 2.6295 (2.5744) grad_norm: 1.2150 (1.7908) time: 2.8694 data: 0.0003 max mem: 28454 +[2024-12-12 23:01:18 root] (utils.py 283): INFO Epoch: [14] [ 350/2502] eta: 1:43:14 lr: 0.000002 loss_cls: 2.5598 (2.5665) grad_norm: 1.0816 (1.7720) time: 2.8693 data: 0.0003 max mem: 28454 +[2024-12-12 23:01:47 root] (utils.py 283): INFO Epoch: [14] [ 360/2502] eta: 1:42:44 lr: 0.000002 loss_cls: 2.5598 (2.5666) grad_norm: 1.1128 (1.7575) time: 2.8687 data: 0.0003 max mem: 28454 +[2024-12-12 23:02:16 root] (utils.py 283): INFO Epoch: [14] [ 370/2502] eta: 1:42:15 lr: 0.000002 loss_cls: 2.4155 (2.5553) grad_norm: 1.1537 (1.7408) time: 2.8693 data: 0.0003 max mem: 28454 +[2024-12-12 23:02:44 root] (utils.py 283): INFO Epoch: [14] [ 380/2502] eta: 1:41:46 lr: 0.000002 loss_cls: 2.1712 (2.5506) grad_norm: 1.1196 (1.7604) time: 2.8694 data: 0.0003 max mem: 28454 +[2024-12-12 23:03:13 root] (utils.py 283): INFO Epoch: [14] [ 390/2502] eta: 1:41:17 lr: 0.000002 loss_cls: 2.5606 (2.5528) grad_norm: 1.1309 (1.7513) time: 2.8703 data: 0.0003 max mem: 28454 +[2024-12-12 23:03:42 root] (utils.py 283): INFO Epoch: [14] [ 400/2502] eta: 1:40:48 lr: 0.000002 loss_cls: 2.5642 (2.5523) grad_norm: 1.2073 (1.7536) time: 2.8710 data: 0.0003 max mem: 28454 +[2024-12-12 23:04:10 root] (utils.py 283): INFO Epoch: [14] [ 410/2502] eta: 1:40:18 lr: 0.000002 loss_cls: 2.5642 (2.5540) grad_norm: 1.1648 (1.7425) time: 2.8682 data: 0.0003 max mem: 28454 +[2024-12-12 23:04:39 root] (utils.py 283): INFO Epoch: [14] [ 420/2502] eta: 1:39:49 lr: 0.000002 loss_cls: 2.5619 (2.5518) grad_norm: 1.1648 (1.7368) time: 2.8661 data: 0.0003 max mem: 28454 +[2024-12-12 23:05:08 root] (utils.py 283): INFO Epoch: [14] [ 430/2502] eta: 1:39:20 lr: 0.000002 loss_cls: 2.4885 (2.5508) grad_norm: 1.1990 (1.7363) time: 2.8671 data: 0.0002 max mem: 28454 +[2024-12-12 23:05:36 root] (utils.py 283): INFO Epoch: [14] [ 440/2502] eta: 1:38:51 lr: 0.000002 loss_cls: 2.7150 (2.5550) grad_norm: 1.1582 (1.7251) time: 2.8691 data: 0.0002 max mem: 28454 +[2024-12-12 23:06:05 root] (utils.py 283): INFO Epoch: [14] [ 450/2502] eta: 1:38:22 lr: 0.000002 loss_cls: 2.7074 (2.5564) grad_norm: 1.0939 (1.7132) time: 2.8733 data: 0.0002 max mem: 28454 +[2024-12-12 23:06:34 root] (utils.py 283): INFO Epoch: [14] [ 460/2502] eta: 1:37:52 lr: 0.000002 loss_cls: 2.6287 (2.5565) grad_norm: 1.1018 (1.7012) time: 2.8679 data: 0.0002 max mem: 28454 +[2024-12-12 23:07:02 root] (utils.py 283): INFO Epoch: [14] [ 470/2502] eta: 1:37:23 lr: 0.000002 loss_cls: 2.6585 (2.5559) grad_norm: 1.1018 (1.6887) time: 2.8631 data: 0.0002 max mem: 28454 +[2024-12-12 23:07:31 root] (utils.py 283): INFO Epoch: [14] [ 480/2502] eta: 1:36:54 lr: 0.000002 loss_cls: 2.6567 (2.5546) grad_norm: 1.0304 (1.6804) time: 2.8683 data: 0.0002 max mem: 28454 +[2024-12-12 23:08:00 root] (utils.py 283): INFO Epoch: [14] [ 490/2502] eta: 1:36:25 lr: 0.000002 loss_cls: 2.7480 (2.5563) grad_norm: 1.1767 (1.6883) time: 2.8689 data: 0.0002 max mem: 28454 +[2024-12-12 23:08:29 root] (utils.py 283): INFO Epoch: [14] [ 500/2502] eta: 1:35:56 lr: 0.000002 loss_cls: 2.7411 (2.5549) grad_norm: 1.2243 (1.6859) time: 2.8692 data: 0.0002 max mem: 28454 +[2024-12-12 23:08:57 root] (utils.py 283): INFO Epoch: [14] [ 510/2502] eta: 1:35:27 lr: 0.000002 loss_cls: 2.3034 (2.5505) grad_norm: 1.1332 (1.6813) time: 2.8708 data: 0.0002 max mem: 28454 +[2024-12-12 23:09:26 root] (utils.py 283): INFO Epoch: [14] [ 520/2502] eta: 1:34:58 lr: 0.000002 loss_cls: 2.5627 (2.5514) grad_norm: 1.1407 (1.6741) time: 2.8671 data: 0.0002 max mem: 28454 +[2024-12-12 23:09:55 root] (utils.py 283): INFO Epoch: [14] [ 530/2502] eta: 1:34:29 lr: 0.000002 loss_cls: 2.5826 (2.5505) grad_norm: 1.1338 (1.6657) time: 2.8652 data: 0.0002 max mem: 28454 +[2024-12-12 23:10:23 root] (utils.py 283): INFO Epoch: [14] [ 540/2502] eta: 1:34:00 lr: 0.000002 loss_cls: 2.6405 (2.5521) grad_norm: 1.1338 (1.6587) time: 2.8673 data: 0.0002 max mem: 28454 +[2024-12-12 23:10:52 root] (utils.py 283): INFO Epoch: [14] [ 550/2502] eta: 1:33:31 lr: 0.000002 loss_cls: 2.7110 (2.5533) grad_norm: 1.2085 (1.6572) time: 2.8673 data: 0.0003 max mem: 28454 +[2024-12-12 23:11:21 root] (utils.py 283): INFO Epoch: [14] [ 560/2502] eta: 1:33:02 lr: 0.000002 loss_cls: 2.6995 (2.5533) grad_norm: 1.1209 (1.6500) time: 2.8674 data: 0.0003 max mem: 28454 +[2024-12-12 23:11:49 root] (utils.py 283): INFO Epoch: [14] [ 570/2502] eta: 1:32:33 lr: 0.000002 loss_cls: 2.8214 (2.5553) grad_norm: 1.0922 (1.6610) time: 2.8668 data: 0.0002 max mem: 28454 +[2024-12-12 23:12:18 root] (utils.py 283): INFO Epoch: [14] [ 580/2502] eta: 1:32:04 lr: 0.000002 loss_cls: 2.8179 (2.5603) grad_norm: 1.1064 (1.6507) time: 2.8683 data: 0.0002 max mem: 28454 +[2024-12-12 23:12:47 root] (utils.py 283): INFO Epoch: [14] [ 590/2502] eta: 1:31:35 lr: 0.000002 loss_cls: 2.7609 (2.5602) grad_norm: 1.1064 (1.6435) time: 2.8665 data: 0.0002 max mem: 28454 +[2024-12-12 23:13:15 root] (utils.py 283): INFO Epoch: [14] [ 600/2502] eta: 1:31:06 lr: 0.000002 loss_cls: 2.7609 (2.5651) grad_norm: 1.1340 (1.6348) time: 2.8673 data: 0.0002 max mem: 28454 +[2024-12-12 23:13:44 root] (utils.py 283): INFO Epoch: [14] [ 610/2502] eta: 1:30:37 lr: 0.000002 loss_cls: 2.7571 (2.5648) grad_norm: 1.1089 (1.6260) time: 2.8699 data: 0.0002 max mem: 28454 +[2024-12-12 23:14:13 root] (utils.py 283): INFO Epoch: [14] [ 620/2502] eta: 1:30:08 lr: 0.000002 loss_cls: 2.4860 (2.5600) grad_norm: 1.0416 (1.6173) time: 2.8712 data: 0.0002 max mem: 28454 +[2024-12-12 23:14:42 root] (utils.py 283): INFO Epoch: [14] [ 630/2502] eta: 1:29:41 lr: 0.000002 loss_cls: 2.5728 (2.5628) grad_norm: 1.0427 (1.6088) time: 2.8912 data: 0.0003 max mem: 28454 +[2024-12-12 23:15:11 root] (utils.py 283): INFO Epoch: [14] [ 640/2502] eta: 1:29:13 lr: 0.000002 loss_cls: 2.5099 (2.5592) grad_norm: 1.0784 (1.6005) time: 2.9170 data: 0.0004 max mem: 28454 +[2024-12-12 23:15:40 root] (utils.py 283): INFO Epoch: [14] [ 650/2502] eta: 1:28:46 lr: 0.000002 loss_cls: 2.2815 (2.5578) grad_norm: 1.1275 (1.5935) time: 2.9288 data: 0.0010 max mem: 28454 +[2024-12-12 23:16:09 root] (utils.py 283): INFO Epoch: [14] [ 660/2502] eta: 1:28:18 lr: 0.000002 loss_cls: 2.6851 (2.5564) grad_norm: 1.1040 (1.5870) time: 2.9125 data: 0.0010 max mem: 28454 +[2024-12-12 23:16:39 root] (utils.py 283): INFO Epoch: [14] [ 670/2502] eta: 1:27:52 lr: 0.000002 loss_cls: 2.6851 (2.5570) grad_norm: 1.0617 (1.5800) time: 2.9297 data: 0.0004 max mem: 28454 +[2024-12-12 23:17:08 root] (utils.py 283): INFO Epoch: [14] [ 680/2502] eta: 1:27:23 lr: 0.000002 loss_cls: 2.6239 (2.5556) grad_norm: 1.1032 (1.5747) time: 2.9176 data: 0.0004 max mem: 28454 +[2024-12-12 23:17:36 root] (utils.py 283): INFO Epoch: [14] [ 690/2502] eta: 1:26:53 lr: 0.000002 loss_cls: 2.3347 (2.5498) grad_norm: 1.1095 (1.5681) time: 2.8649 data: 0.0003 max mem: 28454 +[2024-12-12 23:18:05 root] (utils.py 283): INFO Epoch: [14] [ 700/2502] eta: 1:26:25 lr: 0.000002 loss_cls: 2.3952 (2.5526) grad_norm: 1.0935 (1.5616) time: 2.8661 data: 0.0003 max mem: 28454 +[2024-12-12 23:18:34 root] (utils.py 283): INFO Epoch: [14] [ 710/2502] eta: 1:25:55 lr: 0.000002 loss_cls: 2.7144 (2.5501) grad_norm: 1.0493 (1.5550) time: 2.8670 data: 0.0003 max mem: 28454 +[2024-12-12 23:19:02 root] (utils.py 283): INFO Epoch: [14] [ 720/2502] eta: 1:25:26 lr: 0.000002 loss_cls: 2.6411 (2.5518) grad_norm: 1.1149 (1.5490) time: 2.8663 data: 0.0002 max mem: 28454 +[2024-12-12 23:19:31 root] (utils.py 283): INFO Epoch: [14] [ 730/2502] eta: 1:24:58 lr: 0.000002 loss_cls: 2.7155 (2.5527) grad_norm: 1.1282 (1.5436) time: 2.8705 data: 0.0002 max mem: 28454 +[2024-12-12 23:20:00 root] (utils.py 283): INFO Epoch: [14] [ 740/2502] eta: 1:24:29 lr: 0.000002 loss_cls: 2.4806 (2.5476) grad_norm: 1.1434 (1.5379) time: 2.8725 data: 0.0002 max mem: 28454 +[2024-12-12 23:20:28 root] (utils.py 283): INFO Epoch: [14] [ 750/2502] eta: 1:24:00 lr: 0.000002 loss_cls: 2.4136 (2.5473) grad_norm: 1.1067 (1.5316) time: 2.8723 data: 0.0003 max mem: 28454 +[2024-12-12 23:20:57 root] (utils.py 283): INFO Epoch: [14] [ 760/2502] eta: 1:23:31 lr: 0.000002 loss_cls: 2.6654 (2.5492) grad_norm: 1.0711 (1.5278) time: 2.8690 data: 0.0003 max mem: 28454 +[2024-12-12 23:21:26 root] (utils.py 283): INFO Epoch: [14] [ 770/2502] eta: 1:23:02 lr: 0.000002 loss_cls: 2.8253 (2.5516) grad_norm: 1.1849 (1.5241) time: 2.8708 data: 0.0003 max mem: 28454 +[2024-12-12 23:21:55 root] (utils.py 283): INFO Epoch: [14] [ 780/2502] eta: 1:22:33 lr: 0.000002 loss_cls: 2.7642 (2.5504) grad_norm: 1.1705 (1.5304) time: 2.8734 data: 0.0003 max mem: 28454 +[2024-12-12 23:22:23 root] (utils.py 283): INFO Epoch: [14] [ 790/2502] eta: 1:22:04 lr: 0.000002 loss_cls: 2.4587 (2.5494) grad_norm: 1.1209 (1.5255) time: 2.8723 data: 0.0003 max mem: 28454 +[2024-12-12 23:22:52 root] (utils.py 283): INFO Epoch: [14] [ 800/2502] eta: 1:21:35 lr: 0.000002 loss_cls: 2.5914 (2.5503) grad_norm: 1.0787 (1.5199) time: 2.8718 data: 0.0003 max mem: 28454 +[2024-12-12 23:23:21 root] (utils.py 283): INFO Epoch: [14] [ 810/2502] eta: 1:21:06 lr: 0.000002 loss_cls: 2.6166 (2.5510) grad_norm: 1.0810 (1.5170) time: 2.8704 data: 0.0003 max mem: 28454 +[2024-12-12 23:23:49 root] (utils.py 283): INFO Epoch: [14] [ 820/2502] eta: 1:20:37 lr: 0.000002 loss_cls: 2.7704 (2.5515) grad_norm: 1.1022 (1.5203) time: 2.8676 data: 0.0003 max mem: 28454 +[2024-12-12 23:24:18 root] (utils.py 283): INFO Epoch: [14] [ 830/2502] eta: 1:20:09 lr: 0.000002 loss_cls: 2.5563 (2.5496) grad_norm: 1.0723 (1.5154) time: 2.8668 data: 0.0002 max mem: 28454 +[2024-12-12 23:24:48 root] (utils.py 283): INFO Epoch: [14] [ 840/2502] eta: 1:19:42 lr: 0.000002 loss_cls: 2.3411 (2.5482) grad_norm: 1.0662 (1.5114) time: 2.9173 data: 0.0003 max mem: 28454 +[2024-12-12 23:25:17 root] (utils.py 283): INFO Epoch: [14] [ 850/2502] eta: 1:19:13 lr: 0.000002 loss_cls: 2.5468 (2.5483) grad_norm: 1.0812 (1.5094) time: 2.9297 data: 0.0003 max mem: 28454 +[2024-12-12 23:25:46 root] (utils.py 283): INFO Epoch: [14] [ 860/2502] eta: 1:18:45 lr: 0.000002 loss_cls: 2.4189 (2.5455) grad_norm: 1.0500 (1.5038) time: 2.9115 data: 0.0010 max mem: 28454 +[2024-12-12 23:26:15 root] (utils.py 283): INFO Epoch: [14] [ 870/2502] eta: 1:18:17 lr: 0.000002 loss_cls: 2.2804 (2.5430) grad_norm: 1.0522 (1.4991) time: 2.9104 data: 0.0010 max mem: 28454 +[2024-12-12 23:26:44 root] (utils.py 283): INFO Epoch: [14] [ 880/2502] eta: 1:17:49 lr: 0.000002 loss_cls: 2.7031 (2.5453) grad_norm: 1.0577 (1.4949) time: 2.9160 data: 0.0018 max mem: 28454 +[2024-12-12 23:27:13 root] (utils.py 283): INFO Epoch: [14] [ 890/2502] eta: 1:17:20 lr: 0.000002 loss_cls: 2.6140 (2.5439) grad_norm: 1.1002 (1.4913) time: 2.9120 data: 0.0018 max mem: 28454 +[2024-12-12 23:27:43 root] (utils.py 283): INFO Epoch: [14] [ 900/2502] eta: 1:16:53 lr: 0.000002 loss_cls: 2.6140 (2.5439) grad_norm: 1.1662 (1.4880) time: 2.9322 data: 0.0004 max mem: 28454 +[2024-12-12 23:28:12 root] (utils.py 283): INFO Epoch: [14] [ 910/2502] eta: 1:16:25 lr: 0.000002 loss_cls: 2.6658 (2.5445) grad_norm: 1.1333 (1.4919) time: 2.9531 data: 0.0004 max mem: 28454 +[2024-12-12 23:28:41 root] (utils.py 283): INFO Epoch: [14] [ 920/2502] eta: 1:15:57 lr: 0.000002 loss_cls: 2.5931 (2.5452) grad_norm: 1.1739 (1.4888) time: 2.9124 data: 0.0003 max mem: 28454 +[2024-12-12 23:29:10 root] (utils.py 283): INFO Epoch: [14] [ 930/2502] eta: 1:15:28 lr: 0.000002 loss_cls: 2.5992 (2.5463) grad_norm: 1.1818 (1.4850) time: 2.8941 data: 0.0003 max mem: 28454 +[2024-12-12 23:29:40 root] (utils.py 283): INFO Epoch: [14] [ 940/2502] eta: 1:15:01 lr: 0.000002 loss_cls: 2.7146 (2.5444) grad_norm: 1.1684 (1.4817) time: 2.9191 data: 0.0004 max mem: 28454 +[2024-12-12 23:30:09 root] (utils.py 283): INFO Epoch: [14] [ 950/2502] eta: 1:14:32 lr: 0.000002 loss_cls: 2.1706 (2.5418) grad_norm: 1.1588 (1.4779) time: 2.9325 data: 0.0007 max mem: 28454 +[2024-12-12 23:30:38 root] (utils.py 283): INFO Epoch: [14] [ 960/2502] eta: 1:14:04 lr: 0.000002 loss_cls: 2.4142 (2.5406) grad_norm: 1.1588 (1.4748) time: 2.9212 data: 0.0008 max mem: 28454 +[2024-12-12 23:31:07 root] (utils.py 283): INFO Epoch: [14] [ 970/2502] eta: 1:13:36 lr: 0.000002 loss_cls: 2.5170 (2.5404) grad_norm: 1.1040 (1.4721) time: 2.9287 data: 0.0004 max mem: 28454 +[2024-12-12 23:31:37 root] (utils.py 283): INFO Epoch: [14] [ 980/2502] eta: 1:13:08 lr: 0.000002 loss_cls: 2.5327 (2.5392) grad_norm: 1.0504 (1.4691) time: 2.9237 data: 0.0004 max mem: 28454 +[2024-12-12 23:32:05 root] (utils.py 283): INFO Epoch: [14] [ 990/2502] eta: 1:12:39 lr: 0.000002 loss_cls: 2.5940 (2.5398) grad_norm: 1.0718 (1.4665) time: 2.8940 data: 0.0004 max mem: 28454 +[2024-12-12 23:32:34 root] (utils.py 283): INFO Epoch: [14] [1000/2502] eta: 1:12:10 lr: 0.000002 loss_cls: 2.6342 (2.5399) grad_norm: 1.1391 (1.4817) time: 2.8698 data: 0.0003 max mem: 28454 +[2024-12-12 23:33:03 root] (utils.py 283): INFO Epoch: [14] [1010/2502] eta: 1:11:41 lr: 0.000002 loss_cls: 2.5923 (2.5395) grad_norm: 1.2023 (1.4979) time: 2.8706 data: 0.0003 max mem: 28454 +[2024-12-12 23:33:31 root] (utils.py 283): INFO Epoch: [14] [1020/2502] eta: 1:11:12 lr: 0.000002 loss_cls: 2.5923 (2.5403) grad_norm: 1.2023 (1.4972) time: 2.8762 data: 0.0003 max mem: 28454 +[2024-12-12 23:34:00 root] (utils.py 283): INFO Epoch: [14] [1030/2502] eta: 1:10:43 lr: 0.000002 loss_cls: 2.6612 (2.5394) grad_norm: 1.1182 (1.4929) time: 2.8756 data: 0.0003 max mem: 28454 +[2024-12-12 23:34:29 root] (utils.py 283): INFO Epoch: [14] [1040/2502] eta: 1:10:14 lr: 0.000002 loss_cls: 2.4755 (2.5396) grad_norm: 1.0566 (1.4899) time: 2.8728 data: 0.0003 max mem: 28454 +[2024-12-12 23:34:58 root] (utils.py 283): INFO Epoch: [14] [1050/2502] eta: 1:09:45 lr: 0.000002 loss_cls: 2.5516 (2.5383) grad_norm: 1.1156 (1.4873) time: 2.8739 data: 0.0003 max mem: 28454 +[2024-12-12 23:35:26 root] (utils.py 283): INFO Epoch: [14] [1060/2502] eta: 1:09:16 lr: 0.000002 loss_cls: 2.5516 (2.5380) grad_norm: 1.1290 (1.4843) time: 2.8717 data: 0.0003 max mem: 28454 +[2024-12-12 23:35:55 root] (utils.py 283): INFO Epoch: [14] [1070/2502] eta: 1:08:47 lr: 0.000002 loss_cls: 2.4447 (2.5363) grad_norm: 1.1022 (1.4810) time: 2.8719 data: 0.0003 max mem: 28454 +[2024-12-12 23:36:24 root] (utils.py 283): INFO Epoch: [14] [1080/2502] eta: 1:08:18 lr: 0.000002 loss_cls: 2.3832 (2.5361) grad_norm: 1.1022 (1.4827) time: 2.8692 data: 0.0003 max mem: 28454 +[2024-12-12 23:36:52 root] (utils.py 283): INFO Epoch: [14] [1090/2502] eta: 1:07:49 lr: 0.000002 loss_cls: 2.3899 (2.5343) grad_norm: 1.1294 (1.4801) time: 2.8629 data: 0.0003 max mem: 28454 +[2024-12-12 23:37:21 root] (utils.py 283): INFO Epoch: [14] [1100/2502] eta: 1:07:20 lr: 0.000002 loss_cls: 2.4175 (2.5338) grad_norm: 1.1385 (1.4795) time: 2.8597 data: 0.0003 max mem: 28454 +[2024-12-12 23:37:49 root] (utils.py 283): INFO Epoch: [14] [1110/2502] eta: 1:06:51 lr: 0.000002 loss_cls: 2.5859 (2.5333) grad_norm: 1.1429 (1.4895) time: 2.8589 data: 0.0003 max mem: 28454 +[2024-12-12 23:38:18 root] (utils.py 283): INFO Epoch: [14] [1120/2502] eta: 1:06:22 lr: 0.000002 loss_cls: 2.5237 (2.5333) grad_norm: 1.1457 (1.4871) time: 2.8594 data: 0.0002 max mem: 28454 +[2024-12-12 23:38:47 root] (utils.py 283): INFO Epoch: [14] [1130/2502] eta: 1:05:53 lr: 0.000002 loss_cls: 2.6744 (2.5351) grad_norm: 1.1270 (1.4882) time: 2.8581 data: 0.0002 max mem: 28454 +[2024-12-12 23:39:15 root] (utils.py 283): INFO Epoch: [14] [1140/2502] eta: 1:05:23 lr: 0.000002 loss_cls: 2.7248 (2.5352) grad_norm: 1.0705 (1.4847) time: 2.8581 data: 0.0002 max mem: 28454 +[2024-12-12 23:39:44 root] (utils.py 283): INFO Epoch: [14] [1150/2502] eta: 1:04:54 lr: 0.000002 loss_cls: 2.5711 (2.5362) grad_norm: 1.0405 (1.4816) time: 2.8586 data: 0.0002 max mem: 28454 +[2024-12-12 23:40:12 root] (utils.py 283): INFO Epoch: [14] [1160/2502] eta: 1:04:25 lr: 0.000002 loss_cls: 2.5448 (2.5361) grad_norm: 1.1661 (1.4791) time: 2.8581 data: 0.0002 max mem: 28454 +[2024-12-12 23:40:41 root] (utils.py 283): INFO Epoch: [14] [1170/2502] eta: 1:03:56 lr: 0.000002 loss_cls: 2.5414 (2.5356) grad_norm: 1.2367 (1.4773) time: 2.8595 data: 0.0002 max mem: 28454 +[2024-12-12 23:41:10 root] (utils.py 283): INFO Epoch: [14] [1180/2502] eta: 1:03:27 lr: 0.000002 loss_cls: 2.5385 (2.5358) grad_norm: 1.0952 (1.4747) time: 2.8581 data: 0.0003 max mem: 28454 +[2024-12-12 23:41:38 root] (utils.py 283): INFO Epoch: [14] [1190/2502] eta: 1:02:58 lr: 0.000002 loss_cls: 2.5718 (2.5357) grad_norm: 1.1297 (1.4742) time: 2.8562 data: 0.0003 max mem: 28454 +[2024-12-12 23:42:07 root] (utils.py 283): INFO Epoch: [14] [1200/2502] eta: 1:02:29 lr: 0.000002 loss_cls: 2.6580 (2.5360) grad_norm: 1.1297 (1.4710) time: 2.8564 data: 0.0002 max mem: 28454 +[2024-12-12 23:42:35 root] (utils.py 283): INFO Epoch: [14] [1210/2502] eta: 1:02:00 lr: 0.000002 loss_cls: 2.6307 (2.5359) grad_norm: 1.0999 (1.4683) time: 2.8584 data: 0.0002 max mem: 28454 +[2024-12-12 23:43:04 root] (utils.py 283): INFO Epoch: [14] [1220/2502] eta: 1:01:31 lr: 0.000002 loss_cls: 2.6110 (2.5362) grad_norm: 1.0928 (1.4843) time: 2.8643 data: 0.0002 max mem: 28454 +[2024-12-12 23:43:33 root] (utils.py 283): INFO Epoch: [14] [1230/2502] eta: 1:01:02 lr: 0.000002 loss_cls: 2.6187 (2.5365) grad_norm: 1.0928 (1.4812) time: 2.8623 data: 0.0002 max mem: 28454 +[2024-12-12 23:44:01 root] (utils.py 283): INFO Epoch: [14] [1240/2502] eta: 1:00:33 lr: 0.000002 loss_cls: 2.6187 (2.5381) grad_norm: 1.1710 (1.4812) time: 2.8575 data: 0.0002 max mem: 28454 +[2024-12-12 23:44:30 root] (utils.py 283): INFO Epoch: [14] [1250/2502] eta: 1:00:04 lr: 0.000002 loss_cls: 2.8328 (2.5396) grad_norm: 1.0998 (1.4809) time: 2.8580 data: 0.0002 max mem: 28454 +[2024-12-12 23:44:58 root] (utils.py 283): INFO Epoch: [14] [1260/2502] eta: 0:59:35 lr: 0.000002 loss_cls: 2.7247 (2.5408) grad_norm: 1.0756 (1.4796) time: 2.8580 data: 0.0002 max mem: 28454 +[2024-12-12 23:45:27 root] (utils.py 283): INFO Epoch: [14] [1270/2502] eta: 0:59:06 lr: 0.000002 loss_cls: 2.6528 (2.5394) grad_norm: 1.1237 (1.4771) time: 2.8579 data: 0.0002 max mem: 28454 +[2024-12-12 23:45:55 root] (utils.py 283): INFO Epoch: [14] [1280/2502] eta: 0:58:37 lr: 0.000002 loss_cls: 2.6380 (2.5399) grad_norm: 1.1685 (1.4751) time: 2.8578 data: 0.0002 max mem: 28454 +[2024-12-12 23:46:24 root] (utils.py 283): INFO Epoch: [14] [1290/2502] eta: 0:58:08 lr: 0.000002 loss_cls: 2.4200 (2.5386) grad_norm: 1.1591 (1.4731) time: 2.8589 data: 0.0002 max mem: 28454 +[2024-12-12 23:46:53 root] (utils.py 283): INFO Epoch: [14] [1300/2502] eta: 0:57:39 lr: 0.000002 loss_cls: 2.4200 (2.5389) grad_norm: 1.1591 (1.4716) time: 2.8611 data: 0.0002 max mem: 28454 +[2024-12-12 23:47:21 root] (utils.py 283): INFO Epoch: [14] [1310/2502] eta: 0:57:10 lr: 0.000002 loss_cls: 2.2280 (2.5360) grad_norm: 1.0576 (1.4693) time: 2.8619 data: 0.0002 max mem: 28454 +[2024-12-12 23:47:50 root] (utils.py 283): INFO Epoch: [14] [1320/2502] eta: 0:56:41 lr: 0.000002 loss_cls: 2.1545 (2.5359) grad_norm: 1.0476 (1.4667) time: 2.8592 data: 0.0002 max mem: 28454 +[2024-12-12 23:48:18 root] (utils.py 283): INFO Epoch: [14] [1330/2502] eta: 0:56:12 lr: 0.000002 loss_cls: 2.6877 (2.5361) grad_norm: 1.1406 (1.4656) time: 2.8582 data: 0.0002 max mem: 28454 +[2024-12-12 23:48:47 root] (utils.py 283): INFO Epoch: [14] [1340/2502] eta: 0:55:43 lr: 0.000002 loss_cls: 2.6315 (2.5361) grad_norm: 1.1105 (1.4723) time: 2.8587 data: 0.0002 max mem: 28454 +[2024-12-12 23:49:16 root] (utils.py 283): INFO Epoch: [14] [1350/2502] eta: 0:55:14 lr: 0.000002 loss_cls: 2.6315 (2.5360) grad_norm: 1.1105 (1.4713) time: 2.8589 data: 0.0002 max mem: 28454 +[2024-12-12 23:49:44 root] (utils.py 283): INFO Epoch: [14] [1360/2502] eta: 0:54:46 lr: 0.000002 loss_cls: 2.5137 (2.5351) grad_norm: 1.1203 (1.4784) time: 2.8580 data: 0.0002 max mem: 28454 +[2024-12-12 23:50:13 root] (utils.py 283): INFO Epoch: [14] [1370/2502] eta: 0:54:17 lr: 0.000002 loss_cls: 2.6806 (2.5365) grad_norm: 1.1876 (1.4796) time: 2.8567 data: 0.0002 max mem: 28454 +[2024-12-12 23:50:41 root] (utils.py 283): INFO Epoch: [14] [1380/2502] eta: 0:53:48 lr: 0.000002 loss_cls: 2.7621 (2.5368) grad_norm: 1.2350 (1.4776) time: 2.8567 data: 0.0002 max mem: 28454 +[2024-12-12 23:51:10 root] (utils.py 283): INFO Epoch: [14] [1390/2502] eta: 0:53:19 lr: 0.000002 loss_cls: 2.6482 (2.5353) grad_norm: 1.1922 (1.4756) time: 2.8553 data: 0.0002 max mem: 28454 +[2024-12-12 23:51:38 root] (utils.py 283): INFO Epoch: [14] [1400/2502] eta: 0:52:50 lr: 0.000002 loss_cls: 2.5429 (2.5351) grad_norm: 1.1144 (1.4735) time: 2.8559 data: 0.0002 max mem: 28454 +[2024-12-12 23:52:07 root] (utils.py 283): INFO Epoch: [14] [1410/2502] eta: 0:52:21 lr: 0.000002 loss_cls: 2.7367 (2.5374) grad_norm: 1.0638 (1.4714) time: 2.8584 data: 0.0002 max mem: 28454 +[2024-12-12 23:52:36 root] (utils.py 283): INFO Epoch: [14] [1420/2502] eta: 0:51:52 lr: 0.000002 loss_cls: 2.7786 (2.5387) grad_norm: 1.1329 (1.4702) time: 2.8576 data: 0.0002 max mem: 28454 +[2024-12-12 23:53:04 root] (utils.py 283): INFO Epoch: [14] [1430/2502] eta: 0:51:23 lr: 0.000002 loss_cls: 2.6886 (2.5388) grad_norm: 1.0406 (1.4672) time: 2.8573 data: 0.0002 max mem: 28454 +[2024-12-12 23:53:33 root] (utils.py 283): INFO Epoch: [14] [1440/2502] eta: 0:50:54 lr: 0.000002 loss_cls: 2.5274 (2.5375) grad_norm: 1.0571 (1.4654) time: 2.8641 data: 0.0002 max mem: 28454 +[2024-12-12 23:54:02 root] (utils.py 283): INFO Epoch: [14] [1450/2502] eta: 0:50:25 lr: 0.000002 loss_cls: 2.4928 (2.5362) grad_norm: 1.1099 (1.4639) time: 2.8643 data: 0.0002 max mem: 28454 +[2024-12-12 23:54:30 root] (utils.py 283): INFO Epoch: [14] [1460/2502] eta: 0:49:56 lr: 0.000002 loss_cls: 2.5207 (2.5362) grad_norm: 1.0853 (1.4623) time: 2.8577 data: 0.0002 max mem: 28454 +[2024-12-12 23:54:59 root] (utils.py 283): INFO Epoch: [14] [1470/2502] eta: 0:49:28 lr: 0.000002 loss_cls: 2.5875 (2.5367) grad_norm: 1.1354 (1.4606) time: 2.8585 data: 0.0002 max mem: 28454 +[2024-12-12 23:55:27 root] (utils.py 283): INFO Epoch: [14] [1480/2502] eta: 0:48:59 lr: 0.000002 loss_cls: 2.6870 (2.5370) grad_norm: 1.1014 (1.4581) time: 2.8585 data: 0.0002 max mem: 28454 +[2024-12-12 23:55:56 root] (utils.py 283): INFO Epoch: [14] [1490/2502] eta: 0:48:30 lr: 0.000002 loss_cls: 2.6368 (2.5369) grad_norm: 1.0681 (1.4570) time: 2.8566 data: 0.0002 max mem: 28454 +[2024-12-12 23:56:24 root] (utils.py 283): INFO Epoch: [14] [1500/2502] eta: 0:48:01 lr: 0.000002 loss_cls: 2.4986 (2.5367) grad_norm: 1.1676 (1.4555) time: 2.8578 data: 0.0002 max mem: 28454 +[2024-12-12 23:56:53 root] (utils.py 283): INFO Epoch: [14] [1510/2502] eta: 0:47:32 lr: 0.000002 loss_cls: 2.4441 (2.5363) grad_norm: 1.1890 (1.4623) time: 2.8581 data: 0.0002 max mem: 28454 +[2024-12-12 23:57:22 root] (utils.py 283): INFO Epoch: [14] [1520/2502] eta: 0:47:03 lr: 0.000002 loss_cls: 2.6548 (2.5372) grad_norm: 1.1559 (1.4604) time: 2.8579 data: 0.0002 max mem: 28454 +[2024-12-12 23:57:50 root] (utils.py 283): INFO Epoch: [14] [1530/2502] eta: 0:46:34 lr: 0.000002 loss_cls: 2.6716 (2.5377) grad_norm: 1.1585 (1.4589) time: 2.8573 data: 0.0002 max mem: 28454 +[2024-12-12 23:58:19 root] (utils.py 283): INFO Epoch: [14] [1540/2502] eta: 0:46:05 lr: 0.000002 loss_cls: 2.5732 (2.5366) grad_norm: 1.2235 (1.4573) time: 2.8580 data: 0.0002 max mem: 28454 +[2024-12-12 23:58:47 root] (utils.py 283): INFO Epoch: [14] [1550/2502] eta: 0:45:37 lr: 0.000002 loss_cls: 2.5812 (2.5365) grad_norm: 1.1668 (1.4606) time: 2.8597 data: 0.0002 max mem: 28454 +[2024-12-12 23:59:16 root] (utils.py 283): INFO Epoch: [14] [1560/2502] eta: 0:45:08 lr: 0.000002 loss_cls: 2.7423 (2.5376) grad_norm: 1.1451 (1.4585) time: 2.8586 data: 0.0002 max mem: 28454 +[2024-12-12 23:59:44 root] (utils.py 283): INFO Epoch: [14] [1570/2502] eta: 0:44:39 lr: 0.000002 loss_cls: 2.6884 (2.5378) grad_norm: 1.0492 (1.4560) time: 2.8572 data: 0.0002 max mem: 28454 +[2024-12-13 00:00:13 root] (utils.py 283): INFO Epoch: [14] [1580/2502] eta: 0:44:10 lr: 0.000002 loss_cls: 2.6275 (2.5384) grad_norm: 1.0762 (1.4543) time: 2.8567 data: 0.0002 max mem: 28454 +[2024-12-13 00:00:42 root] (utils.py 283): INFO Epoch: [14] [1590/2502] eta: 0:43:41 lr: 0.000002 loss_cls: 2.6629 (2.5384) grad_norm: 1.0989 (1.4519) time: 2.8586 data: 0.0002 max mem: 28454 +[2024-12-13 00:01:10 root] (utils.py 283): INFO Epoch: [14] [1600/2502] eta: 0:43:12 lr: 0.000002 loss_cls: 2.5382 (2.5374) grad_norm: 1.0765 (1.4498) time: 2.8580 data: 0.0002 max mem: 28454 +[2024-12-13 00:01:39 root] (utils.py 283): INFO Epoch: [14] [1610/2502] eta: 0:42:44 lr: 0.000002 loss_cls: 2.4793 (2.5373) grad_norm: 1.1370 (1.4482) time: 2.8567 data: 0.0002 max mem: 28454 +[2024-12-13 00:02:07 root] (utils.py 283): INFO Epoch: [14] [1620/2502] eta: 0:42:15 lr: 0.000002 loss_cls: 2.6474 (2.5365) grad_norm: 1.1577 (1.4463) time: 2.8576 data: 0.0002 max mem: 28454 +[2024-12-13 00:02:36 root] (utils.py 283): INFO Epoch: [14] [1630/2502] eta: 0:41:46 lr: 0.000002 loss_cls: 2.7730 (2.5382) grad_norm: 1.1298 (1.4443) time: 2.8572 data: 0.0002 max mem: 28454 +[2024-12-13 00:03:05 root] (utils.py 283): INFO Epoch: [14] [1640/2502] eta: 0:41:17 lr: 0.000002 loss_cls: 2.7614 (2.5388) grad_norm: 1.1615 (1.4433) time: 2.8608 data: 0.0002 max mem: 28454 +[2024-12-13 00:03:33 root] (utils.py 283): INFO Epoch: [14] [1650/2502] eta: 0:40:48 lr: 0.000002 loss_cls: 2.6399 (2.5379) grad_norm: 1.1239 (1.4411) time: 2.8601 data: 0.0002 max mem: 28454 +[2024-12-13 00:04:02 root] (utils.py 283): INFO Epoch: [14] [1660/2502] eta: 0:40:19 lr: 0.000002 loss_cls: 2.5636 (2.5375) grad_norm: 1.0837 (1.4401) time: 2.8546 data: 0.0002 max mem: 28454 +[2024-12-13 00:04:30 root] (utils.py 283): INFO Epoch: [14] [1670/2502] eta: 0:39:51 lr: 0.000002 loss_cls: 2.4929 (2.5369) grad_norm: 1.1452 (1.4416) time: 2.8532 data: 0.0002 max mem: 28454 +[2024-12-13 00:04:59 root] (utils.py 283): INFO Epoch: [14] [1680/2502] eta: 0:39:22 lr: 0.000002 loss_cls: 2.5227 (2.5371) grad_norm: 1.0838 (1.4403) time: 2.8555 data: 0.0002 max mem: 28454 +[2024-12-13 00:05:27 root] (utils.py 283): INFO Epoch: [14] [1690/2502] eta: 0:38:53 lr: 0.000002 loss_cls: 2.6104 (2.5368) grad_norm: 1.2101 (1.4443) time: 2.8596 data: 0.0002 max mem: 28454 +[2024-12-13 00:05:56 root] (utils.py 283): INFO Epoch: [14] [1700/2502] eta: 0:38:24 lr: 0.000002 loss_cls: 2.6563 (2.5372) grad_norm: 1.1951 (1.4430) time: 2.8591 data: 0.0003 max mem: 28454 +[2024-12-13 00:06:25 root] (utils.py 283): INFO Epoch: [14] [1710/2502] eta: 0:37:55 lr: 0.000002 loss_cls: 2.6242 (2.5361) grad_norm: 1.1628 (1.4423) time: 2.8593 data: 0.0003 max mem: 28454 +[2024-12-13 00:06:53 root] (utils.py 283): INFO Epoch: [14] [1720/2502] eta: 0:37:26 lr: 0.000002 loss_cls: 2.6242 (2.5362) grad_norm: 1.0942 (1.4428) time: 2.8590 data: 0.0002 max mem: 28454 +[2024-12-13 00:07:22 root] (utils.py 283): INFO Epoch: [14] [1730/2502] eta: 0:36:58 lr: 0.000002 loss_cls: 2.7381 (2.5376) grad_norm: 1.0976 (1.4427) time: 2.8576 data: 0.0003 max mem: 28454 +[2024-12-13 00:07:50 root] (utils.py 283): INFO Epoch: [14] [1740/2502] eta: 0:36:29 lr: 0.000002 loss_cls: 2.7381 (2.5380) grad_norm: 1.1390 (1.4410) time: 2.8579 data: 0.0003 max mem: 28454 +[2024-12-13 00:08:19 root] (utils.py 283): INFO Epoch: [14] [1750/2502] eta: 0:36:00 lr: 0.000002 loss_cls: 2.6167 (2.5377) grad_norm: 1.1187 (1.4392) time: 2.8577 data: 0.0003 max mem: 28454 +[2024-12-13 00:08:47 root] (utils.py 283): INFO Epoch: [14] [1760/2502] eta: 0:35:31 lr: 0.000002 loss_cls: 2.3803 (2.5370) grad_norm: 1.1149 (1.4376) time: 2.8570 data: 0.0002 max mem: 28454 +[2024-12-13 00:09:16 root] (utils.py 283): INFO Epoch: [14] [1770/2502] eta: 0:35:03 lr: 0.000002 loss_cls: 2.3185 (2.5362) grad_norm: 1.1205 (1.4363) time: 2.8592 data: 0.0002 max mem: 28454 +[2024-12-13 00:09:45 root] (utils.py 283): INFO Epoch: [14] [1780/2502] eta: 0:34:34 lr: 0.000002 loss_cls: 2.5359 (2.5359) grad_norm: 1.1353 (1.4349) time: 2.8616 data: 0.0002 max mem: 28454 +[2024-12-13 00:10:13 root] (utils.py 283): INFO Epoch: [14] [1790/2502] eta: 0:34:05 lr: 0.000002 loss_cls: 2.5359 (2.5344) grad_norm: 1.0536 (1.4331) time: 2.8629 data: 0.0002 max mem: 28454 +[2024-12-13 00:10:42 root] (utils.py 283): INFO Epoch: [14] [1800/2502] eta: 0:33:36 lr: 0.000002 loss_cls: 2.6411 (2.5355) grad_norm: 1.0634 (1.4318) time: 2.8677 data: 0.0002 max mem: 28454 +[2024-12-13 00:11:11 root] (utils.py 283): INFO Epoch: [14] [1810/2502] eta: 0:33:07 lr: 0.000002 loss_cls: 2.7154 (2.5343) grad_norm: 1.1971 (1.4301) time: 2.8678 data: 0.0002 max mem: 28454 +[2024-12-13 00:11:39 root] (utils.py 283): INFO Epoch: [14] [1820/2502] eta: 0:32:39 lr: 0.000002 loss_cls: 2.6093 (2.5348) grad_norm: 1.0928 (1.4283) time: 2.8674 data: 0.0002 max mem: 28454 +[2024-12-13 00:12:08 root] (utils.py 283): INFO Epoch: [14] [1830/2502] eta: 0:32:10 lr: 0.000002 loss_cls: 2.7233 (2.5351) grad_norm: 1.0697 (1.4331) time: 2.8688 data: 0.0003 max mem: 28454 +[2024-12-13 00:12:37 root] (utils.py 283): INFO Epoch: [14] [1840/2502] eta: 0:31:41 lr: 0.000002 loss_cls: 2.7213 (2.5355) grad_norm: 1.1214 (1.4320) time: 2.8675 data: 0.0003 max mem: 28454 +[2024-12-13 00:13:05 root] (utils.py 283): INFO Epoch: [14] [1850/2502] eta: 0:31:13 lr: 0.000002 loss_cls: 2.5098 (2.5352) grad_norm: 1.1214 (1.4324) time: 2.8694 data: 0.0002 max mem: 28454 +[2024-12-13 00:13:34 root] (utils.py 283): INFO Epoch: [14] [1860/2502] eta: 0:30:44 lr: 0.000002 loss_cls: 2.6656 (2.5359) grad_norm: 1.1951 (1.4315) time: 2.8696 data: 0.0003 max mem: 28454 +[2024-12-13 00:14:03 root] (utils.py 283): INFO Epoch: [14] [1870/2502] eta: 0:30:15 lr: 0.000002 loss_cls: 2.7122 (2.5358) grad_norm: 1.1192 (1.4301) time: 2.8695 data: 0.0003 max mem: 28454 +[2024-12-13 00:14:32 root] (utils.py 283): INFO Epoch: [14] [1880/2502] eta: 0:29:46 lr: 0.000002 loss_cls: 2.7123 (2.5357) grad_norm: 1.0834 (1.4284) time: 2.8690 data: 0.0003 max mem: 28454 +[2024-12-13 00:15:00 root] (utils.py 283): INFO Epoch: [14] [1890/2502] eta: 0:29:18 lr: 0.000002 loss_cls: 2.7255 (2.5364) grad_norm: 1.0425 (1.4270) time: 2.8663 data: 0.0002 max mem: 28454 +[2024-12-13 00:15:29 root] (utils.py 283): INFO Epoch: [14] [1900/2502] eta: 0:28:49 lr: 0.000002 loss_cls: 2.6424 (2.5355) grad_norm: 0.9905 (1.4251) time: 2.8690 data: 0.0002 max mem: 28454 +[2024-12-13 00:15:58 root] (utils.py 283): INFO Epoch: [14] [1910/2502] eta: 0:28:20 lr: 0.000002 loss_cls: 2.5284 (2.5363) grad_norm: 1.0999 (1.4238) time: 2.8713 data: 0.0002 max mem: 28454 +[2024-12-13 00:16:26 root] (utils.py 283): INFO Epoch: [14] [1920/2502] eta: 0:27:51 lr: 0.000002 loss_cls: 2.6487 (2.5353) grad_norm: 1.0771 (1.4219) time: 2.8717 data: 0.0003 max mem: 28454 +[2024-12-13 00:16:55 root] (utils.py 283): INFO Epoch: [14] [1930/2502] eta: 0:27:23 lr: 0.000002 loss_cls: 2.6406 (2.5361) grad_norm: 1.0680 (1.4308) time: 2.8735 data: 0.0002 max mem: 28454 +[2024-12-13 00:17:24 root] (utils.py 283): INFO Epoch: [14] [1940/2502] eta: 0:26:54 lr: 0.000002 loss_cls: 2.6984 (2.5370) grad_norm: 1.1265 (1.4304) time: 2.8740 data: 0.0003 max mem: 28454 +[2024-12-13 00:17:53 root] (utils.py 283): INFO Epoch: [14] [1950/2502] eta: 0:26:25 lr: 0.000002 loss_cls: 2.6544 (2.5371) grad_norm: 1.1073 (1.4311) time: 2.8732 data: 0.0003 max mem: 28454 +[2024-12-13 00:18:21 root] (utils.py 283): INFO Epoch: [14] [1960/2502] eta: 0:25:56 lr: 0.000002 loss_cls: 2.6332 (2.5379) grad_norm: 1.1326 (1.4320) time: 2.8738 data: 0.0003 max mem: 28454 +[2024-12-13 00:18:50 root] (utils.py 283): INFO Epoch: [14] [1970/2502] eta: 0:25:28 lr: 0.000002 loss_cls: 2.7777 (2.5387) grad_norm: 1.1005 (1.4328) time: 2.8732 data: 0.0002 max mem: 28454 +[2024-12-13 00:19:19 root] (utils.py 283): INFO Epoch: [14] [1980/2502] eta: 0:24:59 lr: 0.000002 loss_cls: 2.7084 (2.5391) grad_norm: 1.0940 (1.4314) time: 2.8722 data: 0.0002 max mem: 28454 +[2024-12-13 00:19:47 root] (utils.py 283): INFO Epoch: [14] [1990/2502] eta: 0:24:30 lr: 0.000002 loss_cls: 2.6011 (2.5384) grad_norm: 1.1219 (1.4307) time: 2.8731 data: 0.0003 max mem: 28454 +[2024-12-13 00:20:16 root] (utils.py 283): INFO Epoch: [14] [2000/2502] eta: 0:24:02 lr: 0.000002 loss_cls: 2.4260 (2.5376) grad_norm: 1.0850 (1.4453) time: 2.8814 data: 0.0003 max mem: 28454 +[2024-12-13 00:20:45 root] (utils.py 283): INFO Epoch: [14] [2010/2502] eta: 0:23:33 lr: 0.000002 loss_cls: 2.6671 (2.5383) grad_norm: 1.0854 (1.4441) time: 2.8800 data: 0.0003 max mem: 28454 +[2024-12-13 00:21:14 root] (utils.py 283): INFO Epoch: [14] [2020/2502] eta: 0:23:04 lr: 0.000002 loss_cls: 2.4361 (2.5373) grad_norm: 1.1158 (1.4424) time: 2.8687 data: 0.0002 max mem: 28454 +[2024-12-13 00:21:42 root] (utils.py 283): INFO Epoch: [14] [2030/2502] eta: 0:22:35 lr: 0.000002 loss_cls: 2.4361 (2.5377) grad_norm: 1.1158 (1.4421) time: 2.8689 data: 0.0002 max mem: 28454 +[2024-12-13 00:22:11 root] (utils.py 283): INFO Epoch: [14] [2040/2502] eta: 0:22:07 lr: 0.000002 loss_cls: 2.7359 (2.5372) grad_norm: 1.1334 (1.4417) time: 2.8714 data: 0.0002 max mem: 28454 +[2024-12-13 00:22:40 root] (utils.py 283): INFO Epoch: [14] [2050/2502] eta: 0:21:38 lr: 0.000002 loss_cls: 2.6374 (2.5375) grad_norm: 1.1306 (1.4401) time: 2.8694 data: 0.0002 max mem: 28454 +[2024-12-13 00:23:09 root] (utils.py 283): INFO Epoch: [14] [2060/2502] eta: 0:21:09 lr: 0.000002 loss_cls: 2.5740 (2.5367) grad_norm: 1.0427 (1.4381) time: 2.8670 data: 0.0002 max mem: 28454 +[2024-12-13 00:23:37 root] (utils.py 283): INFO Epoch: [14] [2070/2502] eta: 0:20:40 lr: 0.000002 loss_cls: 2.4541 (2.5367) grad_norm: 1.0672 (1.4369) time: 2.8666 data: 0.0002 max mem: 28454 +[2024-12-13 00:24:06 root] (utils.py 283): INFO Epoch: [14] [2080/2502] eta: 0:20:12 lr: 0.000002 loss_cls: 2.5404 (2.5369) grad_norm: 1.1351 (1.4356) time: 2.8687 data: 0.0003 max mem: 28454 +[2024-12-13 00:24:35 root] (utils.py 283): INFO Epoch: [14] [2090/2502] eta: 0:19:43 lr: 0.000002 loss_cls: 2.6430 (2.5369) grad_norm: 1.1351 (1.4363) time: 2.8709 data: 0.0003 max mem: 28454 +[2024-12-13 00:25:03 root] (utils.py 283): INFO Epoch: [14] [2100/2502] eta: 0:19:14 lr: 0.000002 loss_cls: 2.5588 (2.5364) grad_norm: 1.1544 (1.4350) time: 2.8715 data: 0.0003 max mem: 28454 +[2024-12-13 00:25:32 root] (utils.py 283): INFO Epoch: [14] [2110/2502] eta: 0:18:46 lr: 0.000002 loss_cls: 2.6718 (2.5368) grad_norm: 1.1550 (1.5620) time: 2.8716 data: 0.0003 max mem: 28454 +[2024-12-13 00:26:01 root] (utils.py 283): INFO Epoch: [14] [2120/2502] eta: 0:18:17 lr: 0.000002 loss_cls: 2.6469 (2.5364) grad_norm: 1.1591 (1.5608) time: 2.8687 data: 0.0003 max mem: 28454 +[2024-12-13 00:26:29 root] (utils.py 283): INFO Epoch: [14] [2130/2502] eta: 0:17:48 lr: 0.000002 loss_cls: 2.6128 (2.5369) grad_norm: 1.1972 (1.5629) time: 2.8687 data: 0.0002 max mem: 28454 +[2024-12-13 00:26:58 root] (utils.py 283): INFO Epoch: [14] [2140/2502] eta: 0:17:19 lr: 0.000002 loss_cls: 2.6128 (2.5364) grad_norm: 1.2025 (1.5634) time: 2.8693 data: 0.0002 max mem: 28454 +[2024-12-13 00:27:27 root] (utils.py 283): INFO Epoch: [14] [2150/2502] eta: 0:16:51 lr: 0.000002 loss_cls: 2.6652 (2.5378) grad_norm: 1.2226 (1.5630) time: 2.8685 data: 0.0003 max mem: 28454 +[2024-12-13 00:27:55 root] (utils.py 283): INFO Epoch: [14] [2160/2502] eta: 0:16:22 lr: 0.000002 loss_cls: 2.8168 (2.5377) grad_norm: 1.1876 (1.5613) time: 2.8695 data: 0.0002 max mem: 28454 +[2024-12-13 00:28:24 root] (utils.py 283): INFO Epoch: [14] [2170/2502] eta: 0:15:53 lr: 0.000002 loss_cls: 2.4688 (2.5369) grad_norm: 1.1578 (1.5598) time: 2.8721 data: 0.0002 max mem: 28454 +[2024-12-13 00:28:53 root] (utils.py 283): INFO Epoch: [14] [2180/2502] eta: 0:15:24 lr: 0.000002 loss_cls: 2.3963 (2.5361) grad_norm: 1.0717 (1.5578) time: 2.8734 data: 0.0002 max mem: 28454 +[2024-12-13 00:29:22 root] (utils.py 283): INFO Epoch: [14] [2190/2502] eta: 0:14:56 lr: 0.000002 loss_cls: 2.6461 (2.5367) grad_norm: 1.0578 (1.5558) time: 2.8736 data: 0.0003 max mem: 28454 +[2024-12-13 00:29:50 root] (utils.py 283): INFO Epoch: [14] [2200/2502] eta: 0:14:27 lr: 0.000002 loss_cls: 2.6499 (2.5368) grad_norm: 1.1128 (1.5543) time: 2.8744 data: 0.0003 max mem: 28454 +[2024-12-13 00:30:19 root] (utils.py 283): INFO Epoch: [14] [2210/2502] eta: 0:13:58 lr: 0.000002 loss_cls: 2.6485 (2.5368) grad_norm: 1.1476 (1.5523) time: 2.8722 data: 0.0002 max mem: 28454 +[2024-12-13 00:30:48 root] (utils.py 283): INFO Epoch: [14] [2220/2502] eta: 0:13:30 lr: 0.000002 loss_cls: 2.5115 (2.5361) grad_norm: 1.1358 (1.5507) time: 2.8708 data: 0.0002 max mem: 28454 +[2024-12-13 00:31:17 root] (utils.py 283): INFO Epoch: [14] [2230/2502] eta: 0:13:01 lr: 0.000002 loss_cls: 2.4192 (2.5362) grad_norm: 1.1209 (1.5491) time: 2.8720 data: 0.0002 max mem: 28454 +[2024-12-13 00:31:45 root] (utils.py 283): INFO Epoch: [14] [2240/2502] eta: 0:12:32 lr: 0.000002 loss_cls: 2.5860 (2.5361) grad_norm: 1.1209 (1.5473) time: 2.8716 data: 0.0002 max mem: 28454 +[2024-12-13 00:32:14 root] (utils.py 283): INFO Epoch: [14] [2250/2502] eta: 0:12:03 lr: 0.000002 loss_cls: 2.6160 (2.5365) grad_norm: 1.1119 (1.5455) time: 2.8701 data: 0.0002 max mem: 28454 +[2024-12-13 00:32:43 root] (utils.py 283): INFO Epoch: [14] [2260/2502] eta: 0:11:35 lr: 0.000002 loss_cls: 2.7482 (2.5372) grad_norm: 1.0928 (1.5439) time: 2.8699 data: 0.0002 max mem: 28454 +[2024-12-13 00:33:11 root] (utils.py 283): INFO Epoch: [14] [2270/2502] eta: 0:11:06 lr: 0.000002 loss_cls: 2.7378 (2.5375) grad_norm: 1.0688 (1.5422) time: 2.8701 data: 0.0003 max mem: 28454 +[2024-12-13 00:33:40 root] (utils.py 283): INFO Epoch: [14] [2280/2502] eta: 0:10:37 lr: 0.000002 loss_cls: 2.6636 (2.5378) grad_norm: 1.1188 (1.5413) time: 2.8706 data: 0.0003 max mem: 28454 +[2024-12-13 00:34:09 root] (utils.py 283): INFO Epoch: [14] [2290/2502] eta: 0:10:08 lr: 0.000002 loss_cls: 2.6357 (2.5382) grad_norm: 1.1188 (1.5412) time: 2.8725 data: 0.0003 max mem: 28454 +[2024-12-13 00:34:38 root] (utils.py 283): INFO Epoch: [14] [2300/2502] eta: 0:09:40 lr: 0.000002 loss_cls: 2.6253 (2.5384) grad_norm: 1.1053 (1.5397) time: 2.8730 data: 0.0003 max mem: 28454 +[2024-12-13 00:35:06 root] (utils.py 283): INFO Epoch: [14] [2310/2502] eta: 0:09:11 lr: 0.000002 loss_cls: 2.4914 (2.5378) grad_norm: 1.1417 (1.5380) time: 2.8743 data: 0.0003 max mem: 28454 +[2024-12-13 00:35:35 root] (utils.py 283): INFO Epoch: [14] [2320/2502] eta: 0:08:42 lr: 0.000002 loss_cls: 2.5945 (2.5386) grad_norm: 1.1417 (1.5401) time: 2.8723 data: 0.0003 max mem: 28454 +[2024-12-13 00:36:04 root] (utils.py 283): INFO Epoch: [14] [2330/2502] eta: 0:08:14 lr: 0.000002 loss_cls: 2.6835 (2.5387) grad_norm: 1.1380 (1.5385) time: 2.8687 data: 0.0003 max mem: 28454 +[2024-12-13 00:36:32 root] (utils.py 283): INFO Epoch: [14] [2340/2502] eta: 0:07:45 lr: 0.000002 loss_cls: 2.6422 (2.5386) grad_norm: 1.1306 (1.5376) time: 2.8700 data: 0.0003 max mem: 28454 +[2024-12-13 00:37:01 root] (utils.py 283): INFO Epoch: [14] [2350/2502] eta: 0:07:16 lr: 0.000002 loss_cls: 2.3599 (2.5380) grad_norm: 1.0705 (1.5382) time: 2.8714 data: 0.0002 max mem: 28454 +[2024-12-13 00:37:30 root] (utils.py 283): INFO Epoch: [14] [2360/2502] eta: 0:06:47 lr: 0.000002 loss_cls: 2.6169 (2.5385) grad_norm: 1.0998 (1.5412) time: 2.8715 data: 0.0002 max mem: 28454 +[2024-12-13 00:37:59 root] (utils.py 283): INFO Epoch: [14] [2370/2502] eta: 0:06:19 lr: 0.000002 loss_cls: 2.6944 (2.5391) grad_norm: 1.1826 (1.5418) time: 2.8715 data: 0.0003 max mem: 28454 +[2024-12-13 00:38:27 root] (utils.py 283): INFO Epoch: [14] [2380/2502] eta: 0:05:50 lr: 0.000002 loss_cls: 2.6669 (2.5385) grad_norm: 1.0900 (1.5398) time: 2.8720 data: 0.0002 max mem: 28454 +[2024-12-13 00:38:56 root] (utils.py 283): INFO Epoch: [14] [2390/2502] eta: 0:05:21 lr: 0.000002 loss_cls: 2.4941 (2.5387) grad_norm: 1.0656 (1.5377) time: 2.8700 data: 0.0002 max mem: 28454 +[2024-12-13 00:39:25 root] (utils.py 283): INFO Epoch: [14] [2400/2502] eta: 0:04:52 lr: 0.000002 loss_cls: 2.6576 (2.5390) grad_norm: 1.0397 (1.5367) time: 2.8707 data: 0.0003 max mem: 28454 +[2024-12-13 00:39:53 root] (utils.py 283): INFO Epoch: [14] [2410/2502] eta: 0:04:24 lr: 0.000002 loss_cls: 2.5745 (2.5384) grad_norm: 1.0966 (1.5359) time: 2.8714 data: 0.0003 max mem: 28454 +[2024-12-13 00:40:22 root] (utils.py 283): INFO Epoch: [14] [2420/2502] eta: 0:03:55 lr: 0.000002 loss_cls: 2.3757 (2.5385) grad_norm: 1.0999 (1.5350) time: 2.8695 data: 0.0003 max mem: 28454 +[2024-12-13 00:40:51 root] (utils.py 283): INFO Epoch: [14] [2430/2502] eta: 0:03:26 lr: 0.000002 loss_cls: 2.7113 (2.5385) grad_norm: 1.0855 (1.5333) time: 2.8723 data: 0.0003 max mem: 28454 +[2024-12-13 00:41:20 root] (utils.py 283): INFO Epoch: [14] [2440/2502] eta: 0:02:58 lr: 0.000002 loss_cls: 2.6930 (2.5387) grad_norm: 1.0981 (1.5324) time: 2.8740 data: 0.0002 max mem: 28454 +[2024-12-13 00:41:48 root] (utils.py 283): INFO Epoch: [14] [2450/2502] eta: 0:02:29 lr: 0.000002 loss_cls: 2.6007 (2.5381) grad_norm: 1.1206 (1.5306) time: 2.8734 data: 0.0002 max mem: 28454 +[2024-12-13 00:42:17 root] (utils.py 283): INFO Epoch: [14] [2460/2502] eta: 0:02:00 lr: 0.000002 loss_cls: 2.5896 (2.5388) grad_norm: 1.1257 (1.5301) time: 2.8734 data: 0.0002 max mem: 28454 +[2024-12-13 00:42:46 root] (utils.py 283): INFO Epoch: [14] [2470/2502] eta: 0:01:31 lr: 0.000002 loss_cls: 2.5416 (2.5379) grad_norm: 1.1046 (1.5283) time: 2.8726 data: 0.0002 max mem: 28454 +[2024-12-13 00:43:14 root] (utils.py 283): INFO Epoch: [14] [2480/2502] eta: 0:01:03 lr: 0.000002 loss_cls: 2.5416 (2.5379) grad_norm: 1.1074 (1.5295) time: 2.8710 data: 0.0003 max mem: 28454 +[2024-12-13 00:43:44 root] (utils.py 283): INFO Epoch: [14] [2490/2502] eta: 0:00:34 lr: 0.000002 loss_cls: 2.4853 (2.5376) grad_norm: 1.1525 (1.5307) time: 2.8931 data: 0.0208 max mem: 28454 +[2024-12-13 00:44:12 root] (utils.py 283): INFO Epoch: [14] [2500/2502] eta: 0:00:05 lr: 0.000002 loss_cls: 2.5040 (2.5378) grad_norm: 1.1525 (1.5295) time: 2.8973 data: 0.0208 max mem: 28454 +[2024-12-13 00:44:15 root] (utils.py 283): INFO Epoch: [14] [2501/2502] eta: 0:00:02 lr: 0.000002 loss_cls: 2.5040 (2.5379) grad_norm: 1.1429 (1.5293) time: 2.8968 data: 0.0208 max mem: 28454 +[2024-12-13 00:44:15 root] (utils.py 297): INFO Epoch: [14] Total time: 1:59:47 (2.8727 s / it) +[2024-12-13 00:44:15 root] (engine.py 179): INFO Averaged stats:lr: 0.000002 loss_cls: 2.5040 (2.5421) grad_norm: 1.1429 (1.5293) +[2024-12-13 00:44:19 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:53 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.3947 (0.3947) acc1: 91.4062 (91.4062) acc3: 98.4375 (98.4375) acc5: 98.4375 (98.4375) time: 0.5426 data: 0.0005 max mem: 28454 +[2024-12-13 00:44:24 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:48 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6341 (0.5944) acc1: 85.1562 (87.0028) acc3: 97.6562 (96.5909) acc5: 98.4375 (98.0824) time: 0.5462 data: 0.0005 max mem: 28454 +[2024-12-13 00:44:29 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:42 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6298 (0.6166) acc1: 87.5000 (86.9420) acc3: 96.8750 (96.2426) acc5: 97.6562 (97.7679) time: 0.5474 data: 0.0006 max mem: 28454 +[2024-12-13 00:44:35 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:37 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6298 (0.6504) acc1: 87.5000 (85.8115) acc3: 96.0938 (96.0685) acc5: 97.6562 (97.6058) time: 0.5486 data: 0.0006 max mem: 28454 +[2024-12-13 00:44:40 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:31 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6895 (0.6575) acc1: 86.7188 (85.6136) acc3: 96.0938 (96.0747) acc5: 97.6562 (97.6372) time: 0.5487 data: 0.0004 max mem: 28454 +[2024-12-13 00:44:46 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:26 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.8374 (0.7369) acc1: 78.9062 (83.8082) acc3: 93.7500 (95.0674) acc5: 95.3125 (96.8444) time: 0.5483 data: 0.0005 max mem: 28454 +[2024-12-13 00:44:51 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:20 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9638 (0.7652) acc1: 79.6875 (83.5041) acc3: 91.4062 (94.4416) acc5: 93.7500 (96.3627) time: 0.5484 data: 0.0004 max mem: 28454 +[2024-12-13 00:44:57 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:15 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9625 (0.7925) acc1: 81.2500 (82.7685) acc3: 91.4062 (94.1021) acc5: 94.5312 (96.1488) time: 0.5484 data: 0.0004 max mem: 28454 +[2024-12-13 00:45:02 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:09 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9978 (0.8155) acc1: 79.6875 (82.3495) acc3: 91.4062 (93.7982) acc5: 94.5312 (95.8430) time: 0.5487 data: 0.0006 max mem: 28454 +[2024-12-13 00:45:08 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:04 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9686 (0.8345) acc1: 77.3438 (81.6621) acc3: 91.4062 (93.4839) acc5: 94.5312 (95.6731) time: 0.5486 data: 0.0006 max mem: 28454 +[2024-12-13 00:45:12 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9178 (0.8323) acc1: 77.3810 (81.6640) acc3: 91.4062 (93.4960) acc5: 94.5312 (95.7200) time: 0.5392 data: 0.0006 max mem: 28454 +[2024-12-13 00:45:12 root] (utils.py 297): INFO Test: Total time: 0:00:53 (0.5466 s / it) +[2024-12-13 00:45:12 root] (engine.py 264): INFO * Acc@1 81.842 Acc@3 93.404 Acc@5 95.744 loss 0.832 flops 13.207 layer_flops 13.109 +[2024-12-13 00:45:12 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 81.8% +[2024-12-13 00:45:12 root] (main.py 576): INFO Max accuracy: 81.88% +[2024-12-13 00:45:12 root] (main.py 589): INFO Finetune time 1 day, 6:10:50