diff --git "a/Vim-Ti_30epoch_75.3/logs/log_rank0.txt" "b/Vim-Ti_30epoch_75.3/logs/log_rank0.txt" new file mode 100644--- /dev/null +++ "b/Vim-Ti_30epoch_75.3/logs/log_rank0.txt" @@ -0,0 +1,8632 @@ +[2024-12-04 21:32:19 root] (main.py 223): INFO Namespace(batch_size=128, epochs=30, model='RMeeTo_tiny', multi_reso=False, input_size=224, drop=0.0, drop_path=0.1, model_ema_decay=0.99996, model_ema_force_cpu=False, model_ema=False, opt='adamw', opt_eps=1e-08, opt_betas=None, clip_grad=None, momentum=0.9, weight_decay=0.0, sched='cosine', lr=2e-05, lr_noise=None, lr_noise_pct=0.67, lr_noise_std=1.0, warmup_lr=1e-06, min_lr=1e-06, decay_epochs=30, warmup_epochs=5, cooldown_epochs=10, patience_epochs=10, decay_rate=0.1, color_jitter=0.4, aa='rand-m9-mstd0.5-inc1', smoothing=0.1, train_interpolation='bicubic', repeated_aug=True, reprob=0.25, remode='pixel', recount=1, resplit=False, mixup=0.8, cutmix=1.0, cutmix_minmax=None, mixup_prob=1.0, mixup_switch_prob=0.5, mixup_mode='batch', finetune='', data_set='IMNET', inat_category='name', output_dir='check/tiny/30', device='cuda', seed=0, resume='', autoresume=False, start_epoch=0, dist_eval=True, num_workers=10, pin_mem=True, world_size=4, port='15662', dist_url='env://', target_flops=3.0, granularity=4, load_compression_rate=False, warmup_compression_rate=False, distill='True', throughput=False, eval=False, merge_method='ToMe', merge_interval=2, if_pruning=False, num_prune='5', metric='X', distance='cosine', if_order=True, if_random=False, rank=0, gpu=0, distributed=True, dist_backend='nccl') +[2024-12-04 21:32:23 root] (main.py 286): INFO Creating model: RMeeTo_tiny +[2024-12-04 21:32:29 root] (main.py 365): INFO number of params: 7148008 +[2024-12-04 21:32:30 root] (main.py 480): INFO Start training for 30 epochs +[2024-12-04 21:32:37 root] (utils.py 283): INFO Epoch: [0] [ 0/2502] eta: 4:40:11 lr: 0.000020 loss_cls: 8.2029 (8.2029) grad_norm: 29.7073 (29.7073) time: 6.7191 data: 0.0032 max mem: 8339 +[2024-12-04 21:32:45 root] (utils.py 283): INFO Epoch: [0] [ 10/2502] eta: 0:56:54 lr: 0.000020 loss_cls: 6.8288 (6.7537) grad_norm: 21.2608 (25.5417) time: 1.3703 data: 0.0005 max mem: 8421 +[2024-12-04 21:32:53 root] (utils.py 283): INFO Epoch: [0] [ 20/2502] eta: 0:45:03 lr: 0.000020 loss_cls: 6.0717 (6.1856) grad_norm: 16.2732 (19.5354) time: 0.8077 data: 0.0003 max mem: 8421 +[2024-12-04 21:33:01 root] (utils.py 283): INFO Epoch: [0] [ 30/2502] eta: 0:40:35 lr: 0.000020 loss_cls: 5.7367 (5.9946) grad_norm: 11.0466 (17.1414) time: 0.7734 data: 0.0003 max mem: 8421 +[2024-12-04 21:33:08 root] (utils.py 283): INFO Epoch: [0] [ 40/2502] eta: 0:38:15 lr: 0.000020 loss_cls: 5.4039 (5.7676) grad_norm: 9.3151 (15.1673) time: 0.7678 data: 0.0003 max mem: 8421 +[2024-12-04 21:33:16 root] (utils.py 283): INFO Epoch: [0] [ 50/2502] eta: 0:36:46 lr: 0.000020 loss_cls: 5.0876 (5.6168) grad_norm: 8.2335 (13.8081) time: 0.7675 data: 0.0003 max mem: 8421 +[2024-12-04 21:33:24 root] (utils.py 283): INFO Epoch: [0] [ 60/2502] eta: 0:35:45 lr: 0.000020 loss_cls: 5.0241 (5.4648) grad_norm: 7.8607 (12.8939) time: 0.7685 data: 0.0003 max mem: 8421 +[2024-12-04 21:33:31 root] (utils.py 283): INFO Epoch: [0] [ 70/2502] eta: 0:34:58 lr: 0.000020 loss_cls: 4.8027 (5.3359) grad_norm: 7.6258 (12.2794) time: 0.7686 data: 0.0003 max mem: 8421 +[2024-12-04 21:33:39 root] (utils.py 283): INFO Epoch: [0] [ 80/2502] eta: 0:34:21 lr: 0.000020 loss_cls: 4.7774 (5.2542) grad_norm: 7.5085 (11.9615) time: 0.7664 data: 0.0002 max mem: 8421 +[2024-12-04 21:33:47 root] (utils.py 283): INFO Epoch: [0] [ 90/2502] eta: 0:33:54 lr: 0.000020 loss_cls: 4.7774 (5.1804) grad_norm: 6.7983 (11.4040) time: 0.7741 data: 0.0002 max mem: 8421 +[2024-12-04 21:33:55 root] (utils.py 283): INFO Epoch: [0] [ 100/2502] eta: 0:33:31 lr: 0.000020 loss_cls: 4.9077 (5.1419) grad_norm: 6.5588 (10.9213) time: 0.7820 data: 0.0002 max mem: 8421 +[2024-12-04 21:34:02 root] (utils.py 283): INFO Epoch: [0] [ 110/2502] eta: 0:33:08 lr: 0.000020 loss_cls: 4.9077 (5.0901) grad_norm: 6.4228 (10.6151) time: 0.7761 data: 0.0003 max mem: 8421 +[2024-12-04 21:34:10 root] (utils.py 283): INFO Epoch: [0] [ 120/2502] eta: 0:32:46 lr: 0.000020 loss_cls: 4.7370 (5.0504) grad_norm: 6.6989 (10.2980) time: 0.7676 data: 0.0002 max mem: 8421 +[2024-12-04 21:34:18 root] (utils.py 283): INFO Epoch: [0] [ 130/2502] eta: 0:32:32 lr: 0.000020 loss_cls: 4.7370 (5.0073) grad_norm: 5.9451 (10.0627) time: 0.7787 data: 0.0002 max mem: 8421 +[2024-12-04 21:34:26 root] (utils.py 283): INFO Epoch: [0] [ 140/2502] eta: 0:32:15 lr: 0.000020 loss_cls: 4.4703 (4.9564) grad_norm: 5.7354 (9.7586) time: 0.7811 data: 0.0002 max mem: 8421 +[2024-12-04 21:34:33 root] (utils.py 283): INFO Epoch: [0] [ 150/2502] eta: 0:31:58 lr: 0.000020 loss_cls: 4.3997 (4.9201) grad_norm: 5.8258 (9.5229) time: 0.7678 data: 0.0002 max mem: 8421 +[2024-12-04 21:34:41 root] (utils.py 283): INFO Epoch: [0] [ 160/2502] eta: 0:31:42 lr: 0.000020 loss_cls: 4.5080 (4.8932) grad_norm: 5.8812 (9.3796) time: 0.7630 data: 0.0002 max mem: 8421 +[2024-12-04 21:34:49 root] (utils.py 283): INFO Epoch: [0] [ 170/2502] eta: 0:31:30 lr: 0.000020 loss_cls: 4.5130 (4.8651) grad_norm: 5.7392 (9.1658) time: 0.7717 data: 0.0002 max mem: 8421 +[2024-12-04 21:34:56 root] (utils.py 283): INFO Epoch: [0] [ 180/2502] eta: 0:31:16 lr: 0.000020 loss_cls: 4.3575 (4.8220) grad_norm: 5.6669 (8.9957) time: 0.7727 data: 0.0002 max mem: 8421 +[2024-12-04 21:35:04 root] (utils.py 283): INFO Epoch: [0] [ 190/2502] eta: 0:31:02 lr: 0.000020 loss_cls: 4.2184 (4.7947) grad_norm: 5.6082 (8.8103) time: 0.7625 data: 0.0002 max mem: 8421 +[2024-12-04 21:35:12 root] (utils.py 283): INFO Epoch: [0] [ 200/2502] eta: 0:30:49 lr: 0.000020 loss_cls: 4.1291 (4.7607) grad_norm: 5.4127 (8.6901) time: 0.7634 data: 0.0002 max mem: 8421 +[2024-12-04 21:35:19 root] (utils.py 283): INFO Epoch: [0] [ 210/2502] eta: 0:30:38 lr: 0.000020 loss_cls: 4.2455 (4.7355) grad_norm: 5.4127 (8.5341) time: 0.7675 data: 0.0002 max mem: 8421 +[2024-12-04 21:35:27 root] (utils.py 283): INFO Epoch: [0] [ 220/2502] eta: 0:30:26 lr: 0.000020 loss_cls: 4.3721 (4.7096) grad_norm: 5.3067 (8.3905) time: 0.7700 data: 0.0002 max mem: 8421 +[2024-12-04 21:35:35 root] (utils.py 283): INFO Epoch: [0] [ 230/2502] eta: 0:30:15 lr: 0.000020 loss_cls: 4.3507 (4.6924) grad_norm: 5.3107 (8.2622) time: 0.7668 data: 0.0002 max mem: 8421 +[2024-12-04 21:35:42 root] (utils.py 283): INFO Epoch: [0] [ 240/2502] eta: 0:30:04 lr: 0.000020 loss_cls: 4.3507 (4.6744) grad_norm: 5.1852 (8.1770) time: 0.7652 data: 0.0002 max mem: 8421 +[2024-12-04 21:35:50 root] (utils.py 283): INFO Epoch: [0] [ 250/2502] eta: 0:29:54 lr: 0.000020 loss_cls: 4.1851 (4.6484) grad_norm: 5.2321 (8.0678) time: 0.7705 data: 0.0003 max mem: 8421 +[2024-12-04 21:35:58 root] (utils.py 283): INFO Epoch: [0] [ 260/2502] eta: 0:29:44 lr: 0.000020 loss_cls: 4.0577 (4.6238) grad_norm: 5.2516 (7.9806) time: 0.7759 data: 0.0003 max mem: 8421 +[2024-12-04 21:36:06 root] (utils.py 283): INFO Epoch: [0] [ 270/2502] eta: 0:29:34 lr: 0.000020 loss_cls: 4.1352 (4.6049) grad_norm: 5.0702 (7.8752) time: 0.7753 data: 0.0002 max mem: 8421 +[2024-12-04 21:36:13 root] (utils.py 283): INFO Epoch: [0] [ 280/2502] eta: 0:29:24 lr: 0.000020 loss_cls: 4.3495 (4.5996) grad_norm: 4.9381 (7.7668) time: 0.7722 data: 0.0003 max mem: 8421 +[2024-12-04 21:36:21 root] (utils.py 283): INFO Epoch: [0] [ 290/2502] eta: 0:29:14 lr: 0.000020 loss_cls: 4.3495 (4.5794) grad_norm: 4.8191 (7.7034) time: 0.7682 data: 0.0003 max mem: 8421 +[2024-12-04 21:36:29 root] (utils.py 283): INFO Epoch: [0] [ 300/2502] eta: 0:29:05 lr: 0.000020 loss_cls: 3.8538 (4.5590) grad_norm: 5.1101 (7.6336) time: 0.7672 data: 0.0002 max mem: 8421 +[2024-12-04 21:36:36 root] (utils.py 283): INFO Epoch: [0] [ 310/2502] eta: 0:28:56 lr: 0.000020 loss_cls: 4.0944 (4.5465) grad_norm: 5.3724 (7.5622) time: 0.7750 data: 0.0002 max mem: 8421 +[2024-12-04 21:36:44 root] (utils.py 283): INFO Epoch: [0] [ 320/2502] eta: 0:28:47 lr: 0.000020 loss_cls: 4.6114 (4.5500) grad_norm: 5.2402 (7.5076) time: 0.7809 data: 0.0002 max mem: 8421 +[2024-12-04 21:36:52 root] (utils.py 283): INFO Epoch: [0] [ 330/2502] eta: 0:28:39 lr: 0.000020 loss_cls: 4.6770 (4.5390) grad_norm: 5.0511 (7.4343) time: 0.7819 data: 0.0003 max mem: 8421 +[2024-12-04 21:37:00 root] (utils.py 283): INFO Epoch: [0] [ 340/2502] eta: 0:28:30 lr: 0.000020 loss_cls: 4.3831 (4.5356) grad_norm: 4.7921 (7.3549) time: 0.7791 data: 0.0003 max mem: 8421 +[2024-12-04 21:37:07 root] (utils.py 283): INFO Epoch: [0] [ 350/2502] eta: 0:28:20 lr: 0.000020 loss_cls: 4.3768 (4.5263) grad_norm: 4.7718 (7.2962) time: 0.7722 data: 0.0002 max mem: 8421 +[2024-12-04 21:37:15 root] (utils.py 283): INFO Epoch: [0] [ 360/2502] eta: 0:28:11 lr: 0.000020 loss_cls: 4.2740 (4.5163) grad_norm: 5.0747 (7.2628) time: 0.7690 data: 0.0002 max mem: 8421 +[2024-12-04 21:37:23 root] (utils.py 283): INFO Epoch: [0] [ 370/2502] eta: 0:28:02 lr: 0.000020 loss_cls: 4.1570 (4.5071) grad_norm: 4.9901 (7.1992) time: 0.7669 data: 0.0002 max mem: 8421 +[2024-12-04 21:37:30 root] (utils.py 283): INFO Epoch: [0] [ 380/2502] eta: 0:27:53 lr: 0.000020 loss_cls: 4.3534 (4.4975) grad_norm: 4.9901 (7.1529) time: 0.7656 data: 0.0002 max mem: 8421 +[2024-12-04 21:37:38 root] (utils.py 283): INFO Epoch: [0] [ 390/2502] eta: 0:27:44 lr: 0.000020 loss_cls: 4.3534 (4.4880) grad_norm: 5.0439 (7.1011) time: 0.7655 data: 0.0002 max mem: 8421 +[2024-12-04 21:37:46 root] (utils.py 283): INFO Epoch: [0] [ 400/2502] eta: 0:27:35 lr: 0.000020 loss_cls: 4.4078 (4.4852) grad_norm: 4.9424 (7.0514) time: 0.7659 data: 0.0002 max mem: 8421 +[2024-12-04 21:37:53 root] (utils.py 283): INFO Epoch: [0] [ 410/2502] eta: 0:27:26 lr: 0.000020 loss_cls: 4.4086 (4.4854) grad_norm: 5.0648 (7.0046) time: 0.7659 data: 0.0002 max mem: 8421 +[2024-12-04 21:38:01 root] (utils.py 283): INFO Epoch: [0] [ 420/2502] eta: 0:27:17 lr: 0.000020 loss_cls: 4.4692 (4.4813) grad_norm: 5.0648 (6.9599) time: 0.7658 data: 0.0002 max mem: 8421 +[2024-12-04 21:38:09 root] (utils.py 283): INFO Epoch: [0] [ 430/2502] eta: 0:27:09 lr: 0.000020 loss_cls: 4.5887 (4.4814) grad_norm: 4.9725 (6.9195) time: 0.7747 data: 0.0002 max mem: 8421 +[2024-12-04 21:38:17 root] (utils.py 283): INFO Epoch: [0] [ 440/2502] eta: 0:27:00 lr: 0.000020 loss_cls: 4.5482 (4.4815) grad_norm: 4.8858 (6.8697) time: 0.7762 data: 0.0002 max mem: 8421 +[2024-12-04 21:38:24 root] (utils.py 283): INFO Epoch: [0] [ 450/2502] eta: 0:26:51 lr: 0.000020 loss_cls: 4.3188 (4.4746) grad_norm: 4.7843 (6.8249) time: 0.7703 data: 0.0002 max mem: 8421 +[2024-12-04 21:38:32 root] (utils.py 283): INFO Epoch: [0] [ 460/2502] eta: 0:26:43 lr: 0.000020 loss_cls: 4.1481 (4.4629) grad_norm: 4.8991 (6.7901) time: 0.7755 data: 0.0002 max mem: 8421 +[2024-12-04 21:38:40 root] (utils.py 283): INFO Epoch: [0] [ 470/2502] eta: 0:26:35 lr: 0.000020 loss_cls: 3.8501 (4.4474) grad_norm: 4.9302 (6.7511) time: 0.7732 data: 0.0002 max mem: 8421 +[2024-12-04 21:38:48 root] (utils.py 283): INFO Epoch: [0] [ 480/2502] eta: 0:26:26 lr: 0.000020 loss_cls: 3.8977 (4.4399) grad_norm: 4.7092 (6.7079) time: 0.7682 data: 0.0002 max mem: 8421 +[2024-12-04 21:38:56 root] (utils.py 283): INFO Epoch: [0] [ 490/2502] eta: 0:26:20 lr: 0.000020 loss_cls: 4.2546 (4.4403) grad_norm: 4.6947 (6.6698) time: 0.7939 data: 0.0003 max mem: 8421 +[2024-12-04 21:39:04 root] (utils.py 283): INFO Epoch: [0] [ 500/2502] eta: 0:26:13 lr: 0.000020 loss_cls: 4.2549 (4.4318) grad_norm: 4.7841 (6.6358) time: 0.8176 data: 0.0003 max mem: 8421 +[2024-12-04 21:39:12 root] (utils.py 283): INFO Epoch: [0] [ 510/2502] eta: 0:26:07 lr: 0.000020 loss_cls: 4.2576 (4.4314) grad_norm: 4.8025 (6.5995) time: 0.8230 data: 0.0003 max mem: 8421 +[2024-12-04 21:39:21 root] (utils.py 283): INFO Epoch: [0] [ 520/2502] eta: 0:26:02 lr: 0.000020 loss_cls: 4.3634 (4.4288) grad_norm: 4.7601 (6.5656) time: 0.8422 data: 0.0003 max mem: 8421 +[2024-12-04 21:39:30 root] (utils.py 283): INFO Epoch: [0] [ 530/2502] eta: 0:26:00 lr: 0.000020 loss_cls: 4.3125 (4.4184) grad_norm: 4.7122 (6.5320) time: 0.9010 data: 0.0003 max mem: 8421 +[2024-12-04 21:39:38 root] (utils.py 283): INFO Epoch: [0] [ 540/2502] eta: 0:25:51 lr: 0.000020 loss_cls: 4.3348 (4.4120) grad_norm: 4.6422 (6.4993) time: 0.8568 data: 0.0003 max mem: 8421 +[2024-12-04 21:39:46 root] (utils.py 283): INFO Epoch: [0] [ 550/2502] eta: 0:25:43 lr: 0.000020 loss_cls: 4.3348 (4.4074) grad_norm: 4.8480 (6.4721) time: 0.7821 data: 0.0002 max mem: 8421 +[2024-12-04 21:39:54 root] (utils.py 283): INFO Epoch: [0] [ 560/2502] eta: 0:25:35 lr: 0.000020 loss_cls: 4.5330 (4.4089) grad_norm: 4.9318 (6.4454) time: 0.7908 data: 0.0002 max mem: 8421 +[2024-12-04 21:40:01 root] (utils.py 283): INFO Epoch: [0] [ 570/2502] eta: 0:25:27 lr: 0.000020 loss_cls: 4.5486 (4.4080) grad_norm: 4.8934 (6.4253) time: 0.7815 data: 0.0002 max mem: 8421 +[2024-12-04 21:40:09 root] (utils.py 283): INFO Epoch: [0] [ 580/2502] eta: 0:25:19 lr: 0.000020 loss_cls: 4.3084 (4.4056) grad_norm: 4.7242 (6.3959) time: 0.7807 data: 0.0002 max mem: 8421 +[2024-12-04 21:40:17 root] (utils.py 283): INFO Epoch: [0] [ 590/2502] eta: 0:25:10 lr: 0.000020 loss_cls: 4.3981 (4.4024) grad_norm: 4.6461 (6.3662) time: 0.7813 data: 0.0002 max mem: 8421 +[2024-12-04 21:40:25 root] (utils.py 283): INFO Epoch: [0] [ 600/2502] eta: 0:25:02 lr: 0.000020 loss_cls: 4.3037 (4.3976) grad_norm: 4.7087 (6.3533) time: 0.7759 data: 0.0002 max mem: 8421 +[2024-12-04 21:40:33 root] (utils.py 283): INFO Epoch: [0] [ 610/2502] eta: 0:24:53 lr: 0.000020 loss_cls: 4.0983 (4.3940) grad_norm: 4.8879 (6.3422) time: 0.7723 data: 0.0002 max mem: 8421 +[2024-12-04 21:40:40 root] (utils.py 283): INFO Epoch: [0] [ 620/2502] eta: 0:24:45 lr: 0.000020 loss_cls: 4.1695 (4.3930) grad_norm: 4.7891 (6.3203) time: 0.7726 data: 0.0002 max mem: 8421 +[2024-12-04 21:40:48 root] (utils.py 283): INFO Epoch: [0] [ 630/2502] eta: 0:24:37 lr: 0.000020 loss_cls: 4.3410 (4.3912) grad_norm: 4.7646 (6.2992) time: 0.7740 data: 0.0002 max mem: 8421 +[2024-12-04 21:40:56 root] (utils.py 283): INFO Epoch: [0] [ 640/2502] eta: 0:24:28 lr: 0.000020 loss_cls: 4.2758 (4.3863) grad_norm: 4.7761 (6.2743) time: 0.7735 data: 0.0002 max mem: 8421 +[2024-12-04 21:41:04 root] (utils.py 283): INFO Epoch: [0] [ 650/2502] eta: 0:24:20 lr: 0.000020 loss_cls: 4.1075 (4.3828) grad_norm: 4.7367 (6.2536) time: 0.7761 data: 0.0002 max mem: 8421 +[2024-12-04 21:41:11 root] (utils.py 283): INFO Epoch: [0] [ 660/2502] eta: 0:24:12 lr: 0.000020 loss_cls: 3.9228 (4.3729) grad_norm: 4.7443 (6.2317) time: 0.7826 data: 0.0002 max mem: 8421 +[2024-12-04 21:41:19 root] (utils.py 283): INFO Epoch: [0] [ 670/2502] eta: 0:24:04 lr: 0.000020 loss_cls: 4.0652 (4.3740) grad_norm: 4.7691 (6.2172) time: 0.7786 data: 0.0002 max mem: 8421 +[2024-12-04 21:41:27 root] (utils.py 283): INFO Epoch: [0] [ 680/2502] eta: 0:23:56 lr: 0.000020 loss_cls: 4.5442 (4.3719) grad_norm: 4.7691 (6.1975) time: 0.7740 data: 0.0002 max mem: 8421 +[2024-12-04 21:41:35 root] (utils.py 283): INFO Epoch: [0] [ 690/2502] eta: 0:23:47 lr: 0.000020 loss_cls: 3.8595 (4.3642) grad_norm: 4.7352 (6.1765) time: 0.7750 data: 0.0002 max mem: 8421 +[2024-12-04 21:41:42 root] (utils.py 283): INFO Epoch: [0] [ 700/2502] eta: 0:23:39 lr: 0.000020 loss_cls: 3.8245 (4.3589) grad_norm: 4.6864 (6.1567) time: 0.7727 data: 0.0002 max mem: 8421 +[2024-12-04 21:41:50 root] (utils.py 283): INFO Epoch: [0] [ 710/2502] eta: 0:23:31 lr: 0.000020 loss_cls: 4.1013 (4.3551) grad_norm: 4.6283 (6.1455) time: 0.7714 data: 0.0002 max mem: 8421 +[2024-12-04 21:41:58 root] (utils.py 283): INFO Epoch: [0] [ 720/2502] eta: 0:23:23 lr: 0.000020 loss_cls: 4.1013 (4.3490) grad_norm: 4.7209 (6.1250) time: 0.7761 data: 0.0002 max mem: 8421 +[2024-12-04 21:42:06 root] (utils.py 283): INFO Epoch: [0] [ 730/2502] eta: 0:23:15 lr: 0.000020 loss_cls: 4.1587 (4.3459) grad_norm: 4.7226 (6.1105) time: 0.7767 data: 0.0002 max mem: 8421 +[2024-12-04 21:42:13 root] (utils.py 283): INFO Epoch: [0] [ 740/2502] eta: 0:23:06 lr: 0.000020 loss_cls: 4.3841 (4.3484) grad_norm: 4.6762 (6.1020) time: 0.7704 data: 0.0002 max mem: 8421 +[2024-12-04 21:42:21 root] (utils.py 283): INFO Epoch: [0] [ 750/2502] eta: 0:22:58 lr: 0.000020 loss_cls: 4.4210 (4.3441) grad_norm: 4.5652 (6.0834) time: 0.7687 data: 0.0002 max mem: 8421 +[2024-12-04 21:42:29 root] (utils.py 283): INFO Epoch: [0] [ 760/2502] eta: 0:22:50 lr: 0.000020 loss_cls: 4.3101 (4.3413) grad_norm: 4.5652 (6.0638) time: 0.7851 data: 0.0002 max mem: 8421 +[2024-12-04 21:42:37 root] (utils.py 283): INFO Epoch: [0] [ 770/2502] eta: 0:22:43 lr: 0.000020 loss_cls: 4.3101 (4.3394) grad_norm: 4.6043 (6.0486) time: 0.7935 data: 0.0002 max mem: 8421 +[2024-12-04 21:42:45 root] (utils.py 283): INFO Epoch: [0] [ 780/2502] eta: 0:22:35 lr: 0.000020 loss_cls: 4.2339 (4.3379) grad_norm: 4.6954 (6.0311) time: 0.8064 data: 0.0003 max mem: 8421 +[2024-12-04 21:42:53 root] (utils.py 283): INFO Epoch: [0] [ 790/2502] eta: 0:22:28 lr: 0.000020 loss_cls: 4.0961 (4.3331) grad_norm: 4.6215 (6.0120) time: 0.8251 data: 0.0003 max mem: 8421 +[2024-12-04 21:43:02 root] (utils.py 283): INFO Epoch: [0] [ 800/2502] eta: 0:22:22 lr: 0.000020 loss_cls: 4.0369 (4.3307) grad_norm: 4.5829 (5.9949) time: 0.8307 data: 0.0003 max mem: 8421 +[2024-12-04 21:43:10 root] (utils.py 283): INFO Epoch: [0] [ 810/2502] eta: 0:22:15 lr: 0.000020 loss_cls: 3.7675 (4.3204) grad_norm: 4.7050 (5.9781) time: 0.8411 data: 0.0003 max mem: 8421 +[2024-12-04 21:43:18 root] (utils.py 283): INFO Epoch: [0] [ 820/2502] eta: 0:22:07 lr: 0.000020 loss_cls: 3.9094 (4.3196) grad_norm: 4.7712 (5.9745) time: 0.8101 data: 0.0003 max mem: 8421 +[2024-12-04 21:43:26 root] (utils.py 283): INFO Epoch: [0] [ 830/2502] eta: 0:21:58 lr: 0.000020 loss_cls: 4.3415 (4.3131) grad_norm: 4.6534 (5.9597) time: 0.7731 data: 0.0003 max mem: 8421 +[2024-12-04 21:43:34 root] (utils.py 283): INFO Epoch: [0] [ 840/2502] eta: 0:21:51 lr: 0.000020 loss_cls: 3.9104 (4.3105) grad_norm: 5.0784 (5.9707) time: 0.7794 data: 0.0003 max mem: 8421 +[2024-12-04 21:43:41 root] (utils.py 283): INFO Epoch: [0] [ 850/2502] eta: 0:21:43 lr: 0.000020 loss_cls: 4.2091 (4.3064) grad_norm: 5.1174 (5.9590) time: 0.7905 data: 0.0002 max mem: 8421 +[2024-12-04 21:43:49 root] (utils.py 283): INFO Epoch: [0] [ 860/2502] eta: 0:21:35 lr: 0.000020 loss_cls: 4.1933 (4.3047) grad_norm: 4.6734 (5.9453) time: 0.7911 data: 0.0002 max mem: 8421 +[2024-12-04 21:43:57 root] (utils.py 283): INFO Epoch: [0] [ 870/2502] eta: 0:21:27 lr: 0.000020 loss_cls: 4.1239 (4.3027) grad_norm: 4.6446 (5.9329) time: 0.7829 data: 0.0002 max mem: 8421 +[2024-12-04 21:44:05 root] (utils.py 283): INFO Epoch: [0] [ 880/2502] eta: 0:21:19 lr: 0.000020 loss_cls: 4.1239 (4.2985) grad_norm: 4.6169 (5.9220) time: 0.7790 data: 0.0002 max mem: 8421 +[2024-12-04 21:44:13 root] (utils.py 283): INFO Epoch: [0] [ 890/2502] eta: 0:21:11 lr: 0.000020 loss_cls: 3.9500 (4.2942) grad_norm: 4.6127 (5.9085) time: 0.7823 data: 0.0002 max mem: 8421 +[2024-12-04 21:44:21 root] (utils.py 283): INFO Epoch: [0] [ 900/2502] eta: 0:21:03 lr: 0.000020 loss_cls: 4.1333 (4.2925) grad_norm: 4.6455 (5.8962) time: 0.7815 data: 0.0002 max mem: 8421 +[2024-12-04 21:44:28 root] (utils.py 283): INFO Epoch: [0] [ 910/2502] eta: 0:20:55 lr: 0.000020 loss_cls: 4.3342 (4.2907) grad_norm: 4.6185 (5.8828) time: 0.7790 data: 0.0002 max mem: 8421 +[2024-12-04 21:44:36 root] (utils.py 283): INFO Epoch: [0] [ 920/2502] eta: 0:20:46 lr: 0.000020 loss_cls: 4.4820 (4.2906) grad_norm: 4.5780 (5.8691) time: 0.7753 data: 0.0002 max mem: 8421 +[2024-12-04 21:44:44 root] (utils.py 283): INFO Epoch: [0] [ 930/2502] eta: 0:20:39 lr: 0.000020 loss_cls: 4.4462 (4.2921) grad_norm: 4.5749 (5.8548) time: 0.7893 data: 0.0002 max mem: 8421 +[2024-12-04 21:44:52 root] (utils.py 283): INFO Epoch: [0] [ 940/2502] eta: 0:20:31 lr: 0.000020 loss_cls: 4.4169 (4.2892) grad_norm: 4.6643 (5.8436) time: 0.7875 data: 0.0002 max mem: 8421 +[2024-12-04 21:45:00 root] (utils.py 283): INFO Epoch: [0] [ 950/2502] eta: 0:20:23 lr: 0.000020 loss_cls: 3.5776 (4.2821) grad_norm: 4.7399 (5.8367) time: 0.7864 data: 0.0002 max mem: 8421 +[2024-12-04 21:45:08 root] (utils.py 283): INFO Epoch: [0] [ 960/2502] eta: 0:20:15 lr: 0.000020 loss_cls: 4.0207 (4.2813) grad_norm: 4.6656 (5.8266) time: 0.7953 data: 0.0003 max mem: 8421 +[2024-12-04 21:45:15 root] (utils.py 283): INFO Epoch: [0] [ 970/2502] eta: 0:20:07 lr: 0.000020 loss_cls: 4.3543 (4.2805) grad_norm: 4.4701 (5.8130) time: 0.7789 data: 0.0002 max mem: 8421 +[2024-12-04 21:45:23 root] (utils.py 283): INFO Epoch: [0] [ 980/2502] eta: 0:19:59 lr: 0.000020 loss_cls: 4.2519 (4.2779) grad_norm: 4.6022 (5.8065) time: 0.7759 data: 0.0002 max mem: 8421 +[2024-12-04 21:45:31 root] (utils.py 283): INFO Epoch: [0] [ 990/2502] eta: 0:19:51 lr: 0.000020 loss_cls: 4.3792 (4.2764) grad_norm: 4.6892 (5.8007) time: 0.7952 data: 0.0003 max mem: 8421 +[2024-12-04 21:45:39 root] (utils.py 283): INFO Epoch: [0] [1000/2502] eta: 0:19:44 lr: 0.000020 loss_cls: 4.2875 (4.2740) grad_norm: 4.6493 (5.7879) time: 0.8032 data: 0.0003 max mem: 8421 +[2024-12-04 21:45:47 root] (utils.py 283): INFO Epoch: [0] [1010/2502] eta: 0:19:36 lr: 0.000020 loss_cls: 4.2875 (4.2739) grad_norm: 4.4855 (5.7815) time: 0.7931 data: 0.0003 max mem: 8421 +[2024-12-04 21:45:55 root] (utils.py 283): INFO Epoch: [0] [1020/2502] eta: 0:19:28 lr: 0.000020 loss_cls: 4.3024 (4.2717) grad_norm: 4.5537 (5.7721) time: 0.7909 data: 0.0002 max mem: 8421 +[2024-12-04 21:46:03 root] (utils.py 283): INFO Epoch: [0] [1030/2502] eta: 0:19:20 lr: 0.000020 loss_cls: 4.0802 (4.2669) grad_norm: 4.6968 (5.7643) time: 0.7903 data: 0.0002 max mem: 8421 +[2024-12-04 21:46:11 root] (utils.py 283): INFO Epoch: [0] [1040/2502] eta: 0:19:12 lr: 0.000020 loss_cls: 4.2875 (4.2674) grad_norm: 4.6215 (5.7544) time: 0.7869 data: 0.0002 max mem: 8421 +[2024-12-04 21:46:19 root] (utils.py 283): INFO Epoch: [0] [1050/2502] eta: 0:19:04 lr: 0.000020 loss_cls: 4.2525 (4.2652) grad_norm: 4.5937 (5.7457) time: 0.7894 data: 0.0003 max mem: 8421 +[2024-12-04 21:46:27 root] (utils.py 283): INFO Epoch: [0] [1060/2502] eta: 0:18:56 lr: 0.000020 loss_cls: 4.1947 (4.2651) grad_norm: 4.6368 (5.7355) time: 0.7886 data: 0.0003 max mem: 8421 +[2024-12-04 21:46:34 root] (utils.py 283): INFO Epoch: [0] [1070/2502] eta: 0:18:48 lr: 0.000020 loss_cls: 4.1947 (4.2633) grad_norm: 4.4813 (5.7242) time: 0.7821 data: 0.0002 max mem: 8421 +[2024-12-04 21:46:42 root] (utils.py 283): INFO Epoch: [0] [1080/2502] eta: 0:18:40 lr: 0.000020 loss_cls: 4.1719 (4.2616) grad_norm: 4.4770 (5.7155) time: 0.7806 data: 0.0002 max mem: 8421 +[2024-12-04 21:46:50 root] (utils.py 283): INFO Epoch: [0] [1090/2502] eta: 0:18:32 lr: 0.000020 loss_cls: 4.0750 (4.2588) grad_norm: 4.7295 (5.7059) time: 0.7805 data: 0.0002 max mem: 8421 +[2024-12-04 21:46:58 root] (utils.py 283): INFO Epoch: [0] [1100/2502] eta: 0:18:24 lr: 0.000020 loss_cls: 4.0750 (4.2568) grad_norm: 4.6760 (5.6991) time: 0.7800 data: 0.0002 max mem: 8421 +[2024-12-04 21:47:06 root] (utils.py 283): INFO Epoch: [0] [1110/2502] eta: 0:18:17 lr: 0.000020 loss_cls: 4.2148 (4.2553) grad_norm: 4.7463 (5.6930) time: 0.7884 data: 0.0002 max mem: 8421 +[2024-12-04 21:47:14 root] (utils.py 283): INFO Epoch: [0] [1120/2502] eta: 0:18:09 lr: 0.000020 loss_cls: 4.2769 (4.2558) grad_norm: 4.7463 (5.6843) time: 0.7911 data: 0.0002 max mem: 8421 +[2024-12-04 21:47:22 root] (utils.py 283): INFO Epoch: [0] [1130/2502] eta: 0:18:01 lr: 0.000020 loss_cls: 4.3598 (4.2560) grad_norm: 4.6497 (5.6767) time: 0.7849 data: 0.0002 max mem: 8421 +[2024-12-04 21:47:29 root] (utils.py 283): INFO Epoch: [0] [1140/2502] eta: 0:17:53 lr: 0.000020 loss_cls: 4.2409 (4.2532) grad_norm: 4.5779 (5.6679) time: 0.7863 data: 0.0002 max mem: 8421 +[2024-12-04 21:47:37 root] (utils.py 283): INFO Epoch: [0] [1150/2502] eta: 0:17:45 lr: 0.000020 loss_cls: 4.0719 (4.2509) grad_norm: 4.4031 (5.6578) time: 0.7860 data: 0.0002 max mem: 8421 +[2024-12-04 21:47:45 root] (utils.py 283): INFO Epoch: [0] [1160/2502] eta: 0:17:37 lr: 0.000020 loss_cls: 4.1579 (4.2499) grad_norm: 4.4031 (5.6481) time: 0.7881 data: 0.0003 max mem: 8421 +[2024-12-04 21:47:53 root] (utils.py 283): INFO Epoch: [0] [1170/2502] eta: 0:17:29 lr: 0.000020 loss_cls: 4.0134 (4.2473) grad_norm: 4.5670 (5.6398) time: 0.7871 data: 0.0003 max mem: 8421 +[2024-12-04 21:48:01 root] (utils.py 283): INFO Epoch: [0] [1180/2502] eta: 0:17:21 lr: 0.000020 loss_cls: 4.3495 (4.2488) grad_norm: 4.5114 (5.6315) time: 0.7864 data: 0.0003 max mem: 8421 +[2024-12-04 21:48:09 root] (utils.py 283): INFO Epoch: [0] [1190/2502] eta: 0:17:13 lr: 0.000020 loss_cls: 4.3896 (4.2495) grad_norm: 4.4109 (5.6230) time: 0.7827 data: 0.0002 max mem: 8421 +[2024-12-04 21:48:16 root] (utils.py 283): INFO Epoch: [0] [1200/2502] eta: 0:17:05 lr: 0.000020 loss_cls: 4.3254 (4.2495) grad_norm: 4.4745 (5.6148) time: 0.7749 data: 0.0002 max mem: 8421 +[2024-12-04 21:48:24 root] (utils.py 283): INFO Epoch: [0] [1210/2502] eta: 0:16:57 lr: 0.000020 loss_cls: 4.1263 (4.2474) grad_norm: 4.6349 (5.6074) time: 0.7734 data: 0.0002 max mem: 8421 +[2024-12-04 21:48:32 root] (utils.py 283): INFO Epoch: [0] [1220/2502] eta: 0:16:49 lr: 0.000020 loss_cls: 4.0719 (4.2461) grad_norm: 4.7493 (5.6019) time: 0.7755 data: 0.0002 max mem: 8421 +[2024-12-04 21:48:40 root] (utils.py 283): INFO Epoch: [0] [1230/2502] eta: 0:16:41 lr: 0.000020 loss_cls: 4.1536 (4.2448) grad_norm: 4.5521 (5.5934) time: 0.7787 data: 0.0003 max mem: 8421 +[2024-12-04 21:48:48 root] (utils.py 283): INFO Epoch: [0] [1240/2502] eta: 0:16:33 lr: 0.000020 loss_cls: 4.2781 (4.2439) grad_norm: 4.4585 (5.5846) time: 0.7827 data: 0.0003 max mem: 8421 +[2024-12-04 21:48:55 root] (utils.py 283): INFO Epoch: [0] [1250/2502] eta: 0:16:26 lr: 0.000020 loss_cls: 4.0411 (4.2412) grad_norm: 4.4269 (5.5750) time: 0.7848 data: 0.0003 max mem: 8421 +[2024-12-04 21:49:03 root] (utils.py 283): INFO Epoch: [0] [1260/2502] eta: 0:16:18 lr: 0.000020 loss_cls: 4.2717 (4.2411) grad_norm: 4.4936 (5.5701) time: 0.7789 data: 0.0002 max mem: 8421 +[2024-12-04 21:49:11 root] (utils.py 283): INFO Epoch: [0] [1270/2502] eta: 0:16:10 lr: 0.000020 loss_cls: 4.2777 (4.2411) grad_norm: 4.5803 (5.5637) time: 0.7828 data: 0.0002 max mem: 8421 +[2024-12-04 21:49:19 root] (utils.py 283): INFO Epoch: [0] [1280/2502] eta: 0:16:02 lr: 0.000020 loss_cls: 4.3456 (4.2419) grad_norm: 4.4853 (5.5570) time: 0.8000 data: 0.0003 max mem: 8421 +[2024-12-04 21:49:27 root] (utils.py 283): INFO Epoch: [0] [1290/2502] eta: 0:15:54 lr: 0.000020 loss_cls: 4.3965 (4.2424) grad_norm: 4.4853 (5.5499) time: 0.8053 data: 0.0003 max mem: 8421 +[2024-12-04 21:49:35 root] (utils.py 283): INFO Epoch: [0] [1300/2502] eta: 0:15:46 lr: 0.000020 loss_cls: 4.3257 (4.2417) grad_norm: 4.5446 (5.5423) time: 0.7912 data: 0.0003 max mem: 8421 +[2024-12-04 21:49:43 root] (utils.py 283): INFO Epoch: [0] [1310/2502] eta: 0:15:39 lr: 0.000020 loss_cls: 4.0764 (4.2389) grad_norm: 4.5644 (5.5363) time: 0.7896 data: 0.0003 max mem: 8421 +[2024-12-04 21:49:51 root] (utils.py 283): INFO Epoch: [0] [1320/2502] eta: 0:15:31 lr: 0.000020 loss_cls: 3.8700 (4.2356) grad_norm: 4.7384 (5.5305) time: 0.7971 data: 0.0003 max mem: 8421 +[2024-12-04 21:49:59 root] (utils.py 283): INFO Epoch: [0] [1330/2502] eta: 0:15:23 lr: 0.000020 loss_cls: 4.1843 (4.2355) grad_norm: 4.7384 (5.5256) time: 0.7892 data: 0.0003 max mem: 8421 +[2024-12-04 21:50:07 root] (utils.py 283): INFO Epoch: [0] [1340/2502] eta: 0:15:15 lr: 0.000020 loss_cls: 4.3480 (4.2355) grad_norm: 4.5932 (5.5177) time: 0.7867 data: 0.0003 max mem: 8421 +[2024-12-04 21:50:15 root] (utils.py 283): INFO Epoch: [0] [1350/2502] eta: 0:15:07 lr: 0.000020 loss_cls: 4.1887 (4.2345) grad_norm: 4.4067 (5.5102) time: 0.7964 data: 0.0002 max mem: 8421 +[2024-12-04 21:50:23 root] (utils.py 283): INFO Epoch: [0] [1360/2502] eta: 0:14:59 lr: 0.000020 loss_cls: 4.2486 (4.2353) grad_norm: 4.5242 (5.5085) time: 0.8017 data: 0.0002 max mem: 8421 +[2024-12-04 21:50:31 root] (utils.py 283): INFO Epoch: [0] [1370/2502] eta: 0:14:52 lr: 0.000020 loss_cls: 4.2962 (4.2353) grad_norm: 4.5287 (5.5018) time: 0.7943 data: 0.0002 max mem: 8421 +[2024-12-04 21:50:38 root] (utils.py 283): INFO Epoch: [0] [1380/2502] eta: 0:14:44 lr: 0.000020 loss_cls: 4.2962 (4.2353) grad_norm: 4.4693 (5.4946) time: 0.7859 data: 0.0002 max mem: 8421 +[2024-12-04 21:50:46 root] (utils.py 283): INFO Epoch: [0] [1390/2502] eta: 0:14:36 lr: 0.000020 loss_cls: 4.3129 (4.2351) grad_norm: 4.6372 (5.4891) time: 0.7834 data: 0.0002 max mem: 8421 +[2024-12-04 21:50:54 root] (utils.py 283): INFO Epoch: [0] [1400/2502] eta: 0:14:28 lr: 0.000020 loss_cls: 4.2516 (4.2340) grad_norm: 4.6155 (5.4824) time: 0.7852 data: 0.0003 max mem: 8421 +[2024-12-04 21:51:02 root] (utils.py 283): INFO Epoch: [0] [1410/2502] eta: 0:14:20 lr: 0.000020 loss_cls: 4.3600 (4.2354) grad_norm: 4.6648 (5.4788) time: 0.7848 data: 0.0003 max mem: 8421 +[2024-12-04 21:51:10 root] (utils.py 283): INFO Epoch: [0] [1420/2502] eta: 0:14:12 lr: 0.000020 loss_cls: 4.1761 (4.2335) grad_norm: 4.6648 (5.4746) time: 0.7807 data: 0.0003 max mem: 8421 +[2024-12-04 21:51:18 root] (utils.py 283): INFO Epoch: [0] [1430/2502] eta: 0:14:04 lr: 0.000020 loss_cls: 4.0963 (4.2327) grad_norm: 4.5204 (5.4696) time: 0.7832 data: 0.0003 max mem: 8421 +[2024-12-04 21:51:25 root] (utils.py 283): INFO Epoch: [0] [1440/2502] eta: 0:13:56 lr: 0.000020 loss_cls: 4.1369 (4.2309) grad_norm: 4.4801 (5.4626) time: 0.7845 data: 0.0002 max mem: 8421 +[2024-12-04 21:51:33 root] (utils.py 283): INFO Epoch: [0] [1450/2502] eta: 0:13:48 lr: 0.000020 loss_cls: 4.2827 (4.2306) grad_norm: 4.3951 (5.4573) time: 0.7823 data: 0.0002 max mem: 8421 +[2024-12-04 21:51:41 root] (utils.py 283): INFO Epoch: [0] [1460/2502] eta: 0:13:40 lr: 0.000020 loss_cls: 4.4178 (4.2309) grad_norm: 4.6802 (5.4519) time: 0.7819 data: 0.0002 max mem: 8421 +[2024-12-04 21:51:49 root] (utils.py 283): INFO Epoch: [0] [1470/2502] eta: 0:13:32 lr: 0.000020 loss_cls: 4.3945 (4.2293) grad_norm: 4.6247 (5.4469) time: 0.7806 data: 0.0002 max mem: 8421 +[2024-12-04 21:51:57 root] (utils.py 283): INFO Epoch: [0] [1480/2502] eta: 0:13:24 lr: 0.000020 loss_cls: 3.7985 (4.2255) grad_norm: 4.5270 (5.4404) time: 0.7818 data: 0.0002 max mem: 8421 +[2024-12-04 21:52:04 root] (utils.py 283): INFO Epoch: [0] [1490/2502] eta: 0:13:17 lr: 0.000020 loss_cls: 3.6106 (4.2208) grad_norm: 4.5355 (5.4349) time: 0.7810 data: 0.0002 max mem: 8421 +[2024-12-04 21:52:12 root] (utils.py 283): INFO Epoch: [0] [1500/2502] eta: 0:13:09 lr: 0.000020 loss_cls: 3.7919 (4.2195) grad_norm: 4.5187 (5.4291) time: 0.7785 data: 0.0002 max mem: 8421 +[2024-12-04 21:52:20 root] (utils.py 283): INFO Epoch: [0] [1510/2502] eta: 0:13:01 lr: 0.000020 loss_cls: 4.2252 (4.2190) grad_norm: 4.5187 (5.4241) time: 0.7803 data: 0.0002 max mem: 8421 +[2024-12-04 21:52:28 root] (utils.py 283): INFO Epoch: [0] [1520/2502] eta: 0:12:53 lr: 0.000020 loss_cls: 4.2409 (4.2192) grad_norm: 4.6096 (5.4193) time: 0.7823 data: 0.0002 max mem: 8421 +[2024-12-04 21:52:36 root] (utils.py 283): INFO Epoch: [0] [1530/2502] eta: 0:12:45 lr: 0.000020 loss_cls: 4.1850 (4.2179) grad_norm: 4.4792 (5.4150) time: 0.7860 data: 0.0002 max mem: 8421 +[2024-12-04 21:52:44 root] (utils.py 283): INFO Epoch: [0] [1540/2502] eta: 0:12:37 lr: 0.000020 loss_cls: 4.1509 (4.2171) grad_norm: 4.4792 (5.4103) time: 0.7852 data: 0.0002 max mem: 8421 +[2024-12-04 21:52:51 root] (utils.py 283): INFO Epoch: [0] [1550/2502] eta: 0:12:29 lr: 0.000020 loss_cls: 4.0604 (4.2149) grad_norm: 4.4939 (5.4046) time: 0.7792 data: 0.0002 max mem: 8421 +[2024-12-04 21:52:59 root] (utils.py 283): INFO Epoch: [0] [1560/2502] eta: 0:12:21 lr: 0.000020 loss_cls: 3.9620 (4.2132) grad_norm: 4.4939 (5.4008) time: 0.7807 data: 0.0002 max mem: 8421 +[2024-12-04 21:53:07 root] (utils.py 283): INFO Epoch: [0] [1570/2502] eta: 0:12:13 lr: 0.000020 loss_cls: 3.9360 (4.2097) grad_norm: 4.5138 (5.3959) time: 0.7847 data: 0.0002 max mem: 8421 +[2024-12-04 21:53:15 root] (utils.py 283): INFO Epoch: [0] [1580/2502] eta: 0:12:05 lr: 0.000020 loss_cls: 3.9360 (4.2086) grad_norm: 4.5800 (5.3996) time: 0.7823 data: 0.0003 max mem: 8421 +[2024-12-04 21:53:23 root] (utils.py 283): INFO Epoch: [0] [1590/2502] eta: 0:11:57 lr: 0.000020 loss_cls: 4.3084 (4.2095) grad_norm: 4.7243 (5.3962) time: 0.7826 data: 0.0002 max mem: 8421 +[2024-12-04 21:53:31 root] (utils.py 283): INFO Epoch: [0] [1600/2502] eta: 0:11:50 lr: 0.000020 loss_cls: 4.3481 (4.2104) grad_norm: 4.6638 (5.3922) time: 0.7817 data: 0.0002 max mem: 8421 +[2024-12-04 21:53:38 root] (utils.py 283): INFO Epoch: [0] [1610/2502] eta: 0:11:42 lr: 0.000020 loss_cls: 4.3295 (4.2101) grad_norm: 4.4420 (5.3879) time: 0.7801 data: 0.0002 max mem: 8421 +[2024-12-04 21:53:46 root] (utils.py 283): INFO Epoch: [0] [1620/2502] eta: 0:11:34 lr: 0.000020 loss_cls: 4.0716 (4.2097) grad_norm: 4.5145 (5.3833) time: 0.7819 data: 0.0002 max mem: 8421 +[2024-12-04 21:53:54 root] (utils.py 283): INFO Epoch: [0] [1630/2502] eta: 0:11:26 lr: 0.000020 loss_cls: 4.2209 (4.2095) grad_norm: 4.5152 (5.3780) time: 0.7808 data: 0.0003 max mem: 8421 +[2024-12-04 21:54:02 root] (utils.py 283): INFO Epoch: [0] [1640/2502] eta: 0:11:18 lr: 0.000020 loss_cls: 4.2180 (4.2084) grad_norm: 4.5610 (5.3745) time: 0.7779 data: 0.0003 max mem: 8421 +[2024-12-04 21:54:10 root] (utils.py 283): INFO Epoch: [0] [1650/2502] eta: 0:11:10 lr: 0.000020 loss_cls: 3.9171 (4.2045) grad_norm: 4.6699 (5.3719) time: 0.7783 data: 0.0002 max mem: 8421 +[2024-12-04 21:54:17 root] (utils.py 283): INFO Epoch: [0] [1660/2502] eta: 0:11:02 lr: 0.000020 loss_cls: 3.5960 (4.2011) grad_norm: 4.4438 (5.3686) time: 0.7799 data: 0.0002 max mem: 8421 +[2024-12-04 21:54:25 root] (utils.py 283): INFO Epoch: [0] [1670/2502] eta: 0:10:54 lr: 0.000020 loss_cls: 3.7488 (4.2004) grad_norm: 4.4402 (5.3647) time: 0.7799 data: 0.0002 max mem: 8421 +[2024-12-04 21:54:33 root] (utils.py 283): INFO Epoch: [0] [1680/2502] eta: 0:10:46 lr: 0.000020 loss_cls: 4.2758 (4.1988) grad_norm: 4.4378 (5.3592) time: 0.7823 data: 0.0002 max mem: 8421 +[2024-12-04 21:54:41 root] (utils.py 283): INFO Epoch: [0] [1690/2502] eta: 0:10:39 lr: 0.000020 loss_cls: 4.2166 (4.1980) grad_norm: 4.4146 (5.3538) time: 0.7935 data: 0.0003 max mem: 8421 +[2024-12-04 21:54:49 root] (utils.py 283): INFO Epoch: [0] [1700/2502] eta: 0:10:31 lr: 0.000020 loss_cls: 4.3667 (4.1998) grad_norm: 4.5254 (5.3531) time: 0.8036 data: 0.0003 max mem: 8421 +[2024-12-04 21:54:58 root] (utils.py 283): INFO Epoch: [0] [1710/2502] eta: 0:10:23 lr: 0.000020 loss_cls: 4.3273 (4.1987) grad_norm: 4.5727 (5.3516) time: 0.8291 data: 0.0003 max mem: 8421 +[2024-12-04 21:55:06 root] (utils.py 283): INFO Epoch: [0] [1720/2502] eta: 0:10:16 lr: 0.000020 loss_cls: 3.9400 (4.1971) grad_norm: 4.4786 (5.3469) time: 0.8469 data: 0.0003 max mem: 8421 +[2024-12-04 21:55:15 root] (utils.py 283): INFO Epoch: [0] [1730/2502] eta: 0:10:08 lr: 0.000020 loss_cls: 4.3031 (4.1978) grad_norm: 4.4786 (5.3451) time: 0.8671 data: 0.0003 max mem: 8421 +[2024-12-04 21:55:26 root] (utils.py 283): INFO Epoch: [0] [1740/2502] eta: 0:10:02 lr: 0.000020 loss_cls: 4.2496 (4.1963) grad_norm: 4.5431 (5.3439) time: 0.9914 data: 0.0003 max mem: 8421 +[2024-12-04 21:55:34 root] (utils.py 283): INFO Epoch: [0] [1750/2502] eta: 0:09:54 lr: 0.000020 loss_cls: 3.7359 (4.1948) grad_norm: 4.6150 (5.3403) time: 0.9354 data: 0.0003 max mem: 8421 +[2024-12-04 21:55:42 root] (utils.py 283): INFO Epoch: [0] [1760/2502] eta: 0:09:46 lr: 0.000020 loss_cls: 3.7045 (4.1936) grad_norm: 4.6575 (5.3366) time: 0.8017 data: 0.0003 max mem: 8421 +[2024-12-04 21:55:50 root] (utils.py 283): INFO Epoch: [0] [1770/2502] eta: 0:09:38 lr: 0.000020 loss_cls: 4.2316 (4.1936) grad_norm: 4.6411 (5.3330) time: 0.8273 data: 0.0003 max mem: 8421 +[2024-12-04 21:55:58 root] (utils.py 283): INFO Epoch: [0] [1780/2502] eta: 0:09:30 lr: 0.000020 loss_cls: 4.2316 (4.1924) grad_norm: 4.5842 (5.3321) time: 0.8063 data: 0.0003 max mem: 8421 +[2024-12-04 21:56:06 root] (utils.py 283): INFO Epoch: [0] [1790/2502] eta: 0:09:22 lr: 0.000020 loss_cls: 4.0979 (4.1908) grad_norm: 4.5409 (5.3270) time: 0.7799 data: 0.0002 max mem: 8421 +[2024-12-04 21:56:14 root] (utils.py 283): INFO Epoch: [0] [1800/2502] eta: 0:09:14 lr: 0.000020 loss_cls: 4.1258 (4.1913) grad_norm: 4.5409 (5.3231) time: 0.7899 data: 0.0003 max mem: 8421 +[2024-12-04 21:56:22 root] (utils.py 283): INFO Epoch: [0] [1810/2502] eta: 0:09:07 lr: 0.000020 loss_cls: 4.1016 (4.1899) grad_norm: 4.6076 (5.3206) time: 0.8173 data: 0.0003 max mem: 8421 +[2024-12-04 21:56:32 root] (utils.py 283): INFO Epoch: [0] [1820/2502] eta: 0:09:00 lr: 0.000020 loss_cls: 4.0895 (4.1896) grad_norm: 4.4379 (5.3162) time: 0.9137 data: 0.0003 max mem: 8421 +[2024-12-04 21:56:43 root] (utils.py 283): INFO Epoch: [0] [1830/2502] eta: 0:08:53 lr: 0.000020 loss_cls: 4.2300 (4.1910) grad_norm: 4.4591 (5.3128) time: 1.0610 data: 0.0003 max mem: 8421 +[2024-12-04 21:56:51 root] (utils.py 283): INFO Epoch: [0] [1840/2502] eta: 0:08:45 lr: 0.000020 loss_cls: 4.1973 (4.1894) grad_norm: 4.4591 (5.3083) time: 0.9665 data: 0.0003 max mem: 8421 +[2024-12-04 21:57:01 root] (utils.py 283): INFO Epoch: [0] [1850/2502] eta: 0:08:38 lr: 0.000020 loss_cls: 3.9931 (4.1879) grad_norm: 4.4213 (5.3050) time: 0.8725 data: 0.0003 max mem: 8421 +[2024-12-04 21:57:09 root] (utils.py 283): INFO Epoch: [0] [1860/2502] eta: 0:08:30 lr: 0.000020 loss_cls: 3.9954 (4.1866) grad_norm: 4.4999 (5.3009) time: 0.8657 data: 0.0003 max mem: 8421 +[2024-12-04 21:57:17 root] (utils.py 283): INFO Epoch: [0] [1870/2502] eta: 0:08:22 lr: 0.000020 loss_cls: 4.0100 (4.1848) grad_norm: 4.4817 (5.2965) time: 0.7977 data: 0.0003 max mem: 8421 +[2024-12-04 21:57:25 root] (utils.py 283): INFO Epoch: [0] [1880/2502] eta: 0:08:14 lr: 0.000020 loss_cls: 4.4618 (4.1859) grad_norm: 4.6585 (5.2958) time: 0.8005 data: 0.0003 max mem: 8421 +[2024-12-04 21:57:33 root] (utils.py 283): INFO Epoch: [0] [1890/2502] eta: 0:08:06 lr: 0.000020 loss_cls: 4.3837 (4.1843) grad_norm: 4.8192 (5.2934) time: 0.7920 data: 0.0002 max mem: 8421 +[2024-12-04 21:57:40 root] (utils.py 283): INFO Epoch: [0] [1900/2502] eta: 0:07:58 lr: 0.000020 loss_cls: 3.9348 (4.1828) grad_norm: 4.5393 (5.2887) time: 0.7877 data: 0.0003 max mem: 8421 +[2024-12-04 21:57:48 root] (utils.py 283): INFO Epoch: [0] [1910/2502] eta: 0:07:50 lr: 0.000020 loss_cls: 4.0715 (4.1827) grad_norm: 4.4191 (5.2848) time: 0.7839 data: 0.0003 max mem: 8421 +[2024-12-04 21:57:56 root] (utils.py 283): INFO Epoch: [0] [1920/2502] eta: 0:07:42 lr: 0.000020 loss_cls: 3.8526 (4.1795) grad_norm: 4.4240 (5.2802) time: 0.7813 data: 0.0003 max mem: 8421 +[2024-12-04 21:58:04 root] (utils.py 283): INFO Epoch: [0] [1930/2502] eta: 0:07:34 lr: 0.000020 loss_cls: 3.7699 (4.1784) grad_norm: 4.4414 (5.2767) time: 0.7821 data: 0.0003 max mem: 8421 +[2024-12-04 21:58:12 root] (utils.py 283): INFO Epoch: [0] [1940/2502] eta: 0:07:26 lr: 0.000020 loss_cls: 3.9962 (4.1770) grad_norm: 4.3849 (5.2719) time: 0.7893 data: 0.0003 max mem: 8421 +[2024-12-04 21:58:20 root] (utils.py 283): INFO Epoch: [0] [1950/2502] eta: 0:07:18 lr: 0.000020 loss_cls: 3.9962 (4.1755) grad_norm: 4.3010 (5.2680) time: 0.7893 data: 0.0003 max mem: 8421 +[2024-12-04 21:58:28 root] (utils.py 283): INFO Epoch: [0] [1960/2502] eta: 0:07:10 lr: 0.000020 loss_cls: 4.0235 (4.1741) grad_norm: 4.2664 (5.2634) time: 0.7834 data: 0.0003 max mem: 8421 +[2024-12-04 21:58:35 root] (utils.py 283): INFO Epoch: [0] [1970/2502] eta: 0:07:02 lr: 0.000020 loss_cls: 3.9975 (4.1720) grad_norm: 4.3036 (5.2595) time: 0.7846 data: 0.0003 max mem: 8421 +[2024-12-04 21:58:43 root] (utils.py 283): INFO Epoch: [0] [1980/2502] eta: 0:06:54 lr: 0.000020 loss_cls: 3.8008 (4.1710) grad_norm: 4.5291 (5.2563) time: 0.7878 data: 0.0003 max mem: 8421 +[2024-12-04 21:58:51 root] (utils.py 283): INFO Epoch: [0] [1990/2502] eta: 0:06:46 lr: 0.000020 loss_cls: 4.2158 (4.1699) grad_norm: 4.5494 (5.2557) time: 0.7904 data: 0.0003 max mem: 8421 +[2024-12-04 21:58:59 root] (utils.py 283): INFO Epoch: [0] [2000/2502] eta: 0:06:38 lr: 0.000020 loss_cls: 4.0334 (4.1686) grad_norm: 4.6239 (5.2525) time: 0.7909 data: 0.0003 max mem: 8421 +[2024-12-04 21:59:07 root] (utils.py 283): INFO Epoch: [0] [2010/2502] eta: 0:06:30 lr: 0.000020 loss_cls: 4.1888 (4.1694) grad_norm: 4.5374 (5.2482) time: 0.7978 data: 0.0003 max mem: 8421 +[2024-12-04 21:59:15 root] (utils.py 283): INFO Epoch: [0] [2020/2502] eta: 0:06:22 lr: 0.000020 loss_cls: 4.2071 (4.1684) grad_norm: 4.3315 (5.2435) time: 0.7972 data: 0.0002 max mem: 8421 +[2024-12-04 21:59:23 root] (utils.py 283): INFO Epoch: [0] [2030/2502] eta: 0:06:14 lr: 0.000020 loss_cls: 4.1934 (4.1686) grad_norm: 4.3045 (5.2389) time: 0.7908 data: 0.0003 max mem: 8421 +[2024-12-04 21:59:31 root] (utils.py 283): INFO Epoch: [0] [2040/2502] eta: 0:06:06 lr: 0.000020 loss_cls: 4.2990 (4.1688) grad_norm: 4.3136 (5.2363) time: 0.7917 data: 0.0003 max mem: 8421 +[2024-12-04 21:59:39 root] (utils.py 283): INFO Epoch: [0] [2050/2502] eta: 0:05:58 lr: 0.000020 loss_cls: 4.0891 (4.1672) grad_norm: 4.5546 (5.2328) time: 0.7886 data: 0.0003 max mem: 8421 +[2024-12-04 21:59:47 root] (utils.py 283): INFO Epoch: [0] [2060/2502] eta: 0:05:50 lr: 0.000020 loss_cls: 3.8251 (4.1662) grad_norm: 4.5122 (5.2292) time: 0.7866 data: 0.0003 max mem: 8421 +[2024-12-04 21:59:54 root] (utils.py 283): INFO Epoch: [0] [2070/2502] eta: 0:05:42 lr: 0.000020 loss_cls: 4.2290 (4.1662) grad_norm: 4.4795 (5.2252) time: 0.7844 data: 0.0003 max mem: 8421 +[2024-12-04 22:00:02 root] (utils.py 283): INFO Epoch: [0] [2080/2502] eta: 0:05:35 lr: 0.000020 loss_cls: 4.1482 (4.1652) grad_norm: 4.2021 (5.2229) time: 0.7844 data: 0.0002 max mem: 8421 +[2024-12-04 22:00:10 root] (utils.py 283): INFO Epoch: [0] [2090/2502] eta: 0:05:27 lr: 0.000020 loss_cls: 4.1459 (4.1657) grad_norm: 4.4429 (5.2192) time: 0.7891 data: 0.0003 max mem: 8421 +[2024-12-04 22:00:18 root] (utils.py 283): INFO Epoch: [0] [2100/2502] eta: 0:05:19 lr: 0.000020 loss_cls: 4.3654 (4.1641) grad_norm: 4.5107 (5.2167) time: 0.7915 data: 0.0003 max mem: 8421 +[2024-12-04 22:00:26 root] (utils.py 283): INFO Epoch: [0] [2110/2502] eta: 0:05:11 lr: 0.000020 loss_cls: 4.1458 (4.1642) grad_norm: 4.3510 (5.2136) time: 0.7895 data: 0.0003 max mem: 8421 +[2024-12-04 22:00:34 root] (utils.py 283): INFO Epoch: [0] [2120/2502] eta: 0:05:03 lr: 0.000020 loss_cls: 4.2468 (4.1632) grad_norm: 4.2957 (5.2099) time: 0.7893 data: 0.0003 max mem: 8421 +[2024-12-04 22:00:42 root] (utils.py 283): INFO Epoch: [0] [2130/2502] eta: 0:04:55 lr: 0.000020 loss_cls: 4.2054 (4.1629) grad_norm: 4.2187 (5.2053) time: 0.8093 data: 0.0003 max mem: 8421 +[2024-12-04 22:00:51 root] (utils.py 283): INFO Epoch: [0] [2140/2502] eta: 0:04:47 lr: 0.000020 loss_cls: 4.2896 (4.1635) grad_norm: 4.2187 (5.2016) time: 0.8349 data: 0.0003 max mem: 8421 +[2024-12-04 22:00:59 root] (utils.py 283): INFO Epoch: [0] [2150/2502] eta: 0:04:39 lr: 0.000020 loss_cls: 4.2897 (4.1627) grad_norm: 4.3565 (5.1984) time: 0.8308 data: 0.0003 max mem: 8421 +[2024-12-04 22:01:07 root] (utils.py 283): INFO Epoch: [0] [2160/2502] eta: 0:04:31 lr: 0.000020 loss_cls: 4.2897 (4.1628) grad_norm: 4.4680 (5.1953) time: 0.8199 data: 0.0003 max mem: 8421 +[2024-12-04 22:01:15 root] (utils.py 283): INFO Epoch: [0] [2170/2502] eta: 0:04:23 lr: 0.000020 loss_cls: 4.3298 (4.1631) grad_norm: 4.5425 (5.1927) time: 0.8216 data: 0.0003 max mem: 8421 +[2024-12-04 22:01:24 root] (utils.py 283): INFO Epoch: [0] [2180/2502] eta: 0:04:15 lr: 0.000020 loss_cls: 4.0613 (4.1622) grad_norm: 4.4518 (5.1892) time: 0.8256 data: 0.0003 max mem: 8421 +[2024-12-04 22:01:32 root] (utils.py 283): INFO Epoch: [0] [2190/2502] eta: 0:04:08 lr: 0.000020 loss_cls: 4.3734 (4.1630) grad_norm: 4.4030 (5.1861) time: 0.8272 data: 0.0003 max mem: 8421 +[2024-12-04 22:01:40 root] (utils.py 283): INFO Epoch: [0] [2200/2502] eta: 0:04:00 lr: 0.000020 loss_cls: 4.1851 (4.1608) grad_norm: 4.3602 (5.1825) time: 0.8200 data: 0.0003 max mem: 8421 +[2024-12-04 22:01:50 root] (utils.py 283): INFO Epoch: [0] [2210/2502] eta: 0:03:52 lr: 0.000020 loss_cls: 3.7590 (4.1600) grad_norm: 4.3602 (5.1798) time: 0.8879 data: 0.0003 max mem: 8421 +[2024-12-04 22:02:00 root] (utils.py 283): INFO Epoch: [0] [2220/2502] eta: 0:03:44 lr: 0.000020 loss_cls: 4.0134 (4.1587) grad_norm: 4.5033 (5.1772) time: 1.0230 data: 0.0003 max mem: 8421 +[2024-12-04 22:02:12 root] (utils.py 283): INFO Epoch: [0] [2230/2502] eta: 0:03:37 lr: 0.000020 loss_cls: 4.0098 (4.1574) grad_norm: 4.5078 (5.1751) time: 1.1198 data: 0.0003 max mem: 8421 +[2024-12-04 22:02:20 root] (utils.py 283): INFO Epoch: [0] [2240/2502] eta: 0:03:29 lr: 0.000020 loss_cls: 3.9225 (4.1562) grad_norm: 4.5459 (5.1724) time: 0.9817 data: 0.0003 max mem: 8421 +[2024-12-04 22:02:28 root] (utils.py 283): INFO Epoch: [0] [2250/2502] eta: 0:03:21 lr: 0.000020 loss_cls: 3.7647 (4.1547) grad_norm: 4.2174 (5.1683) time: 0.7984 data: 0.0003 max mem: 8421 +[2024-12-04 22:02:36 root] (utils.py 283): INFO Epoch: [0] [2260/2502] eta: 0:03:13 lr: 0.000020 loss_cls: 3.6714 (4.1536) grad_norm: 4.2051 (5.1658) time: 0.7844 data: 0.0003 max mem: 8421 +[2024-12-04 22:02:44 root] (utils.py 283): INFO Epoch: [0] [2270/2502] eta: 0:03:05 lr: 0.000020 loss_cls: 4.0289 (4.1537) grad_norm: 4.5611 (5.1632) time: 0.7807 data: 0.0003 max mem: 8421 +[2024-12-04 22:02:52 root] (utils.py 283): INFO Epoch: [0] [2280/2502] eta: 0:02:57 lr: 0.000020 loss_cls: 4.2155 (4.1541) grad_norm: 4.5411 (5.1615) time: 0.7853 data: 0.0003 max mem: 8421 +[2024-12-04 22:02:59 root] (utils.py 283): INFO Epoch: [0] [2290/2502] eta: 0:02:49 lr: 0.000020 loss_cls: 4.2705 (4.1538) grad_norm: 4.4862 (5.1594) time: 0.7847 data: 0.0002 max mem: 8421 +[2024-12-04 22:03:07 root] (utils.py 283): INFO Epoch: [0] [2300/2502] eta: 0:02:41 lr: 0.000020 loss_cls: 3.9716 (4.1528) grad_norm: 4.4576 (5.1566) time: 0.7814 data: 0.0002 max mem: 8421 +[2024-12-04 22:03:15 root] (utils.py 283): INFO Epoch: [0] [2310/2502] eta: 0:02:33 lr: 0.000020 loss_cls: 3.9450 (4.1513) grad_norm: 4.4494 (5.1537) time: 0.7805 data: 0.0003 max mem: 8421 +[2024-12-04 22:03:23 root] (utils.py 283): INFO Epoch: [0] [2320/2502] eta: 0:02:25 lr: 0.000020 loss_cls: 4.0570 (4.1515) grad_norm: 4.3921 (5.1532) time: 0.7997 data: 0.0003 max mem: 8421 +[2024-12-04 22:03:35 root] (utils.py 283): INFO Epoch: [0] [2330/2502] eta: 0:02:17 lr: 0.000020 loss_cls: 4.2884 (4.1523) grad_norm: 4.4090 (5.1505) time: 0.9995 data: 0.0003 max mem: 8421 +[2024-12-04 22:03:44 root] (utils.py 283): INFO Epoch: [0] [2340/2502] eta: 0:02:09 lr: 0.000020 loss_cls: 4.2884 (4.1518) grad_norm: 4.4707 (5.1483) time: 1.0567 data: 0.0003 max mem: 8421 +[2024-12-04 22:03:53 root] (utils.py 283): INFO Epoch: [0] [2350/2502] eta: 0:02:01 lr: 0.000020 loss_cls: 4.1655 (4.1515) grad_norm: 4.4259 (5.1447) time: 0.8825 data: 0.0003 max mem: 8421 +[2024-12-04 22:04:01 root] (utils.py 283): INFO Epoch: [0] [2360/2502] eta: 0:01:53 lr: 0.000020 loss_cls: 4.0153 (4.1504) grad_norm: 4.2463 (5.1410) time: 0.8322 data: 0.0003 max mem: 8421 +[2024-12-04 22:04:09 root] (utils.py 283): INFO Epoch: [0] [2370/2502] eta: 0:01:45 lr: 0.000020 loss_cls: 3.9126 (4.1497) grad_norm: 4.3128 (5.1391) time: 0.8382 data: 0.0003 max mem: 8421 +[2024-12-04 22:04:18 root] (utils.py 283): INFO Epoch: [0] [2380/2502] eta: 0:01:37 lr: 0.000020 loss_cls: 4.1731 (4.1499) grad_norm: 4.3579 (5.1356) time: 0.8338 data: 0.0003 max mem: 8421 +[2024-12-04 22:04:26 root] (utils.py 283): INFO Epoch: [0] [2390/2502] eta: 0:01:29 lr: 0.000020 loss_cls: 4.1748 (4.1486) grad_norm: 4.3323 (5.1356) time: 0.8250 data: 0.0003 max mem: 8421 +[2024-12-04 22:04:34 root] (utils.py 283): INFO Epoch: [0] [2400/2502] eta: 0:01:21 lr: 0.000020 loss_cls: 3.9423 (4.1479) grad_norm: 4.3993 (5.1324) time: 0.8228 data: 0.0003 max mem: 8421 +[2024-12-04 22:04:42 root] (utils.py 283): INFO Epoch: [0] [2410/2502] eta: 0:01:13 lr: 0.000020 loss_cls: 4.3189 (4.1482) grad_norm: 4.3852 (5.1293) time: 0.8255 data: 0.0003 max mem: 8421 +[2024-12-04 22:04:51 root] (utils.py 283): INFO Epoch: [0] [2420/2502] eta: 0:01:05 lr: 0.000020 loss_cls: 4.0910 (4.1471) grad_norm: 4.3852 (5.1269) time: 0.8299 data: 0.0003 max mem: 8421 +[2024-12-04 22:04:59 root] (utils.py 283): INFO Epoch: [0] [2430/2502] eta: 0:00:57 lr: 0.000020 loss_cls: 3.7131 (4.1467) grad_norm: 4.5217 (5.1245) time: 0.8358 data: 0.0003 max mem: 8421 +[2024-12-04 22:05:08 root] (utils.py 283): INFO Epoch: [0] [2440/2502] eta: 0:00:49 lr: 0.000020 loss_cls: 4.1872 (4.1465) grad_norm: 4.5564 (5.1224) time: 0.8419 data: 0.0003 max mem: 8421 +[2024-12-04 22:05:16 root] (utils.py 283): INFO Epoch: [0] [2450/2502] eta: 0:00:41 lr: 0.000020 loss_cls: 4.1872 (4.1468) grad_norm: 4.4639 (5.1200) time: 0.8431 data: 0.0003 max mem: 8421 +[2024-12-04 22:05:24 root] (utils.py 283): INFO Epoch: [0] [2460/2502] eta: 0:00:33 lr: 0.000020 loss_cls: 4.2749 (4.1466) grad_norm: 4.3700 (5.1174) time: 0.8349 data: 0.0003 max mem: 8421 +[2024-12-04 22:05:33 root] (utils.py 283): INFO Epoch: [0] [2470/2502] eta: 0:00:25 lr: 0.000020 loss_cls: 4.2275 (4.1455) grad_norm: 4.3700 (5.1157) time: 0.8325 data: 0.0003 max mem: 8421 +[2024-12-04 22:05:41 root] (utils.py 283): INFO Epoch: [0] [2480/2502] eta: 0:00:17 lr: 0.000020 loss_cls: 3.6594 (4.1443) grad_norm: 4.4974 (5.1136) time: 0.8335 data: 0.0003 max mem: 8421 +[2024-12-04 22:05:50 root] (utils.py 283): INFO Epoch: [0] [2490/2502] eta: 0:00:09 lr: 0.000020 loss_cls: 3.9377 (4.1447) grad_norm: 4.4488 (5.1125) time: 0.8568 data: 0.0306 max mem: 8421 +[2024-12-04 22:05:58 root] (utils.py 283): INFO Epoch: [0] [2500/2502] eta: 0:00:01 lr: 0.000020 loss_cls: 3.9883 (4.1438) grad_norm: 4.4488 (5.1101) time: 0.8581 data: 0.0306 max mem: 8421 +[2024-12-04 22:05:59 root] (utils.py 283): INFO Epoch: [0] [2501/2502] eta: 0:00:00 lr: 0.000020 loss_cls: 3.9883 (4.1432) grad_norm: 4.4488 (5.1098) time: 0.8575 data: 0.0306 max mem: 8421 +[2024-12-04 22:05:59 root] (utils.py 297): INFO Epoch: [0] Total time: 0:33:28 (0.8029 s / it) +[2024-12-04 22:05:59 root] (engine.py 178): INFO Averaged stats:lr: 0.000020 loss_cls: 3.9883 (4.1443) grad_norm: 4.4488 (5.1098) +[2024-12-04 22:05:59 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:14 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7625 (0.7625) acc1: 81.2500 (81.2500) acc3: 95.3125 (95.3125) acc5: 96.8750 (96.8750) time: 0.1458 data: 0.0005 max mem: 8421 +[2024-12-04 22:06:01 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8700 (0.9132) acc1: 81.2500 (80.4688) acc3: 92.1875 (91.8324) acc5: 94.5312 (94.8153) time: 0.1327 data: 0.0003 max mem: 8421 +[2024-12-04 22:06:02 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9280 (0.9599) acc1: 77.3438 (79.3899) acc3: 92.1875 (91.8899) acc5: 94.5312 (94.4568) time: 0.1319 data: 0.0004 max mem: 8421 +[2024-12-04 22:06:03 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9857 (0.9544) acc1: 78.1250 (78.9062) acc3: 92.1875 (92.4143) acc5: 94.5312 (94.8589) time: 0.1328 data: 0.0005 max mem: 8421 +[2024-12-04 22:06:05 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8742 (0.9470) acc1: 81.2500 (79.6494) acc3: 94.5312 (92.6829) acc5: 95.3125 (94.9505) time: 0.1460 data: 0.0010 max mem: 8421 +[2024-12-04 22:06:07 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0732 (1.0447) acc1: 74.2188 (77.4816) acc3: 87.5000 (90.9926) acc5: 92.1875 (93.7347) time: 0.1673 data: 0.0086 max mem: 8421 +[2024-12-04 22:06:09 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3783 (1.0856) acc1: 69.5312 (76.7290) acc3: 85.1562 (90.1895) acc5: 89.0625 (93.0456) time: 0.1888 data: 0.0306 max mem: 8421 +[2024-12-04 22:06:11 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:04 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3295 (1.1322) acc1: 68.7500 (75.3851) acc3: 86.7188 (89.6017) acc5: 89.8438 (92.5286) time: 0.2059 data: 0.0488 max mem: 8421 +[2024-12-04 22:06:13 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3939 (1.1711) acc1: 69.5312 (74.6528) acc3: 85.1562 (88.9275) acc5: 88.2812 (92.0139) time: 0.1940 data: 0.0386 max mem: 8421 +[2024-12-04 22:06:15 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.4118 (1.2042) acc1: 70.3125 (73.7294) acc3: 84.3750 (88.3671) acc5: 88.2812 (91.5780) time: 0.1822 data: 0.0310 max mem: 8421 +[2024-12-04 22:06:16 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2250 (1.1928) acc1: 71.8750 (73.9360) acc3: 86.7188 (88.6400) acc5: 90.6250 (91.7840) time: 0.1865 data: 0.0189 max mem: 8421 +[2024-12-04 22:06:16 root] (utils.py 297): INFO Test: Total time: 0:00:16 (0.1699 s / it) +[2024-12-04 22:06:16 root] (engine.py 263): INFO * Acc@1 73.852 Acc@3 88.446 Acc@5 91.818 loss 1.193 flops 1.285 layer_flops 1.251 +[2024-12-04 22:06:16 root] (main.py 542): INFO Accuracy of the network on the 50000 test images: 73.9% +[2024-12-04 22:06:16 root] (main.py 546): INFO Max accuracy: 73.85% +[2024-12-04 22:06:17 root] (utils.py 283): INFO Epoch: [1] [ 0/2502] eta: 0:56:09 lr: 0.000020 loss_cls: 3.5119 (3.5119) grad_norm: 4.9805 (4.9805) time: 1.3469 data: 0.0003 max mem: 8421 +[2024-12-04 22:06:26 root] (utils.py 283): INFO Epoch: [1] [ 10/2502] eta: 0:35:59 lr: 0.000020 loss_cls: 3.7081 (3.9444) grad_norm: 4.4559 (4.5038) time: 0.8665 data: 0.0003 max mem: 8421 +[2024-12-04 22:06:34 root] (utils.py 283): INFO Epoch: [1] [ 20/2502] eta: 0:34:59 lr: 0.000020 loss_cls: 4.1124 (4.0467) grad_norm: 4.3886 (4.4207) time: 0.8210 data: 0.0003 max mem: 8421 +[2024-12-04 22:06:42 root] (utils.py 283): INFO Epoch: [1] [ 30/2502] eta: 0:34:15 lr: 0.000020 loss_cls: 4.3290 (4.1089) grad_norm: 4.3465 (4.4686) time: 0.8125 data: 0.0003 max mem: 8421 +[2024-12-04 22:06:50 root] (utils.py 283): INFO Epoch: [1] [ 40/2502] eta: 0:34:10 lr: 0.000020 loss_cls: 4.3290 (4.1065) grad_norm: 4.3233 (4.4617) time: 0.8192 data: 0.0003 max mem: 8421 +[2024-12-04 22:06:59 root] (utils.py 283): INFO Epoch: [1] [ 50/2502] eta: 0:34:11 lr: 0.000020 loss_cls: 4.2689 (4.1280) grad_norm: 4.4623 (4.4758) time: 0.8442 data: 0.0003 max mem: 8421 +[2024-12-04 22:07:07 root] (utils.py 283): INFO Epoch: [1] [ 60/2502] eta: 0:34:08 lr: 0.000020 loss_cls: 4.3759 (4.1698) grad_norm: 4.3999 (4.4457) time: 0.8511 data: 0.0003 max mem: 8421 +[2024-12-04 22:07:16 root] (utils.py 283): INFO Epoch: [1] [ 70/2502] eta: 0:33:56 lr: 0.000020 loss_cls: 4.3759 (4.1792) grad_norm: 4.3406 (4.4428) time: 0.8392 data: 0.0003 max mem: 8421 +[2024-12-04 22:07:24 root] (utils.py 283): INFO Epoch: [1] [ 80/2502] eta: 0:33:50 lr: 0.000020 loss_cls: 4.1193 (4.1651) grad_norm: 4.4019 (4.4416) time: 0.8360 data: 0.0003 max mem: 8421 +[2024-12-04 22:07:32 root] (utils.py 283): INFO Epoch: [1] [ 90/2502] eta: 0:33:27 lr: 0.000020 loss_cls: 4.1193 (4.1401) grad_norm: 4.3308 (4.4384) time: 0.8150 data: 0.0003 max mem: 8421 +[2024-12-04 22:07:40 root] (utils.py 283): INFO Epoch: [1] [ 100/2502] eta: 0:33:07 lr: 0.000020 loss_cls: 4.2274 (4.1412) grad_norm: 4.3710 (4.4700) time: 0.7843 data: 0.0002 max mem: 8421 +[2024-12-04 22:07:47 root] (utils.py 283): INFO Epoch: [1] [ 110/2502] eta: 0:32:49 lr: 0.000020 loss_cls: 4.1496 (4.1059) grad_norm: 4.2809 (4.4398) time: 0.7832 data: 0.0002 max mem: 8421 +[2024-12-04 22:07:55 root] (utils.py 283): INFO Epoch: [1] [ 120/2502] eta: 0:32:33 lr: 0.000020 loss_cls: 3.8565 (4.1138) grad_norm: 4.2189 (4.4436) time: 0.7836 data: 0.0003 max mem: 8421 +[2024-12-04 22:08:03 root] (utils.py 283): INFO Epoch: [1] [ 130/2502] eta: 0:32:19 lr: 0.000020 loss_cls: 4.1317 (4.1097) grad_norm: 4.3693 (4.4440) time: 0.7843 data: 0.0002 max mem: 8421 +[2024-12-04 22:08:11 root] (utils.py 283): INFO Epoch: [1] [ 140/2502] eta: 0:32:05 lr: 0.000020 loss_cls: 4.1775 (4.1198) grad_norm: 4.4048 (4.5251) time: 0.7842 data: 0.0002 max mem: 8421 +[2024-12-04 22:08:19 root] (utils.py 283): INFO Epoch: [1] [ 150/2502] eta: 0:31:52 lr: 0.000020 loss_cls: 4.2901 (4.1047) grad_norm: 4.5023 (4.5266) time: 0.7843 data: 0.0003 max mem: 8421 +[2024-12-04 22:08:27 root] (utils.py 283): INFO Epoch: [1] [ 160/2502] eta: 0:31:40 lr: 0.000020 loss_cls: 4.2043 (4.1081) grad_norm: 4.4830 (4.5205) time: 0.7864 data: 0.0003 max mem: 8421 +[2024-12-04 22:08:35 root] (utils.py 283): INFO Epoch: [1] [ 170/2502] eta: 0:31:29 lr: 0.000020 loss_cls: 4.1927 (4.1061) grad_norm: 4.2638 (4.5160) time: 0.7883 data: 0.0003 max mem: 8421 +[2024-12-04 22:08:42 root] (utils.py 283): INFO Epoch: [1] [ 180/2502] eta: 0:31:17 lr: 0.000020 loss_cls: 4.3667 (4.1148) grad_norm: 4.3245 (4.5096) time: 0.7845 data: 0.0002 max mem: 8421 +[2024-12-04 22:08:50 root] (utils.py 283): INFO Epoch: [1] [ 190/2502] eta: 0:31:07 lr: 0.000020 loss_cls: 4.4038 (4.1157) grad_norm: 4.3346 (4.5054) time: 0.7853 data: 0.0002 max mem: 8421 +[2024-12-04 22:08:58 root] (utils.py 283): INFO Epoch: [1] [ 200/2502] eta: 0:30:56 lr: 0.000020 loss_cls: 4.2873 (4.1260) grad_norm: 4.2024 (4.4906) time: 0.7873 data: 0.0002 max mem: 8421 +[2024-12-04 22:09:06 root] (utils.py 283): INFO Epoch: [1] [ 210/2502] eta: 0:30:45 lr: 0.000020 loss_cls: 4.2873 (4.1356) grad_norm: 4.1841 (4.4835) time: 0.7814 data: 0.0003 max mem: 8421 +[2024-12-04 22:09:14 root] (utils.py 283): INFO Epoch: [1] [ 220/2502] eta: 0:30:35 lr: 0.000020 loss_cls: 4.1297 (4.1270) grad_norm: 4.3110 (4.4779) time: 0.7852 data: 0.0003 max mem: 8421 +[2024-12-04 22:09:22 root] (utils.py 283): INFO Epoch: [1] [ 230/2502] eta: 0:30:25 lr: 0.000020 loss_cls: 4.3075 (4.1420) grad_norm: 4.4562 (4.4879) time: 0.7878 data: 0.0002 max mem: 8421 +[2024-12-04 22:09:30 root] (utils.py 283): INFO Epoch: [1] [ 240/2502] eta: 0:30:15 lr: 0.000020 loss_cls: 4.2043 (4.1218) grad_norm: 4.5415 (4.4917) time: 0.7836 data: 0.0002 max mem: 8421 +[2024-12-04 22:09:37 root] (utils.py 283): INFO Epoch: [1] [ 250/2502] eta: 0:30:06 lr: 0.000020 loss_cls: 3.8243 (4.1108) grad_norm: 4.3292 (4.4862) time: 0.7860 data: 0.0002 max mem: 8421 +[2024-12-04 22:09:45 root] (utils.py 283): INFO Epoch: [1] [ 260/2502] eta: 0:29:57 lr: 0.000020 loss_cls: 4.0940 (4.1044) grad_norm: 4.1879 (4.4760) time: 0.7867 data: 0.0003 max mem: 8421 +[2024-12-04 22:09:53 root] (utils.py 283): INFO Epoch: [1] [ 270/2502] eta: 0:29:48 lr: 0.000020 loss_cls: 4.2409 (4.1099) grad_norm: 4.2186 (4.4699) time: 0.7878 data: 0.0003 max mem: 8421 +[2024-12-04 22:10:01 root] (utils.py 283): INFO Epoch: [1] [ 280/2502] eta: 0:29:38 lr: 0.000020 loss_cls: 4.1103 (4.1044) grad_norm: 4.2649 (4.4640) time: 0.7883 data: 0.0002 max mem: 8421 +[2024-12-04 22:10:09 root] (utils.py 283): INFO Epoch: [1] [ 290/2502] eta: 0:29:29 lr: 0.000020 loss_cls: 3.7458 (4.0845) grad_norm: 4.1771 (4.4551) time: 0.7848 data: 0.0002 max mem: 8421 +[2024-12-04 22:10:17 root] (utils.py 283): INFO Epoch: [1] [ 300/2502] eta: 0:29:20 lr: 0.000020 loss_cls: 3.6791 (4.0824) grad_norm: 4.2735 (4.4528) time: 0.7850 data: 0.0002 max mem: 8421 +[2024-12-04 22:10:25 root] (utils.py 283): INFO Epoch: [1] [ 310/2502] eta: 0:29:14 lr: 0.000020 loss_cls: 4.1525 (4.0830) grad_norm: 4.4055 (4.4515) time: 0.8025 data: 0.0002 max mem: 8421 +[2024-12-04 22:10:33 root] (utils.py 283): INFO Epoch: [1] [ 320/2502] eta: 0:29:09 lr: 0.000020 loss_cls: 4.1252 (4.0800) grad_norm: 4.4359 (4.4572) time: 0.8331 data: 0.0002 max mem: 8421 +[2024-12-04 22:10:42 root] (utils.py 283): INFO Epoch: [1] [ 330/2502] eta: 0:29:03 lr: 0.000020 loss_cls: 3.9587 (4.0764) grad_norm: 4.3381 (4.4560) time: 0.8420 data: 0.0002 max mem: 8421 +[2024-12-04 22:10:50 root] (utils.py 283): INFO Epoch: [1] [ 340/2502] eta: 0:28:55 lr: 0.000020 loss_cls: 4.0193 (4.0720) grad_norm: 4.2651 (4.4575) time: 0.8216 data: 0.0003 max mem: 8421 +[2024-12-04 22:10:58 root] (utils.py 283): INFO Epoch: [1] [ 350/2502] eta: 0:28:46 lr: 0.000020 loss_cls: 3.9210 (4.0687) grad_norm: 4.4079 (4.4602) time: 0.7968 data: 0.0003 max mem: 8421 +[2024-12-04 22:11:06 root] (utils.py 283): INFO Epoch: [1] [ 360/2502] eta: 0:28:38 lr: 0.000020 loss_cls: 3.5811 (4.0498) grad_norm: 4.5011 (4.4585) time: 0.7922 data: 0.0003 max mem: 8421 +[2024-12-04 22:11:14 root] (utils.py 283): INFO Epoch: [1] [ 370/2502] eta: 0:28:29 lr: 0.000020 loss_cls: 3.4383 (4.0454) grad_norm: 4.3643 (4.4550) time: 0.7957 data: 0.0002 max mem: 8421 +[2024-12-04 22:11:21 root] (utils.py 283): INFO Epoch: [1] [ 380/2502] eta: 0:28:20 lr: 0.000020 loss_cls: 4.1912 (4.0440) grad_norm: 4.3390 (4.4551) time: 0.7891 data: 0.0002 max mem: 8421 +[2024-12-04 22:11:30 root] (utils.py 283): INFO Epoch: [1] [ 390/2502] eta: 0:28:14 lr: 0.000020 loss_cls: 4.0744 (4.0399) grad_norm: 4.3826 (4.4551) time: 0.8113 data: 0.0003 max mem: 8421 +[2024-12-04 22:11:38 root] (utils.py 283): INFO Epoch: [1] [ 400/2502] eta: 0:28:08 lr: 0.000020 loss_cls: 4.0744 (4.0398) grad_norm: 4.3826 (4.4545) time: 0.8383 data: 0.0003 max mem: 8421 +[2024-12-04 22:11:47 root] (utils.py 283): INFO Epoch: [1] [ 410/2502] eta: 0:28:02 lr: 0.000020 loss_cls: 4.0861 (4.0374) grad_norm: 4.4565 (4.4804) time: 0.8410 data: 0.0003 max mem: 8421 +[2024-12-04 22:11:55 root] (utils.py 283): INFO Epoch: [1] [ 420/2502] eta: 0:27:56 lr: 0.000020 loss_cls: 4.0324 (4.0378) grad_norm: 4.4429 (4.4814) time: 0.8426 data: 0.0003 max mem: 8421 +[2024-12-04 22:12:04 root] (utils.py 283): INFO Epoch: [1] [ 430/2502] eta: 0:27:50 lr: 0.000020 loss_cls: 4.3459 (4.0439) grad_norm: 4.3998 (4.4916) time: 0.8468 data: 0.0003 max mem: 8421 +[2024-12-04 22:12:12 root] (utils.py 283): INFO Epoch: [1] [ 440/2502] eta: 0:27:43 lr: 0.000020 loss_cls: 4.4299 (4.0472) grad_norm: 4.3998 (4.4909) time: 0.8421 data: 0.0003 max mem: 8421 +[2024-12-04 22:12:21 root] (utils.py 283): INFO Epoch: [1] [ 450/2502] eta: 0:27:39 lr: 0.000020 loss_cls: 4.0369 (4.0401) grad_norm: 4.4478 (4.4901) time: 0.8573 data: 0.0003 max mem: 8421 +[2024-12-04 22:12:29 root] (utils.py 283): INFO Epoch: [1] [ 460/2502] eta: 0:27:33 lr: 0.000020 loss_cls: 3.8913 (4.0385) grad_norm: 4.4209 (4.4888) time: 0.8721 data: 0.0003 max mem: 8421 +[2024-12-04 22:12:40 root] (utils.py 283): INFO Epoch: [1] [ 470/2502] eta: 0:27:37 lr: 0.000020 loss_cls: 4.0680 (4.0405) grad_norm: 4.4556 (4.4907) time: 0.9758 data: 0.0003 max mem: 8421 +[2024-12-04 22:12:49 root] (utils.py 283): INFO Epoch: [1] [ 480/2502] eta: 0:27:29 lr: 0.000020 loss_cls: 4.1088 (4.0367) grad_norm: 4.5111 (4.4924) time: 0.9557 data: 0.0003 max mem: 8421 +[2024-12-04 22:12:56 root] (utils.py 283): INFO Epoch: [1] [ 490/2502] eta: 0:27:19 lr: 0.000020 loss_cls: 4.1088 (4.0399) grad_norm: 4.5111 (4.4943) time: 0.7998 data: 0.0003 max mem: 8421 +[2024-12-04 22:13:04 root] (utils.py 283): INFO Epoch: [1] [ 500/2502] eta: 0:27:10 lr: 0.000020 loss_cls: 4.2348 (4.0372) grad_norm: 4.3352 (4.4936) time: 0.7821 data: 0.0003 max mem: 8421 +[2024-12-04 22:13:12 root] (utils.py 283): INFO Epoch: [1] [ 510/2502] eta: 0:27:01 lr: 0.000020 loss_cls: 3.9236 (4.0361) grad_norm: 4.3814 (4.4947) time: 0.7833 data: 0.0003 max mem: 8421 +[2024-12-04 22:13:20 root] (utils.py 283): INFO Epoch: [1] [ 520/2502] eta: 0:26:51 lr: 0.000020 loss_cls: 3.9170 (4.0361) grad_norm: 4.4046 (4.4957) time: 0.7808 data: 0.0002 max mem: 8421 +[2024-12-04 22:13:28 root] (utils.py 283): INFO Epoch: [1] [ 530/2502] eta: 0:26:42 lr: 0.000020 loss_cls: 3.9991 (4.0335) grad_norm: 4.3901 (4.4933) time: 0.7785 data: 0.0002 max mem: 8421 +[2024-12-04 22:13:35 root] (utils.py 283): INFO Epoch: [1] [ 540/2502] eta: 0:26:32 lr: 0.000020 loss_cls: 4.0867 (4.0326) grad_norm: 4.3737 (4.4945) time: 0.7772 data: 0.0002 max mem: 8421 +[2024-12-04 22:13:43 root] (utils.py 283): INFO Epoch: [1] [ 550/2502] eta: 0:26:23 lr: 0.000020 loss_cls: 4.1788 (4.0351) grad_norm: 4.3737 (4.4948) time: 0.7804 data: 0.0002 max mem: 8421 +[2024-12-04 22:13:51 root] (utils.py 283): INFO Epoch: [1] [ 560/2502] eta: 0:26:14 lr: 0.000020 loss_cls: 4.1318 (4.0327) grad_norm: 4.3271 (4.4928) time: 0.7827 data: 0.0002 max mem: 8421 +[2024-12-04 22:13:59 root] (utils.py 283): INFO Epoch: [1] [ 570/2502] eta: 0:26:05 lr: 0.000020 loss_cls: 3.8822 (4.0305) grad_norm: 4.2757 (4.4893) time: 0.7844 data: 0.0002 max mem: 8421 +[2024-12-04 22:14:07 root] (utils.py 283): INFO Epoch: [1] [ 580/2502] eta: 0:25:57 lr: 0.000020 loss_cls: 3.7211 (4.0249) grad_norm: 4.2757 (4.4857) time: 0.7933 data: 0.0002 max mem: 8421 +[2024-12-04 22:14:15 root] (utils.py 283): INFO Epoch: [1] [ 590/2502] eta: 0:25:48 lr: 0.000020 loss_cls: 3.9831 (4.0245) grad_norm: 4.2642 (4.4839) time: 0.8002 data: 0.0003 max mem: 8421 +[2024-12-04 22:14:23 root] (utils.py 283): INFO Epoch: [1] [ 600/2502] eta: 0:25:40 lr: 0.000020 loss_cls: 4.2399 (4.0243) grad_norm: 4.2485 (4.4815) time: 0.8002 data: 0.0002 max mem: 8421 +[2024-12-04 22:14:31 root] (utils.py 283): INFO Epoch: [1] [ 610/2502] eta: 0:25:31 lr: 0.000020 loss_cls: 4.2235 (4.0236) grad_norm: 4.2694 (4.4800) time: 0.7913 data: 0.0003 max mem: 8421 +[2024-12-04 22:14:38 root] (utils.py 283): INFO Epoch: [1] [ 620/2502] eta: 0:25:22 lr: 0.000020 loss_cls: 4.0356 (4.0252) grad_norm: 4.3274 (4.4779) time: 0.7800 data: 0.0003 max mem: 8421 +[2024-12-04 22:14:46 root] (utils.py 283): INFO Epoch: [1] [ 630/2502] eta: 0:25:13 lr: 0.000020 loss_cls: 4.0356 (4.0272) grad_norm: 4.3517 (4.4771) time: 0.7777 data: 0.0003 max mem: 8421 +[2024-12-04 22:14:54 root] (utils.py 283): INFO Epoch: [1] [ 640/2502] eta: 0:25:04 lr: 0.000020 loss_cls: 3.9012 (4.0207) grad_norm: 4.4301 (4.4769) time: 0.7787 data: 0.0002 max mem: 8421 +[2024-12-04 22:15:02 root] (utils.py 283): INFO Epoch: [1] [ 650/2502] eta: 0:24:55 lr: 0.000020 loss_cls: 3.9622 (4.0218) grad_norm: 4.5161 (4.5067) time: 0.7833 data: 0.0002 max mem: 8421 +[2024-12-04 22:15:10 root] (utils.py 283): INFO Epoch: [1] [ 660/2502] eta: 0:24:47 lr: 0.000020 loss_cls: 4.1396 (4.0201) grad_norm: 4.5486 (4.5075) time: 0.7924 data: 0.0002 max mem: 8421 +[2024-12-04 22:15:18 root] (utils.py 283): INFO Epoch: [1] [ 670/2502] eta: 0:24:38 lr: 0.000020 loss_cls: 4.1318 (4.0202) grad_norm: 4.5486 (4.5097) time: 0.7919 data: 0.0003 max mem: 8421 +[2024-12-04 22:15:26 root] (utils.py 283): INFO Epoch: [1] [ 680/2502] eta: 0:24:30 lr: 0.000020 loss_cls: 4.1318 (4.0196) grad_norm: 4.5183 (4.5091) time: 0.7855 data: 0.0003 max mem: 8421 +[2024-12-04 22:15:34 root] (utils.py 283): INFO Epoch: [1] [ 690/2502] eta: 0:24:22 lr: 0.000020 loss_cls: 3.9766 (4.0168) grad_norm: 4.2952 (4.5082) time: 0.8092 data: 0.0003 max mem: 8421 +[2024-12-04 22:15:42 root] (utils.py 283): INFO Epoch: [1] [ 700/2502] eta: 0:24:15 lr: 0.000020 loss_cls: 4.0963 (4.0180) grad_norm: 4.2880 (4.5056) time: 0.8369 data: 0.0003 max mem: 8421 +[2024-12-04 22:15:51 root] (utils.py 283): INFO Epoch: [1] [ 710/2502] eta: 0:24:07 lr: 0.000020 loss_cls: 4.3201 (4.0203) grad_norm: 4.2880 (4.5050) time: 0.8348 data: 0.0003 max mem: 8421 +[2024-12-04 22:15:59 root] (utils.py 283): INFO Epoch: [1] [ 720/2502] eta: 0:24:00 lr: 0.000020 loss_cls: 4.2140 (4.0184) grad_norm: 4.5346 (4.5073) time: 0.8384 data: 0.0003 max mem: 8421 +[2024-12-04 22:16:07 root] (utils.py 283): INFO Epoch: [1] [ 730/2502] eta: 0:23:52 lr: 0.000020 loss_cls: 4.2330 (4.0226) grad_norm: 4.2860 (4.5034) time: 0.8230 data: 0.0003 max mem: 8421 +[2024-12-04 22:16:15 root] (utils.py 283): INFO Epoch: [1] [ 740/2502] eta: 0:23:44 lr: 0.000020 loss_cls: 4.3421 (4.0268) grad_norm: 4.2473 (4.5024) time: 0.8000 data: 0.0003 max mem: 8421 +[2024-12-04 22:16:24 root] (utils.py 283): INFO Epoch: [1] [ 750/2502] eta: 0:23:36 lr: 0.000020 loss_cls: 4.3305 (4.0261) grad_norm: 4.4023 (4.5002) time: 0.8208 data: 0.0003 max mem: 8421 +[2024-12-04 22:16:32 root] (utils.py 283): INFO Epoch: [1] [ 760/2502] eta: 0:23:29 lr: 0.000020 loss_cls: 3.9667 (4.0268) grad_norm: 4.4161 (4.5003) time: 0.8285 data: 0.0003 max mem: 8421 +[2024-12-04 22:16:40 root] (utils.py 283): INFO Epoch: [1] [ 770/2502] eta: 0:23:21 lr: 0.000020 loss_cls: 3.9180 (4.0233) grad_norm: 4.3644 (4.4984) time: 0.8170 data: 0.0003 max mem: 8421 +[2024-12-04 22:16:48 root] (utils.py 283): INFO Epoch: [1] [ 780/2502] eta: 0:23:13 lr: 0.000020 loss_cls: 3.8595 (4.0224) grad_norm: 4.3355 (4.4981) time: 0.8140 data: 0.0003 max mem: 8421 +[2024-12-04 22:16:56 root] (utils.py 283): INFO Epoch: [1] [ 790/2502] eta: 0:23:05 lr: 0.000020 loss_cls: 3.9152 (4.0219) grad_norm: 4.4321 (4.4977) time: 0.8205 data: 0.0003 max mem: 8421 +[2024-12-04 22:17:04 root] (utils.py 283): INFO Epoch: [1] [ 800/2502] eta: 0:22:57 lr: 0.000020 loss_cls: 4.0780 (4.0232) grad_norm: 4.5120 (4.4994) time: 0.8221 data: 0.0003 max mem: 8421 +[2024-12-04 22:17:13 root] (utils.py 283): INFO Epoch: [1] [ 810/2502] eta: 0:22:49 lr: 0.000020 loss_cls: 4.1782 (4.0233) grad_norm: 4.4952 (4.5024) time: 0.8146 data: 0.0003 max mem: 8421 +[2024-12-04 22:17:21 root] (utils.py 283): INFO Epoch: [1] [ 820/2502] eta: 0:22:41 lr: 0.000020 loss_cls: 4.1993 (4.0232) grad_norm: 4.3827 (4.5017) time: 0.8229 data: 0.0003 max mem: 8421 +[2024-12-04 22:17:29 root] (utils.py 283): INFO Epoch: [1] [ 830/2502] eta: 0:22:33 lr: 0.000020 loss_cls: 4.0820 (4.0215) grad_norm: 4.3247 (4.5008) time: 0.8235 data: 0.0003 max mem: 8421 +[2024-12-04 22:17:37 root] (utils.py 283): INFO Epoch: [1] [ 840/2502] eta: 0:22:26 lr: 0.000020 loss_cls: 3.7084 (4.0179) grad_norm: 4.1385 (4.4969) time: 0.8209 data: 0.0003 max mem: 8421 +[2024-12-04 22:17:46 root] (utils.py 283): INFO Epoch: [1] [ 850/2502] eta: 0:22:18 lr: 0.000020 loss_cls: 4.1134 (4.0195) grad_norm: 4.1988 (4.4946) time: 0.8283 data: 0.0003 max mem: 8421 +[2024-12-04 22:17:54 root] (utils.py 283): INFO Epoch: [1] [ 860/2502] eta: 0:22:11 lr: 0.000020 loss_cls: 4.1947 (4.0186) grad_norm: 4.2769 (4.4957) time: 0.8552 data: 0.0003 max mem: 8421 +[2024-12-04 22:18:03 root] (utils.py 283): INFO Epoch: [1] [ 870/2502] eta: 0:22:05 lr: 0.000020 loss_cls: 3.9383 (4.0162) grad_norm: 4.3424 (4.4953) time: 0.8853 data: 0.0003 max mem: 8421 +[2024-12-04 22:18:11 root] (utils.py 283): INFO Epoch: [1] [ 880/2502] eta: 0:21:56 lr: 0.000020 loss_cls: 4.1372 (4.0184) grad_norm: 4.3424 (4.4944) time: 0.8530 data: 0.0003 max mem: 8421 +[2024-12-04 22:18:20 root] (utils.py 283): INFO Epoch: [1] [ 890/2502] eta: 0:21:49 lr: 0.000020 loss_cls: 4.2660 (4.0198) grad_norm: 4.4567 (4.4947) time: 0.8279 data: 0.0003 max mem: 8421 +[2024-12-04 22:18:28 root] (utils.py 283): INFO Epoch: [1] [ 900/2502] eta: 0:21:41 lr: 0.000020 loss_cls: 4.0899 (4.0182) grad_norm: 4.4043 (4.4929) time: 0.8307 data: 0.0003 max mem: 8421 +[2024-12-04 22:18:36 root] (utils.py 283): INFO Epoch: [1] [ 910/2502] eta: 0:21:33 lr: 0.000020 loss_cls: 4.1567 (4.0190) grad_norm: 4.1922 (4.4891) time: 0.8312 data: 0.0003 max mem: 8421 +[2024-12-04 22:18:45 root] (utils.py 283): INFO Epoch: [1] [ 920/2502] eta: 0:21:25 lr: 0.000020 loss_cls: 4.3015 (4.0210) grad_norm: 4.1552 (4.4871) time: 0.8370 data: 0.0003 max mem: 8421 +[2024-12-04 22:18:53 root] (utils.py 283): INFO Epoch: [1] [ 930/2502] eta: 0:21:18 lr: 0.000020 loss_cls: 4.1394 (4.0199) grad_norm: 4.2409 (4.4887) time: 0.8275 data: 0.0003 max mem: 8421 +[2024-12-04 22:19:02 root] (utils.py 283): INFO Epoch: [1] [ 940/2502] eta: 0:21:10 lr: 0.000020 loss_cls: 3.7722 (4.0185) grad_norm: 4.2409 (4.4863) time: 0.8406 data: 0.0003 max mem: 8421 +[2024-12-04 22:19:11 root] (utils.py 283): INFO Epoch: [1] [ 950/2502] eta: 0:21:05 lr: 0.000020 loss_cls: 3.8847 (4.0178) grad_norm: 4.1759 (4.4825) time: 0.9169 data: 0.0004 max mem: 8421 +[2024-12-04 22:19:20 root] (utils.py 283): INFO Epoch: [1] [ 960/2502] eta: 0:20:58 lr: 0.000020 loss_cls: 4.0461 (4.0184) grad_norm: 4.1808 (4.4803) time: 0.9376 data: 0.0004 max mem: 8421 +[2024-12-04 22:19:28 root] (utils.py 283): INFO Epoch: [1] [ 970/2502] eta: 0:20:49 lr: 0.000020 loss_cls: 3.7295 (4.0142) grad_norm: 4.3700 (4.4796) time: 0.8484 data: 0.0003 max mem: 8421 +[2024-12-04 22:19:36 root] (utils.py 283): INFO Epoch: [1] [ 980/2502] eta: 0:20:41 lr: 0.000020 loss_cls: 3.7708 (4.0136) grad_norm: 4.3700 (4.4795) time: 0.8014 data: 0.0003 max mem: 8421 +[2024-12-04 22:19:44 root] (utils.py 283): INFO Epoch: [1] [ 990/2502] eta: 0:20:33 lr: 0.000020 loss_cls: 3.9099 (4.0143) grad_norm: 4.4271 (4.4806) time: 0.8040 data: 0.0003 max mem: 8421 +[2024-12-04 22:19:52 root] (utils.py 283): INFO Epoch: [1] [1000/2502] eta: 0:20:24 lr: 0.000020 loss_cls: 3.9099 (4.0129) grad_norm: 4.4271 (4.4808) time: 0.7994 data: 0.0003 max mem: 8421 +[2024-12-04 22:20:00 root] (utils.py 283): INFO Epoch: [1] [1010/2502] eta: 0:20:16 lr: 0.000020 loss_cls: 4.0954 (4.0129) grad_norm: 4.4366 (4.4823) time: 0.7995 data: 0.0003 max mem: 8421 +[2024-12-04 22:20:08 root] (utils.py 283): INFO Epoch: [1] [1020/2502] eta: 0:20:08 lr: 0.000020 loss_cls: 3.9615 (4.0114) grad_norm: 4.3696 (4.4811) time: 0.8017 data: 0.0003 max mem: 8421 +[2024-12-04 22:20:16 root] (utils.py 283): INFO Epoch: [1] [1030/2502] eta: 0:19:59 lr: 0.000020 loss_cls: 4.0052 (4.0121) grad_norm: 4.2975 (4.4827) time: 0.8026 data: 0.0003 max mem: 8421 +[2024-12-04 22:20:25 root] (utils.py 283): INFO Epoch: [1] [1040/2502] eta: 0:19:51 lr: 0.000020 loss_cls: 3.8968 (4.0089) grad_norm: 4.2013 (4.4792) time: 0.8028 data: 0.0003 max mem: 8421 +[2024-12-04 22:20:33 root] (utils.py 283): INFO Epoch: [1] [1050/2502] eta: 0:19:43 lr: 0.000020 loss_cls: 3.8557 (4.0085) grad_norm: 4.2718 (4.4793) time: 0.8017 data: 0.0003 max mem: 8421 +[2024-12-04 22:20:40 root] (utils.py 283): INFO Epoch: [1] [1060/2502] eta: 0:19:34 lr: 0.000020 loss_cls: 4.1471 (4.0108) grad_norm: 4.3261 (4.4781) time: 0.7975 data: 0.0003 max mem: 8421 +[2024-12-04 22:20:48 root] (utils.py 283): INFO Epoch: [1] [1070/2502] eta: 0:19:26 lr: 0.000020 loss_cls: 4.3457 (4.0111) grad_norm: 4.4632 (4.4792) time: 0.7978 data: 0.0003 max mem: 8421 +[2024-12-04 22:20:57 root] (utils.py 283): INFO Epoch: [1] [1080/2502] eta: 0:19:18 lr: 0.000020 loss_cls: 4.2548 (4.0129) grad_norm: 4.5767 (4.4805) time: 0.8081 data: 0.0003 max mem: 8421 +[2024-12-04 22:21:05 root] (utils.py 283): INFO Epoch: [1] [1090/2502] eta: 0:19:09 lr: 0.000020 loss_cls: 4.2409 (4.0135) grad_norm: 4.2406 (4.4776) time: 0.8088 data: 0.0003 max mem: 8421 +[2024-12-04 22:21:13 root] (utils.py 283): INFO Epoch: [1] [1100/2502] eta: 0:19:01 lr: 0.000020 loss_cls: 4.1531 (4.0119) grad_norm: 4.2329 (4.4761) time: 0.8020 data: 0.0003 max mem: 8421 +[2024-12-04 22:21:21 root] (utils.py 283): INFO Epoch: [1] [1110/2502] eta: 0:18:53 lr: 0.000020 loss_cls: 4.1859 (4.0138) grad_norm: 4.2831 (4.4753) time: 0.8007 data: 0.0003 max mem: 8421 +[2024-12-04 22:21:29 root] (utils.py 283): INFO Epoch: [1] [1120/2502] eta: 0:18:44 lr: 0.000020 loss_cls: 4.2526 (4.0148) grad_norm: 4.3369 (4.4744) time: 0.7995 data: 0.0002 max mem: 8421 +[2024-12-04 22:21:37 root] (utils.py 283): INFO Epoch: [1] [1130/2502] eta: 0:18:36 lr: 0.000020 loss_cls: 4.1966 (4.0167) grad_norm: 4.3988 (4.4733) time: 0.7989 data: 0.0003 max mem: 8421 +[2024-12-04 22:21:45 root] (utils.py 283): INFO Epoch: [1] [1140/2502] eta: 0:18:28 lr: 0.000020 loss_cls: 4.2401 (4.0173) grad_norm: 4.3855 (4.4723) time: 0.8024 data: 0.0003 max mem: 8421 +[2024-12-04 22:21:53 root] (utils.py 283): INFO Epoch: [1] [1150/2502] eta: 0:18:20 lr: 0.000020 loss_cls: 4.1776 (4.0188) grad_norm: 4.2828 (4.4725) time: 0.8049 data: 0.0003 max mem: 8421 +[2024-12-04 22:22:01 root] (utils.py 283): INFO Epoch: [1] [1160/2502] eta: 0:18:11 lr: 0.000020 loss_cls: 4.2242 (4.0187) grad_norm: 4.1252 (4.4697) time: 0.8013 data: 0.0003 max mem: 8421 +[2024-12-04 22:22:09 root] (utils.py 283): INFO Epoch: [1] [1170/2502] eta: 0:18:03 lr: 0.000020 loss_cls: 4.2272 (4.0195) grad_norm: 4.1252 (4.4674) time: 0.8056 data: 0.0003 max mem: 8421 +[2024-12-04 22:22:17 root] (utils.py 283): INFO Epoch: [1] [1180/2502] eta: 0:17:55 lr: 0.000020 loss_cls: 4.2091 (4.0201) grad_norm: 4.2533 (4.4699) time: 0.8075 data: 0.0003 max mem: 8421 +[2024-12-04 22:22:25 root] (utils.py 283): INFO Epoch: [1] [1190/2502] eta: 0:17:47 lr: 0.000020 loss_cls: 4.0728 (4.0189) grad_norm: 4.1947 (4.4673) time: 0.7989 data: 0.0003 max mem: 8421 +[2024-12-04 22:22:33 root] (utils.py 283): INFO Epoch: [1] [1200/2502] eta: 0:17:38 lr: 0.000020 loss_cls: 4.0052 (4.0183) grad_norm: 4.1416 (4.4656) time: 0.7976 data: 0.0003 max mem: 8421 +[2024-12-04 22:22:41 root] (utils.py 283): INFO Epoch: [1] [1210/2502] eta: 0:17:30 lr: 0.000020 loss_cls: 4.0052 (4.0173) grad_norm: 4.3191 (4.4672) time: 0.7966 data: 0.0003 max mem: 8421 +[2024-12-04 22:22:49 root] (utils.py 283): INFO Epoch: [1] [1220/2502] eta: 0:17:22 lr: 0.000020 loss_cls: 4.2911 (4.0191) grad_norm: 4.4435 (4.4684) time: 0.8011 data: 0.0003 max mem: 8421 +[2024-12-04 22:22:57 root] (utils.py 283): INFO Epoch: [1] [1230/2502] eta: 0:17:14 lr: 0.000020 loss_cls: 4.2347 (4.0198) grad_norm: 4.5132 (4.4687) time: 0.8036 data: 0.0003 max mem: 8421 +[2024-12-04 22:23:05 root] (utils.py 283): INFO Epoch: [1] [1240/2502] eta: 0:17:05 lr: 0.000020 loss_cls: 4.2347 (4.0214) grad_norm: 4.5063 (4.4721) time: 0.7989 data: 0.0003 max mem: 8421 +[2024-12-04 22:23:13 root] (utils.py 283): INFO Epoch: [1] [1250/2502] eta: 0:16:57 lr: 0.000020 loss_cls: 4.1996 (4.0196) grad_norm: 4.4591 (4.4714) time: 0.7979 data: 0.0003 max mem: 8421 +[2024-12-04 22:23:21 root] (utils.py 283): INFO Epoch: [1] [1260/2502] eta: 0:16:49 lr: 0.000020 loss_cls: 4.1439 (4.0210) grad_norm: 4.1575 (4.4693) time: 0.7943 data: 0.0003 max mem: 8421 +[2024-12-04 22:23:29 root] (utils.py 283): INFO Epoch: [1] [1270/2502] eta: 0:16:40 lr: 0.000020 loss_cls: 4.2249 (4.0202) grad_norm: 4.0638 (4.4664) time: 0.7971 data: 0.0003 max mem: 8421 +[2024-12-04 22:23:37 root] (utils.py 283): INFO Epoch: [1] [1280/2502] eta: 0:16:32 lr: 0.000020 loss_cls: 4.1259 (4.0195) grad_norm: 4.1909 (4.4651) time: 0.8025 data: 0.0003 max mem: 8421 +[2024-12-04 22:23:45 root] (utils.py 283): INFO Epoch: [1] [1290/2502] eta: 0:16:24 lr: 0.000020 loss_cls: 4.1357 (4.0204) grad_norm: 4.2462 (4.4648) time: 0.8031 data: 0.0003 max mem: 8421 +[2024-12-04 22:23:53 root] (utils.py 283): INFO Epoch: [1] [1300/2502] eta: 0:16:16 lr: 0.000020 loss_cls: 4.1255 (4.0206) grad_norm: 4.2133 (4.4666) time: 0.8003 data: 0.0003 max mem: 8421 +[2024-12-04 22:24:01 root] (utils.py 283): INFO Epoch: [1] [1310/2502] eta: 0:16:07 lr: 0.000020 loss_cls: 3.9580 (4.0192) grad_norm: 4.1834 (4.4639) time: 0.7991 data: 0.0003 max mem: 8421 +[2024-12-04 22:24:09 root] (utils.py 283): INFO Epoch: [1] [1320/2502] eta: 0:15:59 lr: 0.000020 loss_cls: 3.9935 (4.0192) grad_norm: 4.2157 (4.4629) time: 0.8075 data: 0.0003 max mem: 8421 +[2024-12-04 22:24:17 root] (utils.py 283): INFO Epoch: [1] [1330/2502] eta: 0:15:51 lr: 0.000020 loss_cls: 4.3340 (4.0202) grad_norm: 4.2568 (4.4617) time: 0.8057 data: 0.0003 max mem: 8421 +[2024-12-04 22:24:25 root] (utils.py 283): INFO Epoch: [1] [1340/2502] eta: 0:15:43 lr: 0.000020 loss_cls: 4.3279 (4.0183) grad_norm: 4.2987 (4.4612) time: 0.8002 data: 0.0003 max mem: 8421 +[2024-12-04 22:24:33 root] (utils.py 283): INFO Epoch: [1] [1350/2502] eta: 0:15:35 lr: 0.000020 loss_cls: 4.1704 (4.0203) grad_norm: 4.2871 (4.4630) time: 0.8010 data: 0.0003 max mem: 8421 +[2024-12-04 22:24:41 root] (utils.py 283): INFO Epoch: [1] [1360/2502] eta: 0:15:26 lr: 0.000020 loss_cls: 4.1589 (4.0181) grad_norm: 4.2647 (4.4611) time: 0.8008 data: 0.0003 max mem: 8421 +[2024-12-04 22:24:49 root] (utils.py 283): INFO Epoch: [1] [1370/2502] eta: 0:15:18 lr: 0.000020 loss_cls: 3.9302 (4.0169) grad_norm: 4.1883 (4.4593) time: 0.8004 data: 0.0003 max mem: 8421 +[2024-12-04 22:24:57 root] (utils.py 283): INFO Epoch: [1] [1380/2502] eta: 0:15:10 lr: 0.000020 loss_cls: 3.8977 (4.0151) grad_norm: 4.2845 (4.4602) time: 0.8003 data: 0.0003 max mem: 8421 +[2024-12-04 22:25:05 root] (utils.py 283): INFO Epoch: [1] [1390/2502] eta: 0:15:02 lr: 0.000020 loss_cls: 3.8977 (4.0149) grad_norm: 4.3415 (4.4596) time: 0.7986 data: 0.0003 max mem: 8421 +[2024-12-04 22:25:13 root] (utils.py 283): INFO Epoch: [1] [1400/2502] eta: 0:14:54 lr: 0.000020 loss_cls: 4.2151 (4.0138) grad_norm: 4.2657 (4.4576) time: 0.8009 data: 0.0003 max mem: 8421 +[2024-12-04 22:25:21 root] (utils.py 283): INFO Epoch: [1] [1410/2502] eta: 0:14:46 lr: 0.000020 loss_cls: 4.0452 (4.0136) grad_norm: 4.1700 (4.4555) time: 0.8047 data: 0.0003 max mem: 8421 +[2024-12-04 22:25:29 root] (utils.py 283): INFO Epoch: [1] [1420/2502] eta: 0:14:37 lr: 0.000020 loss_cls: 3.8441 (4.0135) grad_norm: 4.1394 (4.4541) time: 0.8037 data: 0.0003 max mem: 8421 +[2024-12-04 22:25:37 root] (utils.py 283): INFO Epoch: [1] [1430/2502] eta: 0:14:29 lr: 0.000020 loss_cls: 4.1002 (4.0128) grad_norm: 4.3087 (4.4531) time: 0.8016 data: 0.0003 max mem: 8421 +[2024-12-04 22:25:45 root] (utils.py 283): INFO Epoch: [1] [1440/2502] eta: 0:14:21 lr: 0.000020 loss_cls: 3.6871 (4.0111) grad_norm: 4.2623 (4.4528) time: 0.8033 data: 0.0003 max mem: 8421 +[2024-12-04 22:25:53 root] (utils.py 283): INFO Epoch: [1] [1450/2502] eta: 0:14:13 lr: 0.000020 loss_cls: 3.6871 (4.0094) grad_norm: 4.1851 (4.4523) time: 0.8021 data: 0.0003 max mem: 8421 +[2024-12-04 22:26:01 root] (utils.py 283): INFO Epoch: [1] [1460/2502] eta: 0:14:05 lr: 0.000020 loss_cls: 4.0266 (4.0097) grad_norm: 4.1851 (4.4514) time: 0.7968 data: 0.0003 max mem: 8421 +[2024-12-04 22:26:09 root] (utils.py 283): INFO Epoch: [1] [1470/2502] eta: 0:13:57 lr: 0.000020 loss_cls: 4.1975 (4.0116) grad_norm: 4.3481 (4.4512) time: 0.8097 data: 0.0003 max mem: 8421 +[2024-12-04 22:26:17 root] (utils.py 283): INFO Epoch: [1] [1480/2502] eta: 0:13:48 lr: 0.000020 loss_cls: 4.1975 (4.0111) grad_norm: 4.2898 (4.4507) time: 0.8128 data: 0.0003 max mem: 8421 +[2024-12-04 22:26:25 root] (utils.py 283): INFO Epoch: [1] [1490/2502] eta: 0:13:40 lr: 0.000020 loss_cls: 4.0160 (4.0106) grad_norm: 4.2732 (4.4499) time: 0.8020 data: 0.0003 max mem: 8421 +[2024-12-04 22:26:33 root] (utils.py 283): INFO Epoch: [1] [1500/2502] eta: 0:13:32 lr: 0.000020 loss_cls: 3.9870 (4.0090) grad_norm: 4.2732 (4.4519) time: 0.7992 data: 0.0003 max mem: 8421 +[2024-12-04 22:26:41 root] (utils.py 283): INFO Epoch: [1] [1510/2502] eta: 0:13:24 lr: 0.000020 loss_cls: 3.7883 (4.0079) grad_norm: 4.1518 (4.4504) time: 0.8031 data: 0.0003 max mem: 8421 +[2024-12-04 22:26:50 root] (utils.py 283): INFO Epoch: [1] [1520/2502] eta: 0:13:16 lr: 0.000020 loss_cls: 4.1203 (4.0083) grad_norm: 4.1546 (4.4564) time: 0.8091 data: 0.0003 max mem: 8421 +[2024-12-04 22:26:57 root] (utils.py 283): INFO Epoch: [1] [1530/2502] eta: 0:13:08 lr: 0.000020 loss_cls: 4.0832 (4.0079) grad_norm: 4.3044 (4.4556) time: 0.8024 data: 0.0003 max mem: 8421 +[2024-12-04 22:27:05 root] (utils.py 283): INFO Epoch: [1] [1540/2502] eta: 0:12:59 lr: 0.000020 loss_cls: 4.0506 (4.0093) grad_norm: 4.3530 (4.4552) time: 0.7982 data: 0.0003 max mem: 8421 +[2024-12-04 22:27:13 root] (utils.py 283): INFO Epoch: [1] [1550/2502] eta: 0:12:51 lr: 0.000020 loss_cls: 4.2655 (4.0111) grad_norm: 4.3530 (4.4539) time: 0.7991 data: 0.0003 max mem: 8421 +[2024-12-04 22:27:21 root] (utils.py 283): INFO Epoch: [1] [1560/2502] eta: 0:12:43 lr: 0.000020 loss_cls: 4.2204 (4.0120) grad_norm: 4.2259 (4.4548) time: 0.7973 data: 0.0004 max mem: 8421 +[2024-12-04 22:27:29 root] (utils.py 283): INFO Epoch: [1] [1570/2502] eta: 0:12:35 lr: 0.000020 loss_cls: 3.7207 (4.0087) grad_norm: 4.1547 (4.4527) time: 0.7997 data: 0.0004 max mem: 8421 +[2024-12-04 22:27:37 root] (utils.py 283): INFO Epoch: [1] [1580/2502] eta: 0:12:27 lr: 0.000020 loss_cls: 3.5103 (4.0069) grad_norm: 4.1476 (4.4525) time: 0.7997 data: 0.0003 max mem: 8421 +[2024-12-04 22:27:45 root] (utils.py 283): INFO Epoch: [1] [1590/2502] eta: 0:12:18 lr: 0.000020 loss_cls: 3.7234 (4.0058) grad_norm: 4.3009 (4.4523) time: 0.7956 data: 0.0003 max mem: 8421 +[2024-12-04 22:27:53 root] (utils.py 283): INFO Epoch: [1] [1600/2502] eta: 0:12:10 lr: 0.000020 loss_cls: 3.8287 (4.0035) grad_norm: 4.2242 (4.4511) time: 0.7925 data: 0.0003 max mem: 8421 +[2024-12-04 22:28:01 root] (utils.py 283): INFO Epoch: [1] [1610/2502] eta: 0:12:02 lr: 0.000020 loss_cls: 3.8631 (4.0035) grad_norm: 4.0625 (4.4492) time: 0.7972 data: 0.0003 max mem: 8421 +[2024-12-04 22:28:09 root] (utils.py 283): INFO Epoch: [1] [1620/2502] eta: 0:11:54 lr: 0.000020 loss_cls: 3.9882 (4.0032) grad_norm: 4.2066 (4.4485) time: 0.7995 data: 0.0002 max mem: 8421 +[2024-12-04 22:28:17 root] (utils.py 283): INFO Epoch: [1] [1630/2502] eta: 0:11:46 lr: 0.000020 loss_cls: 4.0231 (4.0033) grad_norm: 4.2548 (4.4484) time: 0.7999 data: 0.0003 max mem: 8421 +[2024-12-04 22:28:25 root] (utils.py 283): INFO Epoch: [1] [1640/2502] eta: 0:11:38 lr: 0.000020 loss_cls: 4.0763 (4.0033) grad_norm: 4.4026 (4.4496) time: 0.7997 data: 0.0002 max mem: 8421 +[2024-12-04 22:28:33 root] (utils.py 283): INFO Epoch: [1] [1650/2502] eta: 0:11:29 lr: 0.000020 loss_cls: 4.0474 (4.0030) grad_norm: 4.5426 (4.4504) time: 0.7952 data: 0.0002 max mem: 8421 +[2024-12-04 22:28:41 root] (utils.py 283): INFO Epoch: [1] [1660/2502] eta: 0:11:21 lr: 0.000020 loss_cls: 3.9007 (4.0017) grad_norm: 4.2725 (4.4500) time: 0.7974 data: 0.0003 max mem: 8421 +[2024-12-04 22:28:49 root] (utils.py 283): INFO Epoch: [1] [1670/2502] eta: 0:11:13 lr: 0.000020 loss_cls: 4.0524 (4.0029) grad_norm: 4.2695 (4.4496) time: 0.8019 data: 0.0003 max mem: 8421 +[2024-12-04 22:28:57 root] (utils.py 283): INFO Epoch: [1] [1680/2502] eta: 0:11:05 lr: 0.000020 loss_cls: 4.0708 (4.0036) grad_norm: 4.2594 (4.4482) time: 0.8008 data: 0.0003 max mem: 8421 +[2024-12-04 22:29:05 root] (utils.py 283): INFO Epoch: [1] [1690/2502] eta: 0:10:57 lr: 0.000020 loss_cls: 3.9381 (4.0018) grad_norm: 4.2116 (4.4468) time: 0.7974 data: 0.0003 max mem: 8421 +[2024-12-04 22:29:13 root] (utils.py 283): INFO Epoch: [1] [1700/2502] eta: 0:10:49 lr: 0.000020 loss_cls: 3.9608 (4.0023) grad_norm: 4.1509 (4.4484) time: 0.7961 data: 0.0003 max mem: 8421 +[2024-12-04 22:29:21 root] (utils.py 283): INFO Epoch: [1] [1710/2502] eta: 0:10:41 lr: 0.000020 loss_cls: 4.1121 (4.0022) grad_norm: 4.1796 (4.4473) time: 0.7941 data: 0.0003 max mem: 8421 +[2024-12-04 22:29:29 root] (utils.py 283): INFO Epoch: [1] [1720/2502] eta: 0:10:32 lr: 0.000020 loss_cls: 4.1956 (4.0033) grad_norm: 4.1796 (4.4462) time: 0.7974 data: 0.0003 max mem: 8421 +[2024-12-04 22:29:37 root] (utils.py 283): INFO Epoch: [1] [1730/2502] eta: 0:10:24 lr: 0.000020 loss_cls: 4.2903 (4.0015) grad_norm: 4.3029 (4.4476) time: 0.7987 data: 0.0003 max mem: 8421 +[2024-12-04 22:29:45 root] (utils.py 283): INFO Epoch: [1] [1740/2502] eta: 0:10:16 lr: 0.000020 loss_cls: 3.8395 (4.0017) grad_norm: 4.3854 (4.4487) time: 0.7964 data: 0.0003 max mem: 8421 +[2024-12-04 22:29:53 root] (utils.py 283): INFO Epoch: [1] [1750/2502] eta: 0:10:08 lr: 0.000020 loss_cls: 4.1303 (4.0024) grad_norm: 4.2289 (4.4489) time: 0.7987 data: 0.0003 max mem: 8421 +[2024-12-04 22:30:01 root] (utils.py 283): INFO Epoch: [1] [1760/2502] eta: 0:10:00 lr: 0.000020 loss_cls: 4.1303 (4.0023) grad_norm: 4.2707 (4.4491) time: 0.7983 data: 0.0003 max mem: 8421 +[2024-12-04 22:30:09 root] (utils.py 283): INFO Epoch: [1] [1770/2502] eta: 0:09:52 lr: 0.000020 loss_cls: 3.8511 (4.0014) grad_norm: 4.2141 (4.4482) time: 0.7963 data: 0.0003 max mem: 8421 +[2024-12-04 22:30:17 root] (utils.py 283): INFO Epoch: [1] [1780/2502] eta: 0:09:44 lr: 0.000020 loss_cls: 3.9333 (4.0008) grad_norm: 4.2013 (4.4486) time: 0.7957 data: 0.0003 max mem: 8421 +[2024-12-04 22:30:25 root] (utils.py 283): INFO Epoch: [1] [1790/2502] eta: 0:09:35 lr: 0.000020 loss_cls: 3.9805 (4.0005) grad_norm: 4.3386 (4.4489) time: 0.7921 data: 0.0002 max mem: 8421 +[2024-12-04 22:30:33 root] (utils.py 283): INFO Epoch: [1] [1800/2502] eta: 0:09:27 lr: 0.000020 loss_cls: 3.9631 (3.9997) grad_norm: 4.3184 (4.4487) time: 0.8002 data: 0.0003 max mem: 8421 +[2024-12-04 22:30:41 root] (utils.py 283): INFO Epoch: [1] [1810/2502] eta: 0:09:19 lr: 0.000020 loss_cls: 4.1669 (4.0009) grad_norm: 4.2541 (4.4476) time: 0.8064 data: 0.0003 max mem: 8421 +[2024-12-04 22:30:49 root] (utils.py 283): INFO Epoch: [1] [1820/2502] eta: 0:09:11 lr: 0.000020 loss_cls: 4.2187 (4.0011) grad_norm: 4.1999 (4.4468) time: 0.8010 data: 0.0003 max mem: 8421 +[2024-12-04 22:30:57 root] (utils.py 283): INFO Epoch: [1] [1830/2502] eta: 0:09:03 lr: 0.000020 loss_cls: 4.0266 (4.0009) grad_norm: 4.2858 (4.4482) time: 0.8005 data: 0.0003 max mem: 8421 +[2024-12-04 22:31:05 root] (utils.py 283): INFO Epoch: [1] [1840/2502] eta: 0:08:55 lr: 0.000020 loss_cls: 4.0266 (3.9994) grad_norm: 4.2850 (4.4474) time: 0.7962 data: 0.0003 max mem: 8421 +[2024-12-04 22:31:13 root] (utils.py 283): INFO Epoch: [1] [1850/2502] eta: 0:08:47 lr: 0.000020 loss_cls: 4.2382 (3.9990) grad_norm: 4.3348 (4.4477) time: 0.7940 data: 0.0002 max mem: 8421 +[2024-12-04 22:31:21 root] (utils.py 283): INFO Epoch: [1] [1860/2502] eta: 0:08:39 lr: 0.000020 loss_cls: 4.1716 (3.9987) grad_norm: 4.3738 (4.4469) time: 0.7983 data: 0.0003 max mem: 8421 +[2024-12-04 22:31:29 root] (utils.py 283): INFO Epoch: [1] [1870/2502] eta: 0:08:30 lr: 0.000020 loss_cls: 4.2230 (3.9996) grad_norm: 4.1556 (4.4454) time: 0.8007 data: 0.0003 max mem: 8421 +[2024-12-04 22:31:37 root] (utils.py 283): INFO Epoch: [1] [1880/2502] eta: 0:08:22 lr: 0.000020 loss_cls: 4.2670 (3.9990) grad_norm: 4.3422 (4.4473) time: 0.8004 data: 0.0003 max mem: 8421 +[2024-12-04 22:31:45 root] (utils.py 283): INFO Epoch: [1] [1890/2502] eta: 0:08:14 lr: 0.000020 loss_cls: 4.0888 (3.9993) grad_norm: 4.3879 (4.4466) time: 0.8033 data: 0.0003 max mem: 8421 +[2024-12-04 22:31:53 root] (utils.py 283): INFO Epoch: [1] [1900/2502] eta: 0:08:06 lr: 0.000020 loss_cls: 4.2649 (4.0002) grad_norm: 4.1003 (4.4449) time: 0.7955 data: 0.0003 max mem: 8421 +[2024-12-04 22:32:01 root] (utils.py 283): INFO Epoch: [1] [1910/2502] eta: 0:07:58 lr: 0.000020 loss_cls: 4.2649 (4.0002) grad_norm: 4.1003 (4.4439) time: 0.7919 data: 0.0003 max mem: 8421 +[2024-12-04 22:32:09 root] (utils.py 283): INFO Epoch: [1] [1920/2502] eta: 0:07:50 lr: 0.000020 loss_cls: 4.1262 (4.0003) grad_norm: 4.2791 (4.4435) time: 0.7966 data: 0.0003 max mem: 8421 +[2024-12-04 22:32:17 root] (utils.py 283): INFO Epoch: [1] [1930/2502] eta: 0:07:42 lr: 0.000020 loss_cls: 4.1262 (3.9987) grad_norm: 4.3944 (4.4466) time: 0.7948 data: 0.0003 max mem: 8421 +[2024-12-04 22:32:25 root] (utils.py 283): INFO Epoch: [1] [1940/2502] eta: 0:07:34 lr: 0.000020 loss_cls: 4.1811 (3.9993) grad_norm: 4.4773 (4.4469) time: 0.7986 data: 0.0003 max mem: 8421 +[2024-12-04 22:32:33 root] (utils.py 283): INFO Epoch: [1] [1950/2502] eta: 0:07:26 lr: 0.000020 loss_cls: 4.2627 (3.9994) grad_norm: 4.3970 (4.4470) time: 0.8027 data: 0.0003 max mem: 8421 +[2024-12-04 22:32:41 root] (utils.py 283): INFO Epoch: [1] [1960/2502] eta: 0:07:17 lr: 0.000020 loss_cls: 4.1273 (3.9982) grad_norm: 4.3497 (4.4478) time: 0.8003 data: 0.0003 max mem: 8421 +[2024-12-04 22:32:49 root] (utils.py 283): INFO Epoch: [1] [1970/2502] eta: 0:07:09 lr: 0.000020 loss_cls: 4.2093 (4.0004) grad_norm: 4.3231 (4.4482) time: 0.8008 data: 0.0003 max mem: 8421 +[2024-12-04 22:35:01 root] (main.py 223): INFO Namespace(batch_size=128, epochs=30, model='RMeeTo_tiny', multi_reso=False, input_size=224, drop=0.0, drop_path=0.1, model_ema_decay=0.99996, model_ema_force_cpu=False, model_ema=False, opt='adamw', opt_eps=1e-08, opt_betas=None, clip_grad=None, momentum=0.9, weight_decay=0.0, sched='cosine', lr=2e-05, lr_noise=None, lr_noise_pct=0.67, lr_noise_std=1.0, warmup_lr=1e-06, min_lr=1e-06, decay_epochs=30, warmup_epochs=5, cooldown_epochs=10, patience_epochs=10, decay_rate=0.1, color_jitter=0.4, aa='rand-m9-mstd0.5-inc1', smoothing=0.1, train_interpolation='bicubic', repeated_aug=True, reprob=0.25, remode='pixel', recount=1, resplit=False, mixup=0.8, cutmix=1.0, cutmix_minmax=None, mixup_prob=1.0, mixup_switch_prob=0.5, mixup_mode='batch', finetune='', data_set='IMNET', inat_category='name', output_dir='check/tiny/30', device='cuda', seed=0, autoresume=False, start_epoch=0, dist_eval=True, num_workers=10, pin_mem=True, world_size=4, port='15662', dist_url='env://', target_flops=3.0, granularity=4, load_compression_rate=False, warmup_compression_rate=False, distill='True', throughput=False, eval=False, merge_method='ToMe', merge_interval=2, if_pruning=False, num_prune='5', metric='X', distance='cosine', if_order=True, if_random=False, rank=0, gpu=0, distributed=True, dist_backend='nccl') +[2024-12-04 22:35:05 root] (main.py 286): INFO Creating model: RMeeTo_tiny +[2024-12-04 22:35:10 root] (main.py 365): INFO number of params: 7148008 +[2024-12-04 22:35:13 root] (main.py 480): INFO Start training for 29 epochs +[2024-12-04 22:35:18 root] (utils.py 283): INFO Epoch: [1] [ 0/2502] eta: 3:51:04 lr: 0.000020 loss_cls: 4.5484 (4.5484) grad_norm: 4.3719 (4.3719) time: 5.5416 data: 0.0015 max mem: 8394 +[2024-12-04 22:35:26 root] (utils.py 283): INFO Epoch: [1] [ 10/2502] eta: 0:50:51 lr: 0.000020 loss_cls: 4.5484 (4.4423) grad_norm: 4.3719 (4.4606) time: 1.2244 data: 0.0004 max mem: 8421 +[2024-12-04 22:35:38 root] (utils.py 283): INFO Epoch: [1] [ 20/2502] eta: 0:50:25 lr: 0.000020 loss_cls: 4.3698 (4.2959) grad_norm: 4.2890 (4.5303) time: 1.0029 data: 0.0003 max mem: 8421 +[2024-12-04 22:35:58 root] (utils.py 283): INFO Epoch: [1] [ 30/2502] eta: 0:59:52 lr: 0.000020 loss_cls: 4.2555 (4.3115) grad_norm: 4.3077 (4.4968) time: 1.5792 data: 0.0003 max mem: 8421 +[2024-12-04 22:36:17 root] (utils.py 283): INFO Epoch: [1] [ 40/2502] eta: 1:04:38 lr: 0.000020 loss_cls: 4.1994 (4.2417) grad_norm: 4.3637 (4.4692) time: 1.9494 data: 0.0003 max mem: 8421 +[2024-12-04 22:36:31 root] (utils.py 283): INFO Epoch: [1] [ 50/2502] eta: 1:02:32 lr: 0.000020 loss_cls: 4.0927 (4.2169) grad_norm: 4.3397 (4.4683) time: 1.6494 data: 0.0003 max mem: 8421 +[2024-12-04 22:36:45 root] (utils.py 283): INFO Epoch: [1] [ 60/2502] eta: 1:01:16 lr: 0.000020 loss_cls: 4.0927 (4.1816) grad_norm: 4.3182 (4.4459) time: 1.3622 data: 0.0003 max mem: 8421 +[2024-12-04 22:37:02 root] (utils.py 283): INFO Epoch: [1] [ 70/2502] eta: 1:02:33 lr: 0.000020 loss_cls: 3.9860 (4.1409) grad_norm: 4.3446 (4.4756) time: 1.5771 data: 0.0003 max mem: 8421 +[2024-12-04 22:37:20 root] (utils.py 283): INFO Epoch: [1] [ 80/2502] eta: 1:03:26 lr: 0.000020 loss_cls: 4.0377 (4.1204) grad_norm: 4.3278 (4.4603) time: 1.7732 data: 0.0003 max mem: 8421 +[2024-12-04 22:37:38 root] (utils.py 283): INFO Epoch: [1] [ 90/2502] eta: 1:04:03 lr: 0.000020 loss_cls: 4.1343 (4.1055) grad_norm: 4.3709 (4.4847) time: 1.7704 data: 0.0003 max mem: 8421 +[2024-12-04 22:38:00 root] (utils.py 283): INFO Epoch: [1] [ 100/2502] eta: 1:06:19 lr: 0.000020 loss_cls: 4.2999 (4.1160) grad_norm: 4.4368 (4.4999) time: 2.0009 data: 0.0003 max mem: 8421 +[2024-12-04 22:38:22 root] (utils.py 283): INFO Epoch: [1] [ 110/2502] eta: 1:08:04 lr: 0.000020 loss_cls: 4.3175 (4.1105) grad_norm: 4.4322 (4.4921) time: 2.2275 data: 0.0002 max mem: 8421 +[2024-12-04 22:38:33 root] (utils.py 283): INFO Epoch: [1] [ 120/2502] eta: 1:05:43 lr: 0.000020 loss_cls: 4.1295 (4.1082) grad_norm: 4.2629 (4.4768) time: 1.6499 data: 0.0003 max mem: 8421 +[2024-12-04 22:38:54 root] (utils.py 283): INFO Epoch: [1] [ 130/2502] eta: 1:06:53 lr: 0.000020 loss_cls: 4.1302 (4.1005) grad_norm: 4.2729 (4.4647) time: 1.6055 data: 0.0003 max mem: 8421 +[2024-12-04 22:39:25 root] (utils.py 283): INFO Epoch: [1] [ 140/2502] eta: 1:10:33 lr: 0.000020 loss_cls: 4.1302 (4.0866) grad_norm: 4.2729 (4.4783) time: 2.6197 data: 0.0003 max mem: 8421 +[2024-12-04 22:39:55 root] (utils.py 283): INFO Epoch: [1] [ 150/2502] eta: 1:13:11 lr: 0.000020 loss_cls: 4.0226 (4.0784) grad_norm: 4.3293 (4.4904) time: 3.0132 data: 0.0003 max mem: 8421 +[2024-12-04 22:40:23 root] (utils.py 283): INFO Epoch: [1] [ 160/2502] eta: 1:15:06 lr: 0.000020 loss_cls: 4.0226 (4.0766) grad_norm: 4.3293 (4.4857) time: 2.8553 data: 0.0004 max mem: 8421 +[2024-12-04 22:40:39 root] (utils.py 283): INFO Epoch: [1] [ 170/2502] eta: 1:14:12 lr: 0.000020 loss_cls: 4.0915 (4.0725) grad_norm: 4.1727 (4.4783) time: 2.2270 data: 0.0003 max mem: 8421 +[2024-12-04 22:41:06 root] (utils.py 283): INFO Epoch: [1] [ 180/2502] eta: 1:15:34 lr: 0.000020 loss_cls: 4.0455 (4.0536) grad_norm: 4.1765 (4.4903) time: 2.1818 data: 0.0002 max mem: 8421 +[2024-12-04 22:41:34 root] (utils.py 283): INFO Epoch: [1] [ 190/2502] eta: 1:17:00 lr: 0.000020 loss_cls: 3.8328 (4.0477) grad_norm: 4.3733 (4.4840) time: 2.7632 data: 0.0002 max mem: 8421 +[2024-12-04 22:41:56 root] (utils.py 283): INFO Epoch: [1] [ 200/2502] eta: 1:17:00 lr: 0.000020 loss_cls: 3.7959 (4.0332) grad_norm: 4.3090 (4.4727) time: 2.4977 data: 0.0003 max mem: 8421 +[2024-12-04 22:42:18 root] (utils.py 283): INFO Epoch: [1] [ 210/2502] eta: 1:16:55 lr: 0.000020 loss_cls: 3.8978 (4.0261) grad_norm: 4.1936 (4.4568) time: 2.1583 data: 0.0002 max mem: 8421 +[2024-12-04 22:42:35 root] (utils.py 283): INFO Epoch: [1] [ 220/2502] eta: 1:16:06 lr: 0.000020 loss_cls: 3.9765 (4.0168) grad_norm: 4.2041 (4.4471) time: 1.9400 data: 0.0003 max mem: 8421 +[2024-12-04 22:42:47 root] (utils.py 283): INFO Epoch: [1] [ 230/2502] eta: 1:14:26 lr: 0.000020 loss_cls: 3.9875 (4.0142) grad_norm: 4.3107 (4.4681) time: 1.4630 data: 0.0003 max mem: 8421 +[2024-12-04 22:43:08 root] (utils.py 283): INFO Epoch: [1] [ 240/2502] eta: 1:14:21 lr: 0.000020 loss_cls: 4.0538 (4.0103) grad_norm: 4.4137 (4.4670) time: 1.6566 data: 0.0003 max mem: 8421 +[2024-12-04 22:43:30 root] (utils.py 283): INFO Epoch: [1] [ 250/2502] eta: 1:14:20 lr: 0.000020 loss_cls: 3.8936 (3.9968) grad_norm: 4.3864 (4.4766) time: 2.1512 data: 0.0003 max mem: 8421 +[2024-12-04 22:43:51 root] (utils.py 283): INFO Epoch: [1] [ 260/2502] eta: 1:14:09 lr: 0.000020 loss_cls: 3.8184 (3.9854) grad_norm: 4.3671 (4.4890) time: 2.1348 data: 0.0002 max mem: 8421 +[2024-12-04 22:44:13 root] (utils.py 283): INFO Epoch: [1] [ 270/2502] eta: 1:14:09 lr: 0.000020 loss_cls: 3.8634 (3.9802) grad_norm: 4.2870 (4.4833) time: 2.1551 data: 0.0002 max mem: 8421 +[2024-12-04 22:44:28 root] (utils.py 283): INFO Epoch: [1] [ 280/2502] eta: 1:13:10 lr: 0.000020 loss_cls: 4.0684 (3.9862) grad_norm: 4.2152 (4.4866) time: 1.8631 data: 0.0003 max mem: 8421 +[2024-12-04 22:44:43 root] (utils.py 283): INFO Epoch: [1] [ 290/2502] eta: 1:12:10 lr: 0.000020 loss_cls: 4.0684 (3.9774) grad_norm: 4.3315 (4.4948) time: 1.4746 data: 0.0003 max mem: 8421 +[2024-12-04 22:45:05 root] (utils.py 283): INFO Epoch: [1] [ 300/2502] eta: 1:12:11 lr: 0.000020 loss_cls: 3.5968 (3.9678) grad_norm: 4.3938 (4.5071) time: 1.8412 data: 0.0003 max mem: 8421 +[2024-12-04 22:45:26 root] (utils.py 283): INFO Epoch: [1] [ 310/2502] eta: 1:12:00 lr: 0.000020 loss_cls: 3.7444 (3.9651) grad_norm: 4.3938 (4.5109) time: 2.1583 data: 0.0003 max mem: 8421 +[2024-12-04 22:45:48 root] (utils.py 283): INFO Epoch: [1] [ 320/2502] eta: 1:11:59 lr: 0.000020 loss_cls: 4.2529 (3.9774) grad_norm: 4.4731 (4.5165) time: 2.1687 data: 0.0003 max mem: 8421 +[2024-12-04 22:46:10 root] (utils.py 283): INFO Epoch: [1] [ 330/2502] eta: 1:11:53 lr: 0.000020 loss_cls: 4.3396 (3.9748) grad_norm: 4.3465 (4.5107) time: 2.2224 data: 0.0003 max mem: 8421 +[2024-12-04 22:46:22 root] (utils.py 283): INFO Epoch: [1] [ 340/2502] eta: 1:10:40 lr: 0.000020 loss_cls: 4.1405 (3.9793) grad_norm: 4.3465 (4.5085) time: 1.6656 data: 0.0003 max mem: 8421 +[2024-12-04 22:46:39 root] (utils.py 283): INFO Epoch: [1] [ 350/2502] eta: 1:10:09 lr: 0.000020 loss_cls: 4.1249 (3.9790) grad_norm: 4.4512 (4.5141) time: 1.4613 data: 0.0003 max mem: 8421 +[2024-12-04 22:47:01 root] (utils.py 283): INFO Epoch: [1] [ 360/2502] eta: 1:10:04 lr: 0.000020 loss_cls: 4.0302 (3.9764) grad_norm: 4.4522 (4.5192) time: 1.9909 data: 0.0003 max mem: 8421 +[2024-12-04 22:47:21 root] (utils.py 283): INFO Epoch: [1] [ 370/2502] eta: 1:09:44 lr: 0.000020 loss_cls: 3.9283 (3.9742) grad_norm: 4.4522 (4.5229) time: 2.0740 data: 0.0003 max mem: 8421 +[2024-12-04 22:47:39 root] (utils.py 283): INFO Epoch: [1] [ 380/2502] eta: 1:09:15 lr: 0.000020 loss_cls: 4.1204 (3.9717) grad_norm: 4.4259 (4.5221) time: 1.8766 data: 0.0003 max mem: 8421 +[2024-12-04 22:47:57 root] (utils.py 283): INFO Epoch: [1] [ 390/2502] eta: 1:08:46 lr: 0.000020 loss_cls: 4.1204 (3.9694) grad_norm: 4.3285 (4.5218) time: 1.7937 data: 0.0003 max mem: 8421 +[2024-12-04 22:48:13 root] (utils.py 283): INFO Epoch: [1] [ 400/2502] eta: 1:08:12 lr: 0.000020 loss_cls: 4.0711 (3.9734) grad_norm: 4.3472 (4.5199) time: 1.7276 data: 0.0002 max mem: 8421 +[2024-12-04 22:48:23 root] (utils.py 283): INFO Epoch: [1] [ 410/2502] eta: 1:07:02 lr: 0.000020 loss_cls: 4.1172 (3.9797) grad_norm: 4.4106 (4.5194) time: 1.3181 data: 0.0002 max mem: 8421 +[2024-12-04 22:48:45 root] (utils.py 283): INFO Epoch: [1] [ 420/2502] eta: 1:06:57 lr: 0.000020 loss_cls: 4.2037 (3.9811) grad_norm: 4.4020 (4.5168) time: 1.5850 data: 0.0003 max mem: 8421 +[2024-12-04 22:49:08 root] (utils.py 283): INFO Epoch: [1] [ 430/2502] eta: 1:06:54 lr: 0.000020 loss_cls: 4.3070 (3.9868) grad_norm: 4.3696 (4.5243) time: 2.2340 data: 0.0003 max mem: 8421 +[2024-12-04 22:49:28 root] (utils.py 283): INFO Epoch: [1] [ 440/2502] eta: 1:06:39 lr: 0.000020 loss_cls: 4.3198 (3.9924) grad_norm: 4.4960 (4.5246) time: 2.1486 data: 0.0003 max mem: 8421 +[2024-12-04 22:49:49 root] (utils.py 283): INFO Epoch: [1] [ 450/2502] eta: 1:06:27 lr: 0.000020 loss_cls: 4.0464 (3.9916) grad_norm: 4.2890 (4.5207) time: 2.0721 data: 0.0003 max mem: 8421 +[2024-12-04 22:50:07 root] (utils.py 283): INFO Epoch: [1] [ 460/2502] eta: 1:05:58 lr: 0.000020 loss_cls: 3.9675 (3.9855) grad_norm: 4.3517 (4.5206) time: 1.9186 data: 0.0003 max mem: 8421 +[2024-12-04 22:50:18 root] (utils.py 283): INFO Epoch: [1] [ 470/2502] eta: 1:05:04 lr: 0.000020 loss_cls: 3.6333 (3.9752) grad_norm: 4.2728 (4.5137) time: 1.4240 data: 0.0002 max mem: 8421 +[2024-12-04 22:50:39 root] (utils.py 283): INFO Epoch: [1] [ 480/2502] eta: 1:04:54 lr: 0.000020 loss_cls: 3.6955 (3.9723) grad_norm: 4.1802 (4.5073) time: 1.6395 data: 0.0002 max mem: 8421 +[2024-12-04 22:51:01 root] (utils.py 283): INFO Epoch: [1] [ 490/2502] eta: 1:04:45 lr: 0.000020 loss_cls: 3.9968 (3.9775) grad_norm: 4.2077 (4.5034) time: 2.1689 data: 0.0002 max mem: 8421 +[2024-12-04 22:51:21 root] (utils.py 283): INFO Epoch: [1] [ 500/2502] eta: 1:04:29 lr: 0.000020 loss_cls: 4.0748 (3.9739) grad_norm: 4.3376 (4.5027) time: 2.0957 data: 0.0002 max mem: 8421 +[2024-12-04 22:51:43 root] (utils.py 283): INFO Epoch: [1] [ 510/2502] eta: 1:04:18 lr: 0.000020 loss_cls: 4.0246 (3.9779) grad_norm: 4.4370 (4.5013) time: 2.0798 data: 0.0002 max mem: 8421 +[2024-12-04 22:51:59 root] (utils.py 283): INFO Epoch: [1] [ 520/2502] eta: 1:03:47 lr: 0.000020 loss_cls: 4.1494 (3.9798) grad_norm: 4.3168 (4.4983) time: 1.8882 data: 0.0002 max mem: 8421 +[2024-12-04 22:52:11 root] (utils.py 283): INFO Epoch: [1] [ 530/2502] eta: 1:03:02 lr: 0.000020 loss_cls: 4.1494 (3.9740) grad_norm: 4.2386 (4.4939) time: 1.4319 data: 0.0002 max mem: 8421 +[2024-12-04 22:52:34 root] (utils.py 283): INFO Epoch: [1] [ 540/2502] eta: 1:02:54 lr: 0.000020 loss_cls: 4.0743 (3.9714) grad_norm: 4.1977 (4.4881) time: 1.7336 data: 0.0002 max mem: 8421 +[2024-12-04 22:52:56 root] (utils.py 283): INFO Epoch: [1] [ 550/2502] eta: 1:02:45 lr: 0.000020 loss_cls: 4.0743 (3.9705) grad_norm: 4.3082 (4.4900) time: 2.2210 data: 0.0002 max mem: 8421 +[2024-12-04 22:53:16 root] (utils.py 283): INFO Epoch: [1] [ 560/2502] eta: 1:02:30 lr: 0.000020 loss_cls: 4.3021 (3.9749) grad_norm: 4.3323 (4.4951) time: 2.1202 data: 0.0002 max mem: 8421 +[2024-12-04 22:53:38 root] (utils.py 283): INFO Epoch: [1] [ 570/2502] eta: 1:02:18 lr: 0.000020 loss_cls: 4.2815 (3.9780) grad_norm: 4.2986 (4.4958) time: 2.1038 data: 0.0002 max mem: 8421 +[2024-12-04 22:53:52 root] (utils.py 283): INFO Epoch: [1] [ 580/2502] eta: 1:01:42 lr: 0.000020 loss_cls: 4.1626 (3.9794) grad_norm: 4.1686 (4.4929) time: 1.7971 data: 0.0002 max mem: 8421 +[2024-12-04 22:54:07 root] (utils.py 283): INFO Epoch: [1] [ 590/2502] eta: 1:01:10 lr: 0.000020 loss_cls: 4.1673 (3.9797) grad_norm: 4.1652 (4.4880) time: 1.4686 data: 0.0002 max mem: 8421 +[2024-12-04 22:54:29 root] (utils.py 283): INFO Epoch: [1] [ 600/2502] eta: 1:00:59 lr: 0.000020 loss_cls: 4.0944 (3.9780) grad_norm: 4.1852 (4.4866) time: 1.8477 data: 0.0003 max mem: 8421 +[2024-12-04 22:54:49 root] (utils.py 283): INFO Epoch: [1] [ 610/2502] eta: 1:00:41 lr: 0.000020 loss_cls: 3.9117 (3.9775) grad_norm: 4.3815 (4.4877) time: 2.0735 data: 0.0003 max mem: 8421 +[2024-12-04 22:55:10 root] (utils.py 283): INFO Epoch: [1] [ 620/2502] eta: 1:00:29 lr: 0.000020 loss_cls: 3.9138 (3.9795) grad_norm: 4.3971 (4.4834) time: 2.0677 data: 0.0002 max mem: 8421 +[2024-12-04 22:55:32 root] (utils.py 283): INFO Epoch: [1] [ 630/2502] eta: 1:00:18 lr: 0.000020 loss_cls: 4.1313 (3.9809) grad_norm: 4.3816 (4.4853) time: 2.1847 data: 0.0002 max mem: 8421 +[2024-12-04 22:55:45 root] (utils.py 283): INFO Epoch: [1] [ 640/2502] eta: 0:59:38 lr: 0.000020 loss_cls: 4.0510 (3.9794) grad_norm: 4.3920 (4.4837) time: 1.7263 data: 0.0002 max mem: 8421 +[2024-12-04 22:56:02 root] (utils.py 283): INFO Epoch: [1] [ 650/2502] eta: 0:59:13 lr: 0.000020 loss_cls: 3.9677 (3.9791) grad_norm: 4.3791 (4.4860) time: 1.4783 data: 0.0003 max mem: 8421 +[2024-12-04 22:56:23 root] (utils.py 283): INFO Epoch: [1] [ 660/2502] eta: 0:59:00 lr: 0.000020 loss_cls: 3.7069 (3.9724) grad_norm: 4.4358 (4.4865) time: 1.9303 data: 0.0002 max mem: 8421 +[2024-12-04 22:56:43 root] (utils.py 283): INFO Epoch: [1] [ 670/2502] eta: 0:58:43 lr: 0.000020 loss_cls: 3.9007 (3.9762) grad_norm: 4.4835 (4.4897) time: 2.0711 data: 0.0002 max mem: 8421 +[2024-12-04 22:57:05 root] (utils.py 283): INFO Epoch: [1] [ 680/2502] eta: 0:58:31 lr: 0.000020 loss_cls: 4.2968 (3.9773) grad_norm: 4.4450 (4.4886) time: 2.0959 data: 0.0002 max mem: 8421 +[2024-12-04 22:57:27 root] (utils.py 283): INFO Epoch: [1] [ 690/2502] eta: 0:58:19 lr: 0.000020 loss_cls: 3.6930 (3.9726) grad_norm: 4.2509 (4.4857) time: 2.2057 data: 0.0002 max mem: 8421 +[2024-12-04 22:57:38 root] (utils.py 283): INFO Epoch: [1] [ 700/2502] eta: 0:57:36 lr: 0.000020 loss_cls: 3.5879 (3.9702) grad_norm: 4.3225 (4.4934) time: 1.6072 data: 0.0002 max mem: 8421 +[2024-12-04 22:57:57 root] (utils.py 283): INFO Epoch: [1] [ 710/2502] eta: 0:57:17 lr: 0.000020 loss_cls: 3.8827 (3.9690) grad_norm: 4.3744 (4.4912) time: 1.4519 data: 0.0002 max mem: 8421 +[2024-12-04 22:58:18 root] (utils.py 283): INFO Epoch: [1] [ 720/2502] eta: 0:57:03 lr: 0.000020 loss_cls: 3.8827 (3.9658) grad_norm: 4.3424 (4.4894) time: 2.0180 data: 0.0003 max mem: 8421 +[2024-12-04 22:58:37 root] (utils.py 283): INFO Epoch: [1] [ 730/2502] eta: 0:56:44 lr: 0.000020 loss_cls: 4.0235 (3.9658) grad_norm: 4.3424 (4.4879) time: 2.0432 data: 0.0003 max mem: 8421 +[2024-12-04 22:58:59 root] (utils.py 283): INFO Epoch: [1] [ 740/2502] eta: 0:56:30 lr: 0.000020 loss_cls: 4.1607 (3.9707) grad_norm: 4.2673 (4.4840) time: 2.0367 data: 0.0002 max mem: 8421 +[2024-12-04 22:59:21 root] (utils.py 283): INFO Epoch: [1] [ 750/2502] eta: 0:56:17 lr: 0.000020 loss_cls: 4.2250 (3.9688) grad_norm: 4.2673 (4.4820) time: 2.1660 data: 0.0002 max mem: 8421 +[2024-12-04 22:59:31 root] (utils.py 283): INFO Epoch: [1] [ 760/2502] eta: 0:55:36 lr: 0.000020 loss_cls: 4.1198 (3.9687) grad_norm: 4.3582 (4.4809) time: 1.5957 data: 0.0003 max mem: 8421 +[2024-12-04 22:59:50 root] (utils.py 283): INFO Epoch: [1] [ 770/2502] eta: 0:55:18 lr: 0.000020 loss_cls: 4.1198 (3.9693) grad_norm: 4.4725 (4.4906) time: 1.4667 data: 0.0002 max mem: 8421 +[2024-12-04 23:00:12 root] (utils.py 283): INFO Epoch: [1] [ 780/2502] eta: 0:55:04 lr: 0.000020 loss_cls: 3.9742 (3.9703) grad_norm: 4.4725 (4.4888) time: 2.0591 data: 0.0003 max mem: 8421 +[2024-12-04 23:00:30 root] (utils.py 283): INFO Epoch: [1] [ 790/2502] eta: 0:54:43 lr: 0.000020 loss_cls: 3.8879 (3.9678) grad_norm: 4.1695 (4.4853) time: 1.9903 data: 0.0003 max mem: 8421 +[2024-12-04 23:00:47 root] (utils.py 283): INFO Epoch: [1] [ 800/2502] eta: 0:54:19 lr: 0.000020 loss_cls: 3.8179 (3.9675) grad_norm: 4.3447 (4.4888) time: 1.7620 data: 0.0003 max mem: 8421 +[2024-12-04 23:01:05 root] (utils.py 283): INFO Epoch: [1] [ 810/2502] eta: 0:53:57 lr: 0.000020 loss_cls: 3.6737 (3.9597) grad_norm: 4.3048 (4.4855) time: 1.7354 data: 0.0003 max mem: 8421 +[2024-12-04 23:01:22 root] (utils.py 283): INFO Epoch: [1] [ 820/2502] eta: 0:53:34 lr: 0.000020 loss_cls: 3.6804 (3.9607) grad_norm: 4.2943 (4.4852) time: 1.7327 data: 0.0003 max mem: 8421 +[2024-12-04 23:01:31 root] (utils.py 283): INFO Epoch: [1] [ 830/2502] eta: 0:52:56 lr: 0.000020 loss_cls: 4.1495 (3.9564) grad_norm: 4.2168 (4.4835) time: 1.3385 data: 0.0002 max mem: 8421 +[2024-12-04 23:01:51 root] (utils.py 283): INFO Epoch: [1] [ 840/2502] eta: 0:52:38 lr: 0.000020 loss_cls: 3.6885 (3.9557) grad_norm: 4.2397 (4.4845) time: 1.4723 data: 0.0003 max mem: 8421 +[2024-12-04 23:02:13 root] (utils.py 283): INFO Epoch: [1] [ 850/2502] eta: 0:52:25 lr: 0.000020 loss_cls: 4.0603 (3.9533) grad_norm: 4.3339 (4.4862) time: 2.0890 data: 0.0003 max mem: 8421 +[2024-12-04 23:02:33 root] (utils.py 283): INFO Epoch: [1] [ 860/2502] eta: 0:52:07 lr: 0.000020 loss_cls: 3.9872 (3.9536) grad_norm: 4.4479 (4.4845) time: 2.0874 data: 0.0002 max mem: 8421 +[2024-12-04 23:02:55 root] (utils.py 283): INFO Epoch: [1] [ 870/2502] eta: 0:51:54 lr: 0.000020 loss_cls: 3.9508 (3.9534) grad_norm: 4.3798 (4.4846) time: 2.0935 data: 0.0003 max mem: 8421 +[2024-12-04 23:03:15 root] (utils.py 283): INFO Epoch: [1] [ 880/2502] eta: 0:51:36 lr: 0.000020 loss_cls: 3.9508 (3.9511) grad_norm: 4.3798 (4.4846) time: 2.0871 data: 0.0003 max mem: 8421 +[2024-12-04 23:03:25 root] (utils.py 283): INFO Epoch: [1] [ 890/2502] eta: 0:51:00 lr: 0.000020 loss_cls: 3.8383 (3.9487) grad_norm: 4.4212 (4.4860) time: 1.4753 data: 0.0003 max mem: 8421 +[2024-12-04 23:03:46 root] (utils.py 283): INFO Epoch: [1] [ 900/2502] eta: 0:50:45 lr: 0.000020 loss_cls: 3.9705 (3.9488) grad_norm: 4.4996 (4.4864) time: 1.5587 data: 0.0003 max mem: 8421 +[2024-12-04 23:04:07 root] (utils.py 283): INFO Epoch: [1] [ 910/2502] eta: 0:50:30 lr: 0.000020 loss_cls: 4.1456 (3.9488) grad_norm: 4.3356 (4.4831) time: 2.1381 data: 0.0002 max mem: 8421 +[2024-12-04 23:04:27 root] (utils.py 283): INFO Epoch: [1] [ 920/2502] eta: 0:50:13 lr: 0.000020 loss_cls: 4.2968 (3.9504) grad_norm: 4.1734 (4.4810) time: 2.0796 data: 0.0002 max mem: 8421 +[2024-12-04 23:04:49 root] (utils.py 283): INFO Epoch: [1] [ 930/2502] eta: 0:49:58 lr: 0.000020 loss_cls: 4.2870 (3.9532) grad_norm: 4.1878 (4.4788) time: 2.0800 data: 0.0002 max mem: 8421 +[2024-12-04 23:05:07 root] (utils.py 283): INFO Epoch: [1] [ 940/2502] eta: 0:49:38 lr: 0.000020 loss_cls: 4.1885 (3.9521) grad_norm: 4.2894 (4.4786) time: 1.9951 data: 0.0003 max mem: 8421 +[2024-12-04 23:05:18 root] (utils.py 283): INFO Epoch: [1] [ 950/2502] eta: 0:49:06 lr: 0.000020 loss_cls: 3.3639 (3.9468) grad_norm: 4.3018 (4.4793) time: 1.4739 data: 0.0003 max mem: 8421 +[2024-12-04 23:05:40 root] (utils.py 283): INFO Epoch: [1] [ 960/2502] eta: 0:48:51 lr: 0.000020 loss_cls: 3.8848 (3.9477) grad_norm: 4.2989 (4.4779) time: 1.6237 data: 0.0002 max mem: 8421 +[2024-12-04 23:06:02 root] (utils.py 283): INFO Epoch: [1] [ 970/2502] eta: 0:48:36 lr: 0.000020 loss_cls: 4.1026 (3.9483) grad_norm: 4.2787 (4.4749) time: 2.1570 data: 0.0002 max mem: 8421 +[2024-12-04 23:06:21 root] (utils.py 283): INFO Epoch: [1] [ 980/2502] eta: 0:48:18 lr: 0.000020 loss_cls: 4.0775 (3.9473) grad_norm: 4.2849 (4.4779) time: 2.0670 data: 0.0003 max mem: 8421 +[2024-12-04 23:06:42 root] (utils.py 283): INFO Epoch: [1] [ 990/2502] eta: 0:48:02 lr: 0.000020 loss_cls: 4.2055 (3.9473) grad_norm: 4.3902 (4.4791) time: 2.0449 data: 0.0003 max mem: 8421 +[2024-12-04 23:07:00 root] (utils.py 283): INFO Epoch: [1] [1000/2502] eta: 0:47:41 lr: 0.000020 loss_cls: 4.0985 (3.9465) grad_norm: 4.3305 (4.4779) time: 1.9384 data: 0.0003 max mem: 8421 +[2024-12-04 23:07:11 root] (utils.py 283): INFO Epoch: [1] [1010/2502] eta: 0:47:10 lr: 0.000020 loss_cls: 4.0526 (3.9477) grad_norm: 4.2092 (4.4757) time: 1.4226 data: 0.0003 max mem: 8421 +[2024-12-04 23:07:33 root] (utils.py 283): INFO Epoch: [1] [1020/2502] eta: 0:46:56 lr: 0.000020 loss_cls: 4.0925 (3.9471) grad_norm: 4.2632 (4.4764) time: 1.6565 data: 0.0002 max mem: 8421 +[2024-12-04 23:07:55 root] (utils.py 283): INFO Epoch: [1] [1030/2502] eta: 0:46:42 lr: 0.000020 loss_cls: 3.7954 (3.9437) grad_norm: 4.3764 (4.4771) time: 2.2282 data: 0.0003 max mem: 8421 +[2024-12-04 23:08:16 root] (utils.py 283): INFO Epoch: [1] [1040/2502] eta: 0:46:24 lr: 0.000020 loss_cls: 4.0512 (3.9455) grad_norm: 4.2881 (4.4781) time: 2.1252 data: 0.0003 max mem: 8421 +[2024-12-04 23:08:38 root] (utils.py 283): INFO Epoch: [1] [1050/2502] eta: 0:46:10 lr: 0.000020 loss_cls: 4.0589 (3.9447) grad_norm: 4.3264 (4.4797) time: 2.1321 data: 0.0003 max mem: 8421 +[2024-12-04 23:08:53 root] (utils.py 283): INFO Epoch: [1] [1060/2502] eta: 0:45:45 lr: 0.000020 loss_cls: 4.0508 (3.9461) grad_norm: 4.3264 (4.4798) time: 1.8775 data: 0.0003 max mem: 8421 +[2024-12-04 23:09:07 root] (utils.py 283): INFO Epoch: [1] [1070/2502] eta: 0:45:20 lr: 0.000020 loss_cls: 4.0624 (3.9457) grad_norm: 4.2436 (4.4781) time: 1.4676 data: 0.0002 max mem: 8421 +[2024-12-04 23:09:29 root] (utils.py 283): INFO Epoch: [1] [1080/2502] eta: 0:45:05 lr: 0.000020 loss_cls: 4.0124 (3.9452) grad_norm: 4.3473 (4.4825) time: 1.8112 data: 0.0003 max mem: 8421 +[2024-12-04 23:09:49 root] (utils.py 283): INFO Epoch: [1] [1090/2502] eta: 0:44:47 lr: 0.000020 loss_cls: 3.8940 (3.9440) grad_norm: 4.2973 (4.4810) time: 2.0932 data: 0.0002 max mem: 8421 +[2024-12-04 23:10:11 root] (utils.py 283): INFO Epoch: [1] [1100/2502] eta: 0:44:31 lr: 0.000020 loss_cls: 3.8940 (3.9435) grad_norm: 4.3480 (4.4823) time: 2.0881 data: 0.0002 max mem: 8421 +[2024-12-04 23:10:33 root] (utils.py 283): INFO Epoch: [1] [1110/2502] eta: 0:44:16 lr: 0.000020 loss_cls: 3.9953 (3.9433) grad_norm: 4.4565 (4.4842) time: 2.1831 data: 0.0002 max mem: 8421 +[2024-12-04 23:10:46 root] (utils.py 283): INFO Epoch: [1] [1120/2502] eta: 0:43:49 lr: 0.000020 loss_cls: 4.0944 (3.9449) grad_norm: 4.3209 (4.4836) time: 1.7231 data: 0.0002 max mem: 8421 +[2024-12-04 23:10:59 root] (utils.py 283): INFO Epoch: [1] [1130/2502] eta: 0:43:23 lr: 0.000020 loss_cls: 4.1814 (3.9465) grad_norm: 4.3209 (4.4839) time: 1.3006 data: 0.0002 max mem: 8421 +[2024-12-04 23:11:21 root] (utils.py 283): INFO Epoch: [1] [1140/2502] eta: 0:43:08 lr: 0.000020 loss_cls: 4.0476 (3.9450) grad_norm: 4.3375 (4.4852) time: 1.7868 data: 0.0002 max mem: 8421 +[2024-12-04 23:11:42 root] (utils.py 283): INFO Epoch: [1] [1150/2502] eta: 0:42:51 lr: 0.000020 loss_cls: 3.8931 (3.9439) grad_norm: 4.2623 (4.4836) time: 2.1380 data: 0.0002 max mem: 8421 +[2024-12-04 23:12:03 root] (utils.py 283): INFO Epoch: [1] [1160/2502] eta: 0:42:35 lr: 0.000020 loss_cls: 3.9739 (3.9440) grad_norm: 4.1311 (4.4836) time: 2.1122 data: 0.0002 max mem: 8421 +[2024-12-04 23:12:26 root] (utils.py 283): INFO Epoch: [1] [1170/2502] eta: 0:42:19 lr: 0.000020 loss_cls: 3.8916 (3.9429) grad_norm: 4.3097 (4.4854) time: 2.1904 data: 0.0002 max mem: 8421 +[2024-12-04 23:12:38 root] (utils.py 283): INFO Epoch: [1] [1180/2502] eta: 0:41:53 lr: 0.000020 loss_cls: 4.1829 (3.9455) grad_norm: 4.5477 (4.4859) time: 1.7381 data: 0.0002 max mem: 8421 +[2024-12-04 23:12:55 root] (utils.py 283): INFO Epoch: [1] [1190/2502] eta: 0:41:31 lr: 0.000020 loss_cls: 4.2207 (3.9475) grad_norm: 4.3726 (4.4842) time: 1.4715 data: 0.0002 max mem: 8421 +[2024-12-04 23:13:16 root] (utils.py 283): INFO Epoch: [1] [1200/2502] eta: 0:41:15 lr: 0.000020 loss_cls: 4.1877 (3.9484) grad_norm: 4.2291 (4.4830) time: 1.8992 data: 0.0002 max mem: 8421 +[2024-12-04 23:13:36 root] (utils.py 283): INFO Epoch: [1] [1210/2502] eta: 0:40:57 lr: 0.000020 loss_cls: 3.9354 (3.9476) grad_norm: 4.2291 (4.4816) time: 2.0482 data: 0.0002 max mem: 8421 +[2024-12-04 23:13:58 root] (utils.py 283): INFO Epoch: [1] [1220/2502] eta: 0:40:41 lr: 0.000020 loss_cls: 3.9227 (3.9473) grad_norm: 4.3670 (4.4825) time: 2.0949 data: 0.0002 max mem: 8421 +[2024-12-04 23:14:20 root] (utils.py 283): INFO Epoch: [1] [1230/2502] eta: 0:40:24 lr: 0.000020 loss_cls: 3.9602 (3.9472) grad_norm: 4.3853 (4.4864) time: 2.1835 data: 0.0002 max mem: 8421 +[2024-12-04 23:14:31 root] (utils.py 283): INFO Epoch: [1] [1240/2502] eta: 0:39:57 lr: 0.000020 loss_cls: 4.1131 (3.9473) grad_norm: 4.3819 (4.4857) time: 1.6228 data: 0.0002 max mem: 8421 +[2024-12-04 23:14:48 root] (utils.py 283): INFO Epoch: [1] [1250/2502] eta: 0:39:37 lr: 0.000020 loss_cls: 3.8747 (3.9457) grad_norm: 4.2680 (4.4837) time: 1.4238 data: 0.0002 max mem: 8421 +[2024-12-04 23:15:10 root] (utils.py 283): INFO Epoch: [1] [1260/2502] eta: 0:39:20 lr: 0.000020 loss_cls: 4.0747 (3.9466) grad_norm: 4.2472 (4.4858) time: 1.9474 data: 0.0002 max mem: 8421 +[2024-12-04 23:15:30 root] (utils.py 283): INFO Epoch: [1] [1270/2502] eta: 0:39:02 lr: 0.000020 loss_cls: 4.0747 (3.9476) grad_norm: 4.2472 (4.4842) time: 2.0719 data: 0.0002 max mem: 8421 +[2024-12-04 23:15:52 root] (utils.py 283): INFO Epoch: [1] [1280/2502] eta: 0:38:46 lr: 0.000020 loss_cls: 4.2176 (3.9496) grad_norm: 4.2460 (4.4842) time: 2.1248 data: 0.0002 max mem: 8421 +[2024-12-04 23:16:14 root] (utils.py 283): INFO Epoch: [1] [1290/2502] eta: 0:38:30 lr: 0.000020 loss_cls: 4.2176 (3.9511) grad_norm: 4.2907 (4.4840) time: 2.2279 data: 0.0002 max mem: 8421 +[2024-12-04 23:16:24 root] (utils.py 283): INFO Epoch: [1] [1300/2502] eta: 0:38:02 lr: 0.000020 loss_cls: 4.1840 (3.9514) grad_norm: 4.3597 (4.4842) time: 1.5798 data: 0.0002 max mem: 8421 +[2024-12-04 23:16:42 root] (utils.py 283): INFO Epoch: [1] [1310/2502] eta: 0:37:42 lr: 0.000020 loss_cls: 3.8447 (3.9496) grad_norm: 4.3740 (4.4882) time: 1.3743 data: 0.0002 max mem: 8421 +[2024-12-04 23:17:04 root] (utils.py 283): INFO Epoch: [1] [1320/2502] eta: 0:37:26 lr: 0.000020 loss_cls: 3.6736 (3.9471) grad_norm: 4.4587 (4.4882) time: 2.0057 data: 0.0002 max mem: 8421 +[2024-12-04 23:17:24 root] (utils.py 283): INFO Epoch: [1] [1330/2502] eta: 0:37:08 lr: 0.000020 loss_cls: 3.9899 (3.9482) grad_norm: 4.4587 (4.4909) time: 2.1136 data: 0.0002 max mem: 8421 +[2024-12-04 23:17:46 root] (utils.py 283): INFO Epoch: [1] [1340/2502] eta: 0:36:52 lr: 0.000020 loss_cls: 4.2777 (3.9490) grad_norm: 4.3995 (4.4903) time: 2.1165 data: 0.0002 max mem: 8421 +[2024-12-04 23:18:07 root] (utils.py 283): INFO Epoch: [1] [1350/2502] eta: 0:36:34 lr: 0.000020 loss_cls: 3.9796 (3.9490) grad_norm: 4.3092 (4.4892) time: 2.1552 data: 0.0002 max mem: 8421 +[2024-12-04 23:18:16 root] (utils.py 283): INFO Epoch: [1] [1360/2502] eta: 0:36:07 lr: 0.000020 loss_cls: 4.0454 (3.9507) grad_norm: 4.3099 (4.4893) time: 1.5137 data: 0.0002 max mem: 8421 +[2024-12-04 23:18:37 root] (utils.py 283): INFO Epoch: [1] [1370/2502] eta: 0:35:50 lr: 0.000020 loss_cls: 4.2147 (3.9518) grad_norm: 4.3813 (4.4883) time: 1.5007 data: 0.0002 max mem: 8421 +[2024-12-04 23:18:59 root] (utils.py 283): INFO Epoch: [1] [1380/2502] eta: 0:35:33 lr: 0.000020 loss_cls: 4.1640 (3.9527) grad_norm: 4.3547 (4.4869) time: 2.1508 data: 0.0002 max mem: 8421 +[2024-12-04 23:19:20 root] (utils.py 283): INFO Epoch: [1] [1390/2502] eta: 0:35:16 lr: 0.000020 loss_cls: 4.1640 (3.9534) grad_norm: 4.4386 (4.4903) time: 2.1428 data: 0.0002 max mem: 8421 +[2024-12-04 23:19:42 root] (utils.py 283): INFO Epoch: [1] [1400/2502] eta: 0:34:59 lr: 0.000020 loss_cls: 4.1511 (3.9533) grad_norm: 4.4386 (4.4902) time: 2.1347 data: 0.0002 max mem: 8421 +[2024-12-04 23:20:00 root] (utils.py 283): INFO Epoch: [1] [1410/2502] eta: 0:34:39 lr: 0.000020 loss_cls: 4.2169 (3.9557) grad_norm: 4.2876 (4.4898) time: 2.0036 data: 0.0002 max mem: 8421 +[2024-12-04 23:20:12 root] (utils.py 283): INFO Epoch: [1] [1420/2502] eta: 0:34:15 lr: 0.000020 loss_cls: 4.0045 (3.9547) grad_norm: 4.2456 (4.4874) time: 1.4884 data: 0.0002 max mem: 8421 +[2024-12-04 23:20:34 root] (utils.py 283): INFO Epoch: [1] [1430/2502] eta: 0:33:58 lr: 0.000020 loss_cls: 3.9657 (3.9548) grad_norm: 4.1402 (4.4855) time: 1.6866 data: 0.0002 max mem: 8421 +[2024-12-04 23:20:56 root] (utils.py 283): INFO Epoch: [1] [1440/2502] eta: 0:33:41 lr: 0.000020 loss_cls: 4.0084 (3.9539) grad_norm: 4.0721 (4.4831) time: 2.1996 data: 0.0002 max mem: 8421 +[2024-12-04 23:21:12 root] (utils.py 283): INFO Epoch: [1] [1450/2502] eta: 0:33:20 lr: 0.000020 loss_cls: 4.1427 (3.9543) grad_norm: 4.1494 (4.4824) time: 1.9360 data: 0.0002 max mem: 8421 +[2024-12-04 23:21:31 root] (utils.py 283): INFO Epoch: [1] [1460/2502] eta: 0:33:01 lr: 0.000020 loss_cls: 4.2547 (3.9555) grad_norm: 4.2394 (4.4818) time: 1.7418 data: 0.0002 max mem: 8421 +[2024-12-04 23:21:48 root] (utils.py 283): INFO Epoch: [1] [1470/2502] eta: 0:32:40 lr: 0.000020 loss_cls: 4.2516 (3.9547) grad_norm: 4.2394 (4.4810) time: 1.7855 data: 0.0002 max mem: 8421 +[2024-12-04 23:22:01 root] (utils.py 283): INFO Epoch: [1] [1480/2502] eta: 0:32:17 lr: 0.000020 loss_cls: 3.6902 (3.9519) grad_norm: 4.2560 (4.4802) time: 1.5206 data: 0.0002 max mem: 8421 +[2024-12-04 23:22:15 root] (utils.py 283): INFO Epoch: [1] [1490/2502] eta: 0:31:55 lr: 0.000020 loss_cls: 3.5207 (3.9479) grad_norm: 4.3688 (4.4816) time: 1.3471 data: 0.0002 max mem: 8421 +[2024-12-04 23:22:38 root] (utils.py 283): INFO Epoch: [1] [1500/2502] eta: 0:31:38 lr: 0.000020 loss_cls: 3.5952 (3.9476) grad_norm: 4.3688 (4.4810) time: 1.8198 data: 0.0002 max mem: 8421 +[2024-12-04 23:22:58 root] (utils.py 283): INFO Epoch: [1] [1510/2502] eta: 0:31:20 lr: 0.000020 loss_cls: 4.0552 (3.9479) grad_norm: 4.2713 (4.4802) time: 2.1280 data: 0.0002 max mem: 8421 +[2024-12-04 23:23:15 root] (utils.py 283): INFO Epoch: [1] [1520/2502] eta: 0:31:01 lr: 0.000020 loss_cls: 4.0699 (3.9489) grad_norm: 4.3863 (4.4806) time: 1.8939 data: 0.0002 max mem: 8421 +[2024-12-04 23:23:33 root] (utils.py 283): INFO Epoch: [1] [1530/2502] eta: 0:30:41 lr: 0.000020 loss_cls: 4.0506 (3.9484) grad_norm: 4.4084 (4.4817) time: 1.7717 data: 0.0002 max mem: 8421 +[2024-12-04 23:23:51 root] (utils.py 283): INFO Epoch: [1] [1540/2502] eta: 0:30:21 lr: 0.000020 loss_cls: 3.9937 (3.9483) grad_norm: 4.3920 (4.4812) time: 1.7668 data: 0.0002 max mem: 8421 +[2024-12-04 23:24:02 root] (utils.py 283): INFO Epoch: [1] [1550/2502] eta: 0:29:57 lr: 0.000020 loss_cls: 3.9302 (3.9469) grad_norm: 4.3283 (4.4806) time: 1.4466 data: 0.0002 max mem: 8421 +[2024-12-04 23:24:19 root] (utils.py 283): INFO Epoch: [1] [1560/2502] eta: 0:29:38 lr: 0.000020 loss_cls: 3.7896 (3.9459) grad_norm: 4.3152 (4.4835) time: 1.4303 data: 0.0002 max mem: 8421 +[2024-12-04 23:24:41 root] (utils.py 283): INFO Epoch: [1] [1570/2502] eta: 0:29:20 lr: 0.000020 loss_cls: 3.7753 (3.9433) grad_norm: 4.4311 (4.4836) time: 1.9503 data: 0.0002 max mem: 8421 +[2024-12-04 23:25:00 root] (utils.py 283): INFO Epoch: [1] [1580/2502] eta: 0:29:02 lr: 0.000020 loss_cls: 3.7753 (3.9429) grad_norm: 4.4258 (4.4826) time: 2.0576 data: 0.0002 max mem: 8421 +[2024-12-04 23:25:22 root] (utils.py 283): INFO Epoch: [1] [1590/2502] eta: 0:28:45 lr: 0.000020 loss_cls: 4.1109 (3.9443) grad_norm: 4.4258 (4.4872) time: 2.0705 data: 0.0003 max mem: 8421 +[2024-12-04 23:25:45 root] (utils.py 283): INFO Epoch: [1] [1600/2502] eta: 0:28:28 lr: 0.000020 loss_cls: 4.1424 (3.9461) grad_norm: 4.6086 (4.4884) time: 2.2047 data: 0.0003 max mem: 8421 +[2024-12-04 23:25:54 root] (utils.py 283): INFO Epoch: [1] [1610/2502] eta: 0:28:04 lr: 0.000020 loss_cls: 4.1475 (3.9465) grad_norm: 4.4168 (4.4880) time: 1.6047 data: 0.0003 max mem: 8421 +[2024-12-04 23:26:12 root] (utils.py 283): INFO Epoch: [1] [1620/2502] eta: 0:27:44 lr: 0.000020 loss_cls: 3.8965 (3.9468) grad_norm: 4.4168 (4.4909) time: 1.3957 data: 0.0003 max mem: 8421 +[2024-12-04 23:26:34 root] (utils.py 283): INFO Epoch: [1] [1630/2502] eta: 0:27:27 lr: 0.000020 loss_cls: 4.0676 (3.9473) grad_norm: 4.3112 (4.4913) time: 2.0011 data: 0.0003 max mem: 8421 +[2024-12-04 23:26:54 root] (utils.py 283): INFO Epoch: [1] [1640/2502] eta: 0:27:09 lr: 0.000020 loss_cls: 4.1334 (3.9470) grad_norm: 4.3457 (4.4912) time: 2.0872 data: 0.0002 max mem: 8421 +[2024-12-04 23:27:17 root] (utils.py 283): INFO Epoch: [1] [1650/2502] eta: 0:26:51 lr: 0.000020 loss_cls: 3.8043 (3.9439) grad_norm: 4.3457 (4.4904) time: 2.1037 data: 0.0002 max mem: 8421 +[2024-12-04 23:27:38 root] (utils.py 283): INFO Epoch: [1] [1660/2502] eta: 0:26:34 lr: 0.000020 loss_cls: 3.4376 (3.9411) grad_norm: 4.2221 (4.4892) time: 2.1645 data: 0.0003 max mem: 8421 +[2024-12-04 23:27:47 root] (utils.py 283): INFO Epoch: [1] [1670/2502] eta: 0:26:10 lr: 0.000020 loss_cls: 3.5987 (3.9411) grad_norm: 4.1484 (4.4885) time: 1.5246 data: 0.0003 max mem: 8421 +[2024-12-04 23:28:08 root] (utils.py 283): INFO Epoch: [1] [1680/2502] eta: 0:25:52 lr: 0.000020 loss_cls: 4.0586 (3.9402) grad_norm: 4.3075 (4.4876) time: 1.5150 data: 0.0003 max mem: 8421 +[2024-12-04 23:28:30 root] (utils.py 283): INFO Epoch: [1] [1690/2502] eta: 0:25:35 lr: 0.000020 loss_cls: 4.0331 (3.9402) grad_norm: 4.2323 (4.4858) time: 2.1623 data: 0.0002 max mem: 8421 +[2024-12-04 23:28:51 root] (utils.py 283): INFO Epoch: [1] [1700/2502] eta: 0:25:17 lr: 0.000020 loss_cls: 4.2578 (3.9426) grad_norm: 4.2229 (4.4854) time: 2.1449 data: 0.0002 max mem: 8421 +[2024-12-04 23:29:13 root] (utils.py 283): INFO Epoch: [1] [1710/2502] eta: 0:24:59 lr: 0.000020 loss_cls: 4.2122 (3.9423) grad_norm: 4.2511 (4.4850) time: 2.1402 data: 0.0002 max mem: 8421 +[2024-12-04 23:29:31 root] (utils.py 283): INFO Epoch: [1] [1720/2502] eta: 0:24:40 lr: 0.000020 loss_cls: 3.7794 (3.9413) grad_norm: 4.2341 (4.4836) time: 1.9982 data: 0.0002 max mem: 8421 +[2024-12-04 23:29:42 root] (utils.py 283): INFO Epoch: [1] [1730/2502] eta: 0:24:17 lr: 0.000020 loss_cls: 4.2350 (3.9426) grad_norm: 4.1853 (4.4843) time: 1.4277 data: 0.0002 max mem: 8421 +[2024-12-04 23:30:04 root] (utils.py 283): INFO Epoch: [1] [1740/2502] eta: 0:24:00 lr: 0.000020 loss_cls: 4.0850 (3.9418) grad_norm: 4.2288 (4.4837) time: 1.6470 data: 0.0002 max mem: 8421 +[2024-12-04 23:30:25 root] (utils.py 283): INFO Epoch: [1] [1750/2502] eta: 0:23:42 lr: 0.000020 loss_cls: 3.6356 (3.9408) grad_norm: 4.3220 (4.4828) time: 2.1859 data: 0.0003 max mem: 8421 +[2024-12-04 23:30:46 root] (utils.py 283): INFO Epoch: [1] [1760/2502] eta: 0:23:24 lr: 0.000020 loss_cls: 3.6356 (3.9402) grad_norm: 4.3220 (4.4816) time: 2.0927 data: 0.0002 max mem: 8421 +[2024-12-04 23:31:08 root] (utils.py 283): INFO Epoch: [1] [1770/2502] eta: 0:23:06 lr: 0.000020 loss_cls: 4.1019 (3.9408) grad_norm: 4.3824 (4.4815) time: 2.1061 data: 0.0002 max mem: 8421 +[2024-12-04 23:31:24 root] (utils.py 283): INFO Epoch: [1] [1780/2502] eta: 0:22:46 lr: 0.000020 loss_cls: 4.1046 (3.9403) grad_norm: 4.3433 (4.4803) time: 1.9009 data: 0.0002 max mem: 8421 +[2024-12-04 23:31:37 root] (utils.py 283): INFO Epoch: [1] [1790/2502] eta: 0:22:25 lr: 0.000020 loss_cls: 3.8723 (3.9392) grad_norm: 4.3083 (4.4803) time: 1.4877 data: 0.0002 max mem: 8421 +[2024-12-04 23:31:59 root] (utils.py 283): INFO Epoch: [1] [1800/2502] eta: 0:22:07 lr: 0.000020 loss_cls: 3.9306 (3.9403) grad_norm: 4.5329 (4.4838) time: 1.7977 data: 0.0002 max mem: 8421 +[2024-12-04 23:32:20 root] (utils.py 283): INFO Epoch: [1] [1810/2502] eta: 0:21:49 lr: 0.000020 loss_cls: 3.9396 (3.9396) grad_norm: 4.4309 (4.4838) time: 2.1529 data: 0.0002 max mem: 8421 +[2024-12-04 23:32:43 root] (utils.py 283): INFO Epoch: [1] [1820/2502] eta: 0:21:31 lr: 0.000020 loss_cls: 3.9396 (3.9399) grad_norm: 4.3930 (4.4835) time: 2.1601 data: 0.0002 max mem: 8421 +[2024-12-04 23:33:05 root] (utils.py 283): INFO Epoch: [1] [1830/2502] eta: 0:21:14 lr: 0.000020 loss_cls: 4.1342 (3.9419) grad_norm: 4.3439 (4.4831) time: 2.2520 data: 0.0002 max mem: 8421 +[2024-12-04 23:33:17 root] (utils.py 283): INFO Epoch: [1] [1840/2502] eta: 0:20:52 lr: 0.000020 loss_cls: 4.0763 (3.9409) grad_norm: 4.3300 (4.4819) time: 1.7079 data: 0.0002 max mem: 8421 +[2024-12-04 23:33:32 root] (utils.py 283): INFO Epoch: [1] [1850/2502] eta: 0:20:32 lr: 0.000020 loss_cls: 3.8478 (3.9398) grad_norm: 4.2362 (4.4816) time: 1.3288 data: 0.0002 max mem: 8421 +[2024-12-04 23:33:54 root] (utils.py 283): INFO Epoch: [1] [1860/2502] eta: 0:20:14 lr: 0.000020 loss_cls: 3.8402 (3.9391) grad_norm: 4.4170 (4.4821) time: 1.8737 data: 0.0002 max mem: 8421 +[2024-12-04 23:34:14 root] (utils.py 283): INFO Epoch: [1] [1870/2502] eta: 0:19:56 lr: 0.000020 loss_cls: 3.8402 (3.9379) grad_norm: 4.3331 (4.4840) time: 2.1187 data: 0.0002 max mem: 8421 +[2024-12-04 23:34:37 root] (utils.py 283): INFO Epoch: [1] [1880/2502] eta: 0:19:38 lr: 0.000020 loss_cls: 4.2521 (3.9395) grad_norm: 4.4012 (4.4844) time: 2.1106 data: 0.0002 max mem: 8421 +[2024-12-04 23:34:59 root] (utils.py 283): INFO Epoch: [1] [1890/2502] eta: 0:19:20 lr: 0.000020 loss_cls: 4.2521 (3.9386) grad_norm: 4.4012 (4.4854) time: 2.2380 data: 0.0002 max mem: 8421 +[2024-12-04 23:35:10 root] (utils.py 283): INFO Epoch: [1] [1900/2502] eta: 0:18:59 lr: 0.000020 loss_cls: 3.7571 (3.9375) grad_norm: 4.3442 (4.4855) time: 1.6694 data: 0.0002 max mem: 8421 +[2024-12-04 23:35:29 root] (utils.py 283): INFO Epoch: [1] [1910/2502] eta: 0:18:40 lr: 0.000020 loss_cls: 3.9233 (3.9380) grad_norm: 4.2098 (4.4838) time: 1.5063 data: 0.0002 max mem: 8421 +[2024-12-04 23:35:51 root] (utils.py 283): INFO Epoch: [1] [1920/2502] eta: 0:18:22 lr: 0.000020 loss_cls: 3.7133 (3.9354) grad_norm: 4.2629 (4.4862) time: 2.0627 data: 0.0002 max mem: 8421 +[2024-12-04 23:36:12 root] (utils.py 283): INFO Epoch: [1] [1930/2502] eta: 0:18:03 lr: 0.000020 loss_cls: 3.7009 (3.9350) grad_norm: 4.3610 (4.4858) time: 2.1185 data: 0.0002 max mem: 8421 +[2024-12-04 23:36:34 root] (utils.py 283): INFO Epoch: [1] [1940/2502] eta: 0:17:45 lr: 0.000020 loss_cls: 3.8632 (3.9343) grad_norm: 4.4715 (4.4856) time: 2.1229 data: 0.0002 max mem: 8421 +[2024-12-04 23:36:53 root] (utils.py 283): INFO Epoch: [1] [1950/2502] eta: 0:17:26 lr: 0.000020 loss_cls: 3.8632 (3.9333) grad_norm: 4.2610 (4.4844) time: 2.0947 data: 0.0002 max mem: 8421 +[2024-12-04 23:37:03 root] (utils.py 283): INFO Epoch: [1] [1960/2502] eta: 0:17:05 lr: 0.000020 loss_cls: 3.9360 (3.9323) grad_norm: 4.1097 (4.4819) time: 1.4599 data: 0.0002 max mem: 8421 +[2024-12-04 23:37:22 root] (utils.py 283): INFO Epoch: [1] [1970/2502] eta: 0:16:46 lr: 0.000020 loss_cls: 3.8929 (3.9308) grad_norm: 4.0216 (4.4804) time: 1.4101 data: 0.0003 max mem: 8421 +[2024-12-04 23:37:44 root] (utils.py 283): INFO Epoch: [1] [1980/2502] eta: 0:16:28 lr: 0.000020 loss_cls: 3.7152 (3.9303) grad_norm: 4.2618 (4.4808) time: 2.0375 data: 0.0002 max mem: 8421 +[2024-12-04 23:38:04 root] (utils.py 283): INFO Epoch: [1] [1990/2502] eta: 0:16:09 lr: 0.000020 loss_cls: 4.0374 (3.9297) grad_norm: 4.4239 (4.4816) time: 2.1147 data: 0.0002 max mem: 8421 +[2024-12-04 23:38:26 root] (utils.py 283): INFO Epoch: [1] [2000/2502] eta: 0:15:51 lr: 0.000020 loss_cls: 3.9079 (3.9290) grad_norm: 4.2284 (4.4801) time: 2.1235 data: 0.0002 max mem: 8421 +[2024-12-04 23:38:46 root] (utils.py 283): INFO Epoch: [1] [2010/2502] eta: 0:15:32 lr: 0.000020 loss_cls: 4.1007 (3.9303) grad_norm: 4.2284 (4.4797) time: 2.1003 data: 0.0002 max mem: 8421 +[2024-12-04 23:38:56 root] (utils.py 283): INFO Epoch: [1] [2020/2502] eta: 0:15:11 lr: 0.000020 loss_cls: 4.0483 (3.9298) grad_norm: 4.3258 (4.4789) time: 1.5048 data: 0.0002 max mem: 8421 +[2024-12-04 23:39:18 root] (utils.py 283): INFO Epoch: [1] [2030/2502] eta: 0:14:53 lr: 0.000020 loss_cls: 4.0483 (3.9306) grad_norm: 4.2552 (4.4777) time: 1.5978 data: 0.0002 max mem: 8421 +[2024-12-04 23:39:40 root] (utils.py 283): INFO Epoch: [1] [2040/2502] eta: 0:14:35 lr: 0.000020 loss_cls: 4.1719 (3.9312) grad_norm: 4.2749 (4.4772) time: 2.1958 data: 0.0002 max mem: 8421 +[2024-12-04 23:40:00 root] (utils.py 283): INFO Epoch: [1] [2050/2502] eta: 0:14:16 lr: 0.000020 loss_cls: 3.9404 (3.9302) grad_norm: 4.4131 (4.4773) time: 2.0970 data: 0.0002 max mem: 8421 +[2024-12-04 23:40:21 root] (utils.py 283): INFO Epoch: [1] [2060/2502] eta: 0:13:58 lr: 0.000020 loss_cls: 3.7396 (3.9297) grad_norm: 4.5268 (4.4793) time: 2.0420 data: 0.0002 max mem: 8421 +[2024-12-04 23:40:39 root] (utils.py 283): INFO Epoch: [1] [2070/2502] eta: 0:13:38 lr: 0.000020 loss_cls: 4.0820 (3.9302) grad_norm: 4.2319 (4.4776) time: 1.9324 data: 0.0002 max mem: 8421 +[2024-12-04 23:40:48 root] (utils.py 283): INFO Epoch: [1] [2080/2502] eta: 0:13:17 lr: 0.000020 loss_cls: 4.0969 (3.9296) grad_norm: 4.0872 (4.4773) time: 1.3455 data: 0.0002 max mem: 8421 +[2024-12-04 23:41:10 root] (utils.py 283): INFO Epoch: [1] [2090/2502] eta: 0:12:59 lr: 0.000020 loss_cls: 4.0217 (3.9307) grad_norm: 4.1227 (4.4767) time: 1.5650 data: 0.0002 max mem: 8421 +[2024-12-04 23:41:32 root] (utils.py 283): INFO Epoch: [1] [2100/2502] eta: 0:12:41 lr: 0.000020 loss_cls: 4.2544 (3.9295) grad_norm: 4.2733 (4.4789) time: 2.2194 data: 0.0010 max mem: 8421 +[2024-12-04 23:41:53 root] (utils.py 283): INFO Epoch: [1] [2110/2502] eta: 0:12:22 lr: 0.000020 loss_cls: 3.9944 (3.9301) grad_norm: 4.0931 (4.4783) time: 2.1368 data: 0.0010 max mem: 8421 +[2024-12-04 23:42:11 root] (utils.py 283): INFO Epoch: [1] [2120/2502] eta: 0:12:03 lr: 0.000020 loss_cls: 4.1513 (3.9296) grad_norm: 4.2723 (4.4776) time: 1.9199 data: 0.0003 max mem: 8421 +[2024-12-04 23:42:33 root] (utils.py 283): INFO Epoch: [1] [2130/2502] eta: 0:11:45 lr: 0.000020 loss_cls: 4.1498 (3.9298) grad_norm: 4.1472 (4.4767) time: 2.0061 data: 0.0003 max mem: 8421 +[2024-12-04 23:42:51 root] (utils.py 283): INFO Epoch: [1] [2140/2502] eta: 0:11:26 lr: 0.000020 loss_cls: 4.1547 (3.9309) grad_norm: 4.2401 (4.4766) time: 2.0379 data: 0.0002 max mem: 8421 +[2024-12-04 23:43:14 root] (utils.py 283): INFO Epoch: [1] [2150/2502] eta: 0:11:07 lr: 0.000020 loss_cls: 4.1692 (3.9306) grad_norm: 4.2450 (4.4759) time: 2.0670 data: 0.0002 max mem: 8421 +[2024-12-04 23:43:36 root] (utils.py 283): INFO Epoch: [1] [2160/2502] eta: 0:10:49 lr: 0.000020 loss_cls: 4.1692 (3.9311) grad_norm: 4.2444 (4.4765) time: 2.2187 data: 0.0002 max mem: 8421 +[2024-12-04 23:43:57 root] (utils.py 283): INFO Epoch: [1] [2170/2502] eta: 0:10:30 lr: 0.000020 loss_cls: 4.1966 (3.9320) grad_norm: 4.4625 (4.4783) time: 2.1525 data: 0.0002 max mem: 8421 +[2024-12-04 23:44:19 root] (utils.py 283): INFO Epoch: [1] [2180/2502] eta: 0:10:12 lr: 0.000020 loss_cls: 3.9470 (3.9316) grad_norm: 4.4356 (4.4774) time: 2.1701 data: 0.0002 max mem: 8421 +[2024-12-04 23:44:37 root] (utils.py 283): INFO Epoch: [1] [2190/2502] eta: 0:09:53 lr: 0.000020 loss_cls: 4.2495 (3.9328) grad_norm: 4.4356 (4.4809) time: 2.0152 data: 0.0002 max mem: 8421 +[2024-12-04 23:44:59 root] (utils.py 283): INFO Epoch: [1] [2200/2502] eta: 0:09:34 lr: 0.000020 loss_cls: 4.0587 (3.9311) grad_norm: 4.4396 (4.4806) time: 1.9979 data: 0.0002 max mem: 8421 +[2024-12-04 23:45:22 root] (utils.py 283): INFO Epoch: [1] [2210/2502] eta: 0:09:15 lr: 0.000020 loss_cls: 3.6575 (3.9308) grad_norm: 4.1852 (4.4821) time: 2.2261 data: 0.0002 max mem: 8421 +[2024-12-04 23:45:43 root] (utils.py 283): INFO Epoch: [1] [2220/2502] eta: 0:08:57 lr: 0.000020 loss_cls: 3.8669 (3.9300) grad_norm: 4.3999 (4.4822) time: 2.1806 data: 0.0002 max mem: 8421 +[2024-12-04 23:46:04 root] (utils.py 283): INFO Epoch: [1] [2230/2502] eta: 0:08:38 lr: 0.000020 loss_cls: 3.8604 (3.9291) grad_norm: 4.3999 (4.4921) time: 2.0926 data: 0.0003 max mem: 8421 +[2024-12-04 23:46:24 root] (utils.py 283): INFO Epoch: [1] [2240/2502] eta: 0:08:19 lr: 0.000020 loss_cls: 3.9220 (3.9285) grad_norm: 4.8519 (4.4939) time: 2.0498 data: 0.0003 max mem: 8421 +[2024-12-04 23:46:44 root] (utils.py 283): INFO Epoch: [1] [2250/2502] eta: 0:08:00 lr: 0.000020 loss_cls: 3.7382 (3.9275) grad_norm: 4.4856 (4.4965) time: 2.0218 data: 0.0003 max mem: 8421 +[2024-12-04 23:47:07 root] (utils.py 283): INFO Epoch: [1] [2260/2502] eta: 0:07:41 lr: 0.000020 loss_cls: 3.5760 (3.9270) grad_norm: 4.1849 (4.4969) time: 2.1736 data: 0.0003 max mem: 8421 +[2024-12-04 23:47:28 root] (utils.py 283): INFO Epoch: [1] [2270/2502] eta: 0:07:22 lr: 0.000020 loss_cls: 3.9227 (3.9276) grad_norm: 4.3435 (4.4972) time: 2.1872 data: 0.0003 max mem: 8421 +[2024-12-04 23:47:50 root] (utils.py 283): INFO Epoch: [1] [2280/2502] eta: 0:07:04 lr: 0.000020 loss_cls: 4.1121 (3.9284) grad_norm: 4.4154 (4.4971) time: 2.1280 data: 0.0002 max mem: 8421 +[2024-12-04 23:48:11 root] (utils.py 283): INFO Epoch: [1] [2290/2502] eta: 0:06:45 lr: 0.000020 loss_cls: 4.2039 (3.9286) grad_norm: 4.3771 (4.4965) time: 2.1647 data: 0.0002 max mem: 8421 +[2024-12-04 23:48:30 root] (utils.py 283): INFO Epoch: [1] [2300/2502] eta: 0:06:26 lr: 0.000020 loss_cls: 3.8179 (3.9280) grad_norm: 4.3450 (4.4960) time: 2.0253 data: 0.0002 max mem: 8421 +[2024-12-04 23:48:50 root] (utils.py 283): INFO Epoch: [1] [2310/2502] eta: 0:06:07 lr: 0.000020 loss_cls: 3.7849 (3.9269) grad_norm: 4.3169 (4.4952) time: 1.9601 data: 0.0002 max mem: 8421 +[2024-12-04 23:49:12 root] (utils.py 283): INFO Epoch: [1] [2320/2502] eta: 0:05:48 lr: 0.000020 loss_cls: 3.9338 (3.9275) grad_norm: 4.2783 (4.4942) time: 2.0608 data: 0.0002 max mem: 8421 +[2024-12-04 23:49:33 root] (utils.py 283): INFO Epoch: [1] [2330/2502] eta: 0:05:29 lr: 0.000020 loss_cls: 4.1457 (3.9287) grad_norm: 4.2783 (4.4964) time: 2.1172 data: 0.0003 max mem: 8421 +[2024-12-04 23:49:54 root] (utils.py 283): INFO Epoch: [1] [2340/2502] eta: 0:05:10 lr: 0.000020 loss_cls: 4.1457 (3.9285) grad_norm: 4.3170 (4.4974) time: 2.1397 data: 0.0003 max mem: 8421 +[2024-12-04 23:50:13 root] (utils.py 283): INFO Epoch: [1] [2350/2502] eta: 0:04:50 lr: 0.000020 loss_cls: 4.0076 (3.9287) grad_norm: 4.2957 (4.4984) time: 1.9921 data: 0.0003 max mem: 8421 +[2024-12-04 23:50:33 root] (utils.py 283): INFO Epoch: [1] [2360/2502] eta: 0:04:31 lr: 0.000020 loss_cls: 3.8899 (3.9281) grad_norm: 4.2877 (4.4978) time: 1.9487 data: 0.0003 max mem: 8421 +[2024-12-04 23:50:56 root] (utils.py 283): INFO Epoch: [1] [2370/2502] eta: 0:04:12 lr: 0.000020 loss_cls: 3.8179 (3.9277) grad_norm: 4.3606 (4.4974) time: 2.1596 data: 0.0002 max mem: 8421 +[2024-12-04 23:51:16 root] (utils.py 283): INFO Epoch: [1] [2380/2502] eta: 0:03:53 lr: 0.000020 loss_cls: 4.0382 (3.9284) grad_norm: 4.1917 (4.4960) time: 2.1482 data: 0.0002 max mem: 8421 +[2024-12-04 23:51:38 root] (utils.py 283): INFO Epoch: [1] [2390/2502] eta: 0:03:34 lr: 0.000020 loss_cls: 4.0374 (3.9275) grad_norm: 4.1917 (4.4965) time: 2.1047 data: 0.0002 max mem: 8421 +[2024-12-04 23:51:58 root] (utils.py 283): INFO Epoch: [1] [2400/2502] eta: 0:03:15 lr: 0.000020 loss_cls: 3.8219 (3.9272) grad_norm: 4.2929 (4.4968) time: 2.1064 data: 0.0002 max mem: 8421 +[2024-12-04 23:52:17 root] (utils.py 283): INFO Epoch: [1] [2410/2502] eta: 0:02:56 lr: 0.000020 loss_cls: 4.2252 (3.9279) grad_norm: 4.4262 (4.4962) time: 1.9381 data: 0.0002 max mem: 8421 +[2024-12-04 23:52:39 root] (utils.py 283): INFO Epoch: [1] [2420/2502] eta: 0:02:37 lr: 0.000020 loss_cls: 4.0065 (3.9271) grad_norm: 4.4135 (4.4960) time: 2.0252 data: 0.0002 max mem: 8421 +[2024-12-04 23:53:01 root] (utils.py 283): INFO Epoch: [1] [2430/2502] eta: 0:02:18 lr: 0.000020 loss_cls: 3.5730 (3.9271) grad_norm: 4.4126 (4.4966) time: 2.1965 data: 0.0002 max mem: 8421 +[2024-12-04 23:53:23 root] (utils.py 283): INFO Epoch: [1] [2440/2502] eta: 0:01:59 lr: 0.000020 loss_cls: 4.0437 (3.9274) grad_norm: 4.5675 (4.4996) time: 2.1837 data: 0.0002 max mem: 8421 +[2024-12-04 23:53:45 root] (utils.py 283): INFO Epoch: [1] [2450/2502] eta: 0:01:39 lr: 0.000020 loss_cls: 4.0437 (3.9281) grad_norm: 4.5113 (4.4992) time: 2.2235 data: 0.0003 max mem: 8421 +[2024-12-04 23:54:03 root] (utils.py 283): INFO Epoch: [1] [2460/2502] eta: 0:01:20 lr: 0.000020 loss_cls: 4.1448 (3.9283) grad_norm: 4.3884 (4.4987) time: 2.0270 data: 0.0003 max mem: 8421 +[2024-12-04 23:54:25 root] (utils.py 283): INFO Epoch: [1] [2470/2502] eta: 0:01:01 lr: 0.000020 loss_cls: 4.1499 (3.9276) grad_norm: 4.4122 (4.4986) time: 1.9839 data: 0.0002 max mem: 8421 +[2024-12-04 23:54:47 root] (utils.py 283): INFO Epoch: [1] [2480/2502] eta: 0:00:42 lr: 0.000020 loss_cls: 3.5286 (3.9268) grad_norm: 4.2620 (4.4981) time: 2.1929 data: 0.0002 max mem: 8421 +[2024-12-04 23:55:06 root] (utils.py 283): INFO Epoch: [1] [2490/2502] eta: 0:00:23 lr: 0.000020 loss_cls: 3.8626 (3.9275) grad_norm: 4.2620 (4.4985) time: 2.0465 data: 0.0253 max mem: 8421 +[2024-12-04 23:55:24 root] (utils.py 283): INFO Epoch: [1] [2500/2502] eta: 0:00:03 lr: 0.000020 loss_cls: 3.9242 (3.9270) grad_norm: 4.2793 (4.4987) time: 1.8405 data: 0.0253 max mem: 8421 +[2024-12-04 23:55:26 root] (utils.py 283): INFO Epoch: [1] [2501/2502] eta: 0:00:01 lr: 0.000020 loss_cls: 3.9242 (3.9265) grad_norm: 4.2793 (4.4986) time: 1.8432 data: 0.0253 max mem: 8421 +[2024-12-04 23:55:26 root] (utils.py 297): INFO Epoch: [1] Total time: 1:20:12 (1.9236 s / it) +[2024-12-04 23:55:26 root] (engine.py 178): INFO Averaged stats:lr: 0.000020 loss_cls: 3.9242 (3.9274) grad_norm: 4.2793 (4.4986) +[2024-12-04 23:55:26 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:19 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7745 (0.7745) acc1: 83.5938 (83.5938) acc3: 95.3125 (95.3125) acc5: 96.0938 (96.0938) time: 0.1939 data: 0.0004 max mem: 8421 +[2024-12-04 23:55:28 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:15 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8785 (0.9084) acc1: 83.5938 (81.1080) acc3: 92.1875 (91.8324) acc5: 95.3125 (94.9574) time: 0.1780 data: 0.0004 max mem: 8421 +[2024-12-04 23:55:30 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:14 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9358 (0.9527) acc1: 77.3438 (79.5759) acc3: 91.4062 (91.8155) acc5: 93.7500 (94.6429) time: 0.1894 data: 0.0004 max mem: 8421 +[2024-12-04 23:55:32 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0049 (0.9499) acc1: 78.1250 (79.1079) acc3: 92.1875 (92.3135) acc5: 94.5312 (95.1361) time: 0.1894 data: 0.0004 max mem: 8421 +[2024-12-04 23:55:34 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8984 (0.9460) acc1: 79.6875 (79.6494) acc3: 93.7500 (92.4733) acc5: 95.3125 (95.1791) time: 0.1890 data: 0.0004 max mem: 8421 +[2024-12-04 23:55:35 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0756 (1.0421) acc1: 75.7812 (77.5276) acc3: 87.5000 (90.6403) acc5: 91.4062 (93.8266) time: 0.1896 data: 0.0005 max mem: 8421 +[2024-12-04 23:55:38 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3692 (1.0831) acc1: 71.0938 (76.8186) acc3: 85.1562 (89.9462) acc5: 89.8438 (93.1737) time: 0.1895 data: 0.0005 max mem: 8421 +[2024-12-04 23:55:39 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3041 (1.1277) acc1: 71.0938 (75.5392) acc3: 86.7188 (89.3266) acc5: 89.8438 (92.7157) time: 0.1884 data: 0.0004 max mem: 8421 +[2024-12-04 23:55:41 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3530 (1.1672) acc1: 67.1875 (74.7589) acc3: 84.3750 (88.6960) acc5: 89.0625 (92.2261) time: 0.1806 data: 0.0006 max mem: 8421 +[2024-12-04 23:55:43 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.4010 (1.2010) acc1: 67.1875 (73.8753) acc3: 83.5938 (88.1439) acc5: 89.0625 (91.6981) time: 0.1875 data: 0.0006 max mem: 8421 +[2024-12-04 23:55:44 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2251 (1.1895) acc1: 72.6562 (74.0800) acc3: 86.7188 (88.3840) acc5: 91.4062 (91.9120) time: 0.1909 data: 0.0006 max mem: 8421 +[2024-12-04 23:55:44 root] (utils.py 297): INFO Test: Total time: 0:00:18 (0.1876 s / it) +[2024-12-04 23:55:51 root] (engine.py 263): INFO * Acc@1 73.872 Acc@3 88.348 Acc@5 91.886 loss 1.189 flops 1.285 layer_flops 1.251 +[2024-12-04 23:55:51 root] (main.py 542): INFO Accuracy of the network on the 50000 test images: 73.9% +[2024-12-04 23:55:51 root] (main.py 546): INFO Max accuracy: 73.87% +[2024-12-04 23:55:53 root] (utils.py 283): INFO Epoch: [2] [ 0/2502] eta: 1:17:12 lr: 0.000020 loss_cls: 3.5279 (3.5279) grad_norm: 4.3606 (4.3606) time: 1.8514 data: 0.0009 max mem: 8421 +[2024-12-04 23:56:11 root] (utils.py 283): INFO Epoch: [2] [ 10/2502] eta: 1:16:48 lr: 0.000020 loss_cls: 3.6572 (3.8806) grad_norm: 4.3657 (4.4297) time: 1.8492 data: 0.0003 max mem: 8421 +[2024-12-04 23:56:34 root] (utils.py 283): INFO Epoch: [2] [ 20/2502] eta: 1:24:28 lr: 0.000020 loss_cls: 4.0250 (3.9925) grad_norm: 4.3974 (4.4846) time: 2.0515 data: 0.0002 max mem: 8421 +[2024-12-04 23:56:55 root] (utils.py 283): INFO Epoch: [2] [ 30/2502] eta: 1:25:06 lr: 0.000020 loss_cls: 4.2358 (4.0614) grad_norm: 4.4086 (4.5418) time: 2.1848 data: 0.0002 max mem: 8421 +[2024-12-04 23:57:17 root] (utils.py 283): INFO Epoch: [2] [ 40/2502] eta: 1:25:59 lr: 0.000020 loss_cls: 4.1446 (4.0491) grad_norm: 4.3044 (4.5548) time: 2.1519 data: 0.0002 max mem: 8421 +[2024-12-04 23:57:39 root] (utils.py 283): INFO Epoch: [2] [ 50/2502] eta: 1:26:55 lr: 0.000020 loss_cls: 4.1446 (4.0836) grad_norm: 4.2526 (4.4902) time: 2.2226 data: 0.0002 max mem: 8421 +[2024-12-04 23:57:57 root] (utils.py 283): INFO Epoch: [2] [ 60/2502] eta: 1:24:10 lr: 0.000020 loss_cls: 4.2730 (4.1245) grad_norm: 4.3086 (4.4906) time: 2.0125 data: 0.0002 max mem: 8421 +[2024-12-04 23:58:19 root] (utils.py 283): INFO Epoch: [2] [ 70/2502] eta: 1:24:20 lr: 0.000020 loss_cls: 4.2730 (4.1280) grad_norm: 4.4725 (4.4904) time: 1.9629 data: 0.0002 max mem: 8421 +[2024-12-04 23:58:41 root] (utils.py 283): INFO Epoch: [2] [ 80/2502] eta: 1:24:44 lr: 0.000020 loss_cls: 4.1435 (4.1280) grad_norm: 4.4527 (4.4924) time: 2.1944 data: 0.0003 max mem: 8421 +[2024-12-04 23:59:00 root] (utils.py 283): INFO Epoch: [2] [ 90/2502] eta: 1:23:21 lr: 0.000020 loss_cls: 4.1416 (4.1020) grad_norm: 4.2965 (4.4865) time: 2.0484 data: 0.0002 max mem: 8421 +[2024-12-04 23:59:17 root] (utils.py 283): INFO Epoch: [2] [ 100/2502] eta: 1:21:52 lr: 0.000020 loss_cls: 4.1416 (4.1121) grad_norm: 4.3723 (4.4800) time: 1.8255 data: 0.0003 max mem: 8421 +[2024-12-04 23:59:35 root] (utils.py 283): INFO Epoch: [2] [ 110/2502] eta: 1:20:40 lr: 0.000020 loss_cls: 4.1123 (4.0827) grad_norm: 4.3892 (4.4609) time: 1.7953 data: 0.0003 max mem: 8421 +[2024-12-04 23:59:53 root] (utils.py 283): INFO Epoch: [2] [ 120/2502] eta: 1:19:32 lr: 0.000020 loss_cls: 3.8229 (4.0844) grad_norm: 4.3291 (4.4430) time: 1.7922 data: 0.0002 max mem: 8421 +[2024-12-05 00:00:14 root] (utils.py 283): INFO Epoch: [2] [ 130/2502] eta: 1:19:23 lr: 0.000020 loss_cls: 4.2132 (4.0782) grad_norm: 4.3291 (4.4461) time: 1.9227 data: 0.0002 max mem: 8421 +[2024-12-05 00:00:37 root] (utils.py 283): INFO Epoch: [2] [ 140/2502] eta: 1:19:47 lr: 0.000020 loss_cls: 4.2132 (4.0880) grad_norm: 4.4245 (4.4599) time: 2.1682 data: 0.0003 max mem: 8421 +[2024-12-05 00:00:58 root] (utils.py 283): INFO Epoch: [2] [ 150/2502] eta: 1:19:44 lr: 0.000020 loss_cls: 4.3463 (4.0604) grad_norm: 4.5781 (4.4795) time: 2.2058 data: 0.0003 max mem: 8421 +[2024-12-05 00:01:20 root] (utils.py 283): INFO Epoch: [2] [ 160/2502] eta: 1:19:53 lr: 0.000020 loss_cls: 4.2160 (4.0690) grad_norm: 4.4588 (4.4745) time: 2.1887 data: 0.0003 max mem: 8421 +[2024-12-05 00:01:42 root] (utils.py 283): INFO Epoch: [2] [ 170/2502] eta: 1:19:42 lr: 0.000020 loss_cls: 4.1556 (4.0677) grad_norm: 4.4045 (4.4617) time: 2.1755 data: 0.0003 max mem: 8421 +[2024-12-05 00:02:01 root] (utils.py 283): INFO Epoch: [2] [ 180/2502] eta: 1:19:06 lr: 0.000020 loss_cls: 4.2594 (4.0739) grad_norm: 4.2069 (4.4524) time: 2.0206 data: 0.0003 max mem: 8421 +[2024-12-05 00:02:23 root] (utils.py 283): INFO Epoch: [2] [ 190/2502] eta: 1:19:06 lr: 0.000020 loss_cls: 4.2958 (4.0778) grad_norm: 4.2887 (4.4476) time: 2.0698 data: 0.0003 max mem: 8421 +[2024-12-05 00:02:45 root] (utils.py 283): INFO Epoch: [2] [ 200/2502] eta: 1:19:00 lr: 0.000020 loss_cls: 4.2655 (4.0872) grad_norm: 4.3553 (4.4470) time: 2.1992 data: 0.0002 max mem: 8421 +[2024-12-05 00:03:07 root] (utils.py 283): INFO Epoch: [2] [ 210/2502] eta: 1:18:58 lr: 0.000020 loss_cls: 4.1958 (4.0972) grad_norm: 4.3447 (4.4540) time: 2.2068 data: 0.0003 max mem: 8421 +[2024-12-05 00:03:29 root] (utils.py 283): INFO Epoch: [2] [ 220/2502] eta: 1:18:54 lr: 0.000020 loss_cls: 4.1661 (4.0883) grad_norm: 4.3447 (4.4516) time: 2.2281 data: 0.0003 max mem: 8421 +[2024-12-05 00:03:48 root] (utils.py 283): INFO Epoch: [2] [ 230/2502] eta: 1:18:11 lr: 0.000020 loss_cls: 4.1661 (4.0974) grad_norm: 4.3496 (4.4491) time: 2.0363 data: 0.0003 max mem: 8421 +[2024-12-05 00:04:10 root] (utils.py 283): INFO Epoch: [2] [ 240/2502] eta: 1:18:07 lr: 0.000020 loss_cls: 3.9668 (4.0778) grad_norm: 4.4166 (4.4518) time: 2.0434 data: 0.0002 max mem: 8421 +[2024-12-05 00:04:32 root] (utils.py 283): INFO Epoch: [2] [ 250/2502] eta: 1:17:57 lr: 0.000020 loss_cls: 3.6472 (4.0720) grad_norm: 4.3144 (4.4493) time: 2.2167 data: 0.0003 max mem: 8421 +[2024-12-05 00:04:53 root] (utils.py 283): INFO Epoch: [2] [ 260/2502] eta: 1:17:32 lr: 0.000020 loss_cls: 4.1190 (4.0688) grad_norm: 4.2874 (4.4651) time: 2.1144 data: 0.0002 max mem: 8421 +[2024-12-05 00:05:15 root] (utils.py 283): INFO Epoch: [2] [ 270/2502] eta: 1:17:24 lr: 0.000020 loss_cls: 4.1937 (4.0725) grad_norm: 4.2803 (4.4551) time: 2.1305 data: 0.0002 max mem: 8421 +[2024-12-05 00:05:33 root] (utils.py 283): INFO Epoch: [2] [ 280/2502] eta: 1:16:42 lr: 0.000020 loss_cls: 4.1300 (4.0664) grad_norm: 4.3267 (4.4584) time: 2.0185 data: 0.0002 max mem: 8421 +[2024-12-05 00:05:54 root] (utils.py 283): INFO Epoch: [2] [ 290/2502] eta: 1:16:26 lr: 0.000020 loss_cls: 3.6904 (4.0500) grad_norm: 4.3787 (4.4553) time: 1.9763 data: 0.0002 max mem: 8421 +[2024-12-05 00:06:17 root] (utils.py 283): INFO Epoch: [2] [ 300/2502] eta: 1:16:17 lr: 0.000020 loss_cls: 3.6938 (4.0481) grad_norm: 4.3528 (4.4564) time: 2.1860 data: 0.0003 max mem: 8421 +[2024-12-05 00:06:38 root] (utils.py 283): INFO Epoch: [2] [ 310/2502] eta: 1:15:59 lr: 0.000020 loss_cls: 4.1403 (4.0492) grad_norm: 4.3497 (4.4506) time: 2.1747 data: 0.0002 max mem: 8421 +[2024-12-05 00:07:00 root] (utils.py 283): INFO Epoch: [2] [ 320/2502] eta: 1:15:49 lr: 0.000020 loss_cls: 4.1100 (4.0501) grad_norm: 4.3497 (4.4487) time: 2.1792 data: 0.0002 max mem: 8421 +[2024-12-05 00:07:20 root] (utils.py 283): INFO Epoch: [2] [ 330/2502] eta: 1:15:20 lr: 0.000020 loss_cls: 4.0362 (4.0480) grad_norm: 4.2947 (4.4409) time: 2.0999 data: 0.0003 max mem: 8421 +[2024-12-05 00:07:41 root] (utils.py 283): INFO Epoch: [2] [ 340/2502] eta: 1:14:59 lr: 0.000020 loss_cls: 3.8355 (4.0434) grad_norm: 4.1153 (4.4332) time: 2.0173 data: 0.0003 max mem: 8421 +[2024-12-05 00:08:03 root] (utils.py 283): INFO Epoch: [2] [ 350/2502] eta: 1:14:49 lr: 0.000020 loss_cls: 3.8355 (4.0413) grad_norm: 4.2790 (4.4413) time: 2.1651 data: 0.0003 max mem: 8421 +[2024-12-05 00:08:24 root] (utils.py 283): INFO Epoch: [2] [ 360/2502] eta: 1:14:30 lr: 0.000020 loss_cls: 3.4931 (4.0217) grad_norm: 4.3559 (4.4630) time: 2.1892 data: 0.0003 max mem: 8421 +[2024-12-05 00:08:47 root] (utils.py 283): INFO Epoch: [2] [ 370/2502] eta: 1:14:20 lr: 0.000020 loss_cls: 3.4200 (4.0195) grad_norm: 4.2586 (4.4593) time: 2.2005 data: 0.0003 max mem: 8421 +[2024-12-05 00:09:08 root] (utils.py 283): INFO Epoch: [2] [ 380/2502] eta: 1:14:01 lr: 0.000020 loss_cls: 4.1523 (4.0175) grad_norm: 4.2977 (4.4537) time: 2.2012 data: 0.0003 max mem: 8421 +[2024-12-05 00:09:28 root] (utils.py 283): INFO Epoch: [2] [ 390/2502] eta: 1:13:33 lr: 0.000020 loss_cls: 4.1107 (4.0132) grad_norm: 4.3025 (4.4552) time: 2.0410 data: 0.0002 max mem: 8421 +[2024-12-05 00:09:50 root] (utils.py 283): INFO Epoch: [2] [ 400/2502] eta: 1:13:20 lr: 0.000020 loss_cls: 4.0910 (4.0140) grad_norm: 4.3808 (4.4568) time: 2.1001 data: 0.0003 max mem: 8421 +[2024-12-05 00:10:11 root] (utils.py 283): INFO Epoch: [2] [ 410/2502] eta: 1:12:58 lr: 0.000020 loss_cls: 4.1101 (4.0122) grad_norm: 4.3808 (4.4556) time: 2.1538 data: 0.0003 max mem: 8421 +[2024-12-05 00:10:29 root] (utils.py 283): INFO Epoch: [2] [ 420/2502] eta: 1:12:21 lr: 0.000020 loss_cls: 3.9703 (4.0105) grad_norm: 4.2640 (4.4502) time: 1.9164 data: 0.0003 max mem: 8421 +[2024-12-05 00:10:47 root] (utils.py 283): INFO Epoch: [2] [ 430/2502] eta: 1:11:45 lr: 0.000020 loss_cls: 4.1273 (4.0140) grad_norm: 4.2640 (4.4475) time: 1.7763 data: 0.0003 max mem: 8421 +[2024-12-05 00:11:04 root] (utils.py 283): INFO Epoch: [2] [ 440/2502] eta: 1:11:11 lr: 0.000020 loss_cls: 4.3257 (4.0191) grad_norm: 4.2169 (4.4477) time: 1.7810 data: 0.0003 max mem: 8421 +[2024-12-05 00:11:23 root] (utils.py 283): INFO Epoch: [2] [ 450/2502] eta: 1:10:40 lr: 0.000020 loss_cls: 3.9890 (4.0131) grad_norm: 4.2394 (4.4436) time: 1.8138 data: 0.0003 max mem: 8421 +[2024-12-05 00:11:44 root] (utils.py 283): INFO Epoch: [2] [ 460/2502] eta: 1:10:22 lr: 0.000020 loss_cls: 3.8821 (4.0103) grad_norm: 4.2470 (4.4402) time: 1.9912 data: 0.0003 max mem: 8421 +[2024-12-05 00:12:06 root] (utils.py 283): INFO Epoch: [2] [ 470/2502] eta: 1:10:06 lr: 0.000020 loss_cls: 3.9529 (4.0122) grad_norm: 4.2846 (4.4369) time: 2.1586 data: 0.0002 max mem: 8421 +[2024-12-05 00:12:27 root] (utils.py 283): INFO Epoch: [2] [ 480/2502] eta: 1:09:47 lr: 0.000020 loss_cls: 4.1952 (4.0085) grad_norm: 4.2846 (4.4393) time: 2.1488 data: 0.0002 max mem: 8421 +[2024-12-05 00:12:49 root] (utils.py 283): INFO Epoch: [2] [ 490/2502] eta: 1:09:33 lr: 0.000020 loss_cls: 4.1507 (4.0130) grad_norm: 4.3030 (4.4423) time: 2.1735 data: 0.0002 max mem: 8421 +[2024-12-05 00:13:08 root] (utils.py 283): INFO Epoch: [2] [ 500/2502] eta: 1:09:04 lr: 0.000020 loss_cls: 4.2835 (4.0131) grad_norm: 4.3824 (4.4418) time: 2.0475 data: 0.0002 max mem: 8421 +[2024-12-05 00:13:30 root] (utils.py 283): INFO Epoch: [2] [ 510/2502] eta: 1:08:47 lr: 0.000020 loss_cls: 3.9874 (4.0119) grad_norm: 4.2423 (4.4436) time: 2.0158 data: 0.0002 max mem: 8421 +[2024-12-05 00:13:53 root] (utils.py 283): INFO Epoch: [2] [ 520/2502] eta: 1:08:34 lr: 0.000020 loss_cls: 3.9588 (4.0120) grad_norm: 4.3458 (4.4433) time: 2.2244 data: 0.0002 max mem: 8421 +[2024-12-05 00:14:13 root] (utils.py 283): INFO Epoch: [2] [ 530/2502] eta: 1:08:12 lr: 0.000020 loss_cls: 4.0319 (4.0087) grad_norm: 4.2283 (4.4379) time: 2.1484 data: 0.0002 max mem: 8421 +[2024-12-05 00:14:35 root] (utils.py 283): INFO Epoch: [2] [ 540/2502] eta: 1:07:55 lr: 0.000020 loss_cls: 4.0647 (4.0082) grad_norm: 4.2295 (4.4420) time: 2.1015 data: 0.0002 max mem: 8421 +[2024-12-05 00:14:55 root] (utils.py 283): INFO Epoch: [2] [ 550/2502] eta: 1:07:32 lr: 0.000020 loss_cls: 4.1557 (4.0089) grad_norm: 4.6800 (4.4444) time: 2.1097 data: 0.0002 max mem: 8421 +[2024-12-05 00:15:15 root] (utils.py 283): INFO Epoch: [2] [ 560/2502] eta: 1:07:08 lr: 0.000020 loss_cls: 4.1262 (4.0077) grad_norm: 4.4326 (4.4433) time: 2.0003 data: 0.0003 max mem: 8421 +[2024-12-05 00:15:37 root] (utils.py 283): INFO Epoch: [2] [ 570/2502] eta: 1:06:52 lr: 0.000020 loss_cls: 4.0263 (4.0073) grad_norm: 4.2577 (4.4386) time: 2.0975 data: 0.0003 max mem: 8421 +[2024-12-05 00:15:58 root] (utils.py 283): INFO Epoch: [2] [ 580/2502] eta: 1:06:34 lr: 0.000020 loss_cls: 3.8619 (4.0017) grad_norm: 4.1758 (4.4342) time: 2.1830 data: 0.0002 max mem: 8421 +[2024-12-05 00:16:20 root] (utils.py 283): INFO Epoch: [2] [ 590/2502] eta: 1:06:16 lr: 0.000020 loss_cls: 3.8820 (4.0026) grad_norm: 4.1759 (4.4331) time: 2.1575 data: 0.0002 max mem: 8421 +[2024-12-05 00:16:42 root] (utils.py 283): INFO Epoch: [2] [ 600/2502] eta: 1:05:59 lr: 0.000020 loss_cls: 4.1440 (4.0021) grad_norm: 4.3124 (4.4360) time: 2.1809 data: 0.0002 max mem: 8421 +[2024-12-05 00:17:01 root] (utils.py 283): INFO Epoch: [2] [ 610/2502] eta: 1:05:31 lr: 0.000020 loss_cls: 4.0233 (3.9999) grad_norm: 4.2970 (4.4328) time: 2.0237 data: 0.0003 max mem: 8421 +[2024-12-05 00:17:22 root] (utils.py 283): INFO Epoch: [2] [ 620/2502] eta: 1:05:12 lr: 0.000020 loss_cls: 3.9837 (4.0015) grad_norm: 4.3583 (4.4322) time: 1.9994 data: 0.0003 max mem: 8421 +[2024-12-05 00:17:43 root] (utils.py 283): INFO Epoch: [2] [ 630/2502] eta: 1:04:51 lr: 0.000020 loss_cls: 3.9837 (4.0020) grad_norm: 4.4557 (4.4331) time: 2.1000 data: 0.0002 max mem: 8421 +[2024-12-05 00:18:04 root] (utils.py 283): INFO Epoch: [2] [ 640/2502] eta: 1:04:31 lr: 0.000020 loss_cls: 3.6684 (3.9958) grad_norm: 4.4926 (4.4368) time: 2.0937 data: 0.0002 max mem: 8421 +[2024-12-05 00:18:26 root] (utils.py 283): INFO Epoch: [2] [ 650/2502] eta: 1:04:15 lr: 0.000020 loss_cls: 3.9607 (3.9960) grad_norm: 4.1924 (4.4345) time: 2.1850 data: 0.0003 max mem: 8421 +[2024-12-05 00:18:45 root] (utils.py 283): INFO Epoch: [2] [ 660/2502] eta: 1:03:48 lr: 0.000020 loss_cls: 4.1614 (3.9949) grad_norm: 4.1878 (4.4328) time: 2.0464 data: 0.0003 max mem: 8421 +[2024-12-05 00:19:07 root] (utils.py 283): INFO Epoch: [2] [ 670/2502] eta: 1:03:30 lr: 0.000020 loss_cls: 4.0118 (3.9947) grad_norm: 4.2644 (4.4510) time: 2.0207 data: 0.0003 max mem: 8421 +[2024-12-05 00:19:29 root] (utils.py 283): INFO Epoch: [2] [ 680/2502] eta: 1:03:14 lr: 0.000020 loss_cls: 4.0118 (3.9943) grad_norm: 4.2492 (4.4501) time: 2.2290 data: 0.0002 max mem: 8421 +[2024-12-05 00:19:51 root] (utils.py 283): INFO Epoch: [2] [ 690/2502] eta: 1:02:56 lr: 0.000020 loss_cls: 4.0575 (3.9912) grad_norm: 4.3045 (4.4509) time: 2.2175 data: 0.0003 max mem: 8421 +[2024-12-05 00:20:13 root] (utils.py 283): INFO Epoch: [2] [ 700/2502] eta: 1:02:39 lr: 0.000020 loss_cls: 4.1869 (3.9944) grad_norm: 4.3928 (4.4503) time: 2.1972 data: 0.0003 max mem: 8421 +[2024-12-05 00:20:33 root] (utils.py 283): INFO Epoch: [2] [ 710/2502] eta: 1:02:15 lr: 0.000020 loss_cls: 4.3290 (3.9958) grad_norm: 4.3631 (4.4508) time: 2.1036 data: 0.0003 max mem: 8421 +[2024-12-05 00:20:53 root] (utils.py 283): INFO Epoch: [2] [ 720/2502] eta: 1:01:52 lr: 0.000020 loss_cls: 4.1668 (3.9948) grad_norm: 4.1899 (4.4487) time: 1.9916 data: 0.0003 max mem: 8421 +[2024-12-05 00:21:16 root] (utils.py 283): INFO Epoch: [2] [ 730/2502] eta: 1:01:35 lr: 0.000020 loss_cls: 4.1779 (3.9991) grad_norm: 4.2587 (4.4486) time: 2.1222 data: 0.0003 max mem: 8421 +[2024-12-05 00:21:37 root] (utils.py 283): INFO Epoch: [2] [ 740/2502] eta: 1:01:15 lr: 0.000020 loss_cls: 4.4161 (4.0043) grad_norm: 4.3028 (4.4493) time: 2.1762 data: 0.0002 max mem: 8421 +[2024-12-05 00:21:59 root] (utils.py 283): INFO Epoch: [2] [ 750/2502] eta: 1:00:58 lr: 0.000020 loss_cls: 4.3158 (4.0047) grad_norm: 4.3303 (4.4492) time: 2.1809 data: 0.0003 max mem: 8421 +[2024-12-05 00:22:20 root] (utils.py 283): INFO Epoch: [2] [ 760/2502] eta: 1:00:38 lr: 0.000020 loss_cls: 3.9452 (4.0059) grad_norm: 4.3303 (4.4479) time: 2.1962 data: 0.0003 max mem: 8421 +[2024-12-05 00:22:39 root] (utils.py 283): INFO Epoch: [2] [ 770/2502] eta: 1:00:13 lr: 0.000020 loss_cls: 3.8339 (4.0033) grad_norm: 4.3003 (4.4457) time: 2.0098 data: 0.0003 max mem: 8421 +[2024-12-05 00:23:01 root] (utils.py 283): INFO Epoch: [2] [ 780/2502] eta: 0:59:54 lr: 0.000020 loss_cls: 3.7400 (4.0024) grad_norm: 4.3003 (4.4464) time: 2.0279 data: 0.0003 max mem: 8421 +[2024-12-05 00:23:23 root] (utils.py 283): INFO Epoch: [2] [ 790/2502] eta: 0:59:35 lr: 0.000020 loss_cls: 4.0204 (4.0033) grad_norm: 4.3799 (4.4468) time: 2.1756 data: 0.0003 max mem: 8421 +[2024-12-05 00:23:45 root] (utils.py 283): INFO Epoch: [2] [ 800/2502] eta: 0:59:17 lr: 0.000020 loss_cls: 4.1623 (4.0043) grad_norm: 4.4150 (4.4474) time: 2.1928 data: 0.0003 max mem: 8421 +[2024-12-05 00:24:08 root] (utils.py 283): INFO Epoch: [2] [ 810/2502] eta: 0:58:59 lr: 0.000020 loss_cls: 4.1877 (4.0057) grad_norm: 4.3065 (4.4509) time: 2.2336 data: 0.0003 max mem: 8421 +[2024-12-05 00:24:26 root] (utils.py 283): INFO Epoch: [2] [ 820/2502] eta: 0:58:33 lr: 0.000020 loss_cls: 4.2557 (4.0068) grad_norm: 4.2821 (4.4475) time: 2.0407 data: 0.0002 max mem: 8421 +[2024-12-05 00:24:47 root] (utils.py 283): INFO Epoch: [2] [ 830/2502] eta: 0:58:13 lr: 0.000020 loss_cls: 4.0521 (4.0038) grad_norm: 4.1848 (4.4514) time: 1.9969 data: 0.0002 max mem: 8421 +[2024-12-05 00:25:10 root] (utils.py 283): INFO Epoch: [2] [ 840/2502] eta: 0:57:56 lr: 0.000020 loss_cls: 3.8909 (4.0011) grad_norm: 4.2330 (4.4498) time: 2.2123 data: 0.0002 max mem: 8421 +[2024-12-05 00:25:31 root] (utils.py 283): INFO Epoch: [2] [ 850/2502] eta: 0:57:36 lr: 0.000020 loss_cls: 4.1116 (4.0042) grad_norm: 4.3874 (4.4493) time: 2.1927 data: 0.0002 max mem: 8421 +[2024-12-05 00:25:54 root] (utils.py 283): INFO Epoch: [2] [ 860/2502] eta: 0:57:19 lr: 0.000020 loss_cls: 4.2374 (4.0029) grad_norm: 4.3181 (4.4476) time: 2.2153 data: 0.0002 max mem: 8421 +[2024-12-05 00:26:13 root] (utils.py 283): INFO Epoch: [2] [ 870/2502] eta: 0:56:53 lr: 0.000020 loss_cls: 3.8499 (4.0007) grad_norm: 4.3175 (4.4480) time: 2.0680 data: 0.0002 max mem: 8421 +[2024-12-05 00:26:34 root] (utils.py 283): INFO Epoch: [2] [ 880/2502] eta: 0:56:33 lr: 0.000020 loss_cls: 4.1338 (4.0032) grad_norm: 4.4353 (4.4489) time: 1.9804 data: 0.0002 max mem: 8421 +[2024-12-05 00:26:56 root] (utils.py 283): INFO Epoch: [2] [ 890/2502] eta: 0:56:13 lr: 0.000020 loss_cls: 4.2312 (4.0047) grad_norm: 4.3207 (4.4460) time: 2.1472 data: 0.0002 max mem: 8421 +[2024-12-05 00:27:16 root] (utils.py 283): INFO Epoch: [2] [ 900/2502] eta: 0:55:52 lr: 0.000020 loss_cls: 4.0548 (4.0024) grad_norm: 4.2496 (4.4496) time: 2.1276 data: 0.0002 max mem: 8421 +[2024-12-05 00:27:38 root] (utils.py 283): INFO Epoch: [2] [ 910/2502] eta: 0:55:33 lr: 0.000020 loss_cls: 3.8326 (4.0036) grad_norm: 4.3527 (4.4489) time: 2.1317 data: 0.0002 max mem: 8421 +[2024-12-05 00:27:58 root] (utils.py 283): INFO Epoch: [2] [ 920/2502] eta: 0:55:10 lr: 0.000020 loss_cls: 4.3841 (4.0048) grad_norm: 4.2422 (4.4464) time: 2.0998 data: 0.0003 max mem: 8421 +[2024-12-05 00:28:19 root] (utils.py 283): INFO Epoch: [2] [ 930/2502] eta: 0:54:48 lr: 0.000020 loss_cls: 4.0775 (4.0029) grad_norm: 4.2369 (4.4438) time: 2.0174 data: 0.0003 max mem: 8421 +[2024-12-05 00:28:41 root] (utils.py 283): INFO Epoch: [2] [ 940/2502] eta: 0:54:30 lr: 0.000020 loss_cls: 3.6093 (4.0010) grad_norm: 4.2043 (4.4410) time: 2.1224 data: 0.0003 max mem: 8421 +[2024-12-05 00:29:02 root] (utils.py 283): INFO Epoch: [2] [ 950/2502] eta: 0:54:09 lr: 0.000020 loss_cls: 3.7848 (4.0008) grad_norm: 4.1378 (4.4383) time: 2.1640 data: 0.0003 max mem: 8421 +[2024-12-05 00:29:19 root] (utils.py 283): INFO Epoch: [2] [ 960/2502] eta: 0:53:42 lr: 0.000020 loss_cls: 4.3063 (4.0017) grad_norm: 4.1287 (4.4362) time: 1.9197 data: 0.0003 max mem: 8421 +[2024-12-05 00:29:37 root] (utils.py 283): INFO Epoch: [2] [ 970/2502] eta: 0:53:16 lr: 0.000020 loss_cls: 3.8052 (3.9986) grad_norm: 4.0714 (4.4337) time: 1.7491 data: 0.0003 max mem: 8421 +[2024-12-05 00:29:55 root] (utils.py 283): INFO Epoch: [2] [ 980/2502] eta: 0:52:50 lr: 0.000020 loss_cls: 3.9119 (3.9984) grad_norm: 4.2410 (4.4338) time: 1.7711 data: 0.0003 max mem: 8421 +[2024-12-05 00:30:12 root] (utils.py 283): INFO Epoch: [2] [ 990/2502] eta: 0:52:25 lr: 0.000020 loss_cls: 3.9905 (3.9995) grad_norm: 4.3634 (4.4344) time: 1.7722 data: 0.0003 max mem: 8421 +[2024-12-05 00:30:34 root] (utils.py 283): INFO Epoch: [2] [1000/2502] eta: 0:52:06 lr: 0.000020 loss_cls: 3.9905 (3.9981) grad_norm: 4.2497 (4.4330) time: 1.9825 data: 0.0003 max mem: 8421 +[2024-12-05 00:30:57 root] (utils.py 283): INFO Epoch: [2] [1010/2502] eta: 0:51:48 lr: 0.000020 loss_cls: 4.0246 (3.9983) grad_norm: 4.2497 (4.4329) time: 2.2413 data: 0.0003 max mem: 8421 +[2024-12-05 00:31:18 root] (utils.py 283): INFO Epoch: [2] [1020/2502] eta: 0:51:27 lr: 0.000020 loss_cls: 4.0366 (3.9965) grad_norm: 4.3772 (4.4323) time: 2.1745 data: 0.0003 max mem: 8421 +[2024-12-05 00:31:40 root] (utils.py 283): INFO Epoch: [2] [1030/2502] eta: 0:51:08 lr: 0.000020 loss_cls: 4.0366 (3.9969) grad_norm: 4.3258 (4.4305) time: 2.1661 data: 0.0003 max mem: 8421 +[2024-12-05 00:31:59 root] (utils.py 283): INFO Epoch: [2] [1040/2502] eta: 0:50:45 lr: 0.000020 loss_cls: 3.9058 (3.9947) grad_norm: 4.2038 (4.4282) time: 2.0735 data: 0.0002 max mem: 8421 +[2024-12-05 00:32:20 root] (utils.py 283): INFO Epoch: [2] [1050/2502] eta: 0:50:24 lr: 0.000020 loss_cls: 3.8260 (3.9942) grad_norm: 4.2386 (4.4301) time: 1.9888 data: 0.0002 max mem: 8421 +[2024-12-05 00:32:42 root] (utils.py 283): INFO Epoch: [2] [1060/2502] eta: 0:50:05 lr: 0.000020 loss_cls: 4.2570 (3.9967) grad_norm: 4.4785 (4.4306) time: 2.1528 data: 0.0002 max mem: 8421 +[2024-12-05 00:33:03 root] (utils.py 283): INFO Epoch: [2] [1070/2502] eta: 0:49:44 lr: 0.000020 loss_cls: 4.3013 (3.9968) grad_norm: 4.3327 (4.4293) time: 2.1450 data: 0.0003 max mem: 8421 +[2024-12-05 00:33:26 root] (utils.py 283): INFO Epoch: [2] [1080/2502] eta: 0:49:26 lr: 0.000020 loss_cls: 4.1481 (3.9983) grad_norm: 4.3173 (4.4307) time: 2.1804 data: 0.0003 max mem: 8421 +[2024-12-05 00:33:47 root] (utils.py 283): INFO Epoch: [2] [1090/2502] eta: 0:49:05 lr: 0.000020 loss_cls: 4.1448 (3.9996) grad_norm: 4.2587 (4.4298) time: 2.1714 data: 0.0002 max mem: 8421 +[2024-12-05 00:34:07 root] (utils.py 283): INFO Epoch: [2] [1100/2502] eta: 0:48:43 lr: 0.000020 loss_cls: 3.9878 (3.9972) grad_norm: 4.2258 (4.4288) time: 2.0436 data: 0.0003 max mem: 8421 +[2024-12-05 00:34:29 root] (utils.py 283): INFO Epoch: [2] [1110/2502] eta: 0:48:24 lr: 0.000020 loss_cls: 4.0380 (3.9991) grad_norm: 4.0891 (4.4279) time: 2.1424 data: 0.0002 max mem: 8421 +[2024-12-05 00:34:51 root] (utils.py 283): INFO Epoch: [2] [1120/2502] eta: 0:48:04 lr: 0.000020 loss_cls: 4.2717 (4.0008) grad_norm: 4.1337 (4.4265) time: 2.2019 data: 0.0002 max mem: 8421 +[2024-12-05 00:35:13 root] (utils.py 283): INFO Epoch: [2] [1130/2502] eta: 0:47:45 lr: 0.000020 loss_cls: 4.2149 (4.0027) grad_norm: 4.2459 (4.4308) time: 2.1762 data: 0.0002 max mem: 8421 +[2024-12-05 00:35:35 root] (utils.py 283): INFO Epoch: [2] [1140/2502] eta: 0:47:25 lr: 0.000020 loss_cls: 4.0768 (4.0027) grad_norm: 4.2949 (4.4327) time: 2.1910 data: 0.0002 max mem: 8421 +[2024-12-05 00:35:52 root] (utils.py 283): INFO Epoch: [2] [1150/2502] eta: 0:47:00 lr: 0.000020 loss_cls: 4.2043 (4.0051) grad_norm: 4.2714 (4.4326) time: 1.9518 data: 0.0002 max mem: 8421 +[2024-12-05 00:36:10 root] (utils.py 283): INFO Epoch: [2] [1160/2502] eta: 0:46:35 lr: 0.000020 loss_cls: 4.2649 (4.0045) grad_norm: 4.2338 (4.4311) time: 1.7529 data: 0.0002 max mem: 8421 +[2024-12-05 00:36:27 root] (utils.py 283): INFO Epoch: [2] [1170/2502] eta: 0:46:11 lr: 0.000020 loss_cls: 4.2279 (4.0055) grad_norm: 4.2464 (4.4306) time: 1.7612 data: 0.0002 max mem: 8421 +[2024-12-05 00:36:44 root] (utils.py 283): INFO Epoch: [2] [1180/2502] eta: 0:45:46 lr: 0.000020 loss_cls: 4.0370 (4.0055) grad_norm: 4.2464 (4.4288) time: 1.7314 data: 0.0002 max mem: 8421 +[2024-12-05 00:37:02 root] (utils.py 283): INFO Epoch: [2] [1190/2502] eta: 0:45:21 lr: 0.000020 loss_cls: 4.0370 (4.0052) grad_norm: 4.2592 (4.4274) time: 1.7201 data: 0.0002 max mem: 8421 +[2024-12-05 00:37:19 root] (utils.py 283): INFO Epoch: [2] [1200/2502] eta: 0:44:57 lr: 0.000020 loss_cls: 4.0656 (4.0048) grad_norm: 4.2592 (4.4260) time: 1.7190 data: 0.0002 max mem: 8421 +[2024-12-05 00:37:36 root] (utils.py 283): INFO Epoch: [2] [1210/2502] eta: 0:44:32 lr: 0.000020 loss_cls: 3.8967 (4.0036) grad_norm: 4.1547 (4.4243) time: 1.7209 data: 0.0003 max mem: 8421 +[2024-12-05 00:37:53 root] (utils.py 283): INFO Epoch: [2] [1220/2502] eta: 0:44:08 lr: 0.000020 loss_cls: 3.9227 (4.0050) grad_norm: 4.1518 (4.4220) time: 1.7224 data: 0.0003 max mem: 8421 +[2024-12-05 00:38:10 root] (utils.py 283): INFO Epoch: [2] [1230/2502] eta: 0:43:44 lr: 0.000020 loss_cls: 4.0895 (4.0057) grad_norm: 4.1730 (4.4215) time: 1.7214 data: 0.0003 max mem: 8421 +[2024-12-05 00:38:28 root] (utils.py 283): INFO Epoch: [2] [1240/2502] eta: 0:43:20 lr: 0.000020 loss_cls: 4.0119 (4.0071) grad_norm: 4.3120 (4.4206) time: 1.7226 data: 0.0003 max mem: 8421 +[2024-12-05 00:38:45 root] (utils.py 283): INFO Epoch: [2] [1250/2502] eta: 0:42:56 lr: 0.000020 loss_cls: 4.0928 (4.0059) grad_norm: 4.1530 (4.4198) time: 1.7236 data: 0.0003 max mem: 8421 +[2024-12-05 00:39:02 root] (utils.py 283): INFO Epoch: [2] [1260/2502] eta: 0:42:32 lr: 0.000020 loss_cls: 4.1293 (4.0074) grad_norm: 4.1407 (4.4191) time: 1.7224 data: 0.0003 max mem: 8421 +[2024-12-05 00:39:19 root] (utils.py 283): INFO Epoch: [2] [1270/2502] eta: 0:42:08 lr: 0.000020 loss_cls: 4.2284 (4.0069) grad_norm: 4.1980 (4.4174) time: 1.7245 data: 0.0003 max mem: 8421 +[2024-12-05 00:39:37 root] (utils.py 283): INFO Epoch: [2] [1280/2502] eta: 0:41:44 lr: 0.000020 loss_cls: 4.0873 (4.0061) grad_norm: 4.1858 (4.4160) time: 1.7290 data: 0.0003 max mem: 8421 +[2024-12-05 00:39:54 root] (utils.py 283): INFO Epoch: [2] [1290/2502] eta: 0:41:21 lr: 0.000020 loss_cls: 4.1472 (4.0074) grad_norm: 4.2004 (4.4165) time: 1.7275 data: 0.0003 max mem: 8421 +[2024-12-05 00:40:11 root] (utils.py 283): INFO Epoch: [2] [1300/2502] eta: 0:40:57 lr: 0.000020 loss_cls: 4.2754 (4.0082) grad_norm: 4.3223 (4.4164) time: 1.7245 data: 0.0003 max mem: 8421 +[2024-12-05 00:40:28 root] (utils.py 283): INFO Epoch: [2] [1310/2502] eta: 0:40:34 lr: 0.000020 loss_cls: 3.8995 (4.0058) grad_norm: 4.1811 (4.4144) time: 1.7246 data: 0.0003 max mem: 8421 +[2024-12-05 00:40:46 root] (utils.py 283): INFO Epoch: [2] [1320/2502] eta: 0:40:11 lr: 0.000020 loss_cls: 3.8944 (4.0053) grad_norm: 4.0799 (4.4123) time: 1.7244 data: 0.0003 max mem: 8421 +[2024-12-05 00:41:03 root] (utils.py 283): INFO Epoch: [2] [1330/2502] eta: 0:39:48 lr: 0.000020 loss_cls: 4.2831 (4.0066) grad_norm: 4.1702 (4.4107) time: 1.7241 data: 0.0003 max mem: 8421 +[2024-12-05 00:41:20 root] (utils.py 283): INFO Epoch: [2] [1340/2502] eta: 0:39:24 lr: 0.000020 loss_cls: 4.1844 (4.0046) grad_norm: 4.3342 (4.4101) time: 1.7228 data: 0.0003 max mem: 8421 +[2024-12-05 00:41:37 root] (utils.py 283): INFO Epoch: [2] [1350/2502] eta: 0:39:01 lr: 0.000020 loss_cls: 4.1595 (4.0071) grad_norm: 4.4012 (4.4105) time: 1.7231 data: 0.0003 max mem: 8421 +[2024-12-05 00:41:55 root] (utils.py 283): INFO Epoch: [2] [1360/2502] eta: 0:38:38 lr: 0.000020 loss_cls: 4.1595 (4.0046) grad_norm: 4.3011 (4.4098) time: 1.7227 data: 0.0003 max mem: 8421 +[2024-12-05 00:42:12 root] (utils.py 283): INFO Epoch: [2] [1370/2502] eta: 0:38:16 lr: 0.000020 loss_cls: 3.8114 (4.0035) grad_norm: 4.2346 (4.4086) time: 1.7209 data: 0.0003 max mem: 8421 +[2024-12-05 00:42:29 root] (utils.py 283): INFO Epoch: [2] [1380/2502] eta: 0:37:53 lr: 0.000020 loss_cls: 3.8791 (4.0016) grad_norm: 4.1642 (4.4088) time: 1.7229 data: 0.0003 max mem: 8421 +[2024-12-05 00:42:46 root] (utils.py 283): INFO Epoch: [2] [1390/2502] eta: 0:37:30 lr: 0.000020 loss_cls: 3.8791 (4.0004) grad_norm: 4.2054 (4.4083) time: 1.7233 data: 0.0003 max mem: 8421 +[2024-12-05 00:43:04 root] (utils.py 283): INFO Epoch: [2] [1400/2502] eta: 0:37:08 lr: 0.000020 loss_cls: 4.0832 (4.0000) grad_norm: 4.3020 (4.4075) time: 1.7227 data: 0.0003 max mem: 8421 +[2024-12-05 00:43:21 root] (utils.py 283): INFO Epoch: [2] [1410/2502] eta: 0:36:45 lr: 0.000020 loss_cls: 3.8721 (3.9997) grad_norm: 4.3295 (4.4071) time: 1.7238 data: 0.0003 max mem: 8421 +[2024-12-05 00:43:38 root] (utils.py 283): INFO Epoch: [2] [1420/2502] eta: 0:36:23 lr: 0.000020 loss_cls: 3.9740 (3.9995) grad_norm: 4.2600 (4.4061) time: 1.7255 data: 0.0003 max mem: 8421 +[2024-12-05 00:43:55 root] (utils.py 283): INFO Epoch: [2] [1430/2502] eta: 0:36:00 lr: 0.000020 loss_cls: 4.0560 (3.9986) grad_norm: 4.1353 (4.4040) time: 1.7253 data: 0.0003 max mem: 8421 +[2024-12-05 00:44:13 root] (utils.py 283): INFO Epoch: [2] [1440/2502] eta: 0:35:38 lr: 0.000020 loss_cls: 3.8577 (3.9967) grad_norm: 4.1461 (4.4025) time: 1.7215 data: 0.0002 max mem: 8421 +[2024-12-05 00:44:30 root] (utils.py 283): INFO Epoch: [2] [1450/2502] eta: 0:35:16 lr: 0.000020 loss_cls: 3.8577 (3.9954) grad_norm: 4.1593 (4.4016) time: 1.7208 data: 0.0002 max mem: 8421 +[2024-12-05 00:44:47 root] (utils.py 283): INFO Epoch: [2] [1460/2502] eta: 0:34:53 lr: 0.000020 loss_cls: 4.0335 (3.9963) grad_norm: 4.3934 (4.4017) time: 1.7215 data: 0.0002 max mem: 8421 +[2024-12-05 00:45:04 root] (utils.py 283): INFO Epoch: [2] [1470/2502] eta: 0:34:31 lr: 0.000020 loss_cls: 4.0989 (3.9985) grad_norm: 4.3554 (4.4013) time: 1.7218 data: 0.0003 max mem: 8421 +[2024-12-05 00:45:21 root] (utils.py 283): INFO Epoch: [2] [1480/2502] eta: 0:34:09 lr: 0.000020 loss_cls: 4.0876 (3.9985) grad_norm: 4.2080 (4.3997) time: 1.7227 data: 0.0003 max mem: 8421 +[2024-12-05 00:45:39 root] (utils.py 283): INFO Epoch: [2] [1490/2502] eta: 0:33:47 lr: 0.000020 loss_cls: 4.0183 (3.9987) grad_norm: 4.0378 (4.3977) time: 1.7227 data: 0.0002 max mem: 8421 +[2024-12-05 00:45:56 root] (utils.py 283): INFO Epoch: [2] [1500/2502] eta: 0:33:25 lr: 0.000020 loss_cls: 4.0272 (3.9975) grad_norm: 4.0801 (4.3974) time: 1.7204 data: 0.0002 max mem: 8421 +[2024-12-05 00:46:13 root] (utils.py 283): INFO Epoch: [2] [1510/2502] eta: 0:33:04 lr: 0.000020 loss_cls: 3.7231 (3.9966) grad_norm: 4.2983 (4.3982) time: 1.7205 data: 0.0003 max mem: 8421 +[2024-12-05 00:46:30 root] (utils.py 283): INFO Epoch: [2] [1520/2502] eta: 0:32:42 lr: 0.000020 loss_cls: 3.9426 (3.9968) grad_norm: 4.2633 (4.3982) time: 1.7225 data: 0.0003 max mem: 8421 +[2024-12-05 00:46:48 root] (utils.py 283): INFO Epoch: [2] [1530/2502] eta: 0:32:20 lr: 0.000020 loss_cls: 3.9825 (3.9961) grad_norm: 4.2915 (4.4002) time: 1.7252 data: 0.0003 max mem: 8421 +[2024-12-05 00:47:05 root] (utils.py 283): INFO Epoch: [2] [1540/2502] eta: 0:31:58 lr: 0.000020 loss_cls: 4.0429 (3.9976) grad_norm: 4.3433 (4.3994) time: 1.7244 data: 0.0003 max mem: 8421 +[2024-12-05 00:47:22 root] (utils.py 283): INFO Epoch: [2] [1550/2502] eta: 0:31:37 lr: 0.000020 loss_cls: 4.2723 (3.9995) grad_norm: 4.2826 (4.3987) time: 1.7237 data: 0.0003 max mem: 8421 +[2024-12-05 00:47:39 root] (utils.py 283): INFO Epoch: [2] [1560/2502] eta: 0:31:15 lr: 0.000020 loss_cls: 4.2812 (4.0007) grad_norm: 4.2321 (4.3986) time: 1.7259 data: 0.0003 max mem: 8421 +[2024-12-05 00:47:57 root] (utils.py 283): INFO Epoch: [2] [1570/2502] eta: 0:30:54 lr: 0.000020 loss_cls: 3.7482 (3.9981) grad_norm: 4.1582 (4.3976) time: 1.7292 data: 0.0003 max mem: 8421 +[2024-12-05 00:48:14 root] (utils.py 283): INFO Epoch: [2] [1580/2502] eta: 0:30:32 lr: 0.000020 loss_cls: 3.5159 (3.9964) grad_norm: 4.0951 (4.3956) time: 1.7298 data: 0.0003 max mem: 8421 +[2024-12-05 00:48:31 root] (utils.py 283): INFO Epoch: [2] [1590/2502] eta: 0:30:11 lr: 0.000020 loss_cls: 3.6694 (3.9949) grad_norm: 4.2528 (4.3963) time: 1.7240 data: 0.0003 max mem: 8421 +[2024-12-05 00:48:48 root] (utils.py 283): INFO Epoch: [2] [1600/2502] eta: 0:29:50 lr: 0.000020 loss_cls: 3.7226 (3.9933) grad_norm: 4.3391 (4.3952) time: 1.7229 data: 0.0003 max mem: 8421 +[2024-12-05 00:49:06 root] (utils.py 283): INFO Epoch: [2] [1610/2502] eta: 0:29:28 lr: 0.000020 loss_cls: 4.0494 (3.9942) grad_norm: 4.2284 (4.3948) time: 1.7250 data: 0.0003 max mem: 8421 +[2024-12-05 00:49:23 root] (utils.py 283): INFO Epoch: [2] [1620/2502] eta: 0:29:07 lr: 0.000020 loss_cls: 4.0824 (3.9933) grad_norm: 4.4032 (4.3974) time: 1.7239 data: 0.0003 max mem: 8421 +[2024-12-05 00:49:40 root] (utils.py 283): INFO Epoch: [2] [1630/2502] eta: 0:28:46 lr: 0.000020 loss_cls: 4.1022 (3.9940) grad_norm: 4.7375 (4.4262) time: 1.7231 data: 0.0003 max mem: 8421 +[2024-12-05 00:49:57 root] (utils.py 283): INFO Epoch: [2] [1640/2502] eta: 0:28:25 lr: 0.000020 loss_cls: 4.1305 (3.9945) grad_norm: 4.8118 (4.4306) time: 1.7220 data: 0.0003 max mem: 8421 +[2024-12-05 00:50:14 root] (utils.py 283): INFO Epoch: [2] [1650/2502] eta: 0:28:04 lr: 0.000020 loss_cls: 4.0339 (3.9948) grad_norm: 4.7101 (4.4331) time: 1.7205 data: 0.0002 max mem: 8421 +[2024-12-05 00:50:32 root] (utils.py 283): INFO Epoch: [2] [1660/2502] eta: 0:27:43 lr: 0.000020 loss_cls: 4.0339 (3.9942) grad_norm: 4.7367 (4.4345) time: 1.7203 data: 0.0002 max mem: 8421 +[2024-12-05 00:50:49 root] (utils.py 283): INFO Epoch: [2] [1670/2502] eta: 0:27:22 lr: 0.000020 loss_cls: 4.2006 (3.9954) grad_norm: 4.3716 (4.4334) time: 1.7200 data: 0.0002 max mem: 8421 +[2024-12-05 00:51:06 root] (utils.py 283): INFO Epoch: [2] [1680/2502] eta: 0:27:01 lr: 0.000020 loss_cls: 4.2006 (3.9958) grad_norm: 4.1593 (4.4316) time: 1.7205 data: 0.0002 max mem: 8421 +[2024-12-05 00:51:23 root] (utils.py 283): INFO Epoch: [2] [1690/2502] eta: 0:26:40 lr: 0.000020 loss_cls: 3.8820 (3.9939) grad_norm: 4.1766 (4.4303) time: 1.7203 data: 0.0002 max mem: 8421 +[2024-12-05 00:51:40 root] (utils.py 283): INFO Epoch: [2] [1700/2502] eta: 0:26:19 lr: 0.000020 loss_cls: 3.7666 (3.9938) grad_norm: 4.1907 (4.4291) time: 1.7192 data: 0.0002 max mem: 8421 +[2024-12-05 00:51:58 root] (utils.py 283): INFO Epoch: [2] [1710/2502] eta: 0:25:58 lr: 0.000020 loss_cls: 4.0681 (3.9935) grad_norm: 4.1222 (4.4275) time: 1.7189 data: 0.0002 max mem: 8421 +[2024-12-05 00:52:15 root] (utils.py 283): INFO Epoch: [2] [1720/2502] eta: 0:25:37 lr: 0.000020 loss_cls: 4.2249 (3.9943) grad_norm: 4.0110 (4.4258) time: 1.7209 data: 0.0003 max mem: 8421 +[2024-12-05 00:52:32 root] (utils.py 283): INFO Epoch: [2] [1730/2502] eta: 0:25:16 lr: 0.000020 loss_cls: 4.2079 (3.9922) grad_norm: 4.2821 (4.4289) time: 1.7211 data: 0.0002 max mem: 8421 +[2024-12-05 00:52:49 root] (utils.py 283): INFO Epoch: [2] [1740/2502] eta: 0:24:56 lr: 0.000020 loss_cls: 3.8176 (3.9922) grad_norm: 4.3827 (4.4288) time: 1.7206 data: 0.0002 max mem: 8421 +[2024-12-05 00:53:07 root] (utils.py 283): INFO Epoch: [2] [1750/2502] eta: 0:24:35 lr: 0.000020 loss_cls: 4.1694 (3.9930) grad_norm: 4.2826 (4.4286) time: 1.7207 data: 0.0002 max mem: 8421 +[2024-12-05 00:53:24 root] (utils.py 283): INFO Epoch: [2] [1760/2502] eta: 0:24:14 lr: 0.000020 loss_cls: 4.2122 (3.9927) grad_norm: 4.2826 (4.4300) time: 1.7206 data: 0.0003 max mem: 8421 +[2024-12-05 00:53:41 root] (utils.py 283): INFO Epoch: [2] [1770/2502] eta: 0:23:54 lr: 0.000020 loss_cls: 3.7448 (3.9915) grad_norm: 4.1740 (4.4288) time: 1.7236 data: 0.0003 max mem: 8421 +[2024-12-05 00:53:58 root] (utils.py 283): INFO Epoch: [2] [1780/2502] eta: 0:23:33 lr: 0.000020 loss_cls: 4.0212 (3.9910) grad_norm: 4.1834 (4.4288) time: 1.7253 data: 0.0003 max mem: 8421 +[2024-12-05 00:54:15 root] (utils.py 283): INFO Epoch: [2] [1790/2502] eta: 0:23:13 lr: 0.000020 loss_cls: 4.0866 (3.9908) grad_norm: 4.1834 (4.4275) time: 1.7218 data: 0.0003 max mem: 8421 +[2024-12-05 00:54:33 root] (utils.py 283): INFO Epoch: [2] [1800/2502] eta: 0:22:52 lr: 0.000020 loss_cls: 3.9857 (3.9899) grad_norm: 4.1105 (4.4264) time: 1.7201 data: 0.0003 max mem: 8421 +[2024-12-05 00:54:50 root] (utils.py 283): INFO Epoch: [2] [1810/2502] eta: 0:22:32 lr: 0.000020 loss_cls: 4.0770 (3.9909) grad_norm: 4.1621 (4.4250) time: 1.7196 data: 0.0003 max mem: 8421 +[2024-12-05 00:55:07 root] (utils.py 283): INFO Epoch: [2] [1820/2502] eta: 0:22:11 lr: 0.000020 loss_cls: 4.1347 (3.9910) grad_norm: 4.1507 (4.4244) time: 1.7188 data: 0.0002 max mem: 8421 +[2024-12-05 00:55:24 root] (utils.py 283): INFO Epoch: [2] [1830/2502] eta: 0:21:51 lr: 0.000020 loss_cls: 4.1241 (3.9903) grad_norm: 4.1402 (4.4259) time: 1.7209 data: 0.0003 max mem: 8421 +[2024-12-05 00:55:41 root] (utils.py 283): INFO Epoch: [2] [1840/2502] eta: 0:21:31 lr: 0.000020 loss_cls: 3.9519 (3.9885) grad_norm: 4.1566 (4.4259) time: 1.7219 data: 0.0003 max mem: 8421 +[2024-12-05 00:55:59 root] (utils.py 283): INFO Epoch: [2] [1850/2502] eta: 0:21:10 lr: 0.000020 loss_cls: 4.1047 (3.9885) grad_norm: 4.3393 (4.4266) time: 1.7217 data: 0.0002 max mem: 8421 +[2024-12-05 00:56:16 root] (utils.py 283): INFO Epoch: [2] [1860/2502] eta: 0:20:50 lr: 0.000020 loss_cls: 4.1224 (3.9884) grad_norm: 4.4173 (4.4256) time: 1.7208 data: 0.0002 max mem: 8421 +[2024-12-05 00:56:33 root] (utils.py 283): INFO Epoch: [2] [1870/2502] eta: 0:20:30 lr: 0.000020 loss_cls: 4.2552 (3.9894) grad_norm: 4.2026 (4.4246) time: 1.7199 data: 0.0003 max mem: 8421 +[2024-12-05 00:56:50 root] (utils.py 283): INFO Epoch: [2] [1880/2502] eta: 0:20:10 lr: 0.000020 loss_cls: 4.1862 (3.9886) grad_norm: 4.2026 (4.4250) time: 1.7225 data: 0.0002 max mem: 8421 +[2024-12-05 00:57:08 root] (utils.py 283): INFO Epoch: [2] [1890/2502] eta: 0:19:49 lr: 0.000020 loss_cls: 4.0473 (3.9893) grad_norm: 4.3944 (4.4244) time: 1.7226 data: 0.0002 max mem: 8421 +[2024-12-05 00:57:25 root] (utils.py 283): INFO Epoch: [2] [1900/2502] eta: 0:19:29 lr: 0.000020 loss_cls: 4.2349 (3.9901) grad_norm: 3.8637 (4.4216) time: 1.7197 data: 0.0002 max mem: 8421 +[2024-12-05 00:57:42 root] (utils.py 283): INFO Epoch: [2] [1910/2502] eta: 0:19:09 lr: 0.000020 loss_cls: 4.2068 (3.9898) grad_norm: 4.0264 (4.4220) time: 1.7211 data: 0.0002 max mem: 8421 +[2024-12-05 00:57:59 root] (utils.py 283): INFO Epoch: [2] [1920/2502] eta: 0:18:49 lr: 0.000020 loss_cls: 3.9729 (3.9895) grad_norm: 4.3554 (4.4222) time: 1.7235 data: 0.0002 max mem: 8421 +[2024-12-05 00:58:16 root] (utils.py 283): INFO Epoch: [2] [1930/2502] eta: 0:18:29 lr: 0.000020 loss_cls: 3.9057 (3.9880) grad_norm: 4.2858 (4.4230) time: 1.7217 data: 0.0002 max mem: 8421 +[2024-12-05 00:58:34 root] (utils.py 283): INFO Epoch: [2] [1940/2502] eta: 0:18:09 lr: 0.000020 loss_cls: 4.1228 (3.9880) grad_norm: 4.3171 (4.4264) time: 1.7175 data: 0.0002 max mem: 8421 +[2024-12-05 00:58:51 root] (utils.py 283): INFO Epoch: [2] [1950/2502] eta: 0:17:49 lr: 0.000020 loss_cls: 4.2051 (3.9883) grad_norm: 4.3938 (4.4270) time: 1.7178 data: 0.0002 max mem: 8421 +[2024-12-05 00:59:08 root] (utils.py 283): INFO Epoch: [2] [1960/2502] eta: 0:17:29 lr: 0.000020 loss_cls: 4.0782 (3.9870) grad_norm: 4.2387 (4.4265) time: 1.7199 data: 0.0002 max mem: 8421 +[2024-12-05 00:59:25 root] (utils.py 283): INFO Epoch: [2] [1970/2502] eta: 0:17:09 lr: 0.000020 loss_cls: 4.0782 (3.9891) grad_norm: 4.2235 (4.4271) time: 1.7189 data: 0.0002 max mem: 8421 +[2024-12-05 00:59:42 root] (utils.py 283): INFO Epoch: [2] [1980/2502] eta: 0:16:49 lr: 0.000020 loss_cls: 4.2880 (3.9892) grad_norm: 4.2754 (4.4276) time: 1.7194 data: 0.0002 max mem: 8421 +[2024-12-05 01:00:00 root] (utils.py 283): INFO Epoch: [2] [1990/2502] eta: 0:16:29 lr: 0.000020 loss_cls: 4.0597 (3.9872) grad_norm: 4.2747 (4.4271) time: 1.7200 data: 0.0002 max mem: 8421 +[2024-12-05 01:00:17 root] (utils.py 283): INFO Epoch: [2] [2000/2502] eta: 0:16:09 lr: 0.000020 loss_cls: 3.9524 (3.9878) grad_norm: 4.2837 (4.4273) time: 1.7206 data: 0.0002 max mem: 8421 +[2024-12-05 01:00:34 root] (utils.py 283): INFO Epoch: [2] [2010/2502] eta: 0:15:49 lr: 0.000020 loss_cls: 4.3032 (3.9884) grad_norm: 4.2596 (4.4272) time: 1.7214 data: 0.0002 max mem: 8421 +[2024-12-05 01:00:51 root] (utils.py 283): INFO Epoch: [2] [2020/2502] eta: 0:15:30 lr: 0.000020 loss_cls: 4.3032 (3.9879) grad_norm: 4.2308 (4.4279) time: 1.7208 data: 0.0002 max mem: 8421 +[2024-12-05 01:01:08 root] (utils.py 283): INFO Epoch: [2] [2030/2502] eta: 0:15:10 lr: 0.000020 loss_cls: 3.9910 (3.9879) grad_norm: 4.3727 (4.4278) time: 1.7203 data: 0.0002 max mem: 8421 +[2024-12-05 01:01:26 root] (utils.py 283): INFO Epoch: [2] [2040/2502] eta: 0:14:50 lr: 0.000020 loss_cls: 3.9444 (3.9873) grad_norm: 4.2371 (4.4272) time: 1.7200 data: 0.0002 max mem: 8421 +[2024-12-05 01:01:43 root] (utils.py 283): INFO Epoch: [2] [2050/2502] eta: 0:14:30 lr: 0.000020 loss_cls: 3.9202 (3.9876) grad_norm: 4.2458 (4.4262) time: 1.7200 data: 0.0002 max mem: 8421 +[2024-12-05 01:02:00 root] (utils.py 283): INFO Epoch: [2] [2060/2502] eta: 0:14:11 lr: 0.000020 loss_cls: 4.2215 (3.9881) grad_norm: 4.3233 (4.4263) time: 1.7202 data: 0.0002 max mem: 8421 +[2024-12-05 01:02:17 root] (utils.py 283): INFO Epoch: [2] [2070/2502] eta: 0:13:51 lr: 0.000020 loss_cls: 4.1227 (3.9880) grad_norm: 4.2771 (4.4262) time: 1.7194 data: 0.0002 max mem: 8421 +[2024-12-05 01:02:34 root] (utils.py 283): INFO Epoch: [2] [2080/2502] eta: 0:13:31 lr: 0.000020 loss_cls: 4.1445 (3.9888) grad_norm: 4.2130 (4.4272) time: 1.7192 data: 0.0002 max mem: 8421 +[2024-12-05 01:02:52 root] (utils.py 283): INFO Epoch: [2] [2090/2502] eta: 0:13:12 lr: 0.000020 loss_cls: 4.1910 (3.9893) grad_norm: 4.2253 (4.4270) time: 1.7205 data: 0.0002 max mem: 8421 +[2024-12-05 01:03:09 root] (utils.py 283): INFO Epoch: [2] [2100/2502] eta: 0:12:52 lr: 0.000020 loss_cls: 4.0181 (3.9891) grad_norm: 4.2497 (4.4263) time: 1.7197 data: 0.0002 max mem: 8421 +[2024-12-05 01:03:25 root] (utils.py 283): INFO Epoch: [2] [2110/2502] eta: 0:12:32 lr: 0.000020 loss_cls: 3.9032 (3.9878) grad_norm: 4.1866 (4.4257) time: 1.6885 data: 0.0002 max mem: 8421 +[2024-12-05 01:03:42 root] (utils.py 283): INFO Epoch: [2] [2120/2502] eta: 0:12:13 lr: 0.000020 loss_cls: 3.9032 (3.9881) grad_norm: 4.1301 (4.4253) time: 1.6683 data: 0.0002 max mem: 8421 +[2024-12-05 01:04:09 root] (utils.py 283): INFO Epoch: [2] [2130/2502] eta: 0:11:55 lr: 0.000020 loss_cls: 4.1883 (3.9877) grad_norm: 4.1301 (4.4242) time: 2.1926 data: 0.0003 max mem: 8421 +[2024-12-05 01:04:26 root] (utils.py 283): INFO Epoch: [2] [2140/2502] eta: 0:11:35 lr: 0.000020 loss_cls: 4.2487 (3.9890) grad_norm: 4.0819 (4.4232) time: 2.2017 data: 0.0003 max mem: 8421 +[2024-12-05 01:04:43 root] (utils.py 283): INFO Epoch: [2] [2150/2502] eta: 0:11:16 lr: 0.000020 loss_cls: 4.2487 (3.9887) grad_norm: 4.1694 (4.4229) time: 1.6977 data: 0.0003 max mem: 8421 +[2024-12-05 01:05:00 root] (utils.py 283): INFO Epoch: [2] [2160/2502] eta: 0:10:56 lr: 0.000020 loss_cls: 4.2158 (3.9902) grad_norm: 4.2702 (4.4222) time: 1.6879 data: 0.0003 max mem: 8421 +[2024-12-05 01:05:17 root] (utils.py 283): INFO Epoch: [2] [2170/2502] eta: 0:10:37 lr: 0.000020 loss_cls: 4.1783 (3.9903) grad_norm: 4.2740 (4.4241) time: 1.6977 data: 0.0003 max mem: 8421 +[2024-12-05 01:05:34 root] (utils.py 283): INFO Epoch: [2] [2180/2502] eta: 0:10:17 lr: 0.000020 loss_cls: 4.0797 (3.9905) grad_norm: 4.2361 (4.4235) time: 1.7187 data: 0.0003 max mem: 8421 +[2024-12-05 01:05:51 root] (utils.py 283): INFO Epoch: [2] [2190/2502] eta: 0:09:58 lr: 0.000020 loss_cls: 4.3021 (3.9918) grad_norm: 4.1656 (4.4230) time: 1.7184 data: 0.0002 max mem: 8421 +[2024-12-05 01:06:09 root] (utils.py 283): INFO Epoch: [2] [2200/2502] eta: 0:09:38 lr: 0.000020 loss_cls: 4.3021 (3.9927) grad_norm: 4.2593 (4.4238) time: 1.7195 data: 0.0002 max mem: 8421 +[2024-12-05 01:06:26 root] (utils.py 283): INFO Epoch: [2] [2210/2502] eta: 0:09:19 lr: 0.000020 loss_cls: 4.2293 (3.9927) grad_norm: 4.2589 (4.4239) time: 1.7216 data: 0.0003 max mem: 8421 +[2024-12-05 01:06:43 root] (utils.py 283): INFO Epoch: [2] [2220/2502] eta: 0:08:59 lr: 0.000020 loss_cls: 4.2653 (3.9937) grad_norm: 4.2589 (4.4254) time: 1.7211 data: 0.0003 max mem: 8421 +[2024-12-05 01:07:00 root] (utils.py 283): INFO Epoch: [2] [2230/2502] eta: 0:08:40 lr: 0.000020 loss_cls: 4.2257 (3.9931) grad_norm: 4.1895 (4.4241) time: 1.7197 data: 0.0002 max mem: 8421 +[2024-12-05 01:07:18 root] (utils.py 283): INFO Epoch: [2] [2240/2502] eta: 0:08:21 lr: 0.000020 loss_cls: 4.1958 (3.9938) grad_norm: 4.1928 (4.4236) time: 1.7205 data: 0.0002 max mem: 8421 +[2024-12-05 01:07:35 root] (utils.py 283): INFO Epoch: [2] [2250/2502] eta: 0:08:01 lr: 0.000020 loss_cls: 4.1064 (3.9927) grad_norm: 4.2625 (4.4232) time: 1.7208 data: 0.0003 max mem: 8421 +[2024-12-05 01:07:52 root] (utils.py 283): INFO Epoch: [2] [2260/2502] eta: 0:07:42 lr: 0.000020 loss_cls: 4.0861 (3.9926) grad_norm: 4.1098 (4.4245) time: 1.7199 data: 0.0002 max mem: 8421 +[2024-12-05 01:08:09 root] (utils.py 283): INFO Epoch: [2] [2270/2502] eta: 0:07:23 lr: 0.000020 loss_cls: 3.9906 (3.9915) grad_norm: 4.1986 (4.4240) time: 1.7221 data: 0.0002 max mem: 8421 +[2024-12-05 01:08:26 root] (utils.py 283): INFO Epoch: [2] [2280/2502] eta: 0:07:03 lr: 0.000020 loss_cls: 3.8418 (3.9915) grad_norm: 4.0567 (4.4230) time: 1.7236 data: 0.0002 max mem: 8421 +[2024-12-05 01:08:44 root] (utils.py 283): INFO Epoch: [2] [2290/2502] eta: 0:06:44 lr: 0.000020 loss_cls: 4.2074 (3.9916) grad_norm: 4.0528 (4.4241) time: 1.7207 data: 0.0002 max mem: 8421 +[2024-12-05 01:09:01 root] (utils.py 283): INFO Epoch: [2] [2300/2502] eta: 0:06:25 lr: 0.000020 loss_cls: 4.2626 (3.9921) grad_norm: 4.2037 (4.4235) time: 1.7192 data: 0.0002 max mem: 8421 +[2024-12-05 01:09:18 root] (utils.py 283): INFO Epoch: [2] [2310/2502] eta: 0:06:06 lr: 0.000020 loss_cls: 4.1508 (3.9919) grad_norm: 4.2046 (4.4235) time: 1.7203 data: 0.0002 max mem: 8421 +[2024-12-05 01:09:35 root] (utils.py 283): INFO Epoch: [2] [2320/2502] eta: 0:05:46 lr: 0.000020 loss_cls: 4.1508 (3.9913) grad_norm: 4.1752 (4.4226) time: 1.7200 data: 0.0002 max mem: 8421 +[2024-12-05 01:09:52 root] (utils.py 283): INFO Epoch: [2] [2330/2502] eta: 0:05:27 lr: 0.000020 loss_cls: 4.1678 (3.9917) grad_norm: 4.2703 (4.4225) time: 1.7198 data: 0.0002 max mem: 8421 +[2024-12-05 01:10:10 root] (utils.py 283): INFO Epoch: [2] [2340/2502] eta: 0:05:08 lr: 0.000020 loss_cls: 4.1678 (3.9920) grad_norm: 4.2992 (4.4229) time: 1.7196 data: 0.0002 max mem: 8421 +[2024-12-05 01:10:27 root] (utils.py 283): INFO Epoch: [2] [2350/2502] eta: 0:04:49 lr: 0.000020 loss_cls: 4.1838 (3.9910) grad_norm: 4.3064 (4.4224) time: 1.7192 data: 0.0002 max mem: 8421 +[2024-12-05 01:10:44 root] (utils.py 283): INFO Epoch: [2] [2360/2502] eta: 0:04:30 lr: 0.000020 loss_cls: 4.0480 (3.9897) grad_norm: 4.2102 (4.4214) time: 1.7206 data: 0.0002 max mem: 8421 +[2024-12-05 01:11:01 root] (utils.py 283): INFO Epoch: [2] [2370/2502] eta: 0:04:11 lr: 0.000020 loss_cls: 3.6616 (3.9886) grad_norm: 4.2102 (4.4213) time: 1.7210 data: 0.0002 max mem: 8421 +[2024-12-05 01:11:18 root] (utils.py 283): INFO Epoch: [2] [2380/2502] eta: 0:03:51 lr: 0.000020 loss_cls: 4.0294 (3.9891) grad_norm: 4.2822 (4.4208) time: 1.7194 data: 0.0002 max mem: 8421 +[2024-12-05 01:11:36 root] (utils.py 283): INFO Epoch: [2] [2390/2502] eta: 0:03:32 lr: 0.000020 loss_cls: 4.1344 (3.9888) grad_norm: 4.0962 (4.4192) time: 1.7214 data: 0.0002 max mem: 8421 +[2024-12-05 01:11:53 root] (utils.py 283): INFO Epoch: [2] [2400/2502] eta: 0:03:13 lr: 0.000020 loss_cls: 3.8601 (3.9880) grad_norm: 4.0774 (4.4193) time: 1.7223 data: 0.0002 max mem: 8421 +[2024-12-05 01:12:10 root] (utils.py 283): INFO Epoch: [2] [2410/2502] eta: 0:02:54 lr: 0.000020 loss_cls: 3.8073 (3.9877) grad_norm: 4.0958 (4.4200) time: 1.7221 data: 0.0002 max mem: 8421 +[2024-12-05 01:12:27 root] (utils.py 283): INFO Epoch: [2] [2420/2502] eta: 0:02:35 lr: 0.000020 loss_cls: 4.0321 (3.9881) grad_norm: 4.3063 (4.4208) time: 1.7220 data: 0.0002 max mem: 8421 +[2024-12-05 01:12:44 root] (utils.py 283): INFO Epoch: [2] [2430/2502] eta: 0:02:16 lr: 0.000020 loss_cls: 4.1731 (3.9884) grad_norm: 4.5692 (4.4212) time: 1.7189 data: 0.0002 max mem: 8421 +[2024-12-05 01:13:02 root] (utils.py 283): INFO Epoch: [2] [2440/2502] eta: 0:01:57 lr: 0.000020 loss_cls: 4.2809 (3.9893) grad_norm: 4.4781 (4.4219) time: 1.7171 data: 0.0002 max mem: 8421 +[2024-12-05 01:13:19 root] (utils.py 283): INFO Epoch: [2] [2450/2502] eta: 0:01:38 lr: 0.000020 loss_cls: 4.2276 (3.9897) grad_norm: 4.5200 (4.4233) time: 1.7165 data: 0.0002 max mem: 8421 +[2024-12-05 01:13:36 root] (utils.py 283): INFO Epoch: [2] [2460/2502] eta: 0:01:19 lr: 0.000020 loss_cls: 3.9526 (3.9888) grad_norm: 4.3722 (4.4234) time: 1.7160 data: 0.0002 max mem: 8421 +[2024-12-05 01:13:53 root] (utils.py 283): INFO Epoch: [2] [2470/2502] eta: 0:01:00 lr: 0.000020 loss_cls: 4.0147 (3.9889) grad_norm: 4.2247 (4.4231) time: 1.7170 data: 0.0002 max mem: 8421 +[2024-12-05 01:14:10 root] (utils.py 283): INFO Epoch: [2] [2480/2502] eta: 0:00:41 lr: 0.000020 loss_cls: 4.2238 (3.9903) grad_norm: 4.2576 (4.4229) time: 1.7209 data: 0.0003 max mem: 8421 +[2024-12-05 01:14:28 root] (utils.py 283): INFO Epoch: [2] [2490/2502] eta: 0:00:22 lr: 0.000020 loss_cls: 4.1593 (3.9901) grad_norm: 4.1739 (4.4218) time: 1.7352 data: 0.0245 max mem: 8421 +[2024-12-05 01:14:45 root] (utils.py 283): INFO Epoch: [2] [2500/2502] eta: 0:00:03 lr: 0.000020 loss_cls: 4.1124 (3.9906) grad_norm: 4.1519 (4.4235) time: 1.7128 data: 0.0245 max mem: 8421 +[2024-12-05 01:14:46 root] (utils.py 283): INFO Epoch: [2] [2501/2502] eta: 0:00:01 lr: 0.000020 loss_cls: 4.1124 (3.9909) grad_norm: 4.1957 (4.4236) time: 1.7127 data: 0.0245 max mem: 8421 +[2024-12-05 01:14:46 root] (utils.py 297): INFO Epoch: [2] Total time: 1:18:55 (1.8927 s / it) +[2024-12-05 01:14:46 root] (engine.py 178): INFO Averaged stats:lr: 0.000020 loss_cls: 4.1124 (3.9911) grad_norm: 4.1957 (4.4236) +[2024-12-05 01:14:47 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:17 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7339 (0.7339) acc1: 86.7188 (86.7188) acc3: 92.9688 (92.9688) acc5: 97.6562 (97.6562) time: 0.1756 data: 0.0005 max mem: 8421 +[2024-12-05 01:14:49 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:15 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8744 (0.9121) acc1: 82.8125 (81.2500) acc3: 92.9688 (92.1875) acc5: 94.5312 (95.3835) time: 0.1774 data: 0.0005 max mem: 8421 +[2024-12-05 01:14:51 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:14 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9495 (0.9581) acc1: 79.6875 (79.6503) acc3: 90.6250 (91.7411) acc5: 94.5312 (94.9405) time: 0.1896 data: 0.0005 max mem: 8421 +[2024-12-05 01:14:52 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9747 (0.9501) acc1: 78.9062 (79.0071) acc3: 91.4062 (92.3135) acc5: 95.3125 (95.1361) time: 0.1889 data: 0.0005 max mem: 8421 +[2024-12-05 01:14:54 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8912 (0.9402) acc1: 79.6875 (79.4398) acc3: 93.7500 (92.5114) acc5: 95.3125 (95.1982) time: 0.1886 data: 0.0005 max mem: 8421 +[2024-12-05 01:14:56 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0895 (1.0360) acc1: 74.2188 (77.3438) acc3: 89.0625 (90.9007) acc5: 92.1875 (93.9951) time: 0.1882 data: 0.0005 max mem: 8421 +[2024-12-05 01:14:58 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3553 (1.0816) acc1: 71.0938 (76.4985) acc3: 85.9375 (90.0871) acc5: 89.8438 (93.3274) time: 0.1874 data: 0.0005 max mem: 8421 +[2024-12-05 01:15:00 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3423 (1.1273) acc1: 70.3125 (75.3631) acc3: 85.9375 (89.3816) acc5: 90.6250 (92.8147) time: 0.1887 data: 0.0005 max mem: 8421 +[2024-12-05 01:15:02 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3824 (1.1656) acc1: 68.7500 (74.5467) acc3: 85.1562 (88.6671) acc5: 89.0625 (92.2164) time: 0.1815 data: 0.0008 max mem: 8421 +[2024-12-05 01:15:04 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.4070 (1.1986) acc1: 68.7500 (73.8324) acc3: 83.5938 (88.0580) acc5: 87.5000 (91.7754) time: 0.1875 data: 0.0008 max mem: 8421 +[2024-12-05 01:15:05 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2095 (1.1853) acc1: 72.6562 (74.1360) acc3: 85.9375 (88.2880) acc5: 89.8438 (92.0320) time: 0.1841 data: 0.0007 max mem: 8421 +[2024-12-05 01:15:05 root] (utils.py 297): INFO Test: Total time: 0:00:18 (0.1858 s / it) +[2024-12-05 01:15:05 root] (engine.py 263): INFO * Acc@1 73.908 Acc@3 88.342 Acc@5 91.806 loss 1.184 flops 1.285 layer_flops 1.251 +[2024-12-05 01:15:05 root] (main.py 542): INFO Accuracy of the network on the 50000 test images: 73.9% +[2024-12-05 01:15:05 root] (main.py 546): INFO Max accuracy: 73.91% +[2024-12-05 01:15:07 root] (utils.py 283): INFO Epoch: [3] [ 0/2502] eta: 1:11:09 lr: 0.000020 loss_cls: 4.4098 (4.4098) grad_norm: 4.8240 (4.8240) time: 1.7065 data: 0.0004 max mem: 8421 +[2024-12-05 01:15:24 root] (utils.py 283): INFO Epoch: [3] [ 10/2502] eta: 1:11:31 lr: 0.000020 loss_cls: 4.0220 (4.0231) grad_norm: 4.5254 (4.9108) time: 1.7222 data: 0.0003 max mem: 8421 +[2024-12-05 01:15:41 root] (utils.py 283): INFO Epoch: [3] [ 20/2502] eta: 1:11:13 lr: 0.000020 loss_cls: 4.0220 (3.9742) grad_norm: 4.1738 (4.5845) time: 1.7225 data: 0.0002 max mem: 8421 +[2024-12-05 01:15:58 root] (utils.py 283): INFO Epoch: [3] [ 30/2502] eta: 1:10:53 lr: 0.000020 loss_cls: 3.7621 (3.8617) grad_norm: 4.0859 (4.5111) time: 1.7198 data: 0.0003 max mem: 8421 +[2024-12-05 01:16:16 root] (utils.py 283): INFO Epoch: [3] [ 40/2502] eta: 1:10:38 lr: 0.000020 loss_cls: 4.0478 (3.9177) grad_norm: 4.0661 (4.4079) time: 1.7210 data: 0.0003 max mem: 8421 +[2024-12-05 01:16:33 root] (utils.py 283): INFO Epoch: [3] [ 50/2502] eta: 1:10:30 lr: 0.000020 loss_cls: 4.1132 (3.9304) grad_norm: 4.1267 (4.3878) time: 1.7326 data: 0.0003 max mem: 8421 +[2024-12-05 01:16:50 root] (utils.py 283): INFO Epoch: [3] [ 60/2502] eta: 1:10:15 lr: 0.000020 loss_cls: 4.1348 (3.9624) grad_norm: 4.1702 (4.3505) time: 1.7357 data: 0.0003 max mem: 8421 +[2024-12-05 01:17:08 root] (utils.py 283): INFO Epoch: [3] [ 70/2502] eta: 1:09:58 lr: 0.000020 loss_cls: 4.0595 (3.9294) grad_norm: 4.1732 (4.3290) time: 1.7287 data: 0.0003 max mem: 8421 +[2024-12-05 01:17:25 root] (utils.py 283): INFO Epoch: [3] [ 80/2502] eta: 1:09:40 lr: 0.000020 loss_cls: 3.9297 (3.9150) grad_norm: 4.1301 (4.3266) time: 1.7265 data: 0.0003 max mem: 8421 +[2024-12-05 01:17:42 root] (utils.py 283): INFO Epoch: [3] [ 90/2502] eta: 1:09:23 lr: 0.000020 loss_cls: 3.9297 (3.9091) grad_norm: 4.1743 (4.3308) time: 1.7255 data: 0.0003 max mem: 8421 +[2024-12-05 01:17:59 root] (utils.py 283): INFO Epoch: [3] [ 100/2502] eta: 1:09:05 lr: 0.000020 loss_cls: 4.1203 (3.9369) grad_norm: 4.2068 (4.3275) time: 1.7253 data: 0.0003 max mem: 8421 +[2024-12-05 01:18:17 root] (utils.py 283): INFO Epoch: [3] [ 110/2502] eta: 1:08:48 lr: 0.000020 loss_cls: 4.2589 (3.9408) grad_norm: 4.0911 (4.3087) time: 1.7254 data: 0.0002 max mem: 8421 +[2024-12-05 01:18:34 root] (utils.py 283): INFO Epoch: [3] [ 120/2502] eta: 1:08:31 lr: 0.000020 loss_cls: 3.7992 (3.9316) grad_norm: 4.0995 (4.3155) time: 1.7264 data: 0.0003 max mem: 8421 +[2024-12-05 01:18:51 root] (utils.py 283): INFO Epoch: [3] [ 130/2502] eta: 1:08:14 lr: 0.000020 loss_cls: 3.9672 (3.9427) grad_norm: 4.1817 (4.2961) time: 1.7262 data: 0.0003 max mem: 8421 +[2024-12-05 01:19:08 root] (utils.py 283): INFO Epoch: [3] [ 140/2502] eta: 1:07:56 lr: 0.000020 loss_cls: 4.0909 (3.9541) grad_norm: 4.2656 (4.3818) time: 1.7254 data: 0.0003 max mem: 8421 +[2024-12-05 01:19:26 root] (utils.py 283): INFO Epoch: [3] [ 150/2502] eta: 1:07:39 lr: 0.000020 loss_cls: 4.0909 (3.9523) grad_norm: 4.5316 (4.3907) time: 1.7249 data: 0.0003 max mem: 8421 +[2024-12-05 01:19:43 root] (utils.py 283): INFO Epoch: [3] [ 160/2502] eta: 1:07:22 lr: 0.000020 loss_cls: 4.1649 (3.9732) grad_norm: 4.3965 (4.3887) time: 1.7252 data: 0.0003 max mem: 8421 +[2024-12-05 01:20:00 root] (utils.py 283): INFO Epoch: [3] [ 170/2502] eta: 1:07:04 lr: 0.000020 loss_cls: 4.2152 (3.9667) grad_norm: 4.2397 (4.3860) time: 1.7239 data: 0.0003 max mem: 8421 +[2024-12-05 01:20:17 root] (utils.py 283): INFO Epoch: [3] [ 180/2502] eta: 1:06:46 lr: 0.000020 loss_cls: 4.2322 (3.9773) grad_norm: 4.2575 (4.4512) time: 1.7236 data: 0.0003 max mem: 8421 +[2024-12-05 01:20:35 root] (utils.py 283): INFO Epoch: [3] [ 190/2502] eta: 1:06:29 lr: 0.000020 loss_cls: 3.9557 (3.9649) grad_norm: 4.1943 (4.4357) time: 1.7263 data: 0.0003 max mem: 8421 +[2024-12-05 01:20:52 root] (utils.py 283): INFO Epoch: [3] [ 200/2502] eta: 1:06:12 lr: 0.000020 loss_cls: 3.9674 (3.9729) grad_norm: 4.0896 (4.4277) time: 1.7262 data: 0.0003 max mem: 8421 +[2024-12-05 01:21:09 root] (utils.py 283): INFO Epoch: [3] [ 210/2502] eta: 1:05:55 lr: 0.000020 loss_cls: 4.2628 (3.9859) grad_norm: 4.2140 (4.4314) time: 1.7250 data: 0.0003 max mem: 8421 +[2024-12-05 01:21:26 root] (utils.py 283): INFO Epoch: [3] [ 220/2502] eta: 1:05:37 lr: 0.000020 loss_cls: 4.1569 (3.9883) grad_norm: 4.2370 (4.4265) time: 1.7225 data: 0.0002 max mem: 8421 +[2024-12-05 01:21:44 root] (utils.py 283): INFO Epoch: [3] [ 230/2502] eta: 1:05:19 lr: 0.000020 loss_cls: 4.0853 (3.9813) grad_norm: 4.1808 (4.4170) time: 1.7221 data: 0.0003 max mem: 8421 +[2024-12-05 01:22:01 root] (utils.py 283): INFO Epoch: [3] [ 240/2502] eta: 1:05:02 lr: 0.000020 loss_cls: 4.2581 (3.9987) grad_norm: 4.3460 (4.4182) time: 1.7219 data: 0.0003 max mem: 8421 +[2024-12-05 01:22:18 root] (utils.py 283): INFO Epoch: [3] [ 250/2502] eta: 1:04:44 lr: 0.000020 loss_cls: 4.2841 (3.9993) grad_norm: 4.4134 (4.4243) time: 1.7202 data: 0.0002 max mem: 8421 +[2024-12-05 01:22:35 root] (utils.py 283): INFO Epoch: [3] [ 260/2502] eta: 1:04:26 lr: 0.000020 loss_cls: 3.9826 (3.9975) grad_norm: 4.4134 (4.4289) time: 1.7192 data: 0.0002 max mem: 8421 +[2024-12-05 01:22:52 root] (utils.py 283): INFO Epoch: [3] [ 270/2502] eta: 1:04:08 lr: 0.000020 loss_cls: 3.9599 (3.9983) grad_norm: 4.2497 (4.4190) time: 1.7183 data: 0.0002 max mem: 8421 +[2024-12-05 01:23:10 root] (utils.py 283): INFO Epoch: [3] [ 280/2502] eta: 1:03:51 lr: 0.000020 loss_cls: 4.1171 (4.0034) grad_norm: 4.2497 (4.4313) time: 1.7188 data: 0.0002 max mem: 8421 +[2024-12-05 01:23:27 root] (utils.py 283): INFO Epoch: [3] [ 290/2502] eta: 1:03:33 lr: 0.000020 loss_cls: 4.1274 (4.0039) grad_norm: 4.4900 (4.4270) time: 1.7186 data: 0.0002 max mem: 8421 +[2024-12-05 01:23:44 root] (utils.py 283): INFO Epoch: [3] [ 300/2502] eta: 1:03:15 lr: 0.000020 loss_cls: 4.1004 (4.0094) grad_norm: 4.1567 (4.4211) time: 1.7184 data: 0.0002 max mem: 8421 +[2024-12-05 01:24:01 root] (utils.py 283): INFO Epoch: [3] [ 310/2502] eta: 1:02:58 lr: 0.000020 loss_cls: 4.0057 (4.0030) grad_norm: 4.2924 (4.4177) time: 1.7184 data: 0.0002 max mem: 8421 +[2024-12-05 01:24:18 root] (utils.py 283): INFO Epoch: [3] [ 320/2502] eta: 1:02:40 lr: 0.000020 loss_cls: 3.7671 (3.9993) grad_norm: 4.3131 (4.4140) time: 1.7180 data: 0.0002 max mem: 8421 +[2024-12-05 01:24:36 root] (utils.py 283): INFO Epoch: [3] [ 330/2502] eta: 1:02:23 lr: 0.000020 loss_cls: 3.6711 (3.9907) grad_norm: 4.0228 (4.4000) time: 1.7187 data: 0.0002 max mem: 8421 +[2024-12-05 01:24:53 root] (utils.py 283): INFO Epoch: [3] [ 340/2502] eta: 1:02:05 lr: 0.000020 loss_cls: 4.1357 (3.9929) grad_norm: 4.1164 (4.4049) time: 1.7189 data: 0.0002 max mem: 8421 +[2024-12-05 01:25:10 root] (utils.py 283): INFO Epoch: [3] [ 350/2502] eta: 1:01:48 lr: 0.000020 loss_cls: 4.2265 (3.9934) grad_norm: 4.3252 (4.4015) time: 1.7184 data: 0.0002 max mem: 8421 +[2024-12-05 01:25:27 root] (utils.py 283): INFO Epoch: [3] [ 360/2502] eta: 1:01:30 lr: 0.000020 loss_cls: 4.0767 (3.9889) grad_norm: 4.3297 (4.4037) time: 1.7185 data: 0.0002 max mem: 8421 +[2024-12-05 01:25:44 root] (utils.py 283): INFO Epoch: [3] [ 370/2502] eta: 1:01:13 lr: 0.000020 loss_cls: 3.8602 (3.9852) grad_norm: 4.3173 (4.3999) time: 1.7190 data: 0.0002 max mem: 8421 +[2024-12-05 01:26:02 root] (utils.py 283): INFO Epoch: [3] [ 380/2502] eta: 1:00:55 lr: 0.000020 loss_cls: 3.9024 (3.9818) grad_norm: 4.1780 (4.3991) time: 1.7216 data: 0.0003 max mem: 8421 +[2024-12-05 01:26:19 root] (utils.py 283): INFO Epoch: [3] [ 390/2502] eta: 1:00:38 lr: 0.000020 loss_cls: 3.9069 (3.9813) grad_norm: 4.3584 (4.3979) time: 1.7218 data: 0.0003 max mem: 8421 +[2024-12-05 01:26:36 root] (utils.py 283): INFO Epoch: [3] [ 400/2502] eta: 1:00:21 lr: 0.000020 loss_cls: 4.3040 (3.9887) grad_norm: 4.1611 (4.3926) time: 1.7188 data: 0.0002 max mem: 8421 +[2024-12-05 01:26:53 root] (utils.py 283): INFO Epoch: [3] [ 410/2502] eta: 1:00:03 lr: 0.000020 loss_cls: 4.3040 (3.9910) grad_norm: 4.1227 (4.3906) time: 1.7188 data: 0.0003 max mem: 8421 +[2024-12-05 01:27:10 root] (utils.py 283): INFO Epoch: [3] [ 420/2502] eta: 0:59:46 lr: 0.000020 loss_cls: 4.0638 (3.9850) grad_norm: 4.2550 (4.3948) time: 1.7215 data: 0.0003 max mem: 8421 +[2024-12-05 01:27:28 root] (utils.py 283): INFO Epoch: [3] [ 430/2502] eta: 0:59:29 lr: 0.000020 loss_cls: 4.1004 (3.9833) grad_norm: 4.3483 (4.4444) time: 1.7243 data: 0.0003 max mem: 8421 +[2024-12-05 01:27:45 root] (utils.py 283): INFO Epoch: [3] [ 440/2502] eta: 0:59:12 lr: 0.000020 loss_cls: 4.1004 (3.9823) grad_norm: 4.4086 (4.4482) time: 1.7242 data: 0.0003 max mem: 8421 +[2024-12-05 01:28:02 root] (utils.py 283): INFO Epoch: [3] [ 450/2502] eta: 0:58:54 lr: 0.000020 loss_cls: 4.1474 (3.9838) grad_norm: 4.3374 (4.4431) time: 1.7212 data: 0.0003 max mem: 8421 +[2024-12-05 01:28:19 root] (utils.py 283): INFO Epoch: [3] [ 460/2502] eta: 0:58:37 lr: 0.000020 loss_cls: 4.2077 (3.9865) grad_norm: 4.2036 (4.4379) time: 1.7195 data: 0.0003 max mem: 8421 +[2024-12-05 01:28:36 root] (utils.py 283): INFO Epoch: [3] [ 470/2502] eta: 0:58:20 lr: 0.000020 loss_cls: 4.2408 (3.9887) grad_norm: 4.2036 (4.4323) time: 1.7189 data: 0.0002 max mem: 8421 +[2024-12-05 01:28:54 root] (utils.py 283): INFO Epoch: [3] [ 480/2502] eta: 0:58:02 lr: 0.000020 loss_cls: 3.9242 (3.9849) grad_norm: 4.2077 (4.4311) time: 1.7189 data: 0.0003 max mem: 8421 +[2024-12-05 01:29:11 root] (utils.py 283): INFO Epoch: [3] [ 490/2502] eta: 0:57:45 lr: 0.000020 loss_cls: 3.9451 (3.9878) grad_norm: 4.1721 (4.4259) time: 1.7182 data: 0.0003 max mem: 8421 +[2024-12-05 01:29:28 root] (utils.py 283): INFO Epoch: [3] [ 500/2502] eta: 0:57:27 lr: 0.000020 loss_cls: 4.1636 (3.9871) grad_norm: 4.1457 (4.4230) time: 1.7170 data: 0.0002 max mem: 8421 +[2024-12-05 01:29:45 root] (utils.py 283): INFO Epoch: [3] [ 510/2502] eta: 0:57:10 lr: 0.000020 loss_cls: 3.8459 (3.9841) grad_norm: 4.1457 (4.4223) time: 1.7171 data: 0.0002 max mem: 8421 +[2024-12-05 01:30:02 root] (utils.py 283): INFO Epoch: [3] [ 520/2502] eta: 0:56:53 lr: 0.000020 loss_cls: 4.1524 (3.9873) grad_norm: 4.1928 (4.4281) time: 1.7180 data: 0.0002 max mem: 8421 +[2024-12-05 01:30:20 root] (utils.py 283): INFO Epoch: [3] [ 530/2502] eta: 0:56:35 lr: 0.000020 loss_cls: 4.0990 (3.9810) grad_norm: 4.1423 (4.4217) time: 1.7192 data: 0.0002 max mem: 8421 +[2024-12-05 01:30:37 root] (utils.py 283): INFO Epoch: [3] [ 540/2502] eta: 0:56:18 lr: 0.000020 loss_cls: 3.9069 (3.9790) grad_norm: 4.1250 (4.4175) time: 1.7184 data: 0.0002 max mem: 8421 +[2024-12-05 01:30:54 root] (utils.py 283): INFO Epoch: [3] [ 550/2502] eta: 0:56:01 lr: 0.000020 loss_cls: 3.9144 (3.9767) grad_norm: 4.1228 (4.4144) time: 1.7195 data: 0.0002 max mem: 8421 +[2024-12-05 01:31:11 root] (utils.py 283): INFO Epoch: [3] [ 560/2502] eta: 0:55:43 lr: 0.000020 loss_cls: 3.9409 (3.9774) grad_norm: 4.1621 (4.4169) time: 1.7210 data: 0.0002 max mem: 8421 +[2024-12-05 01:31:28 root] (utils.py 283): INFO Epoch: [3] [ 570/2502] eta: 0:55:26 lr: 0.000020 loss_cls: 3.8790 (3.9746) grad_norm: 4.2538 (4.4159) time: 1.7195 data: 0.0002 max mem: 8421 +[2024-12-05 01:31:46 root] (utils.py 283): INFO Epoch: [3] [ 580/2502] eta: 0:55:09 lr: 0.000020 loss_cls: 4.0916 (3.9740) grad_norm: 4.1970 (4.4154) time: 1.7210 data: 0.0002 max mem: 8421 +[2024-12-05 01:32:03 root] (utils.py 283): INFO Epoch: [3] [ 590/2502] eta: 0:54:52 lr: 0.000020 loss_cls: 4.1252 (3.9763) grad_norm: 4.1206 (4.4113) time: 1.7234 data: 0.0002 max mem: 8421 +[2024-12-05 01:32:20 root] (utils.py 283): INFO Epoch: [3] [ 600/2502] eta: 0:54:34 lr: 0.000020 loss_cls: 4.1234 (3.9715) grad_norm: 4.1206 (4.4113) time: 1.7222 data: 0.0003 max mem: 8421 +[2024-12-05 01:32:37 root] (utils.py 283): INFO Epoch: [3] [ 610/2502] eta: 0:54:17 lr: 0.000020 loss_cls: 3.8782 (3.9705) grad_norm: 4.1258 (4.4071) time: 1.7216 data: 0.0003 max mem: 8421 +[2024-12-05 01:32:54 root] (utils.py 283): INFO Epoch: [3] [ 620/2502] eta: 0:54:00 lr: 0.000020 loss_cls: 4.0958 (3.9722) grad_norm: 4.1821 (4.4046) time: 1.7219 data: 0.0003 max mem: 8421 +[2024-12-05 01:33:12 root] (utils.py 283): INFO Epoch: [3] [ 630/2502] eta: 0:53:43 lr: 0.000020 loss_cls: 4.2816 (3.9770) grad_norm: 4.1982 (4.4045) time: 1.7212 data: 0.0003 max mem: 8421 +[2024-12-05 01:33:29 root] (utils.py 283): INFO Epoch: [3] [ 640/2502] eta: 0:53:26 lr: 0.000020 loss_cls: 4.2816 (3.9775) grad_norm: 4.2009 (4.4030) time: 1.7237 data: 0.0003 max mem: 8421 +[2024-12-05 01:33:46 root] (utils.py 283): INFO Epoch: [3] [ 650/2502] eta: 0:53:09 lr: 0.000020 loss_cls: 4.1688 (3.9758) grad_norm: 4.2005 (4.4002) time: 1.7285 data: 0.0002 max mem: 8421 +[2024-12-05 01:34:04 root] (utils.py 283): INFO Epoch: [3] [ 660/2502] eta: 0:52:52 lr: 0.000020 loss_cls: 3.9465 (3.9741) grad_norm: 4.2821 (4.3999) time: 1.7296 data: 0.0002 max mem: 8421 +[2024-12-05 01:34:21 root] (utils.py 283): INFO Epoch: [3] [ 670/2502] eta: 0:52:34 lr: 0.000020 loss_cls: 3.8786 (3.9740) grad_norm: 4.4073 (4.4051) time: 1.7249 data: 0.0003 max mem: 8421 +[2024-12-05 01:34:38 root] (utils.py 283): INFO Epoch: [3] [ 680/2502] eta: 0:52:17 lr: 0.000020 loss_cls: 4.0558 (3.9753) grad_norm: 4.4715 (4.4051) time: 1.7203 data: 0.0002 max mem: 8421 +[2024-12-05 01:34:55 root] (utils.py 283): INFO Epoch: [3] [ 690/2502] eta: 0:52:00 lr: 0.000020 loss_cls: 4.0558 (3.9747) grad_norm: 4.1720 (4.4020) time: 1.7190 data: 0.0002 max mem: 8421 +[2024-12-05 01:35:12 root] (utils.py 283): INFO Epoch: [3] [ 700/2502] eta: 0:51:43 lr: 0.000020 loss_cls: 4.1017 (3.9769) grad_norm: 4.1234 (4.4005) time: 1.7181 data: 0.0002 max mem: 8421 +[2024-12-05 01:35:30 root] (utils.py 283): INFO Epoch: [3] [ 710/2502] eta: 0:51:25 lr: 0.000020 loss_cls: 4.0890 (3.9750) grad_norm: 4.0772 (4.3979) time: 1.7219 data: 0.0002 max mem: 8421 +[2024-12-05 01:35:47 root] (utils.py 283): INFO Epoch: [3] [ 720/2502] eta: 0:51:08 lr: 0.000020 loss_cls: 3.9140 (3.9732) grad_norm: 4.1455 (4.3980) time: 1.7243 data: 0.0002 max mem: 8421 +[2024-12-05 01:36:04 root] (utils.py 283): INFO Epoch: [3] [ 730/2502] eta: 0:50:51 lr: 0.000020 loss_cls: 3.8491 (3.9713) grad_norm: 4.1455 (4.3948) time: 1.7222 data: 0.0002 max mem: 8421 +[2024-12-05 01:36:21 root] (utils.py 283): INFO Epoch: [3] [ 740/2502] eta: 0:50:34 lr: 0.000020 loss_cls: 3.8042 (3.9692) grad_norm: 4.0836 (4.3917) time: 1.7232 data: 0.0002 max mem: 8421 +[2024-12-05 01:36:38 root] (utils.py 283): INFO Epoch: [3] [ 750/2502] eta: 0:50:17 lr: 0.000020 loss_cls: 4.0802 (3.9723) grad_norm: 4.1405 (4.3895) time: 1.7222 data: 0.0002 max mem: 8421 +[2024-12-05 01:36:56 root] (utils.py 283): INFO Epoch: [3] [ 760/2502] eta: 0:49:59 lr: 0.000020 loss_cls: 4.2100 (3.9740) grad_norm: 4.1722 (4.3887) time: 1.7191 data: 0.0002 max mem: 8421 +[2024-12-05 01:37:13 root] (utils.py 283): INFO Epoch: [3] [ 770/2502] eta: 0:49:42 lr: 0.000020 loss_cls: 3.9384 (3.9716) grad_norm: 4.3411 (4.3898) time: 1.7190 data: 0.0002 max mem: 8421 +[2024-12-05 01:37:30 root] (utils.py 283): INFO Epoch: [3] [ 780/2502] eta: 0:49:25 lr: 0.000020 loss_cls: 3.8133 (3.9709) grad_norm: 4.2728 (4.3875) time: 1.7208 data: 0.0002 max mem: 8421 +[2024-12-05 01:37:47 root] (utils.py 283): INFO Epoch: [3] [ 790/2502] eta: 0:49:07 lr: 0.000020 loss_cls: 4.0168 (3.9695) grad_norm: 4.2245 (4.3872) time: 1.7217 data: 0.0002 max mem: 8421 +[2024-12-05 01:38:05 root] (utils.py 283): INFO Epoch: [3] [ 800/2502] eta: 0:48:50 lr: 0.000020 loss_cls: 4.0168 (3.9711) grad_norm: 4.2255 (4.3852) time: 1.7259 data: 0.0002 max mem: 8421 +[2024-12-05 01:38:22 root] (utils.py 283): INFO Epoch: [3] [ 810/2502] eta: 0:48:33 lr: 0.000020 loss_cls: 4.0365 (3.9704) grad_norm: 4.3371 (4.3885) time: 1.7288 data: 0.0003 max mem: 8421 +[2024-12-05 01:38:39 root] (utils.py 283): INFO Epoch: [3] [ 820/2502] eta: 0:48:16 lr: 0.000020 loss_cls: 4.1762 (3.9728) grad_norm: 4.2392 (4.3870) time: 1.7256 data: 0.0003 max mem: 8421 +[2024-12-05 01:38:56 root] (utils.py 283): INFO Epoch: [3] [ 830/2502] eta: 0:47:59 lr: 0.000020 loss_cls: 4.2121 (3.9733) grad_norm: 4.1664 (4.3835) time: 1.7224 data: 0.0002 max mem: 8421 +[2024-12-05 01:39:13 root] (utils.py 283): INFO Epoch: [3] [ 840/2502] eta: 0:47:42 lr: 0.000020 loss_cls: 4.0790 (3.9724) grad_norm: 4.3280 (4.3834) time: 1.7204 data: 0.0003 max mem: 8421 +[2024-12-05 01:39:31 root] (utils.py 283): INFO Epoch: [3] [ 850/2502] eta: 0:47:24 lr: 0.000020 loss_cls: 3.9857 (3.9724) grad_norm: 4.3457 (4.3838) time: 1.7204 data: 0.0003 max mem: 8421 +[2024-12-05 01:39:48 root] (utils.py 283): INFO Epoch: [3] [ 860/2502] eta: 0:47:07 lr: 0.000020 loss_cls: 4.2237 (3.9755) grad_norm: 4.1321 (4.3807) time: 1.7223 data: 0.0003 max mem: 8421 +[2024-12-05 01:40:05 root] (utils.py 283): INFO Epoch: [3] [ 870/2502] eta: 0:46:50 lr: 0.000020 loss_cls: 4.3138 (3.9816) grad_norm: 4.2177 (4.3799) time: 1.7231 data: 0.0002 max mem: 8421 +[2024-12-05 01:40:22 root] (utils.py 283): INFO Epoch: [3] [ 880/2502] eta: 0:46:33 lr: 0.000020 loss_cls: 4.3355 (3.9835) grad_norm: 4.2818 (4.3889) time: 1.7238 data: 0.0002 max mem: 8421 +[2024-12-05 01:40:40 root] (utils.py 283): INFO Epoch: [3] [ 890/2502] eta: 0:46:16 lr: 0.000020 loss_cls: 4.1226 (3.9825) grad_norm: 4.2431 (4.3903) time: 1.7260 data: 0.0003 max mem: 8421 +[2024-12-05 01:40:57 root] (utils.py 283): INFO Epoch: [3] [ 900/2502] eta: 0:45:58 lr: 0.000020 loss_cls: 3.9244 (3.9806) grad_norm: 4.2964 (4.3894) time: 1.7239 data: 0.0003 max mem: 8421 +[2024-12-05 01:41:14 root] (utils.py 283): INFO Epoch: [3] [ 910/2502] eta: 0:45:41 lr: 0.000020 loss_cls: 4.0502 (3.9818) grad_norm: 4.2669 (4.3925) time: 1.7207 data: 0.0003 max mem: 8421 +[2024-12-05 01:41:31 root] (utils.py 283): INFO Epoch: [3] [ 920/2502] eta: 0:45:24 lr: 0.000020 loss_cls: 4.1395 (3.9825) grad_norm: 4.1836 (4.3921) time: 1.7213 data: 0.0003 max mem: 8421 +[2024-12-05 01:41:49 root] (utils.py 283): INFO Epoch: [3] [ 930/2502] eta: 0:45:07 lr: 0.000020 loss_cls: 4.2838 (3.9833) grad_norm: 4.1831 (4.3917) time: 1.7215 data: 0.0003 max mem: 8421 +[2024-12-05 01:42:06 root] (utils.py 283): INFO Epoch: [3] [ 940/2502] eta: 0:44:49 lr: 0.000020 loss_cls: 4.2838 (3.9854) grad_norm: 4.1383 (4.3978) time: 1.7213 data: 0.0002 max mem: 8421 +[2024-12-05 01:42:23 root] (utils.py 283): INFO Epoch: [3] [ 950/2502] eta: 0:44:32 lr: 0.000020 loss_cls: 4.2235 (3.9840) grad_norm: 4.1685 (4.3965) time: 1.7211 data: 0.0003 max mem: 8421 +[2024-12-05 01:42:40 root] (utils.py 283): INFO Epoch: [3] [ 960/2502] eta: 0:44:15 lr: 0.000020 loss_cls: 3.8210 (3.9825) grad_norm: 4.1271 (4.3945) time: 1.7208 data: 0.0003 max mem: 8421 +[2024-12-05 01:42:57 root] (utils.py 283): INFO Epoch: [3] [ 970/2502] eta: 0:43:58 lr: 0.000020 loss_cls: 4.2215 (3.9850) grad_norm: 4.1037 (4.3914) time: 1.7198 data: 0.0002 max mem: 8421 +[2024-12-05 01:43:15 root] (utils.py 283): INFO Epoch: [3] [ 980/2502] eta: 0:43:40 lr: 0.000020 loss_cls: 4.2397 (3.9862) grad_norm: 4.1656 (4.3917) time: 1.7192 data: 0.0002 max mem: 8421 +[2024-12-05 01:43:32 root] (utils.py 283): INFO Epoch: [3] [ 990/2502] eta: 0:43:23 lr: 0.000020 loss_cls: 4.1497 (3.9867) grad_norm: 4.2746 (4.3910) time: 1.7196 data: 0.0002 max mem: 8421 +[2024-12-05 01:43:49 root] (utils.py 283): INFO Epoch: [3] [1000/2502] eta: 0:43:06 lr: 0.000020 loss_cls: 4.1018 (3.9875) grad_norm: 4.1460 (4.3883) time: 1.7188 data: 0.0002 max mem: 8421 +[2024-12-05 01:44:06 root] (utils.py 283): INFO Epoch: [3] [1010/2502] eta: 0:42:49 lr: 0.000020 loss_cls: 4.1619 (3.9897) grad_norm: 4.1390 (4.3881) time: 1.7187 data: 0.0002 max mem: 8421 +[2024-12-05 01:44:23 root] (utils.py 283): INFO Epoch: [3] [1020/2502] eta: 0:42:31 lr: 0.000020 loss_cls: 4.1367 (3.9881) grad_norm: 4.3687 (4.3866) time: 1.7184 data: 0.0002 max mem: 8421 +[2024-12-05 01:44:40 root] (utils.py 283): INFO Epoch: [3] [1030/2502] eta: 0:42:14 lr: 0.000020 loss_cls: 3.8618 (3.9851) grad_norm: 4.0578 (4.3841) time: 1.7166 data: 0.0002 max mem: 8421 +[2024-12-05 01:44:58 root] (utils.py 283): INFO Epoch: [3] [1040/2502] eta: 0:41:57 lr: 0.000020 loss_cls: 3.8737 (3.9842) grad_norm: 4.1956 (4.3836) time: 1.7171 data: 0.0002 max mem: 8421 +[2024-12-05 01:45:15 root] (utils.py 283): INFO Epoch: [3] [1050/2502] eta: 0:41:40 lr: 0.000020 loss_cls: 4.0278 (3.9842) grad_norm: 4.3352 (4.3834) time: 1.7190 data: 0.0002 max mem: 8421 +[2024-12-05 01:45:32 root] (utils.py 283): INFO Epoch: [3] [1060/2502] eta: 0:41:22 lr: 0.000020 loss_cls: 4.0975 (3.9852) grad_norm: 4.3576 (4.3835) time: 1.7196 data: 0.0002 max mem: 8421 +[2024-12-05 01:45:49 root] (utils.py 283): INFO Epoch: [3] [1070/2502] eta: 0:41:05 lr: 0.000020 loss_cls: 4.1571 (3.9871) grad_norm: 4.2254 (4.3813) time: 1.7208 data: 0.0002 max mem: 8421 +[2024-12-05 01:46:06 root] (utils.py 283): INFO Epoch: [3] [1080/2502] eta: 0:40:48 lr: 0.000020 loss_cls: 4.1571 (3.9883) grad_norm: 4.1473 (4.3802) time: 1.7215 data: 0.0003 max mem: 8421 +[2024-12-05 01:46:24 root] (utils.py 283): INFO Epoch: [3] [1090/2502] eta: 0:40:31 lr: 0.000020 loss_cls: 4.2379 (3.9884) grad_norm: 4.1354 (4.3775) time: 1.7243 data: 0.0003 max mem: 8421 +[2024-12-05 01:46:41 root] (utils.py 283): INFO Epoch: [3] [1100/2502] eta: 0:40:14 lr: 0.000020 loss_cls: 4.2379 (3.9887) grad_norm: 4.0989 (4.3771) time: 1.7242 data: 0.0003 max mem: 8421 +[2024-12-05 01:46:58 root] (utils.py 283): INFO Epoch: [3] [1110/2502] eta: 0:39:56 lr: 0.000020 loss_cls: 3.5227 (3.9853) grad_norm: 4.0093 (4.3738) time: 1.7206 data: 0.0002 max mem: 8421 +[2024-12-05 01:47:15 root] (utils.py 283): INFO Epoch: [3] [1120/2502] eta: 0:39:39 lr: 0.000020 loss_cls: 3.6024 (3.9841) grad_norm: 4.0093 (4.3720) time: 1.7198 data: 0.0002 max mem: 8421 +[2024-12-05 01:47:33 root] (utils.py 283): INFO Epoch: [3] [1130/2502] eta: 0:39:22 lr: 0.000020 loss_cls: 3.9717 (3.9849) grad_norm: 4.1019 (4.3708) time: 1.7190 data: 0.0002 max mem: 8421 +[2024-12-05 01:47:50 root] (utils.py 283): INFO Epoch: [3] [1140/2502] eta: 0:39:05 lr: 0.000020 loss_cls: 3.8376 (3.9826) grad_norm: 4.0928 (4.3694) time: 1.7249 data: 0.0003 max mem: 8421 +[2024-12-05 01:48:07 root] (utils.py 283): INFO Epoch: [3] [1150/2502] eta: 0:38:48 lr: 0.000020 loss_cls: 3.8376 (3.9831) grad_norm: 4.0938 (4.3683) time: 1.7309 data: 0.0003 max mem: 8421 +[2024-12-05 01:48:24 root] (utils.py 283): INFO Epoch: [3] [1160/2502] eta: 0:38:30 lr: 0.000020 loss_cls: 4.1721 (3.9829) grad_norm: 4.2010 (4.3690) time: 1.7250 data: 0.0003 max mem: 8421 +[2024-12-05 01:48:42 root] (utils.py 283): INFO Epoch: [3] [1170/2502] eta: 0:38:13 lr: 0.000020 loss_cls: 4.1721 (3.9846) grad_norm: 4.2139 (4.3736) time: 1.7200 data: 0.0002 max mem: 8421 +[2024-12-05 01:48:59 root] (utils.py 283): INFO Epoch: [3] [1180/2502] eta: 0:37:56 lr: 0.000020 loss_cls: 4.0592 (3.9831) grad_norm: 4.2174 (4.3736) time: 1.7202 data: 0.0002 max mem: 8421 +[2024-12-05 01:49:16 root] (utils.py 283): INFO Epoch: [3] [1190/2502] eta: 0:37:39 lr: 0.000020 loss_cls: 3.9271 (3.9835) grad_norm: 4.4220 (4.4368) time: 1.7208 data: 0.0002 max mem: 8421 +[2024-12-05 01:49:33 root] (utils.py 283): INFO Epoch: [3] [1200/2502] eta: 0:37:21 lr: 0.000020 loss_cls: 4.1763 (3.9860) grad_norm: 5.3651 (4.4503) time: 1.7224 data: 0.0002 max mem: 8421 +[2024-12-05 01:49:50 root] (utils.py 283): INFO Epoch: [3] [1210/2502] eta: 0:37:04 lr: 0.000020 loss_cls: 3.7744 (3.9826) grad_norm: 4.6340 (4.4493) time: 1.7210 data: 0.0002 max mem: 8421 +[2024-12-05 01:50:08 root] (utils.py 283): INFO Epoch: [3] [1220/2502] eta: 0:36:47 lr: 0.000020 loss_cls: 3.8054 (3.9834) grad_norm: 4.2193 (4.4474) time: 1.7198 data: 0.0002 max mem: 8421 +[2024-12-05 01:50:25 root] (utils.py 283): INFO Epoch: [3] [1230/2502] eta: 0:36:30 lr: 0.000020 loss_cls: 4.0545 (3.9820) grad_norm: 4.1145 (4.4446) time: 1.7195 data: 0.0002 max mem: 8421 +[2024-12-05 01:50:42 root] (utils.py 283): INFO Epoch: [3] [1240/2502] eta: 0:36:12 lr: 0.000020 loss_cls: 4.0226 (3.9833) grad_norm: 4.2019 (4.4445) time: 1.7188 data: 0.0003 max mem: 8421 +[2024-12-05 01:50:59 root] (utils.py 283): INFO Epoch: [3] [1250/2502] eta: 0:35:55 lr: 0.000020 loss_cls: 4.1978 (3.9860) grad_norm: 4.2607 (4.4444) time: 1.7168 data: 0.0002 max mem: 8421 +[2024-12-05 01:51:16 root] (utils.py 283): INFO Epoch: [3] [1260/2502] eta: 0:35:38 lr: 0.000020 loss_cls: 4.2546 (3.9874) grad_norm: 4.3429 (4.4454) time: 1.7157 data: 0.0002 max mem: 8421 +[2024-12-05 01:51:33 root] (utils.py 283): INFO Epoch: [3] [1270/2502] eta: 0:35:21 lr: 0.000020 loss_cls: 4.1335 (3.9875) grad_norm: 4.3461 (4.4441) time: 1.7161 data: 0.0002 max mem: 8421 +[2024-12-05 01:51:51 root] (utils.py 283): INFO Epoch: [3] [1280/2502] eta: 0:35:03 lr: 0.000020 loss_cls: 4.1513 (3.9877) grad_norm: 4.4031 (4.4452) time: 1.7153 data: 0.0003 max mem: 8421 +[2024-12-05 01:52:08 root] (utils.py 283): INFO Epoch: [3] [1290/2502] eta: 0:34:46 lr: 0.000020 loss_cls: 4.2444 (3.9897) grad_norm: 4.4520 (4.4464) time: 1.7168 data: 0.0002 max mem: 8421 +[2024-12-05 01:52:25 root] (utils.py 283): INFO Epoch: [3] [1300/2502] eta: 0:34:29 lr: 0.000020 loss_cls: 4.2444 (3.9923) grad_norm: 4.2796 (4.4460) time: 1.7168 data: 0.0002 max mem: 8421 +[2024-12-05 01:52:42 root] (utils.py 283): INFO Epoch: [3] [1310/2502] eta: 0:34:12 lr: 0.000020 loss_cls: 4.3054 (3.9933) grad_norm: 4.0073 (4.4438) time: 1.7154 data: 0.0002 max mem: 8421 +[2024-12-05 01:52:59 root] (utils.py 283): INFO Epoch: [3] [1320/2502] eta: 0:33:54 lr: 0.000020 loss_cls: 4.3054 (3.9941) grad_norm: 4.1194 (4.4419) time: 1.7152 data: 0.0002 max mem: 8421 +[2024-12-05 01:53:16 root] (utils.py 283): INFO Epoch: [3] [1330/2502] eta: 0:33:37 lr: 0.000020 loss_cls: 3.9949 (3.9931) grad_norm: 4.1555 (4.4407) time: 1.7156 data: 0.0002 max mem: 8421 +[2024-12-05 01:53:34 root] (utils.py 283): INFO Epoch: [3] [1340/2502] eta: 0:33:20 lr: 0.000020 loss_cls: 3.9949 (3.9934) grad_norm: 4.2537 (4.4450) time: 1.7146 data: 0.0002 max mem: 8421 +[2024-12-05 01:53:51 root] (utils.py 283): INFO Epoch: [3] [1350/2502] eta: 0:33:02 lr: 0.000020 loss_cls: 4.2154 (3.9926) grad_norm: 4.2647 (4.4436) time: 1.7137 data: 0.0002 max mem: 8421 +[2024-12-05 01:54:08 root] (utils.py 283): INFO Epoch: [3] [1360/2502] eta: 0:32:45 lr: 0.000020 loss_cls: 4.2082 (3.9924) grad_norm: 4.2647 (4.4429) time: 1.7143 data: 0.0002 max mem: 8421 +[2024-12-05 01:54:25 root] (utils.py 283): INFO Epoch: [3] [1370/2502] eta: 0:32:28 lr: 0.000020 loss_cls: 4.1825 (3.9935) grad_norm: 4.1718 (4.4410) time: 1.7144 data: 0.0002 max mem: 8421 +[2024-12-05 01:54:42 root] (utils.py 283): INFO Epoch: [3] [1380/2502] eta: 0:32:11 lr: 0.000020 loss_cls: 4.1758 (3.9936) grad_norm: 4.2149 (4.4408) time: 1.7155 data: 0.0002 max mem: 8421 +[2024-12-05 01:54:59 root] (utils.py 283): INFO Epoch: [3] [1390/2502] eta: 0:31:53 lr: 0.000020 loss_cls: 4.1758 (3.9945) grad_norm: 4.2722 (4.4398) time: 1.7127 data: 0.0002 max mem: 8421 +[2024-12-05 01:55:16 root] (utils.py 283): INFO Epoch: [3] [1400/2502] eta: 0:31:36 lr: 0.000020 loss_cls: 4.1454 (3.9938) grad_norm: 4.2005 (4.4391) time: 1.6920 data: 0.0002 max mem: 8421 +[2024-12-05 01:55:33 root] (utils.py 283): INFO Epoch: [3] [1410/2502] eta: 0:31:18 lr: 0.000020 loss_cls: 3.9167 (3.9941) grad_norm: 4.1755 (4.4403) time: 1.6707 data: 0.0002 max mem: 8421 +[2024-12-05 01:55:50 root] (utils.py 283): INFO Epoch: [3] [1420/2502] eta: 0:31:01 lr: 0.000020 loss_cls: 4.1655 (3.9947) grad_norm: 4.2136 (4.4393) time: 1.6896 data: 0.0002 max mem: 8421 +[2024-12-05 01:56:07 root] (utils.py 283): INFO Epoch: [3] [1430/2502] eta: 0:30:44 lr: 0.000020 loss_cls: 3.7635 (3.9926) grad_norm: 4.4030 (4.4398) time: 1.7138 data: 0.0002 max mem: 8421 +[2024-12-05 01:56:24 root] (utils.py 283): INFO Epoch: [3] [1440/2502] eta: 0:30:26 lr: 0.000020 loss_cls: 3.7653 (3.9929) grad_norm: 4.4846 (4.4398) time: 1.7154 data: 0.0002 max mem: 8421 +[2024-12-05 01:56:41 root] (utils.py 283): INFO Epoch: [3] [1450/2502] eta: 0:30:09 lr: 0.000020 loss_cls: 4.0983 (3.9917) grad_norm: 4.2523 (4.4388) time: 1.7149 data: 0.0002 max mem: 8421 +[2024-12-05 01:56:58 root] (utils.py 283): INFO Epoch: [3] [1460/2502] eta: 0:29:52 lr: 0.000020 loss_cls: 4.0594 (3.9910) grad_norm: 4.2523 (4.4376) time: 1.7141 data: 0.0003 max mem: 8421 +[2024-12-05 01:57:16 root] (utils.py 283): INFO Epoch: [3] [1470/2502] eta: 0:29:35 lr: 0.000020 loss_cls: 3.6369 (3.9886) grad_norm: 4.2494 (4.4371) time: 1.7150 data: 0.0002 max mem: 8421 +[2024-12-05 01:57:33 root] (utils.py 283): INFO Epoch: [3] [1480/2502] eta: 0:29:17 lr: 0.000020 loss_cls: 3.8661 (3.9876) grad_norm: 4.2018 (4.4350) time: 1.7151 data: 0.0002 max mem: 8421 +[2024-12-05 01:57:50 root] (utils.py 283): INFO Epoch: [3] [1490/2502] eta: 0:29:00 lr: 0.000020 loss_cls: 4.0735 (3.9883) grad_norm: 4.0628 (4.4349) time: 1.7149 data: 0.0002 max mem: 8421 +[2024-12-05 01:58:07 root] (utils.py 283): INFO Epoch: [3] [1500/2502] eta: 0:28:43 lr: 0.000020 loss_cls: 4.1927 (3.9872) grad_norm: 4.1467 (4.4329) time: 1.7156 data: 0.0002 max mem: 8421 +[2024-12-05 01:58:24 root] (utils.py 283): INFO Epoch: [3] [1510/2502] eta: 0:28:26 lr: 0.000020 loss_cls: 4.1927 (3.9882) grad_norm: 4.1467 (4.4320) time: 1.7149 data: 0.0002 max mem: 8421 +[2024-12-05 01:58:41 root] (utils.py 283): INFO Epoch: [3] [1520/2502] eta: 0:28:08 lr: 0.000020 loss_cls: 4.1052 (3.9873) grad_norm: 4.1845 (4.4312) time: 1.7144 data: 0.0002 max mem: 8421 +[2024-12-05 01:58:58 root] (utils.py 283): INFO Epoch: [3] [1530/2502] eta: 0:27:51 lr: 0.000020 loss_cls: 3.9870 (3.9874) grad_norm: 4.0346 (4.4295) time: 1.7145 data: 0.0002 max mem: 8421 +[2024-12-05 01:59:16 root] (utils.py 283): INFO Epoch: [3] [1540/2502] eta: 0:27:34 lr: 0.000020 loss_cls: 4.0722 (3.9884) grad_norm: 4.0739 (4.4282) time: 1.7151 data: 0.0002 max mem: 8421 +[2024-12-05 01:59:33 root] (utils.py 283): INFO Epoch: [3] [1550/2502] eta: 0:27:17 lr: 0.000020 loss_cls: 4.1377 (3.9889) grad_norm: 4.1456 (4.4281) time: 1.7150 data: 0.0002 max mem: 8421 +[2024-12-05 01:59:50 root] (utils.py 283): INFO Epoch: [3] [1560/2502] eta: 0:27:00 lr: 0.000020 loss_cls: 3.8590 (3.9886) grad_norm: 4.1705 (4.4265) time: 1.7149 data: 0.0002 max mem: 8421 +[2024-12-05 02:00:07 root] (utils.py 283): INFO Epoch: [3] [1570/2502] eta: 0:26:42 lr: 0.000020 loss_cls: 3.9781 (3.9895) grad_norm: 4.2023 (4.4252) time: 1.7157 data: 0.0002 max mem: 8421 +[2024-12-05 02:00:24 root] (utils.py 283): INFO Epoch: [3] [1580/2502] eta: 0:26:25 lr: 0.000020 loss_cls: 4.2282 (3.9887) grad_norm: 4.2023 (4.4244) time: 1.7174 data: 0.0002 max mem: 8421 +[2024-12-05 02:00:41 root] (utils.py 283): INFO Epoch: [3] [1590/2502] eta: 0:26:08 lr: 0.000020 loss_cls: 4.1210 (3.9894) grad_norm: 4.1049 (4.4223) time: 1.7193 data: 0.0002 max mem: 8421 +[2024-12-05 02:00:59 root] (utils.py 283): INFO Epoch: [3] [1600/2502] eta: 0:25:51 lr: 0.000020 loss_cls: 4.0497 (3.9880) grad_norm: 4.1049 (4.4215) time: 1.7188 data: 0.0002 max mem: 8421 +[2024-12-05 02:01:16 root] (utils.py 283): INFO Epoch: [3] [1610/2502] eta: 0:25:34 lr: 0.000020 loss_cls: 3.8513 (3.9867) grad_norm: 4.1729 (4.4201) time: 1.7194 data: 0.0002 max mem: 8421 +[2024-12-05 02:01:33 root] (utils.py 283): INFO Epoch: [3] [1620/2502] eta: 0:25:16 lr: 0.000020 loss_cls: 4.0103 (3.9881) grad_norm: 4.1729 (4.4190) time: 1.7207 data: 0.0002 max mem: 8421 +[2024-12-05 02:01:50 root] (utils.py 283): INFO Epoch: [3] [1630/2502] eta: 0:24:59 lr: 0.000020 loss_cls: 4.1236 (3.9867) grad_norm: 4.2213 (4.4184) time: 1.7187 data: 0.0003 max mem: 8421 +[2024-12-05 02:02:07 root] (utils.py 283): INFO Epoch: [3] [1640/2502] eta: 0:24:42 lr: 0.000020 loss_cls: 4.0665 (3.9874) grad_norm: 4.4300 (4.4189) time: 1.7189 data: 0.0002 max mem: 8421 +[2024-12-05 02:02:25 root] (utils.py 283): INFO Epoch: [3] [1650/2502] eta: 0:24:25 lr: 0.000020 loss_cls: 3.9152 (3.9858) grad_norm: 4.4481 (4.4180) time: 1.7201 data: 0.0002 max mem: 8421 +[2024-12-05 02:02:42 root] (utils.py 283): INFO Epoch: [3] [1660/2502] eta: 0:24:08 lr: 0.000020 loss_cls: 3.8412 (3.9854) grad_norm: 4.2005 (4.4164) time: 1.7190 data: 0.0002 max mem: 8421 +[2024-12-05 02:02:59 root] (utils.py 283): INFO Epoch: [3] [1670/2502] eta: 0:23:50 lr: 0.000020 loss_cls: 4.0584 (3.9842) grad_norm: 4.1637 (4.4159) time: 1.7197 data: 0.0002 max mem: 8421 +[2024-12-05 02:03:16 root] (utils.py 283): INFO Epoch: [3] [1680/2502] eta: 0:23:33 lr: 0.000020 loss_cls: 3.8161 (3.9831) grad_norm: 4.2327 (4.4164) time: 1.7198 data: 0.0002 max mem: 8421 +[2024-12-05 02:03:33 root] (utils.py 283): INFO Epoch: [3] [1690/2502] eta: 0:23:16 lr: 0.000020 loss_cls: 3.8941 (3.9821) grad_norm: 4.2488 (4.4152) time: 1.7195 data: 0.0002 max mem: 8421 +[2024-12-05 02:03:51 root] (utils.py 283): INFO Epoch: [3] [1700/2502] eta: 0:22:59 lr: 0.000020 loss_cls: 4.1229 (3.9833) grad_norm: 4.1442 (4.4135) time: 1.7201 data: 0.0002 max mem: 8421 +[2024-12-05 02:04:08 root] (utils.py 283): INFO Epoch: [3] [1710/2502] eta: 0:22:42 lr: 0.000020 loss_cls: 4.2045 (3.9833) grad_norm: 4.0497 (4.4124) time: 1.7220 data: 0.0002 max mem: 8421 +[2024-12-05 02:04:25 root] (utils.py 283): INFO Epoch: [3] [1720/2502] eta: 0:22:24 lr: 0.000020 loss_cls: 4.1609 (3.9850) grad_norm: 4.0788 (4.4113) time: 1.7217 data: 0.0002 max mem: 8421 +[2024-12-05 02:04:42 root] (utils.py 283): INFO Epoch: [3] [1730/2502] eta: 0:22:07 lr: 0.000020 loss_cls: 4.1609 (3.9849) grad_norm: 4.0914 (4.4104) time: 1.7197 data: 0.0002 max mem: 8421 +[2024-12-05 02:04:59 root] (utils.py 283): INFO Epoch: [3] [1740/2502] eta: 0:21:50 lr: 0.000020 loss_cls: 3.7978 (3.9832) grad_norm: 4.1658 (4.4099) time: 1.7207 data: 0.0003 max mem: 8421 +[2024-12-05 02:05:17 root] (utils.py 283): INFO Epoch: [3] [1750/2502] eta: 0:21:33 lr: 0.000020 loss_cls: 3.6101 (3.9819) grad_norm: 4.1415 (4.4092) time: 1.7188 data: 0.0003 max mem: 8421 +[2024-12-05 02:05:34 root] (utils.py 283): INFO Epoch: [3] [1760/2502] eta: 0:21:16 lr: 0.000020 loss_cls: 3.7361 (3.9810) grad_norm: 4.3135 (4.4115) time: 1.7185 data: 0.0002 max mem: 8421 +[2024-12-05 02:05:51 root] (utils.py 283): INFO Epoch: [3] [1770/2502] eta: 0:20:58 lr: 0.000020 loss_cls: 3.8875 (3.9812) grad_norm: 4.3867 (4.4109) time: 1.7206 data: 0.0002 max mem: 8421 +[2024-12-05 02:06:08 root] (utils.py 283): INFO Epoch: [3] [1780/2502] eta: 0:20:41 lr: 0.000020 loss_cls: 4.2620 (3.9817) grad_norm: 4.3827 (4.4111) time: 1.7187 data: 0.0002 max mem: 8421 +[2024-12-05 02:06:25 root] (utils.py 283): INFO Epoch: [3] [1790/2502] eta: 0:20:24 lr: 0.000020 loss_cls: 4.2620 (3.9830) grad_norm: 4.1536 (4.4098) time: 1.7184 data: 0.0002 max mem: 8421 +[2024-12-05 02:06:43 root] (utils.py 283): INFO Epoch: [3] [1800/2502] eta: 0:20:07 lr: 0.000020 loss_cls: 4.2497 (3.9825) grad_norm: 4.1652 (4.4086) time: 1.7208 data: 0.0002 max mem: 8421 +[2024-12-05 02:07:00 root] (utils.py 283): INFO Epoch: [3] [1810/2502] eta: 0:19:50 lr: 0.000020 loss_cls: 4.0684 (3.9818) grad_norm: 4.1400 (4.4067) time: 1.7203 data: 0.0002 max mem: 8421 +[2024-12-05 02:07:17 root] (utils.py 283): INFO Epoch: [3] [1820/2502] eta: 0:19:32 lr: 0.000020 loss_cls: 3.9757 (3.9818) grad_norm: 4.1006 (4.4062) time: 1.7193 data: 0.0002 max mem: 8421 +[2024-12-05 02:07:34 root] (utils.py 283): INFO Epoch: [3] [1830/2502] eta: 0:19:15 lr: 0.000020 loss_cls: 3.9289 (3.9816) grad_norm: 4.0567 (4.4055) time: 1.7208 data: 0.0002 max mem: 8421 +[2024-12-05 02:07:51 root] (utils.py 283): INFO Epoch: [3] [1840/2502] eta: 0:18:58 lr: 0.000020 loss_cls: 4.1371 (3.9813) grad_norm: 4.3141 (4.4066) time: 1.7227 data: 0.0002 max mem: 8421 +[2024-12-05 02:08:09 root] (utils.py 283): INFO Epoch: [3] [1850/2502] eta: 0:18:41 lr: 0.000020 loss_cls: 3.6257 (3.9790) grad_norm: 4.2520 (4.4059) time: 1.7251 data: 0.0002 max mem: 8421 +[2024-12-05 02:08:26 root] (utils.py 283): INFO Epoch: [3] [1860/2502] eta: 0:18:24 lr: 0.000020 loss_cls: 3.6257 (3.9786) grad_norm: 4.2012 (4.4070) time: 1.7242 data: 0.0002 max mem: 8421 +[2024-12-05 02:08:43 root] (utils.py 283): INFO Epoch: [3] [1870/2502] eta: 0:18:06 lr: 0.000020 loss_cls: 3.8866 (3.9791) grad_norm: 4.2909 (4.4071) time: 1.7225 data: 0.0002 max mem: 8421 +[2024-12-05 02:09:00 root] (utils.py 283): INFO Epoch: [3] [1880/2502] eta: 0:17:49 lr: 0.000020 loss_cls: 3.9925 (3.9780) grad_norm: 4.2876 (4.4068) time: 1.7236 data: 0.0003 max mem: 8421 +[2024-12-05 02:09:18 root] (utils.py 283): INFO Epoch: [3] [1890/2502] eta: 0:17:32 lr: 0.000020 loss_cls: 3.9925 (3.9781) grad_norm: 4.1436 (4.4052) time: 1.7244 data: 0.0003 max mem: 8421 +[2024-12-05 02:09:35 root] (utils.py 283): INFO Epoch: [3] [1900/2502] eta: 0:17:15 lr: 0.000020 loss_cls: 3.9893 (3.9769) grad_norm: 4.1346 (4.4045) time: 1.7250 data: 0.0003 max mem: 8421 +[2024-12-05 02:09:52 root] (utils.py 283): INFO Epoch: [3] [1910/2502] eta: 0:16:58 lr: 0.000020 loss_cls: 3.8635 (3.9765) grad_norm: 4.2345 (4.4034) time: 1.7260 data: 0.0003 max mem: 8421 +[2024-12-05 02:10:09 root] (utils.py 283): INFO Epoch: [3] [1920/2502] eta: 0:16:41 lr: 0.000020 loss_cls: 3.8635 (3.9758) grad_norm: 4.2465 (4.4029) time: 1.7244 data: 0.0003 max mem: 8421 +[2024-12-05 02:10:27 root] (utils.py 283): INFO Epoch: [3] [1930/2502] eta: 0:16:23 lr: 0.000020 loss_cls: 3.8974 (3.9764) grad_norm: 4.3011 (4.4030) time: 1.7231 data: 0.0003 max mem: 8421 +[2024-12-05 02:10:44 root] (utils.py 283): INFO Epoch: [3] [1940/2502] eta: 0:16:06 lr: 0.000020 loss_cls: 3.8922 (3.9755) grad_norm: 4.2434 (4.4029) time: 1.7228 data: 0.0003 max mem: 8421 +[2024-12-05 02:11:01 root] (utils.py 283): INFO Epoch: [3] [1950/2502] eta: 0:15:49 lr: 0.000020 loss_cls: 3.8663 (3.9740) grad_norm: 4.2222 (4.4021) time: 1.7210 data: 0.0002 max mem: 8421 +[2024-12-05 02:11:18 root] (utils.py 283): INFO Epoch: [3] [1960/2502] eta: 0:15:32 lr: 0.000020 loss_cls: 4.0880 (3.9746) grad_norm: 4.2313 (4.4018) time: 1.7209 data: 0.0003 max mem: 8421 +[2024-12-05 02:11:35 root] (utils.py 283): INFO Epoch: [3] [1970/2502] eta: 0:15:15 lr: 0.000020 loss_cls: 4.0783 (3.9733) grad_norm: 4.3193 (4.4020) time: 1.7200 data: 0.0003 max mem: 8421 +[2024-12-05 02:11:53 root] (utils.py 283): INFO Epoch: [3] [1980/2502] eta: 0:14:57 lr: 0.000020 loss_cls: 3.9210 (3.9733) grad_norm: 4.2344 (4.4015) time: 1.7188 data: 0.0002 max mem: 8421 +[2024-12-05 02:12:10 root] (utils.py 283): INFO Epoch: [3] [1990/2502] eta: 0:14:40 lr: 0.000020 loss_cls: 3.8855 (3.9727) grad_norm: 4.0452 (4.4004) time: 1.7182 data: 0.0002 max mem: 8421 +[2024-12-05 02:12:27 root] (utils.py 283): INFO Epoch: [3] [2000/2502] eta: 0:14:23 lr: 0.000020 loss_cls: 3.6527 (3.9702) grad_norm: 4.0316 (4.3995) time: 1.7168 data: 0.0003 max mem: 8421 +[2024-12-05 02:12:44 root] (utils.py 283): INFO Epoch: [3] [2010/2502] eta: 0:14:06 lr: 0.000020 loss_cls: 3.7393 (3.9702) grad_norm: 4.1356 (4.3982) time: 1.7160 data: 0.0003 max mem: 8421 +[2024-12-05 02:13:01 root] (utils.py 283): INFO Epoch: [3] [2020/2502] eta: 0:13:49 lr: 0.000020 loss_cls: 4.0601 (3.9698) grad_norm: 4.1104 (4.3989) time: 1.7162 data: 0.0002 max mem: 8421 +[2024-12-05 02:13:18 root] (utils.py 283): INFO Epoch: [3] [2030/2502] eta: 0:13:31 lr: 0.000020 loss_cls: 4.1104 (3.9703) grad_norm: 4.1104 (4.3979) time: 1.7162 data: 0.0002 max mem: 8421 +[2024-12-05 02:13:36 root] (utils.py 283): INFO Epoch: [3] [2040/2502] eta: 0:13:14 lr: 0.000020 loss_cls: 4.0273 (3.9690) grad_norm: 4.1220 (4.3993) time: 1.7155 data: 0.0002 max mem: 8421 +[2024-12-05 02:13:53 root] (utils.py 283): INFO Epoch: [3] [2050/2502] eta: 0:12:57 lr: 0.000020 loss_cls: 4.0273 (3.9686) grad_norm: 4.3100 (4.3985) time: 1.7151 data: 0.0003 max mem: 8421 +[2024-12-05 02:14:10 root] (utils.py 283): INFO Epoch: [3] [2060/2502] eta: 0:12:40 lr: 0.000020 loss_cls: 4.2311 (3.9700) grad_norm: 4.2841 (4.3976) time: 1.7155 data: 0.0002 max mem: 8421 +[2024-12-05 02:14:27 root] (utils.py 283): INFO Epoch: [3] [2070/2502] eta: 0:12:22 lr: 0.000020 loss_cls: 4.1224 (3.9696) grad_norm: 4.0423 (4.3964) time: 1.7168 data: 0.0002 max mem: 8421 +[2024-12-05 02:14:44 root] (utils.py 283): INFO Epoch: [3] [2080/2502] eta: 0:12:05 lr: 0.000020 loss_cls: 3.8808 (3.9680) grad_norm: 4.0423 (4.3960) time: 1.7165 data: 0.0002 max mem: 8421 +[2024-12-05 02:15:01 root] (utils.py 283): INFO Epoch: [3] [2090/2502] eta: 0:11:48 lr: 0.000020 loss_cls: 3.8808 (3.9682) grad_norm: 4.1090 (4.3986) time: 1.7161 data: 0.0002 max mem: 8421 +[2024-12-05 02:15:19 root] (utils.py 283): INFO Epoch: [3] [2100/2502] eta: 0:11:31 lr: 0.000020 loss_cls: 4.0524 (3.9683) grad_norm: 4.1187 (4.3979) time: 1.7175 data: 0.0002 max mem: 8421 +[2024-12-05 02:15:35 root] (utils.py 283): INFO Epoch: [3] [2110/2502] eta: 0:11:14 lr: 0.000020 loss_cls: 3.9125 (3.9673) grad_norm: 4.0914 (4.3994) time: 1.6893 data: 0.0003 max mem: 8421 +[2024-12-05 02:15:52 root] (utils.py 283): INFO Epoch: [3] [2120/2502] eta: 0:10:56 lr: 0.000020 loss_cls: 3.7797 (3.9668) grad_norm: 4.1535 (4.4012) time: 1.6687 data: 0.0002 max mem: 8421 +[2024-12-05 02:16:20 root] (utils.py 283): INFO Epoch: [3] [2130/2502] eta: 0:10:41 lr: 0.000020 loss_cls: 3.9470 (3.9668) grad_norm: 4.2113 (4.4003) time: 2.2203 data: 0.0003 max mem: 8421 +[2024-12-05 02:16:36 root] (utils.py 283): INFO Epoch: [3] [2140/2502] eta: 0:10:24 lr: 0.000020 loss_cls: 4.1017 (3.9666) grad_norm: 4.1467 (4.3993) time: 2.2205 data: 0.0003 max mem: 8421 +[2024-12-05 02:16:54 root] (utils.py 283): INFO Epoch: [3] [2150/2502] eta: 0:10:06 lr: 0.000020 loss_cls: 4.2549 (3.9690) grad_norm: 4.2289 (4.3994) time: 1.6996 data: 0.0003 max mem: 8421 +[2024-12-05 02:17:11 root] (utils.py 283): INFO Epoch: [3] [2160/2502] eta: 0:09:49 lr: 0.000020 loss_cls: 4.3465 (3.9700) grad_norm: 4.2780 (4.3991) time: 1.7237 data: 0.0003 max mem: 8421 +[2024-12-05 02:17:28 root] (utils.py 283): INFO Epoch: [3] [2170/2502] eta: 0:09:32 lr: 0.000020 loss_cls: 4.2285 (3.9706) grad_norm: 4.3084 (4.3996) time: 1.7231 data: 0.0003 max mem: 8421 +[2024-12-05 02:17:45 root] (utils.py 283): INFO Epoch: [3] [2180/2502] eta: 0:09:15 lr: 0.000020 loss_cls: 3.9892 (3.9694) grad_norm: 4.3795 (4.3988) time: 1.7200 data: 0.0002 max mem: 8421 +[2024-12-05 02:18:03 root] (utils.py 283): INFO Epoch: [3] [2190/2502] eta: 0:08:57 lr: 0.000020 loss_cls: 3.8951 (3.9693) grad_norm: 4.1840 (4.3978) time: 1.7203 data: 0.0002 max mem: 8421 +[2024-12-05 02:18:20 root] (utils.py 283): INFO Epoch: [3] [2200/2502] eta: 0:08:40 lr: 0.000020 loss_cls: 3.9065 (3.9693) grad_norm: 4.1840 (4.3982) time: 1.7275 data: 0.0003 max mem: 8421 +[2024-12-05 02:18:37 root] (utils.py 283): INFO Epoch: [3] [2210/2502] eta: 0:08:23 lr: 0.000020 loss_cls: 3.8849 (3.9685) grad_norm: 4.2122 (4.3984) time: 1.7305 data: 0.0003 max mem: 8421 +[2024-12-05 02:18:54 root] (utils.py 283): INFO Epoch: [3] [2220/2502] eta: 0:08:06 lr: 0.000020 loss_cls: 3.8894 (3.9691) grad_norm: 4.2313 (4.3989) time: 1.7233 data: 0.0003 max mem: 8421 +[2024-12-05 02:19:12 root] (utils.py 283): INFO Epoch: [3] [2230/2502] eta: 0:07:48 lr: 0.000020 loss_cls: 4.1071 (3.9692) grad_norm: 4.5365 (4.3998) time: 1.7237 data: 0.0002 max mem: 8421 +[2024-12-05 02:19:29 root] (utils.py 283): INFO Epoch: [3] [2240/2502] eta: 0:07:31 lr: 0.000020 loss_cls: 4.1750 (3.9700) grad_norm: 4.1966 (4.3986) time: 1.7227 data: 0.0002 max mem: 8421 +[2024-12-05 02:19:46 root] (utils.py 283): INFO Epoch: [3] [2250/2502] eta: 0:07:14 lr: 0.000020 loss_cls: 4.0668 (3.9691) grad_norm: 4.0642 (4.3989) time: 1.7190 data: 0.0002 max mem: 8421 +[2024-12-05 02:20:03 root] (utils.py 283): INFO Epoch: [3] [2260/2502] eta: 0:06:57 lr: 0.000020 loss_cls: 3.9769 (3.9702) grad_norm: 4.0642 (4.3982) time: 1.7214 data: 0.0002 max mem: 8421 +[2024-12-05 02:20:20 root] (utils.py 283): INFO Epoch: [3] [2270/2502] eta: 0:06:39 lr: 0.000020 loss_cls: 4.1294 (3.9704) grad_norm: 4.1215 (4.3972) time: 1.7215 data: 0.0002 max mem: 8421 +[2024-12-05 02:20:38 root] (utils.py 283): INFO Epoch: [3] [2280/2502] eta: 0:06:22 lr: 0.000020 loss_cls: 3.6477 (3.9685) grad_norm: 4.0940 (4.3964) time: 1.7221 data: 0.0003 max mem: 8421 +[2024-12-05 02:20:55 root] (utils.py 283): INFO Epoch: [3] [2290/2502] eta: 0:06:05 lr: 0.000020 loss_cls: 4.0090 (3.9688) grad_norm: 4.2353 (4.3961) time: 1.7213 data: 0.0003 max mem: 8421 +[2024-12-05 02:21:12 root] (utils.py 283): INFO Epoch: [3] [2300/2502] eta: 0:05:48 lr: 0.000020 loss_cls: 4.0887 (3.9695) grad_norm: 4.2817 (4.3966) time: 1.7188 data: 0.0003 max mem: 8421 +[2024-12-05 02:21:29 root] (utils.py 283): INFO Epoch: [3] [2310/2502] eta: 0:05:30 lr: 0.000020 loss_cls: 4.1604 (3.9695) grad_norm: 4.2549 (4.3960) time: 1.7182 data: 0.0002 max mem: 8421 +[2024-12-05 02:21:46 root] (utils.py 283): INFO Epoch: [3] [2320/2502] eta: 0:05:13 lr: 0.000020 loss_cls: 3.9098 (3.9694) grad_norm: 4.3197 (4.3969) time: 1.7190 data: 0.0002 max mem: 8421 +[2024-12-05 02:22:04 root] (utils.py 283): INFO Epoch: [3] [2330/2502] eta: 0:04:56 lr: 0.000020 loss_cls: 3.7644 (3.9681) grad_norm: 4.2255 (4.3956) time: 1.7201 data: 0.0002 max mem: 8421 +[2024-12-05 02:22:21 root] (utils.py 283): INFO Epoch: [3] [2340/2502] eta: 0:04:39 lr: 0.000020 loss_cls: 3.9932 (3.9690) grad_norm: 4.0567 (4.3948) time: 1.7208 data: 0.0003 max mem: 8421 +[2024-12-05 02:22:38 root] (utils.py 283): INFO Epoch: [3] [2350/2502] eta: 0:04:22 lr: 0.000020 loss_cls: 4.0064 (3.9689) grad_norm: 4.1227 (4.3939) time: 1.7224 data: 0.0003 max mem: 8421 +[2024-12-05 02:22:55 root] (utils.py 283): INFO Epoch: [3] [2360/2502] eta: 0:04:04 lr: 0.000020 loss_cls: 4.0064 (3.9691) grad_norm: 4.2832 (4.3953) time: 1.7223 data: 0.0003 max mem: 8421 +[2024-12-05 02:23:12 root] (utils.py 283): INFO Epoch: [3] [2370/2502] eta: 0:03:47 lr: 0.000020 loss_cls: 3.9726 (3.9686) grad_norm: 4.3724 (4.3955) time: 1.7207 data: 0.0002 max mem: 8421 +[2024-12-05 02:23:30 root] (utils.py 283): INFO Epoch: [3] [2380/2502] eta: 0:03:30 lr: 0.000020 loss_cls: 3.9726 (3.9691) grad_norm: 4.1904 (4.3947) time: 1.7236 data: 0.0003 max mem: 8421 +[2024-12-05 02:23:47 root] (utils.py 283): INFO Epoch: [3] [2390/2502] eta: 0:03:13 lr: 0.000020 loss_cls: 4.1383 (3.9698) grad_norm: 4.0818 (4.3938) time: 1.7252 data: 0.0003 max mem: 8421 +[2024-12-05 02:24:04 root] (utils.py 283): INFO Epoch: [3] [2400/2502] eta: 0:02:55 lr: 0.000020 loss_cls: 3.8786 (3.9684) grad_norm: 4.1145 (4.3930) time: 1.7214 data: 0.0002 max mem: 8421 +[2024-12-05 02:24:21 root] (utils.py 283): INFO Epoch: [3] [2410/2502] eta: 0:02:38 lr: 0.000020 loss_cls: 3.6620 (3.9679) grad_norm: 4.1145 (4.3915) time: 1.7194 data: 0.0002 max mem: 8421 +[2024-12-05 02:24:39 root] (utils.py 283): INFO Epoch: [3] [2420/2502] eta: 0:02:21 lr: 0.000020 loss_cls: 4.0404 (3.9685) grad_norm: 4.1036 (4.3914) time: 1.7227 data: 0.0003 max mem: 8421 +[2024-12-05 02:24:56 root] (utils.py 283): INFO Epoch: [3] [2430/2502] eta: 0:02:04 lr: 0.000020 loss_cls: 4.0404 (3.9682) grad_norm: 4.3580 (4.3924) time: 1.7244 data: 0.0003 max mem: 8421 +[2024-12-05 02:25:13 root] (utils.py 283): INFO Epoch: [3] [2440/2502] eta: 0:01:46 lr: 0.000020 loss_cls: 3.6701 (3.9668) grad_norm: 4.3523 (4.3952) time: 1.7214 data: 0.0003 max mem: 8421 +[2024-12-05 02:25:30 root] (utils.py 283): INFO Epoch: [3] [2450/2502] eta: 0:01:29 lr: 0.000020 loss_cls: 3.8727 (3.9671) grad_norm: 4.2539 (4.3946) time: 1.7202 data: 0.0002 max mem: 8421 +[2024-12-05 02:25:47 root] (utils.py 283): INFO Epoch: [3] [2460/2502] eta: 0:01:12 lr: 0.000020 loss_cls: 4.2872 (3.9679) grad_norm: 4.2415 (4.3945) time: 1.7193 data: 0.0003 max mem: 8421 +[2024-12-05 02:26:05 root] (utils.py 283): INFO Epoch: [3] [2470/2502] eta: 0:00:55 lr: 0.000020 loss_cls: 4.2073 (3.9683) grad_norm: 4.0608 (4.3932) time: 1.7186 data: 0.0003 max mem: 8421 +[2024-12-05 02:26:22 root] (utils.py 283): INFO Epoch: [3] [2480/2502] eta: 0:00:37 lr: 0.000020 loss_cls: 3.8964 (3.9681) grad_norm: 3.9998 (4.3919) time: 1.7189 data: 0.0002 max mem: 8421 +[2024-12-05 02:26:39 root] (utils.py 283): INFO Epoch: [3] [2490/2502] eta: 0:00:20 lr: 0.000020 loss_cls: 3.8923 (3.9673) grad_norm: 4.2051 (4.3918) time: 1.7289 data: 0.0229 max mem: 8421 +[2024-12-05 02:26:56 root] (utils.py 283): INFO Epoch: [3] [2500/2502] eta: 0:00:03 lr: 0.000020 loss_cls: 4.0057 (3.9667) grad_norm: 4.2401 (4.3927) time: 1.7294 data: 0.0229 max mem: 8421 +[2024-12-05 02:26:58 root] (utils.py 283): INFO Epoch: [3] [2501/2502] eta: 0:00:01 lr: 0.000020 loss_cls: 4.0057 (3.9667) grad_norm: 4.2496 (4.3927) time: 1.7291 data: 0.0229 max mem: 8421 +[2024-12-05 02:26:58 root] (utils.py 297): INFO Epoch: [3] Total time: 1:11:53 (1.7238 s / it) +[2024-12-05 02:26:58 root] (engine.py 178): INFO Averaged stats:lr: 0.000020 loss_cls: 4.0057 (3.9664) grad_norm: 4.2496 (4.3927) +[2024-12-05 02:26:59 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:17 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7508 (0.7508) acc1: 85.1562 (85.1562) acc3: 95.3125 (95.3125) acc5: 96.8750 (96.8750) time: 0.1735 data: 0.0003 max mem: 8421 +[2024-12-05 02:27:00 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:15 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8256 (0.8885) acc1: 82.0312 (81.0369) acc3: 92.9688 (92.0455) acc5: 95.3125 (95.1705) time: 0.1771 data: 0.0004 max mem: 8421 +[2024-12-05 02:27:02 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:14 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8939 (0.9257) acc1: 78.9062 (80.0967) acc3: 92.9688 (92.1875) acc5: 95.3125 (94.9033) time: 0.1892 data: 0.0005 max mem: 8421 +[2024-12-05 02:27:04 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9500 (0.9263) acc1: 78.9062 (79.5615) acc3: 92.9688 (92.4395) acc5: 95.3125 (95.1613) time: 0.1890 data: 0.0005 max mem: 8421 +[2024-12-05 02:27:06 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8669 (0.9209) acc1: 80.4688 (79.9543) acc3: 93.7500 (92.4924) acc5: 96.0938 (95.2172) time: 0.1884 data: 0.0005 max mem: 8421 +[2024-12-05 02:27:08 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0684 (1.0219) acc1: 73.4375 (77.6348) acc3: 88.2812 (90.7629) acc5: 92.9688 (94.0564) time: 0.1884 data: 0.0005 max mem: 8421 +[2024-12-05 02:27:10 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3173 (1.0691) acc1: 68.7500 (76.6906) acc3: 85.1562 (89.9974) acc5: 89.0625 (93.4042) time: 0.1831 data: 0.0005 max mem: 8421 +[2024-12-05 02:27:12 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2993 (1.1162) acc1: 71.8750 (75.5832) acc3: 85.9375 (89.4366) acc5: 89.8438 (92.8697) time: 0.1895 data: 0.0005 max mem: 8421 +[2024-12-05 02:27:14 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3262 (1.1537) acc1: 69.5312 (74.8167) acc3: 85.9375 (88.8696) acc5: 89.0625 (92.3611) time: 0.1839 data: 0.0009 max mem: 8421 +[2024-12-05 02:27:16 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3915 (1.1876) acc1: 69.5312 (73.9698) acc3: 84.3750 (88.3156) acc5: 89.0625 (91.8355) time: 0.1897 data: 0.0008 max mem: 8421 +[2024-12-05 02:27:17 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2935 (1.1763) acc1: 72.6562 (74.0880) acc3: 86.7188 (88.4640) acc5: 89.8438 (92.0160) time: 0.1856 data: 0.0006 max mem: 8421 +[2024-12-05 02:27:17 root] (utils.py 297): INFO Test: Total time: 0:00:18 (0.1864 s / it) +[2024-12-05 02:27:17 root] (engine.py 263): INFO * Acc@1 74.012 Acc@3 88.498 Acc@5 91.976 loss 1.176 flops 1.285 layer_flops 1.251 +[2024-12-05 02:27:17 root] (main.py 542): INFO Accuracy of the network on the 50000 test images: 74.0% +[2024-12-05 02:27:17 root] (main.py 546): INFO Max accuracy: 74.01% +[2024-12-05 02:27:19 root] (utils.py 283): INFO Epoch: [4] [ 0/2502] eta: 1:09:49 lr: 0.000020 loss_cls: 2.9111 (2.9111) grad_norm: 4.0281 (4.0281) time: 1.6745 data: 0.0003 max mem: 8421 +[2024-12-05 02:27:36 root] (utils.py 283): INFO Epoch: [4] [ 10/2502] eta: 1:11:07 lr: 0.000020 loss_cls: 3.7910 (3.7681) grad_norm: 4.0321 (4.1117) time: 1.7126 data: 0.0002 max mem: 8421 +[2024-12-05 02:27:53 root] (utils.py 283): INFO Epoch: [4] [ 20/2502] eta: 1:10:55 lr: 0.000020 loss_cls: 3.7993 (3.7863) grad_norm: 4.0406 (4.1038) time: 1.7166 data: 0.0002 max mem: 8421 +[2024-12-05 02:28:10 root] (utils.py 283): INFO Epoch: [4] [ 30/2502] eta: 1:10:39 lr: 0.000020 loss_cls: 3.7993 (3.7987) grad_norm: 4.0467 (4.2652) time: 1.7165 data: 0.0002 max mem: 8421 +[2024-12-05 02:28:27 root] (utils.py 283): INFO Epoch: [4] [ 40/2502] eta: 1:10:22 lr: 0.000020 loss_cls: 3.9857 (3.8877) grad_norm: 4.2953 (4.2849) time: 1.7158 data: 0.0002 max mem: 8421 +[2024-12-05 02:28:44 root] (utils.py 283): INFO Epoch: [4] [ 50/2502] eta: 1:10:05 lr: 0.000020 loss_cls: 4.0174 (3.8532) grad_norm: 4.3408 (4.3028) time: 1.7150 data: 0.0002 max mem: 8421 +[2024-12-05 02:29:02 root] (utils.py 283): INFO Epoch: [4] [ 60/2502] eta: 1:09:50 lr: 0.000020 loss_cls: 3.7493 (3.8108) grad_norm: 4.2881 (4.2946) time: 1.7178 data: 0.0002 max mem: 8421 +[2024-12-05 02:29:19 root] (utils.py 283): INFO Epoch: [4] [ 70/2502] eta: 1:09:35 lr: 0.000020 loss_cls: 3.9514 (3.8406) grad_norm: 4.0707 (4.2658) time: 1.7212 data: 0.0002 max mem: 8421 +[2024-12-05 02:29:36 root] (utils.py 283): INFO Epoch: [4] [ 80/2502] eta: 1:09:18 lr: 0.000020 loss_cls: 4.1284 (3.8801) grad_norm: 4.1464 (4.2838) time: 1.7199 data: 0.0002 max mem: 8421 +[2024-12-05 02:29:53 root] (utils.py 283): INFO Epoch: [4] [ 90/2502] eta: 1:09:02 lr: 0.000020 loss_cls: 4.1284 (3.8818) grad_norm: 4.2375 (4.2648) time: 1.7194 data: 0.0002 max mem: 8421 +[2024-12-05 02:30:10 root] (utils.py 283): INFO Epoch: [4] [ 100/2502] eta: 1:08:45 lr: 0.000020 loss_cls: 4.1982 (3.8945) grad_norm: 4.2050 (4.2569) time: 1.7204 data: 0.0002 max mem: 8421 +[2024-12-05 02:30:28 root] (utils.py 283): INFO Epoch: [4] [ 110/2502] eta: 1:08:29 lr: 0.000020 loss_cls: 4.0782 (3.8777) grad_norm: 4.2212 (4.2933) time: 1.7203 data: 0.0002 max mem: 8421 +[2024-12-05 02:30:45 root] (utils.py 283): INFO Epoch: [4] [ 120/2502] eta: 1:08:12 lr: 0.000020 loss_cls: 4.1149 (3.8925) grad_norm: 4.2485 (4.3016) time: 1.7210 data: 0.0002 max mem: 8421 +[2024-12-05 02:31:02 root] (utils.py 283): INFO Epoch: [4] [ 130/2502] eta: 1:07:55 lr: 0.000020 loss_cls: 4.2125 (3.9026) grad_norm: 4.1977 (4.3063) time: 1.7206 data: 0.0002 max mem: 8421 +[2024-12-05 02:31:19 root] (utils.py 283): INFO Epoch: [4] [ 140/2502] eta: 1:07:38 lr: 0.000020 loss_cls: 4.0780 (3.9080) grad_norm: 4.2691 (4.3075) time: 1.7193 data: 0.0002 max mem: 8421 +[2024-12-05 02:31:36 root] (utils.py 283): INFO Epoch: [4] [ 150/2502] eta: 1:07:21 lr: 0.000020 loss_cls: 3.9394 (3.9142) grad_norm: 4.2691 (4.2997) time: 1.7189 data: 0.0002 max mem: 8421 +[2024-12-05 02:31:54 root] (utils.py 283): INFO Epoch: [4] [ 160/2502] eta: 1:07:04 lr: 0.000020 loss_cls: 4.2580 (3.9324) grad_norm: 4.0791 (4.2960) time: 1.7186 data: 0.0002 max mem: 8421 +[2024-12-05 02:32:11 root] (utils.py 283): INFO Epoch: [4] [ 170/2502] eta: 1:06:47 lr: 0.000020 loss_cls: 4.0702 (3.9254) grad_norm: 4.2103 (4.2913) time: 1.7195 data: 0.0002 max mem: 8421 +[2024-12-05 02:32:28 root] (utils.py 283): INFO Epoch: [4] [ 180/2502] eta: 1:06:31 lr: 0.000020 loss_cls: 3.9640 (3.9230) grad_norm: 4.1049 (4.2820) time: 1.7229 data: 0.0002 max mem: 8421 +[2024-12-05 02:32:45 root] (utils.py 283): INFO Epoch: [4] [ 190/2502] eta: 1:06:14 lr: 0.000020 loss_cls: 4.1258 (3.9331) grad_norm: 4.0551 (4.2694) time: 1.7252 data: 0.0003 max mem: 8421 +[2024-12-05 02:33:03 root] (utils.py 283): INFO Epoch: [4] [ 200/2502] eta: 1:05:57 lr: 0.000020 loss_cls: 4.1258 (3.9447) grad_norm: 4.1540 (4.2780) time: 1.7232 data: 0.0003 max mem: 8421 +[2024-12-05 02:33:20 root] (utils.py 283): INFO Epoch: [4] [ 210/2502] eta: 1:05:40 lr: 0.000020 loss_cls: 4.0022 (3.9424) grad_norm: 4.1904 (4.2715) time: 1.7213 data: 0.0002 max mem: 8421 +[2024-12-05 02:33:37 root] (utils.py 283): INFO Epoch: [4] [ 220/2502] eta: 1:05:23 lr: 0.000020 loss_cls: 3.6277 (3.9192) grad_norm: 4.1454 (4.2659) time: 1.7216 data: 0.0002 max mem: 8421 +[2024-12-05 02:33:54 root] (utils.py 283): INFO Epoch: [4] [ 230/2502] eta: 1:05:06 lr: 0.000020 loss_cls: 3.7854 (3.9268) grad_norm: 4.1509 (4.2711) time: 1.7199 data: 0.0002 max mem: 8421 +[2024-12-05 02:34:11 root] (utils.py 283): INFO Epoch: [4] [ 240/2502] eta: 1:04:49 lr: 0.000020 loss_cls: 3.8371 (3.9253) grad_norm: 4.1577 (4.2733) time: 1.7209 data: 0.0002 max mem: 8421 +[2024-12-05 02:34:29 root] (utils.py 283): INFO Epoch: [4] [ 250/2502] eta: 1:04:32 lr: 0.000020 loss_cls: 3.6921 (3.9211) grad_norm: 4.1577 (4.2743) time: 1.7219 data: 0.0002 max mem: 8421 +[2024-12-05 02:34:46 root] (utils.py 283): INFO Epoch: [4] [ 260/2502] eta: 1:04:15 lr: 0.000020 loss_cls: 3.8687 (3.9198) grad_norm: 4.0738 (4.2763) time: 1.7221 data: 0.0002 max mem: 8421 +[2024-12-05 02:35:03 root] (utils.py 283): INFO Epoch: [4] [ 270/2502] eta: 1:03:58 lr: 0.000020 loss_cls: 3.8687 (3.9217) grad_norm: 4.1311 (4.2771) time: 1.7230 data: 0.0003 max mem: 8421 +[2024-12-05 02:35:20 root] (utils.py 283): INFO Epoch: [4] [ 280/2502] eta: 1:03:41 lr: 0.000020 loss_cls: 3.7858 (3.9191) grad_norm: 4.1518 (4.2730) time: 1.7206 data: 0.0002 max mem: 8421 +[2024-12-05 02:35:37 root] (utils.py 283): INFO Epoch: [4] [ 290/2502] eta: 1:03:24 lr: 0.000020 loss_cls: 3.8331 (3.9224) grad_norm: 4.1774 (4.2775) time: 1.7192 data: 0.0002 max mem: 8421 +[2024-12-05 02:35:55 root] (utils.py 283): INFO Epoch: [4] [ 300/2502] eta: 1:03:07 lr: 0.000020 loss_cls: 4.0133 (3.9227) grad_norm: 4.1774 (4.2786) time: 1.7186 data: 0.0002 max mem: 8421 +[2024-12-05 02:36:12 root] (utils.py 283): INFO Epoch: [4] [ 310/2502] eta: 1:02:49 lr: 0.000020 loss_cls: 4.0476 (3.9247) grad_norm: 4.2517 (4.2941) time: 1.7182 data: 0.0002 max mem: 8421 +[2024-12-05 02:36:29 root] (utils.py 283): INFO Epoch: [4] [ 320/2502] eta: 1:02:32 lr: 0.000020 loss_cls: 3.9950 (3.9199) grad_norm: 4.2285 (4.2878) time: 1.7195 data: 0.0002 max mem: 8421 +[2024-12-05 02:36:46 root] (utils.py 283): INFO Epoch: [4] [ 330/2502] eta: 1:02:15 lr: 0.000020 loss_cls: 3.9412 (3.9248) grad_norm: 4.0865 (4.2868) time: 1.7200 data: 0.0002 max mem: 8421 +[2024-12-05 02:37:03 root] (utils.py 283): INFO Epoch: [4] [ 340/2502] eta: 1:01:58 lr: 0.000020 loss_cls: 4.1445 (3.9266) grad_norm: 4.1932 (4.2885) time: 1.7191 data: 0.0002 max mem: 8421 +[2024-12-05 02:37:21 root] (utils.py 283): INFO Epoch: [4] [ 350/2502] eta: 1:01:41 lr: 0.000020 loss_cls: 3.9293 (3.9222) grad_norm: 4.3311 (4.2932) time: 1.7210 data: 0.0002 max mem: 8421 +[2024-12-05 02:37:38 root] (utils.py 283): INFO Epoch: [4] [ 360/2502] eta: 1:01:24 lr: 0.000020 loss_cls: 4.0871 (3.9287) grad_norm: 4.3090 (4.2972) time: 1.7228 data: 0.0002 max mem: 8421 +[2024-12-05 02:37:55 root] (utils.py 283): INFO Epoch: [4] [ 370/2502] eta: 1:01:06 lr: 0.000020 loss_cls: 4.1810 (3.9345) grad_norm: 4.2616 (4.3032) time: 1.7206 data: 0.0002 max mem: 8421 +[2024-12-05 02:38:12 root] (utils.py 283): INFO Epoch: [4] [ 380/2502] eta: 1:00:49 lr: 0.000020 loss_cls: 4.0374 (3.9316) grad_norm: 4.2210 (4.3016) time: 1.7209 data: 0.0002 max mem: 8421 +[2024-12-05 02:38:30 root] (utils.py 283): INFO Epoch: [4] [ 390/2502] eta: 1:00:32 lr: 0.000020 loss_cls: 4.0374 (3.9355) grad_norm: 4.1150 (4.2967) time: 1.7232 data: 0.0002 max mem: 8421 +[2024-12-05 02:38:47 root] (utils.py 283): INFO Epoch: [4] [ 400/2502] eta: 1:00:15 lr: 0.000020 loss_cls: 4.2236 (3.9429) grad_norm: 4.1790 (4.3039) time: 1.7228 data: 0.0003 max mem: 8421 +[2024-12-05 02:39:04 root] (utils.py 283): INFO Epoch: [4] [ 410/2502] eta: 0:59:58 lr: 0.000020 loss_cls: 3.9732 (3.9386) grad_norm: 4.1812 (4.3070) time: 1.7222 data: 0.0002 max mem: 8421 +[2024-12-05 02:39:21 root] (utils.py 283): INFO Epoch: [4] [ 420/2502] eta: 0:59:41 lr: 0.000020 loss_cls: 3.8092 (3.9391) grad_norm: 4.1219 (4.3056) time: 1.7236 data: 0.0003 max mem: 8421 +[2024-12-05 02:39:38 root] (utils.py 283): INFO Epoch: [4] [ 430/2502] eta: 0:59:24 lr: 0.000020 loss_cls: 3.8423 (3.9372) grad_norm: 4.1905 (4.3102) time: 1.7211 data: 0.0003 max mem: 8421 +[2024-12-05 02:39:56 root] (utils.py 283): INFO Epoch: [4] [ 440/2502] eta: 0:59:06 lr: 0.000020 loss_cls: 4.0242 (3.9400) grad_norm: 4.2651 (4.3101) time: 1.7176 data: 0.0002 max mem: 8421 +[2024-12-05 02:40:13 root] (utils.py 283): INFO Epoch: [4] [ 450/2502] eta: 0:58:50 lr: 0.000020 loss_cls: 3.9599 (3.9376) grad_norm: 4.3098 (4.3210) time: 1.7241 data: 0.0002 max mem: 8421 +[2024-12-05 02:40:30 root] (utils.py 283): INFO Epoch: [4] [ 460/2502] eta: 0:58:33 lr: 0.000020 loss_cls: 3.8068 (3.9355) grad_norm: 4.2152 (4.3174) time: 1.7271 data: 0.0002 max mem: 8421 +[2024-12-05 02:40:47 root] (utils.py 283): INFO Epoch: [4] [ 470/2502] eta: 0:58:15 lr: 0.000020 loss_cls: 3.9671 (3.9371) grad_norm: 4.1080 (4.3123) time: 1.7211 data: 0.0003 max mem: 8421 +[2024-12-05 02:41:05 root] (utils.py 283): INFO Epoch: [4] [ 480/2502] eta: 0:57:58 lr: 0.000020 loss_cls: 3.8246 (3.9340) grad_norm: 4.0157 (4.3048) time: 1.7184 data: 0.0002 max mem: 8421 +[2024-12-05 02:41:22 root] (utils.py 283): INFO Epoch: [4] [ 490/2502] eta: 0:57:41 lr: 0.000020 loss_cls: 3.8246 (3.9334) grad_norm: 3.9322 (4.3026) time: 1.7227 data: 0.0002 max mem: 8421 +[2024-12-05 02:41:39 root] (utils.py 283): INFO Epoch: [4] [ 500/2502] eta: 0:57:24 lr: 0.000020 loss_cls: 4.2453 (3.9347) grad_norm: 4.2046 (4.2994) time: 1.7229 data: 0.0002 max mem: 8421 +[2024-12-05 02:41:56 root] (utils.py 283): INFO Epoch: [4] [ 510/2502] eta: 0:57:07 lr: 0.000020 loss_cls: 3.8394 (3.9341) grad_norm: 4.1583 (4.2955) time: 1.7193 data: 0.0002 max mem: 8421 +[2024-12-05 02:42:13 root] (utils.py 283): INFO Epoch: [4] [ 520/2502] eta: 0:56:50 lr: 0.000020 loss_cls: 3.7608 (3.9297) grad_norm: 4.0586 (4.2917) time: 1.7217 data: 0.0002 max mem: 8421 +[2024-12-05 02:42:31 root] (utils.py 283): INFO Epoch: [4] [ 530/2502] eta: 0:56:32 lr: 0.000020 loss_cls: 4.0560 (3.9331) grad_norm: 4.0908 (4.2895) time: 1.7219 data: 0.0002 max mem: 8421 +[2024-12-05 02:42:48 root] (utils.py 283): INFO Epoch: [4] [ 540/2502] eta: 0:56:15 lr: 0.000020 loss_cls: 4.1502 (3.9309) grad_norm: 4.0287 (4.2835) time: 1.7198 data: 0.0002 max mem: 8421 +[2024-12-05 02:43:05 root] (utils.py 283): INFO Epoch: [4] [ 550/2502] eta: 0:55:58 lr: 0.000020 loss_cls: 3.6557 (3.9245) grad_norm: 4.1363 (4.2843) time: 1.7201 data: 0.0002 max mem: 8421 +[2024-12-05 02:43:22 root] (utils.py 283): INFO Epoch: [4] [ 560/2502] eta: 0:55:41 lr: 0.000020 loss_cls: 3.9901 (3.9250) grad_norm: 4.2047 (4.2786) time: 1.7198 data: 0.0002 max mem: 8421 +[2024-12-05 02:43:39 root] (utils.py 283): INFO Epoch: [4] [ 570/2502] eta: 0:55:23 lr: 0.000020 loss_cls: 4.2789 (3.9302) grad_norm: 4.2047 (4.2785) time: 1.7186 data: 0.0002 max mem: 8421 +[2024-12-05 02:43:57 root] (utils.py 283): INFO Epoch: [4] [ 580/2502] eta: 0:55:06 lr: 0.000020 loss_cls: 4.2887 (3.9298) grad_norm: 4.1255 (4.2770) time: 1.7216 data: 0.0002 max mem: 8421 +[2024-12-05 02:44:14 root] (utils.py 283): INFO Epoch: [4] [ 590/2502] eta: 0:54:49 lr: 0.000020 loss_cls: 4.2183 (3.9367) grad_norm: 4.2190 (4.2838) time: 1.7222 data: 0.0002 max mem: 8421 +[2024-12-05 02:44:31 root] (utils.py 283): INFO Epoch: [4] [ 600/2502] eta: 0:54:32 lr: 0.000020 loss_cls: 4.1735 (3.9326) grad_norm: 4.3684 (4.2880) time: 1.7196 data: 0.0002 max mem: 8421 +[2024-12-05 02:44:48 root] (utils.py 283): INFO Epoch: [4] [ 610/2502] eta: 0:54:15 lr: 0.000020 loss_cls: 3.4824 (3.9291) grad_norm: 4.1737 (4.2845) time: 1.7201 data: 0.0002 max mem: 8421 +[2024-12-05 02:45:06 root] (utils.py 283): INFO Epoch: [4] [ 620/2502] eta: 0:53:58 lr: 0.000020 loss_cls: 4.0432 (3.9325) grad_norm: 4.0611 (4.2810) time: 1.7255 data: 0.0002 max mem: 8421 +[2024-12-05 02:45:23 root] (utils.py 283): INFO Epoch: [4] [ 630/2502] eta: 0:53:41 lr: 0.000020 loss_cls: 3.9237 (3.9303) grad_norm: 4.2123 (4.2895) time: 1.7274 data: 0.0003 max mem: 8421 +[2024-12-05 02:45:40 root] (utils.py 283): INFO Epoch: [4] [ 640/2502] eta: 0:53:23 lr: 0.000020 loss_cls: 3.8146 (3.9312) grad_norm: 4.2872 (4.2945) time: 1.7233 data: 0.0003 max mem: 8421 +[2024-12-05 02:45:57 root] (utils.py 283): INFO Epoch: [4] [ 650/2502] eta: 0:53:06 lr: 0.000020 loss_cls: 4.0110 (3.9316) grad_norm: 4.1947 (4.2955) time: 1.7242 data: 0.0003 max mem: 8421 +[2024-12-05 02:46:15 root] (utils.py 283): INFO Epoch: [4] [ 660/2502] eta: 0:52:50 lr: 0.000020 loss_cls: 3.9867 (3.9297) grad_norm: 4.1757 (4.2936) time: 1.7305 data: 0.0003 max mem: 8421 +[2024-12-05 02:46:32 root] (utils.py 283): INFO Epoch: [4] [ 670/2502] eta: 0:52:33 lr: 0.000020 loss_cls: 3.9867 (3.9308) grad_norm: 4.2988 (4.3045) time: 1.7309 data: 0.0003 max mem: 8421 +[2024-12-05 02:46:49 root] (utils.py 283): INFO Epoch: [4] [ 680/2502] eta: 0:52:15 lr: 0.000020 loss_cls: 4.1517 (3.9323) grad_norm: 4.2272 (4.3020) time: 1.7247 data: 0.0002 max mem: 8421 +[2024-12-05 02:47:06 root] (utils.py 283): INFO Epoch: [4] [ 690/2502] eta: 0:51:58 lr: 0.000020 loss_cls: 4.0974 (3.9303) grad_norm: 4.1344 (4.3010) time: 1.7236 data: 0.0002 max mem: 8421 +[2024-12-05 02:47:24 root] (utils.py 283): INFO Epoch: [4] [ 700/2502] eta: 0:51:41 lr: 0.000020 loss_cls: 4.0085 (3.9331) grad_norm: 4.0881 (4.2987) time: 1.7220 data: 0.0002 max mem: 8421 +[2024-12-05 02:47:41 root] (utils.py 283): INFO Epoch: [4] [ 710/2502] eta: 0:51:24 lr: 0.000020 loss_cls: 4.1329 (3.9331) grad_norm: 4.2098 (4.3047) time: 1.7225 data: 0.0002 max mem: 8421 +[2024-12-05 02:47:58 root] (utils.py 283): INFO Epoch: [4] [ 720/2502] eta: 0:51:07 lr: 0.000020 loss_cls: 3.8638 (3.9296) grad_norm: 4.2362 (4.3026) time: 1.7263 data: 0.0002 max mem: 8421 +[2024-12-05 02:48:15 root] (utils.py 283): INFO Epoch: [4] [ 730/2502] eta: 0:50:50 lr: 0.000020 loss_cls: 3.6073 (3.9274) grad_norm: 4.1638 (4.3042) time: 1.7291 data: 0.0003 max mem: 8421 +[2024-12-05 02:48:33 root] (utils.py 283): INFO Epoch: [4] [ 740/2502] eta: 0:50:33 lr: 0.000020 loss_cls: 4.1018 (3.9286) grad_norm: 4.3486 (4.3042) time: 1.7315 data: 0.0003 max mem: 8421 +[2024-12-05 02:48:50 root] (utils.py 283): INFO Epoch: [4] [ 750/2502] eta: 0:50:16 lr: 0.000020 loss_cls: 4.1538 (3.9300) grad_norm: 4.1456 (4.3021) time: 1.7300 data: 0.0003 max mem: 8421 +[2024-12-05 02:49:07 root] (utils.py 283): INFO Epoch: [4] [ 760/2502] eta: 0:49:59 lr: 0.000020 loss_cls: 4.2029 (3.9331) grad_norm: 4.0700 (4.3016) time: 1.7320 data: 0.0003 max mem: 8421 +[2024-12-05 02:49:25 root] (utils.py 283): INFO Epoch: [4] [ 770/2502] eta: 0:49:42 lr: 0.000020 loss_cls: 4.2010 (3.9348) grad_norm: 4.2629 (4.3007) time: 1.7313 data: 0.0003 max mem: 8421 +[2024-12-05 02:49:42 root] (utils.py 283): INFO Epoch: [4] [ 780/2502] eta: 0:49:25 lr: 0.000020 loss_cls: 4.0027 (3.9360) grad_norm: 4.2921 (4.3006) time: 1.7244 data: 0.0002 max mem: 8421 +[2024-12-05 02:49:59 root] (utils.py 283): INFO Epoch: [4] [ 790/2502] eta: 0:49:07 lr: 0.000020 loss_cls: 3.9581 (3.9365) grad_norm: 4.1797 (4.2991) time: 1.7222 data: 0.0002 max mem: 8421 +[2024-12-05 02:50:16 root] (utils.py 283): INFO Epoch: [4] [ 800/2502] eta: 0:48:50 lr: 0.000020 loss_cls: 3.8799 (3.9341) grad_norm: 4.0174 (4.2993) time: 1.7219 data: 0.0002 max mem: 8421 +[2024-12-05 02:50:34 root] (utils.py 283): INFO Epoch: [4] [ 810/2502] eta: 0:48:33 lr: 0.000020 loss_cls: 3.6418 (3.9328) grad_norm: 4.0985 (4.3054) time: 1.7242 data: 0.0002 max mem: 8421 +[2024-12-05 06:24:17 root] (main.py 225): INFO Namespace(batch_size=128, epochs=30, model='RMeeTo_tiny', multi_reso=False, input_size=224, drop=0.0, drop_path=0.1, model_ema_decay=0.99996, model_ema_force_cpu=False, model_ema=False, opt='adamw', opt_eps=1e-08, opt_betas=None, clip_grad=None, momentum=0.9, weight_decay=0.0, sched='cosine', lr=2e-05, lr_noise=None, lr_noise_pct=0.67, lr_noise_std=1.0, warmup_lr=1e-06, min_lr=1e-06, decay_epochs=30, warmup_epochs=5, cooldown_epochs=10, patience_epochs=10, decay_rate=0.1, color_jitter=0.4, aa='rand-m9-mstd0.5-inc1', smoothing=0.1, train_interpolation='bicubic', repeated_aug=True, reprob=0.25, remode='pixel', recount=1, resplit=False, mixup=0.8, cutmix=1.0, cutmix_minmax=None, mixup_prob=1.0, mixup_switch_prob=0.5, mixup_mode='batch', finetune='', data_set='IMNET', inat_category='name', output_dir='check/tiny/30', device='cuda', seed=0, autoresume=False, start_epoch=0, dist_eval=True, num_workers=10, pin_mem=True, world_size=4, port='15662', dist_url='env://', target_flops=3.0, granularity=4, load_compression_rate=False, warmup_compression_rate=False, distill='True', throughput=False, eval=False, merge_method='ToMe', merge_interval=2, if_pruning=False, num_prune='5', metric='X', distance='cosine', if_order=True, if_random=False, if_merge_odd=False, rank=0, gpu=0, distributed=True, dist_backend='nccl') +[2024-12-05 06:24:22 root] (main.py 288): INFO Creating model: RMeeTo_tiny +[2024-12-05 06:24:26 root] (main.py 368): INFO number of params: 7148008 +[2024-12-05 06:24:28 root] (main.py 484): INFO Start training for 26 epochs +[2024-12-05 06:24:34 root] (utils.py 283): INFO Epoch: [4] [ 0/2502] eta: 3:58:21 lr: 0.000020 loss_cls: 4.3947 (4.3947) grad_norm: 7.2570 (7.2570) time: 5.7159 data: 0.0014 max mem: 8394 +[2024-12-05 06:24:41 root] (utils.py 283): INFO Epoch: [4] [ 10/2502] eta: 0:51:15 lr: 0.000020 loss_cls: 4.3947 (4.3309) grad_norm: 4.5130 (4.6201) time: 1.2341 data: 0.0004 max mem: 8421 +[2024-12-05 06:24:49 root] (utils.py 283): INFO Epoch: [4] [ 20/2502] eta: 0:42:19 lr: 0.000020 loss_cls: 4.2437 (4.1813) grad_norm: 4.0216 (4.4474) time: 0.7887 data: 0.0002 max mem: 8421 +[2024-12-05 06:24:57 root] (utils.py 283): INFO Epoch: [4] [ 30/2502] eta: 0:39:05 lr: 0.000020 loss_cls: 4.1725 (4.1997) grad_norm: 3.9937 (4.3762) time: 0.7918 data: 0.0002 max mem: 8421 +[2024-12-05 06:25:05 root] (utils.py 283): INFO Epoch: [4] [ 40/2502] eta: 0:37:23 lr: 0.000020 loss_cls: 4.0699 (4.1314) grad_norm: 4.0088 (4.3802) time: 0.7933 data: 0.0002 max mem: 8421 +[2024-12-05 06:25:16 root] (utils.py 283): INFO Epoch: [4] [ 50/2502] eta: 0:38:23 lr: 0.000020 loss_cls: 3.9788 (4.1023) grad_norm: 4.0088 (4.3411) time: 0.9246 data: 0.0006 max mem: 8421 +[2024-12-05 06:25:33 root] (utils.py 283): INFO Epoch: [4] [ 60/2502] eta: 0:43:42 lr: 0.000020 loss_cls: 3.9788 (4.0677) grad_norm: 4.0335 (4.3241) time: 1.4075 data: 0.0006 max mem: 8421 +[2024-12-05 06:25:50 root] (utils.py 283): INFO Epoch: [4] [ 70/2502] eta: 0:47:02 lr: 0.000020 loss_cls: 3.8926 (4.0260) grad_norm: 4.3725 (4.3563) time: 1.7256 data: 0.0003 max mem: 8421 +[2024-12-05 06:26:01 root] (utils.py 283): INFO Epoch: [4] [ 80/2502] eta: 0:46:30 lr: 0.000020 loss_cls: 3.9687 (4.0071) grad_norm: 4.4286 (4.3956) time: 1.3911 data: 0.0003 max mem: 8421 +[2024-12-05 06:26:09 root] (utils.py 283): INFO Epoch: [4] [ 90/2502] eta: 0:44:39 lr: 0.000020 loss_cls: 4.0369 (3.9940) grad_norm: 4.1676 (4.3733) time: 0.9341 data: 0.0003 max mem: 8421 +[2024-12-05 06:26:17 root] (utils.py 283): INFO Epoch: [4] [ 100/2502] eta: 0:43:14 lr: 0.000020 loss_cls: 4.1568 (4.0034) grad_norm: 4.1114 (4.3835) time: 0.7885 data: 0.0003 max mem: 8421 +[2024-12-05 06:26:25 root] (utils.py 283): INFO Epoch: [4] [ 110/2502] eta: 0:41:59 lr: 0.000020 loss_cls: 4.2104 (3.9967) grad_norm: 4.0799 (4.3831) time: 0.7914 data: 0.0003 max mem: 8421 +[2024-12-05 06:26:33 root] (utils.py 283): INFO Epoch: [4] [ 120/2502] eta: 0:40:58 lr: 0.000020 loss_cls: 4.0067 (3.9951) grad_norm: 4.0799 (4.3742) time: 0.7901 data: 0.0003 max mem: 8421 +[2024-12-05 06:26:41 root] (utils.py 283): INFO Epoch: [4] [ 130/2502] eta: 0:40:02 lr: 0.000020 loss_cls: 4.0134 (3.9882) grad_norm: 4.1596 (4.3550) time: 0.7872 data: 0.0003 max mem: 8421 +[2024-12-05 06:26:48 root] (utils.py 283): INFO Epoch: [4] [ 140/2502] eta: 0:39:13 lr: 0.000020 loss_cls: 4.0134 (3.9743) grad_norm: 4.0144 (4.3389) time: 0.7791 data: 0.0002 max mem: 8421 +[2024-12-05 06:26:56 root] (utils.py 283): INFO Epoch: [4] [ 150/2502] eta: 0:38:29 lr: 0.000020 loss_cls: 3.8851 (3.9664) grad_norm: 4.2797 (4.3540) time: 0.7800 data: 0.0003 max mem: 8421 +[2024-12-05 06:27:04 root] (utils.py 283): INFO Epoch: [4] [ 160/2502] eta: 0:37:48 lr: 0.000020 loss_cls: 3.9227 (3.9643) grad_norm: 4.2745 (4.3631) time: 0.7732 data: 0.0003 max mem: 8421 +[2024-12-05 06:27:11 root] (utils.py 283): INFO Epoch: [4] [ 170/2502] eta: 0:37:11 lr: 0.000020 loss_cls: 3.9524 (3.9609) grad_norm: 4.1065 (4.3574) time: 0.7672 data: 0.0003 max mem: 8421 +[2024-12-05 06:27:19 root] (utils.py 283): INFO Epoch: [4] [ 180/2502] eta: 0:36:38 lr: 0.000020 loss_cls: 3.9029 (3.9436) grad_norm: 4.1739 (4.3506) time: 0.7697 data: 0.0003 max mem: 8421 +[2024-12-05 06:27:27 root] (utils.py 283): INFO Epoch: [4] [ 190/2502] eta: 0:36:10 lr: 0.000020 loss_cls: 3.7934 (3.9373) grad_norm: 4.0319 (4.3353) time: 0.7839 data: 0.0003 max mem: 8421 +[2024-12-05 06:27:35 root] (utils.py 283): INFO Epoch: [4] [ 200/2502] eta: 0:35:41 lr: 0.000020 loss_cls: 3.6647 (3.9227) grad_norm: 4.0319 (4.3275) time: 0.7820 data: 0.0003 max mem: 8421 +[2024-12-05 06:27:43 root] (utils.py 283): INFO Epoch: [4] [ 210/2502] eta: 0:35:14 lr: 0.000020 loss_cls: 3.8089 (3.9158) grad_norm: 4.0592 (4.3133) time: 0.7691 data: 0.0003 max mem: 8421 +[2024-12-05 06:27:50 root] (utils.py 283): INFO Epoch: [4] [ 220/2502] eta: 0:34:49 lr: 0.000020 loss_cls: 3.8719 (3.9074) grad_norm: 3.9503 (4.3111) time: 0.7706 data: 0.0003 max mem: 8421 +[2024-12-05 06:27:58 root] (utils.py 283): INFO Epoch: [4] [ 230/2502] eta: 0:34:26 lr: 0.000020 loss_cls: 3.9125 (3.9042) grad_norm: 4.0480 (4.3088) time: 0.7743 data: 0.0002 max mem: 8421 +[2024-12-05 06:28:06 root] (utils.py 283): INFO Epoch: [4] [ 240/2502] eta: 0:34:05 lr: 0.000020 loss_cls: 3.9277 (3.9007) grad_norm: 4.1722 (4.3033) time: 0.7786 data: 0.0002 max mem: 8421 +[2024-12-05 06:28:14 root] (utils.py 283): INFO Epoch: [4] [ 250/2502] eta: 0:33:45 lr: 0.000020 loss_cls: 3.8168 (3.8873) grad_norm: 4.1722 (4.3035) time: 0.7781 data: 0.0002 max mem: 8421 +[2024-12-05 06:28:21 root] (utils.py 283): INFO Epoch: [4] [ 260/2502] eta: 0:33:25 lr: 0.000020 loss_cls: 3.7260 (3.8762) grad_norm: 4.2332 (4.2986) time: 0.7760 data: 0.0003 max mem: 8421 +[2024-12-05 06:28:29 root] (utils.py 283): INFO Epoch: [4] [ 270/2502] eta: 0:33:06 lr: 0.000020 loss_cls: 3.7677 (3.8703) grad_norm: 4.2206 (4.2958) time: 0.7741 data: 0.0003 max mem: 8421 +[2024-12-05 06:28:37 root] (utils.py 283): INFO Epoch: [4] [ 280/2502] eta: 0:32:48 lr: 0.000020 loss_cls: 3.9387 (3.8765) grad_norm: 4.0220 (4.2796) time: 0.7698 data: 0.0003 max mem: 8421 +[2024-12-05 06:28:44 root] (utils.py 283): INFO Epoch: [4] [ 290/2502] eta: 0:32:30 lr: 0.000020 loss_cls: 3.9387 (3.8673) grad_norm: 4.0436 (4.2895) time: 0.7678 data: 0.0003 max mem: 8421 +[2024-12-05 06:28:52 root] (utils.py 283): INFO Epoch: [4] [ 300/2502] eta: 0:32:13 lr: 0.000020 loss_cls: 3.4690 (3.8583) grad_norm: 4.1483 (4.2867) time: 0.7688 data: 0.0003 max mem: 8421 +[2024-12-05 06:29:00 root] (utils.py 283): INFO Epoch: [4] [ 310/2502] eta: 0:31:56 lr: 0.000020 loss_cls: 3.6649 (3.8562) grad_norm: 4.3257 (4.2993) time: 0.7666 data: 0.0003 max mem: 8421 +[2024-12-05 06:29:08 root] (utils.py 283): INFO Epoch: [4] [ 320/2502] eta: 0:31:41 lr: 0.000020 loss_cls: 4.0793 (3.8697) grad_norm: 4.4416 (4.2995) time: 0.7768 data: 0.0003 max mem: 8421 +[2024-12-05 06:29:16 root] (utils.py 283): INFO Epoch: [4] [ 330/2502] eta: 0:31:27 lr: 0.000020 loss_cls: 4.2501 (3.8673) grad_norm: 4.0935 (4.2959) time: 0.7911 data: 0.0003 max mem: 8421 +[2024-12-05 06:29:23 root] (utils.py 283): INFO Epoch: [4] [ 340/2502] eta: 0:31:14 lr: 0.000020 loss_cls: 4.0513 (3.8725) grad_norm: 4.0935 (4.2982) time: 0.7917 data: 0.0003 max mem: 8421 +[2024-12-05 06:29:31 root] (utils.py 283): INFO Epoch: [4] [ 350/2502] eta: 0:31:00 lr: 0.000020 loss_cls: 4.0174 (3.8724) grad_norm: 4.2696 (4.2980) time: 0.7890 data: 0.0003 max mem: 8421 +[2024-12-05 06:29:39 root] (utils.py 283): INFO Epoch: [4] [ 360/2502] eta: 0:30:46 lr: 0.000020 loss_cls: 3.9294 (3.8695) grad_norm: 4.2696 (4.2951) time: 0.7810 data: 0.0003 max mem: 8421 +[2024-12-05 06:29:47 root] (utils.py 283): INFO Epoch: [4] [ 370/2502] eta: 0:30:32 lr: 0.000020 loss_cls: 3.8134 (3.8671) grad_norm: 4.1575 (4.2896) time: 0.7707 data: 0.0003 max mem: 8421 +[2024-12-05 06:29:54 root] (utils.py 283): INFO Epoch: [4] [ 380/2502] eta: 0:30:18 lr: 0.000020 loss_cls: 3.9809 (3.8646) grad_norm: 3.9687 (4.2862) time: 0.7667 data: 0.0003 max mem: 8421 +[2024-12-05 06:30:02 root] (utils.py 283): INFO Epoch: [4] [ 390/2502] eta: 0:30:05 lr: 0.000020 loss_cls: 3.9809 (3.8619) grad_norm: 4.1167 (4.2913) time: 0.7667 data: 0.0003 max mem: 8421 +[2024-12-05 06:30:10 root] (utils.py 283): INFO Epoch: [4] [ 400/2502] eta: 0:29:52 lr: 0.000020 loss_cls: 3.9931 (3.8663) grad_norm: 4.1376 (4.2974) time: 0.7684 data: 0.0003 max mem: 8421 +[2024-12-05 06:30:18 root] (utils.py 283): INFO Epoch: [4] [ 410/2502] eta: 0:29:39 lr: 0.000020 loss_cls: 4.0529 (3.8731) grad_norm: 4.1461 (4.2956) time: 0.7727 data: 0.0003 max mem: 8421 +[2024-12-05 06:30:25 root] (utils.py 283): INFO Epoch: [4] [ 420/2502] eta: 0:29:27 lr: 0.000020 loss_cls: 4.1192 (3.8745) grad_norm: 4.2782 (4.2966) time: 0.7739 data: 0.0002 max mem: 8421 +[2024-12-05 06:30:33 root] (utils.py 283): INFO Epoch: [4] [ 430/2502] eta: 0:29:15 lr: 0.000020 loss_cls: 4.2061 (3.8802) grad_norm: 4.2407 (4.3075) time: 0.7698 data: 0.0003 max mem: 8421 +[2024-12-05 06:30:41 root] (utils.py 283): INFO Epoch: [4] [ 440/2502] eta: 0:29:03 lr: 0.000020 loss_cls: 4.2205 (3.8862) grad_norm: 4.2036 (4.3053) time: 0.7723 data: 0.0003 max mem: 8421 +[2024-12-05 06:30:49 root] (utils.py 283): INFO Epoch: [4] [ 450/2502] eta: 0:28:52 lr: 0.000020 loss_cls: 3.9451 (3.8854) grad_norm: 4.1743 (4.3040) time: 0.7813 data: 0.0003 max mem: 8421 +[2024-12-05 06:30:56 root] (utils.py 283): INFO Epoch: [4] [ 460/2502] eta: 0:28:40 lr: 0.000020 loss_cls: 3.8939 (3.8801) grad_norm: 4.2784 (4.3043) time: 0.7766 data: 0.0003 max mem: 8421 +[2024-12-05 06:31:04 root] (utils.py 283): INFO Epoch: [4] [ 470/2502] eta: 0:28:28 lr: 0.000020 loss_cls: 3.5576 (3.8701) grad_norm: 4.1759 (4.3007) time: 0.7690 data: 0.0003 max mem: 8421 +[2024-12-05 06:31:12 root] (utils.py 283): INFO Epoch: [4] [ 480/2502] eta: 0:28:17 lr: 0.000020 loss_cls: 3.6448 (3.8675) grad_norm: 4.1418 (4.2956) time: 0.7696 data: 0.0003 max mem: 8421 +[2024-12-05 06:31:19 root] (utils.py 283): INFO Epoch: [4] [ 490/2502] eta: 0:28:06 lr: 0.000020 loss_cls: 3.8725 (3.8730) grad_norm: 4.0064 (4.2983) time: 0.7688 data: 0.0003 max mem: 8421 +[2024-12-05 06:31:27 root] (utils.py 283): INFO Epoch: [4] [ 500/2502] eta: 0:27:55 lr: 0.000020 loss_cls: 3.9631 (3.8698) grad_norm: 4.1040 (4.3023) time: 0.7702 data: 0.0003 max mem: 8421 +[2024-12-05 06:31:35 root] (utils.py 283): INFO Epoch: [4] [ 510/2502] eta: 0:27:44 lr: 0.000020 loss_cls: 3.9163 (3.8739) grad_norm: 4.2494 (4.3025) time: 0.7749 data: 0.0003 max mem: 8421 +[2024-12-05 06:31:42 root] (utils.py 283): INFO Epoch: [4] [ 520/2502] eta: 0:27:33 lr: 0.000020 loss_cls: 4.0161 (3.8757) grad_norm: 4.3028 (4.3025) time: 0.7709 data: 0.0003 max mem: 8421 +[2024-12-05 06:31:50 root] (utils.py 283): INFO Epoch: [4] [ 530/2502] eta: 0:27:22 lr: 0.000020 loss_cls: 4.0161 (3.8699) grad_norm: 3.9934 (4.2992) time: 0.7648 data: 0.0003 max mem: 8421 +[2024-12-05 06:31:58 root] (utils.py 283): INFO Epoch: [4] [ 540/2502] eta: 0:27:11 lr: 0.000020 loss_cls: 3.9856 (3.8679) grad_norm: 3.9719 (4.2970) time: 0.7646 data: 0.0003 max mem: 8421 +[2024-12-05 06:32:05 root] (utils.py 283): INFO Epoch: [4] [ 550/2502] eta: 0:27:01 lr: 0.000020 loss_cls: 3.9856 (3.8676) grad_norm: 4.1612 (4.3012) time: 0.7666 data: 0.0003 max mem: 8421 +[2024-12-05 06:32:13 root] (utils.py 283): INFO Epoch: [4] [ 560/2502] eta: 0:26:51 lr: 0.000020 loss_cls: 4.2319 (3.8722) grad_norm: 4.2530 (4.2992) time: 0.7738 data: 0.0003 max mem: 8421 +[2024-12-05 06:32:21 root] (utils.py 283): INFO Epoch: [4] [ 570/2502] eta: 0:26:40 lr: 0.000020 loss_cls: 4.1830 (3.8754) grad_norm: 4.0591 (4.2965) time: 0.7754 data: 0.0003 max mem: 8421 +[2024-12-05 06:32:29 root] (utils.py 283): INFO Epoch: [4] [ 580/2502] eta: 0:26:30 lr: 0.000020 loss_cls: 4.0760 (3.8773) grad_norm: 4.0591 (4.3327) time: 0.7693 data: 0.0003 max mem: 8421 +[2024-12-05 06:32:36 root] (utils.py 283): INFO Epoch: [4] [ 590/2502] eta: 0:26:20 lr: 0.000020 loss_cls: 4.0760 (3.8781) grad_norm: 4.1759 (4.3308) time: 0.7707 data: 0.0003 max mem: 8421 +[2024-12-05 06:32:44 root] (utils.py 283): INFO Epoch: [4] [ 600/2502] eta: 0:26:11 lr: 0.000020 loss_cls: 4.0218 (3.8770) grad_norm: 4.1397 (4.3282) time: 0.7822 data: 0.0003 max mem: 8421 +[2024-12-05 06:32:52 root] (utils.py 283): INFO Epoch: [4] [ 610/2502] eta: 0:26:01 lr: 0.000020 loss_cls: 3.8182 (3.8769) grad_norm: 4.3353 (4.3390) time: 0.7879 data: 0.0003 max mem: 8421 +[2024-12-05 06:33:00 root] (utils.py 283): INFO Epoch: [4] [ 620/2502] eta: 0:25:51 lr: 0.000020 loss_cls: 3.8182 (3.8792) grad_norm: 4.3304 (4.3361) time: 0.7762 data: 0.0003 max mem: 8421 +[2024-12-05 06:33:08 root] (utils.py 283): INFO Epoch: [4] [ 630/2502] eta: 0:25:41 lr: 0.000020 loss_cls: 4.0885 (3.8813) grad_norm: 4.3756 (4.3860) time: 0.7664 data: 0.0003 max mem: 8421 +[2024-12-05 06:33:15 root] (utils.py 283): INFO Epoch: [4] [ 640/2502] eta: 0:25:31 lr: 0.000020 loss_cls: 3.9839 (3.8804) grad_norm: 4.5081 (4.3863) time: 0.7660 data: 0.0003 max mem: 8421 +[2024-12-05 06:33:23 root] (utils.py 283): INFO Epoch: [4] [ 650/2502] eta: 0:25:21 lr: 0.000020 loss_cls: 3.8972 (3.8807) grad_norm: 4.2452 (4.3851) time: 0.7639 data: 0.0002 max mem: 8421 +[2024-12-05 06:33:30 root] (utils.py 283): INFO Epoch: [4] [ 660/2502] eta: 0:25:11 lr: 0.000020 loss_cls: 3.6625 (3.8741) grad_norm: 4.2452 (4.3821) time: 0.7629 data: 0.0002 max mem: 8421 +[2024-12-05 06:33:38 root] (utils.py 283): INFO Epoch: [4] [ 670/2502] eta: 0:25:02 lr: 0.000020 loss_cls: 3.7858 (3.8782) grad_norm: 4.2548 (4.3832) time: 0.7671 data: 0.0002 max mem: 8421 +[2024-12-05 06:33:46 root] (utils.py 283): INFO Epoch: [4] [ 680/2502] eta: 0:24:52 lr: 0.000020 loss_cls: 4.1967 (3.8795) grad_norm: 4.2898 (4.3833) time: 0.7679 data: 0.0002 max mem: 8421 +[2024-12-05 06:33:53 root] (utils.py 283): INFO Epoch: [4] [ 690/2502] eta: 0:24:43 lr: 0.000020 loss_cls: 3.6185 (3.8750) grad_norm: 4.2532 (4.3796) time: 0.7666 data: 0.0002 max mem: 8421 +[2024-12-05 06:34:01 root] (utils.py 283): INFO Epoch: [4] [ 700/2502] eta: 0:24:33 lr: 0.000020 loss_cls: 3.5285 (3.8728) grad_norm: 4.0834 (4.3793) time: 0.7643 data: 0.0002 max mem: 8421 +[2024-12-05 06:34:09 root] (utils.py 283): INFO Epoch: [4] [ 710/2502] eta: 0:24:23 lr: 0.000020 loss_cls: 3.7855 (3.8718) grad_norm: 4.1010 (4.3802) time: 0.7618 data: 0.0002 max mem: 8421 +[2024-12-05 06:34:17 root] (utils.py 283): INFO Epoch: [4] [ 720/2502] eta: 0:24:14 lr: 0.000020 loss_cls: 3.7855 (3.8689) grad_norm: 4.1010 (4.3775) time: 0.7727 data: 0.0002 max mem: 8421 +[2024-12-05 06:34:24 root] (utils.py 283): INFO Epoch: [4] [ 730/2502] eta: 0:24:05 lr: 0.000020 loss_cls: 3.9149 (3.8690) grad_norm: 4.0760 (4.3744) time: 0.7758 data: 0.0002 max mem: 8421 +[2024-12-05 06:34:32 root] (utils.py 283): INFO Epoch: [4] [ 740/2502] eta: 0:23:56 lr: 0.000020 loss_cls: 4.0847 (3.8740) grad_norm: 4.1182 (4.3710) time: 0.7792 data: 0.0003 max mem: 8421 +[2024-12-05 06:34:40 root] (utils.py 283): INFO Epoch: [4] [ 750/2502] eta: 0:23:47 lr: 0.000020 loss_cls: 4.1371 (3.8725) grad_norm: 4.0889 (4.3699) time: 0.7802 data: 0.0003 max mem: 8421 +[2024-12-05 06:34:47 root] (utils.py 283): INFO Epoch: [4] [ 760/2502] eta: 0:23:38 lr: 0.000020 loss_cls: 4.0763 (3.8727) grad_norm: 4.2103 (4.3686) time: 0.7681 data: 0.0002 max mem: 8421 +[2024-12-05 06:34:55 root] (utils.py 283): INFO Epoch: [4] [ 770/2502] eta: 0:23:28 lr: 0.000020 loss_cls: 4.0763 (3.8733) grad_norm: 4.2476 (4.3684) time: 0.7646 data: 0.0002 max mem: 8421 +[2024-12-05 06:35:03 root] (utils.py 283): INFO Epoch: [4] [ 780/2502] eta: 0:23:20 lr: 0.000020 loss_cls: 3.8754 (3.8745) grad_norm: 4.0976 (4.3670) time: 0.7709 data: 0.0002 max mem: 8421 +[2024-12-05 06:35:11 root] (utils.py 283): INFO Epoch: [4] [ 790/2502] eta: 0:23:10 lr: 0.000020 loss_cls: 3.8331 (3.8721) grad_norm: 3.9551 (4.3619) time: 0.7696 data: 0.0002 max mem: 8421 +[2024-12-05 06:35:18 root] (utils.py 283): INFO Epoch: [4] [ 800/2502] eta: 0:23:01 lr: 0.000020 loss_cls: 3.7488 (3.8720) grad_norm: 3.9829 (4.3596) time: 0.7623 data: 0.0002 max mem: 8421 +[2024-12-05 06:35:26 root] (utils.py 283): INFO Epoch: [4] [ 810/2502] eta: 0:22:52 lr: 0.000020 loss_cls: 3.5632 (3.8645) grad_norm: 4.0958 (4.3558) time: 0.7629 data: 0.0002 max mem: 8421 +[2024-12-05 06:35:33 root] (utils.py 283): INFO Epoch: [4] [ 820/2502] eta: 0:22:43 lr: 0.000020 loss_cls: 3.5632 (3.8657) grad_norm: 4.0642 (4.3532) time: 0.7628 data: 0.0002 max mem: 8421 +[2024-12-05 06:35:41 root] (utils.py 283): INFO Epoch: [4] [ 830/2502] eta: 0:22:34 lr: 0.000020 loss_cls: 4.0291 (3.8615) grad_norm: 4.0832 (4.3521) time: 0.7633 data: 0.0002 max mem: 8421 +[2024-12-05 06:35:49 root] (utils.py 283): INFO Epoch: [4] [ 840/2502] eta: 0:22:25 lr: 0.000020 loss_cls: 3.5913 (3.8610) grad_norm: 4.1417 (4.3532) time: 0.7656 data: 0.0002 max mem: 8421 +[2024-12-05 06:35:56 root] (utils.py 283): INFO Epoch: [4] [ 850/2502] eta: 0:22:16 lr: 0.000020 loss_cls: 3.9899 (3.8587) grad_norm: 4.1596 (4.3540) time: 0.7655 data: 0.0003 max mem: 8421 +[2024-12-05 06:36:04 root] (utils.py 283): INFO Epoch: [4] [ 860/2502] eta: 0:22:07 lr: 0.000020 loss_cls: 3.8999 (3.8592) grad_norm: 4.2554 (4.3525) time: 0.7653 data: 0.0003 max mem: 8421 +[2024-12-05 06:36:12 root] (utils.py 283): INFO Epoch: [4] [ 870/2502] eta: 0:21:58 lr: 0.000020 loss_cls: 3.8866 (3.8592) grad_norm: 4.3185 (4.3563) time: 0.7651 data: 0.0003 max mem: 8421 +[2024-12-05 06:36:19 root] (utils.py 283): INFO Epoch: [4] [ 880/2502] eta: 0:21:49 lr: 0.000020 loss_cls: 3.8866 (3.8570) grad_norm: 4.3236 (4.3599) time: 0.7633 data: 0.0002 max mem: 8421 +[2024-12-05 06:36:27 root] (utils.py 283): INFO Epoch: [4] [ 890/2502] eta: 0:21:41 lr: 0.000020 loss_cls: 3.8182 (3.8548) grad_norm: 4.2304 (4.3588) time: 0.7748 data: 0.0002 max mem: 8421 +[2024-12-05 06:36:35 root] (utils.py 283): INFO Epoch: [4] [ 900/2502] eta: 0:21:32 lr: 0.000020 loss_cls: 3.9186 (3.8548) grad_norm: 4.2453 (4.3583) time: 0.7867 data: 0.0002 max mem: 8421 +[2024-12-05 06:36:43 root] (utils.py 283): INFO Epoch: [4] [ 910/2502] eta: 0:21:24 lr: 0.000020 loss_cls: 4.0573 (3.8551) grad_norm: 4.0373 (4.3560) time: 0.7872 data: 0.0002 max mem: 8421 +[2024-12-05 06:36:51 root] (utils.py 283): INFO Epoch: [4] [ 920/2502] eta: 0:21:15 lr: 0.000020 loss_cls: 4.2085 (3.8569) grad_norm: 4.0373 (4.3548) time: 0.7796 data: 0.0002 max mem: 8421 +[2024-12-05 06:36:58 root] (utils.py 283): INFO Epoch: [4] [ 930/2502] eta: 0:21:06 lr: 0.000020 loss_cls: 4.2085 (3.8598) grad_norm: 4.1469 (4.3542) time: 0.7688 data: 0.0002 max mem: 8421 +[2024-12-05 06:37:06 root] (utils.py 283): INFO Epoch: [4] [ 940/2502] eta: 0:20:58 lr: 0.000020 loss_cls: 4.0947 (3.8587) grad_norm: 4.3669 (4.3604) time: 0.7651 data: 0.0002 max mem: 8421 +[2024-12-05 06:37:14 root] (utils.py 283): INFO Epoch: [4] [ 950/2502] eta: 0:20:49 lr: 0.000020 loss_cls: 3.2917 (3.8535) grad_norm: 4.3956 (4.3600) time: 0.7643 data: 0.0002 max mem: 8421 +[2024-12-05 06:37:21 root] (utils.py 283): INFO Epoch: [4] [ 960/2502] eta: 0:20:40 lr: 0.000020 loss_cls: 3.7969 (3.8546) grad_norm: 4.2635 (4.3593) time: 0.7658 data: 0.0002 max mem: 8421 +[2024-12-05 06:37:29 root] (utils.py 283): INFO Epoch: [4] [ 970/2502] eta: 0:20:32 lr: 0.000020 loss_cls: 4.0293 (3.8554) grad_norm: 4.0630 (4.3560) time: 0.7690 data: 0.0002 max mem: 8421 +[2024-12-05 06:37:37 root] (utils.py 283): INFO Epoch: [4] [ 980/2502] eta: 0:20:23 lr: 0.000020 loss_cls: 4.0236 (3.8546) grad_norm: 4.0266 (4.3551) time: 0.7679 data: 0.0002 max mem: 8421 +[2024-12-05 06:37:44 root] (utils.py 283): INFO Epoch: [4] [ 990/2502] eta: 0:20:14 lr: 0.000020 loss_cls: 4.1188 (3.8546) grad_norm: 4.3067 (4.3608) time: 0.7654 data: 0.0003 max mem: 8421 +[2024-12-05 06:37:52 root] (utils.py 283): INFO Epoch: [4] [1000/2502] eta: 0:20:06 lr: 0.000020 loss_cls: 3.9925 (3.8539) grad_norm: 4.2504 (4.3593) time: 0.7693 data: 0.0003 max mem: 8421 +[2024-12-05 06:38:00 root] (utils.py 283): INFO Epoch: [4] [1010/2502] eta: 0:19:58 lr: 0.000020 loss_cls: 3.9568 (3.8553) grad_norm: 4.1309 (4.3581) time: 0.7787 data: 0.0003 max mem: 8421 +[2024-12-05 06:38:07 root] (utils.py 283): INFO Epoch: [4] [1020/2502] eta: 0:19:49 lr: 0.000020 loss_cls: 3.9656 (3.8547) grad_norm: 4.1309 (4.3570) time: 0.7724 data: 0.0002 max mem: 8421 +[2024-12-05 06:38:15 root] (utils.py 283): INFO Epoch: [4] [1030/2502] eta: 0:19:40 lr: 0.000020 loss_cls: 3.7122 (3.8514) grad_norm: 4.1514 (4.3566) time: 0.7626 data: 0.0002 max mem: 8421 +[2024-12-05 06:38:23 root] (utils.py 283): INFO Epoch: [4] [1040/2502] eta: 0:19:32 lr: 0.000020 loss_cls: 3.9639 (3.8535) grad_norm: 4.1843 (4.3561) time: 0.7647 data: 0.0002 max mem: 8421 +[2024-12-05 06:38:30 root] (utils.py 283): INFO Epoch: [4] [1050/2502] eta: 0:19:23 lr: 0.000020 loss_cls: 3.9170 (3.8527) grad_norm: 4.1843 (4.3565) time: 0.7656 data: 0.0002 max mem: 8421 +[2024-12-05 06:38:38 root] (utils.py 283): INFO Epoch: [4] [1060/2502] eta: 0:19:15 lr: 0.000020 loss_cls: 3.9170 (3.8543) grad_norm: 4.1767 (4.3552) time: 0.7663 data: 0.0002 max mem: 8421 +[2024-12-05 06:38:46 root] (utils.py 283): INFO Epoch: [4] [1070/2502] eta: 0:19:06 lr: 0.000020 loss_cls: 3.9430 (3.8541) grad_norm: 4.2367 (4.3543) time: 0.7660 data: 0.0003 max mem: 8421 +[2024-12-05 06:38:53 root] (utils.py 283): INFO Epoch: [4] [1080/2502] eta: 0:18:58 lr: 0.000020 loss_cls: 3.8993 (3.8538) grad_norm: 4.2828 (4.3540) time: 0.7658 data: 0.0002 max mem: 8421 +[2024-12-05 06:39:01 root] (utils.py 283): INFO Epoch: [4] [1090/2502] eta: 0:18:50 lr: 0.000020 loss_cls: 3.8326 (3.8526) grad_norm: 4.2086 (4.3526) time: 0.7794 data: 0.0003 max mem: 8421 +[2024-12-05 06:39:09 root] (utils.py 283): INFO Epoch: [4] [1100/2502] eta: 0:18:42 lr: 0.000020 loss_cls: 3.8326 (3.8520) grad_norm: 4.1362 (4.3520) time: 0.7850 data: 0.0003 max mem: 8421 +[2024-12-05 06:39:17 root] (utils.py 283): INFO Epoch: [4] [1110/2502] eta: 0:18:33 lr: 0.000020 loss_cls: 3.8760 (3.8519) grad_norm: 4.3903 (4.3524) time: 0.7792 data: 0.0003 max mem: 8421 +[2024-12-05 06:39:25 root] (utils.py 283): INFO Epoch: [4] [1120/2502] eta: 0:18:25 lr: 0.000020 loss_cls: 4.0081 (3.8537) grad_norm: 4.3467 (4.3532) time: 0.7729 data: 0.0002 max mem: 8421 +[2024-12-05 06:39:32 root] (utils.py 283): INFO Epoch: [4] [1130/2502] eta: 0:18:16 lr: 0.000020 loss_cls: 4.0265 (3.8553) grad_norm: 4.3467 (4.3544) time: 0.7650 data: 0.0002 max mem: 8421 +[2024-12-05 06:39:40 root] (utils.py 283): INFO Epoch: [4] [1140/2502] eta: 0:18:08 lr: 0.000020 loss_cls: 3.9841 (3.8539) grad_norm: 4.1848 (4.3530) time: 0.7646 data: 0.0002 max mem: 8421 +[2024-12-05 06:39:48 root] (utils.py 283): INFO Epoch: [4] [1150/2502] eta: 0:18:00 lr: 0.000020 loss_cls: 3.7540 (3.8528) grad_norm: 4.0695 (4.3502) time: 0.7709 data: 0.0002 max mem: 8421 +[2024-12-05 06:39:55 root] (utils.py 283): INFO Epoch: [4] [1160/2502] eta: 0:17:52 lr: 0.000020 loss_cls: 3.9224 (3.8530) grad_norm: 3.9759 (4.3482) time: 0.7798 data: 0.0002 max mem: 8421 +[2024-12-05 06:40:03 root] (utils.py 283): INFO Epoch: [4] [1170/2502] eta: 0:17:43 lr: 0.000020 loss_cls: 3.8630 (3.8520) grad_norm: 4.1401 (4.3474) time: 0.7730 data: 0.0002 max mem: 8421 +[2024-12-05 06:40:11 root] (utils.py 283): INFO Epoch: [4] [1180/2502] eta: 0:17:35 lr: 0.000020 loss_cls: 4.1056 (3.8547) grad_norm: 4.2413 (4.3469) time: 0.7645 data: 0.0003 max mem: 8421 +[2024-12-05 06:40:19 root] (utils.py 283): INFO Epoch: [4] [1190/2502] eta: 0:17:27 lr: 0.000020 loss_cls: 4.1392 (3.8567) grad_norm: 4.1213 (4.3460) time: 0.7767 data: 0.0003 max mem: 8421 +[2024-12-05 06:40:26 root] (utils.py 283): INFO Epoch: [4] [1200/2502] eta: 0:17:19 lr: 0.000020 loss_cls: 4.1025 (3.8577) grad_norm: 4.0440 (4.3444) time: 0.7829 data: 0.0002 max mem: 8421 +[2024-12-05 06:40:34 root] (utils.py 283): INFO Epoch: [4] [1210/2502] eta: 0:17:10 lr: 0.000020 loss_cls: 3.8837 (3.8569) grad_norm: 4.0343 (4.3441) time: 0.7694 data: 0.0002 max mem: 8421 +[2024-12-05 06:40:42 root] (utils.py 283): INFO Epoch: [4] [1220/2502] eta: 0:17:02 lr: 0.000020 loss_cls: 3.8593 (3.8568) grad_norm: 4.1778 (4.3442) time: 0.7674 data: 0.0002 max mem: 8421 +[2024-12-05 06:40:50 root] (utils.py 283): INFO Epoch: [4] [1230/2502] eta: 0:16:54 lr: 0.000020 loss_cls: 3.8875 (3.8567) grad_norm: 4.2754 (4.3447) time: 0.7845 data: 0.0002 max mem: 8421 +[2024-12-05 06:40:57 root] (utils.py 283): INFO Epoch: [4] [1240/2502] eta: 0:16:46 lr: 0.000020 loss_cls: 4.0394 (3.8570) grad_norm: 4.3456 (4.3459) time: 0.7807 data: 0.0002 max mem: 8421 +[2024-12-05 06:41:05 root] (utils.py 283): INFO Epoch: [4] [1250/2502] eta: 0:16:37 lr: 0.000020 loss_cls: 3.7748 (3.8555) grad_norm: 4.1866 (4.3459) time: 0.7658 data: 0.0002 max mem: 8421 +[2024-12-05 06:41:13 root] (utils.py 283): INFO Epoch: [4] [1260/2502] eta: 0:16:29 lr: 0.000020 loss_cls: 4.0501 (3.8565) grad_norm: 4.1678 (4.3453) time: 0.7667 data: 0.0002 max mem: 8421 +[2024-12-05 06:41:20 root] (utils.py 283): INFO Epoch: [4] [1270/2502] eta: 0:16:21 lr: 0.000020 loss_cls: 3.9966 (3.8576) grad_norm: 4.2100 (4.3470) time: 0.7698 data: 0.0003 max mem: 8421 +[2024-12-05 06:41:28 root] (utils.py 283): INFO Epoch: [4] [1280/2502] eta: 0:16:13 lr: 0.000020 loss_cls: 4.1027 (3.8595) grad_norm: 4.1788 (4.3466) time: 0.7709 data: 0.0003 max mem: 8421 +[2024-12-05 06:41:36 root] (utils.py 283): INFO Epoch: [4] [1290/2502] eta: 0:16:04 lr: 0.000020 loss_cls: 4.1368 (3.8612) grad_norm: 4.3882 (4.3485) time: 0.7691 data: 0.0003 max mem: 8421 +[2024-12-05 06:41:43 root] (utils.py 283): INFO Epoch: [4] [1300/2502] eta: 0:15:56 lr: 0.000020 loss_cls: 4.0908 (3.8616) grad_norm: 4.3530 (4.3473) time: 0.7668 data: 0.0002 max mem: 8421 +[2024-12-05 06:41:51 root] (utils.py 283): INFO Epoch: [4] [1310/2502] eta: 0:15:48 lr: 0.000020 loss_cls: 3.7576 (3.8600) grad_norm: 4.3418 (4.3482) time: 0.7657 data: 0.0002 max mem: 8421 +[2024-12-05 06:41:59 root] (utils.py 283): INFO Epoch: [4] [1320/2502] eta: 0:15:40 lr: 0.000020 loss_cls: 3.6037 (3.8577) grad_norm: 4.4147 (4.3489) time: 0.7656 data: 0.0003 max mem: 8421 +[2024-12-05 06:42:06 root] (utils.py 283): INFO Epoch: [4] [1330/2502] eta: 0:15:31 lr: 0.000020 loss_cls: 3.9099 (3.8587) grad_norm: 4.3480 (4.3484) time: 0.7632 data: 0.0003 max mem: 8421 +[2024-12-05 06:42:14 root] (utils.py 283): INFO Epoch: [4] [1340/2502] eta: 0:15:23 lr: 0.000020 loss_cls: 4.1929 (3.8594) grad_norm: 4.0845 (4.3483) time: 0.7645 data: 0.0002 max mem: 8421 +[2024-12-05 06:42:22 root] (utils.py 283): INFO Epoch: [4] [1350/2502] eta: 0:15:15 lr: 0.000020 loss_cls: 3.9197 (3.8593) grad_norm: 4.0436 (4.3480) time: 0.7647 data: 0.0002 max mem: 8421 +[2024-12-05 06:42:29 root] (utils.py 283): INFO Epoch: [4] [1360/2502] eta: 0:15:07 lr: 0.000020 loss_cls: 3.9225 (3.8611) grad_norm: 4.3609 (4.3489) time: 0.7652 data: 0.0003 max mem: 8421 +[2024-12-05 06:42:37 root] (utils.py 283): INFO Epoch: [4] [1370/2502] eta: 0:14:59 lr: 0.000020 loss_cls: 4.1037 (3.8622) grad_norm: 4.3609 (4.3496) time: 0.7664 data: 0.0003 max mem: 8421 +[2024-12-05 06:42:45 root] (utils.py 283): INFO Epoch: [4] [1380/2502] eta: 0:14:51 lr: 0.000020 loss_cls: 4.0987 (3.8632) grad_norm: 4.2675 (4.3489) time: 0.7659 data: 0.0002 max mem: 8421 +[2024-12-05 06:42:52 root] (utils.py 283): INFO Epoch: [4] [1390/2502] eta: 0:14:42 lr: 0.000020 loss_cls: 4.1061 (3.8640) grad_norm: 4.4689 (4.3506) time: 0.7660 data: 0.0002 max mem: 8421 +[2024-12-05 06:43:00 root] (utils.py 283): INFO Epoch: [4] [1400/2502] eta: 0:14:34 lr: 0.000020 loss_cls: 4.1061 (3.8640) grad_norm: 4.4608 (4.3504) time: 0.7663 data: 0.0002 max mem: 8421 +[2024-12-05 06:43:08 root] (utils.py 283): INFO Epoch: [4] [1410/2502] eta: 0:14:26 lr: 0.000020 loss_cls: 4.1342 (3.8665) grad_norm: 4.2836 (4.3508) time: 0.7658 data: 0.0002 max mem: 8421 +[2024-12-05 06:43:15 root] (utils.py 283): INFO Epoch: [4] [1420/2502] eta: 0:14:18 lr: 0.000020 loss_cls: 3.9615 (3.8657) grad_norm: 4.1369 (4.3491) time: 0.7644 data: 0.0002 max mem: 8421 +[2024-12-05 06:43:23 root] (utils.py 283): INFO Epoch: [4] [1430/2502] eta: 0:14:10 lr: 0.000020 loss_cls: 3.8695 (3.8660) grad_norm: 4.0742 (4.3493) time: 0.7650 data: 0.0002 max mem: 8421 +[2024-12-05 06:43:31 root] (utils.py 283): INFO Epoch: [4] [1440/2502] eta: 0:14:02 lr: 0.000020 loss_cls: 3.9782 (3.8651) grad_norm: 4.1187 (4.3479) time: 0.7635 data: 0.0002 max mem: 8421 +[2024-12-05 06:43:38 root] (utils.py 283): INFO Epoch: [4] [1450/2502] eta: 0:13:53 lr: 0.000020 loss_cls: 4.0836 (3.8657) grad_norm: 4.1974 (4.3496) time: 0.7665 data: 0.0002 max mem: 8421 +[2024-12-05 06:43:46 root] (utils.py 283): INFO Epoch: [4] [1460/2502] eta: 0:13:45 lr: 0.000020 loss_cls: 4.2105 (3.8670) grad_norm: 4.2187 (4.3484) time: 0.7676 data: 0.0002 max mem: 8421 +[2024-12-05 06:43:54 root] (utils.py 283): INFO Epoch: [4] [1470/2502] eta: 0:13:37 lr: 0.000020 loss_cls: 4.1331 (3.8663) grad_norm: 4.1644 (4.3478) time: 0.7621 data: 0.0002 max mem: 8421 +[2024-12-05 06:44:01 root] (utils.py 283): INFO Epoch: [4] [1480/2502] eta: 0:13:29 lr: 0.000020 loss_cls: 3.6393 (3.8636) grad_norm: 4.1147 (4.3472) time: 0.7628 data: 0.0002 max mem: 8421 +[2024-12-05 06:44:09 root] (utils.py 283): INFO Epoch: [4] [1490/2502] eta: 0:13:21 lr: 0.000020 loss_cls: 3.4903 (3.8598) grad_norm: 4.2044 (4.3464) time: 0.7623 data: 0.0002 max mem: 8421 +[2024-12-05 06:44:16 root] (utils.py 283): INFO Epoch: [4] [1500/2502] eta: 0:13:13 lr: 0.000020 loss_cls: 3.5038 (3.8595) grad_norm: 4.2343 (4.3464) time: 0.7609 data: 0.0002 max mem: 8421 +[2024-12-05 06:44:24 root] (utils.py 283): INFO Epoch: [4] [1510/2502] eta: 0:13:05 lr: 0.000020 loss_cls: 3.9479 (3.8598) grad_norm: 4.1415 (4.3458) time: 0.7636 data: 0.0002 max mem: 8421 +[2024-12-05 06:44:32 root] (utils.py 283): INFO Epoch: [4] [1520/2502] eta: 0:12:57 lr: 0.000020 loss_cls: 3.9934 (3.8609) grad_norm: 4.2303 (4.3452) time: 0.7642 data: 0.0002 max mem: 8421 +[2024-12-05 06:44:39 root] (utils.py 283): INFO Epoch: [4] [1530/2502] eta: 0:12:49 lr: 0.000020 loss_cls: 3.9973 (3.8605) grad_norm: 4.2980 (4.3456) time: 0.7615 data: 0.0002 max mem: 8421 +[2024-12-05 06:44:47 root] (utils.py 283): INFO Epoch: [4] [1540/2502] eta: 0:12:40 lr: 0.000020 loss_cls: 3.9692 (3.8605) grad_norm: 4.3445 (4.3447) time: 0.7611 data: 0.0002 max mem: 8421 +[2024-12-05 06:44:55 root] (utils.py 283): INFO Epoch: [4] [1550/2502] eta: 0:12:32 lr: 0.000020 loss_cls: 3.8216 (3.8592) grad_norm: 4.2853 (4.3468) time: 0.7630 data: 0.0002 max mem: 8421 +[2024-12-05 06:45:02 root] (utils.py 283): INFO Epoch: [4] [1560/2502] eta: 0:12:24 lr: 0.000020 loss_cls: 3.7337 (3.8583) grad_norm: 4.3142 (4.3475) time: 0.7623 data: 0.0003 max mem: 8421 +[2024-12-05 06:45:10 root] (utils.py 283): INFO Epoch: [4] [1570/2502] eta: 0:12:16 lr: 0.000020 loss_cls: 3.7431 (3.8559) grad_norm: 4.3142 (4.3472) time: 0.7668 data: 0.0002 max mem: 8421 +[2024-12-05 06:45:18 root] (utils.py 283): INFO Epoch: [4] [1580/2502] eta: 0:12:08 lr: 0.000020 loss_cls: 3.7521 (3.8556) grad_norm: 4.3470 (4.3489) time: 0.7694 data: 0.0002 max mem: 8421 +[2024-12-05 06:45:25 root] (utils.py 283): INFO Epoch: [4] [1590/2502] eta: 0:12:00 lr: 0.000020 loss_cls: 4.0237 (3.8571) grad_norm: 4.4375 (4.3502) time: 0.7677 data: 0.0003 max mem: 8421 +[2024-12-05 06:45:33 root] (utils.py 283): INFO Epoch: [4] [1600/2502] eta: 0:11:52 lr: 0.000020 loss_cls: 4.0364 (3.8587) grad_norm: 4.2961 (4.3501) time: 0.7686 data: 0.0003 max mem: 8421 +[2024-12-05 06:45:41 root] (utils.py 283): INFO Epoch: [4] [1610/2502] eta: 0:11:44 lr: 0.000020 loss_cls: 4.0364 (3.8591) grad_norm: 4.1367 (4.3490) time: 0.7649 data: 0.0002 max mem: 8421 +[2024-12-05 06:45:48 root] (utils.py 283): INFO Epoch: [4] [1620/2502] eta: 0:11:36 lr: 0.000020 loss_cls: 3.8114 (3.8595) grad_norm: 4.1990 (4.3488) time: 0.7645 data: 0.0002 max mem: 8421 +[2024-12-05 06:45:56 root] (utils.py 283): INFO Epoch: [4] [1630/2502] eta: 0:11:28 lr: 0.000020 loss_cls: 3.9834 (3.8600) grad_norm: 4.2869 (4.3487) time: 0.7680 data: 0.0002 max mem: 8421 +[2024-12-05 06:46:04 root] (utils.py 283): INFO Epoch: [4] [1640/2502] eta: 0:11:20 lr: 0.000020 loss_cls: 4.0626 (3.8597) grad_norm: 4.2402 (4.3485) time: 0.7680 data: 0.0002 max mem: 8421 +[2024-12-05 06:46:11 root] (utils.py 283): INFO Epoch: [4] [1650/2502] eta: 0:11:12 lr: 0.000020 loss_cls: 3.7624 (3.8567) grad_norm: 4.1904 (4.3476) time: 0.7658 data: 0.0002 max mem: 8421 +[2024-12-05 06:46:19 root] (utils.py 283): INFO Epoch: [4] [1660/2502] eta: 0:11:04 lr: 0.000020 loss_cls: 3.3533 (3.8542) grad_norm: 4.1751 (4.3467) time: 0.7724 data: 0.0002 max mem: 8421 +[2024-12-05 06:46:27 root] (utils.py 283): INFO Epoch: [4] [1670/2502] eta: 0:10:56 lr: 0.000020 loss_cls: 3.5856 (3.8543) grad_norm: 4.3190 (4.3594) time: 0.7763 data: 0.0003 max mem: 8421 +[2024-12-05 06:46:34 root] (utils.py 283): INFO Epoch: [4] [1680/2502] eta: 0:10:48 lr: 0.000020 loss_cls: 4.0190 (3.8538) grad_norm: 4.6805 (4.3622) time: 0.7686 data: 0.0003 max mem: 8421 +[2024-12-05 06:46:42 root] (utils.py 283): INFO Epoch: [4] [1690/2502] eta: 0:10:40 lr: 0.000020 loss_cls: 3.9980 (3.8539) grad_norm: 4.3945 (4.3629) time: 0.7665 data: 0.0002 max mem: 8421 +[2024-12-05 06:46:50 root] (utils.py 283): INFO Epoch: [4] [1700/2502] eta: 0:10:32 lr: 0.000020 loss_cls: 4.1826 (3.8564) grad_norm: 4.3732 (4.3649) time: 0.7695 data: 0.0003 max mem: 8421 +[2024-12-05 06:46:57 root] (utils.py 283): INFO Epoch: [4] [1710/2502] eta: 0:10:24 lr: 0.000020 loss_cls: 4.0998 (3.8562) grad_norm: 4.2484 (4.3644) time: 0.7683 data: 0.0002 max mem: 8421 +[2024-12-05 06:47:05 root] (utils.py 283): INFO Epoch: [4] [1720/2502] eta: 0:10:16 lr: 0.000020 loss_cls: 3.7167 (3.8553) grad_norm: 4.1309 (4.3631) time: 0.7675 data: 0.0002 max mem: 8421 +[2024-12-05 06:47:13 root] (utils.py 283): INFO Epoch: [4] [1730/2502] eta: 0:10:08 lr: 0.000020 loss_cls: 4.1144 (3.8567) grad_norm: 4.0536 (4.3625) time: 0.7674 data: 0.0002 max mem: 8421 +[2024-12-05 06:47:20 root] (utils.py 283): INFO Epoch: [4] [1740/2502] eta: 0:10:00 lr: 0.000020 loss_cls: 4.0117 (3.8559) grad_norm: 4.1214 (4.3618) time: 0.7648 data: 0.0002 max mem: 8421 +[2024-12-05 06:47:28 root] (utils.py 283): INFO Epoch: [4] [1750/2502] eta: 0:09:52 lr: 0.000020 loss_cls: 3.6067 (3.8550) grad_norm: 4.1564 (4.3613) time: 0.7644 data: 0.0002 max mem: 8421 +[2024-12-05 06:47:36 root] (utils.py 283): INFO Epoch: [4] [1760/2502] eta: 0:09:44 lr: 0.000020 loss_cls: 3.5358 (3.8545) grad_norm: 4.2033 (4.3604) time: 0.7661 data: 0.0002 max mem: 8421 +[2024-12-05 06:47:43 root] (utils.py 283): INFO Epoch: [4] [1770/2502] eta: 0:09:36 lr: 0.000020 loss_cls: 3.9984 (3.8552) grad_norm: 4.1622 (4.3601) time: 0.7660 data: 0.0002 max mem: 8421 +[2024-12-05 06:47:51 root] (utils.py 283): INFO Epoch: [4] [1780/2502] eta: 0:09:28 lr: 0.000020 loss_cls: 4.0653 (3.8548) grad_norm: 4.1622 (4.3588) time: 0.7641 data: 0.0002 max mem: 8421 +[2024-12-05 06:47:59 root] (utils.py 283): INFO Epoch: [4] [1790/2502] eta: 0:09:20 lr: 0.000020 loss_cls: 3.8037 (3.8537) grad_norm: 4.1569 (4.3585) time: 0.7668 data: 0.0002 max mem: 8421 +[2024-12-05 06:48:06 root] (utils.py 283): INFO Epoch: [4] [1800/2502] eta: 0:09:12 lr: 0.000020 loss_cls: 3.8516 (3.8549) grad_norm: 4.1676 (4.3581) time: 0.7671 data: 0.0002 max mem: 8421 +[2024-12-05 06:48:14 root] (utils.py 283): INFO Epoch: [4] [1810/2502] eta: 0:09:04 lr: 0.000020 loss_cls: 3.8712 (3.8542) grad_norm: 4.1676 (4.3570) time: 0.7621 data: 0.0002 max mem: 8421 +[2024-12-05 06:48:22 root] (utils.py 283): INFO Epoch: [4] [1820/2502] eta: 0:08:56 lr: 0.000020 loss_cls: 3.8413 (3.8545) grad_norm: 4.0436 (4.3556) time: 0.7594 data: 0.0002 max mem: 8421 +[2024-12-05 06:48:29 root] (utils.py 283): INFO Epoch: [4] [1830/2502] eta: 0:08:48 lr: 0.000020 loss_cls: 4.0578 (3.8565) grad_norm: 4.1588 (4.3553) time: 0.7595 data: 0.0002 max mem: 8421 +[2024-12-05 06:48:37 root] (utils.py 283): INFO Epoch: [4] [1840/2502] eta: 0:08:40 lr: 0.000020 loss_cls: 4.0084 (3.8557) grad_norm: 4.1588 (4.3572) time: 0.7645 data: 0.0002 max mem: 8421 +[2024-12-05 06:48:45 root] (utils.py 283): INFO Epoch: [4] [1850/2502] eta: 0:08:33 lr: 0.000020 loss_cls: 3.7685 (3.8547) grad_norm: 4.1026 (4.3562) time: 0.7660 data: 0.0002 max mem: 8421 +[2024-12-05 06:48:52 root] (utils.py 283): INFO Epoch: [4] [1860/2502] eta: 0:08:25 lr: 0.000020 loss_cls: 3.7720 (3.8542) grad_norm: 4.1219 (4.3560) time: 0.7667 data: 0.0002 max mem: 8421 +[2024-12-05 06:49:00 root] (utils.py 283): INFO Epoch: [4] [1870/2502] eta: 0:08:17 lr: 0.000020 loss_cls: 3.7766 (3.8530) grad_norm: 4.1297 (4.3557) time: 0.7682 data: 0.0002 max mem: 8421 +[2024-12-05 06:49:08 root] (utils.py 283): INFO Epoch: [4] [1880/2502] eta: 0:08:09 lr: 0.000020 loss_cls: 4.1471 (3.8546) grad_norm: 4.2193 (4.3604) time: 0.7652 data: 0.0002 max mem: 8421 +[2024-12-05 06:49:15 root] (utils.py 283): INFO Epoch: [4] [1890/2502] eta: 0:08:01 lr: 0.000020 loss_cls: 4.1471 (3.8538) grad_norm: 4.3814 (4.3609) time: 0.7642 data: 0.0002 max mem: 8421 +[2024-12-05 06:49:23 root] (utils.py 283): INFO Epoch: [4] [1900/2502] eta: 0:07:53 lr: 0.000020 loss_cls: 3.7028 (3.8529) grad_norm: 4.3460 (4.3621) time: 0.7636 data: 0.0002 max mem: 8421 +[2024-12-05 06:49:30 root] (utils.py 283): INFO Epoch: [4] [1910/2502] eta: 0:07:45 lr: 0.000020 loss_cls: 3.8203 (3.8533) grad_norm: 4.2794 (4.3636) time: 0.7642 data: 0.0002 max mem: 8421 +[2024-12-05 06:49:38 root] (utils.py 283): INFO Epoch: [4] [1920/2502] eta: 0:07:37 lr: 0.000020 loss_cls: 3.6014 (3.8509) grad_norm: 4.4050 (4.3633) time: 0.7655 data: 0.0002 max mem: 8421 +[2024-12-05 06:49:46 root] (utils.py 283): INFO Epoch: [4] [1930/2502] eta: 0:07:29 lr: 0.000020 loss_cls: 3.5984 (3.8505) grad_norm: 4.3649 (4.3631) time: 0.7748 data: 0.0002 max mem: 8421 +[2024-12-05 06:49:54 root] (utils.py 283): INFO Epoch: [4] [1940/2502] eta: 0:07:21 lr: 0.000020 loss_cls: 3.7725 (3.8498) grad_norm: 4.3069 (4.3631) time: 0.7748 data: 0.0002 max mem: 8421 +[2024-12-05 06:50:01 root] (utils.py 283): INFO Epoch: [4] [1950/2502] eta: 0:07:13 lr: 0.000020 loss_cls: 3.7888 (3.8490) grad_norm: 4.2265 (4.3617) time: 0.7655 data: 0.0002 max mem: 8421 +[2024-12-05 06:50:09 root] (utils.py 283): INFO Epoch: [4] [1960/2502] eta: 0:07:05 lr: 0.000020 loss_cls: 3.8531 (3.8480) grad_norm: 4.0592 (4.3598) time: 0.7664 data: 0.0002 max mem: 8421 +[2024-12-05 06:50:17 root] (utils.py 283): INFO Epoch: [4] [1970/2502] eta: 0:06:58 lr: 0.000020 loss_cls: 3.8282 (3.8465) grad_norm: 4.0590 (4.3589) time: 0.7881 data: 0.0002 max mem: 8421 +[2024-12-05 06:50:25 root] (utils.py 283): INFO Epoch: [4] [1980/2502] eta: 0:06:50 lr: 0.000020 loss_cls: 3.7036 (3.8462) grad_norm: 4.2613 (4.3594) time: 0.7970 data: 0.0002 max mem: 8421 +[2024-12-05 06:50:33 root] (utils.py 283): INFO Epoch: [4] [1990/2502] eta: 0:06:42 lr: 0.000020 loss_cls: 4.0232 (3.8457) grad_norm: 4.2613 (4.3592) time: 0.7858 data: 0.0002 max mem: 8421 +[2024-12-05 06:50:41 root] (utils.py 283): INFO Epoch: [4] [2000/2502] eta: 0:06:34 lr: 0.000020 loss_cls: 3.7674 (3.8451) grad_norm: 4.2815 (4.3656) time: 0.7877 data: 0.0002 max mem: 8421 +[2024-12-05 06:50:49 root] (utils.py 283): INFO Epoch: [4] [2010/2502] eta: 0:06:26 lr: 0.000020 loss_cls: 4.0365 (3.8467) grad_norm: 4.3538 (4.3671) time: 0.7901 data: 0.0002 max mem: 8421 +[2024-12-05 06:50:56 root] (utils.py 283): INFO Epoch: [4] [2020/2502] eta: 0:06:18 lr: 0.000020 loss_cls: 4.0088 (3.8464) grad_norm: 4.3471 (4.3661) time: 0.7874 data: 0.0002 max mem: 8421 +[2024-12-05 06:51:04 root] (utils.py 283): INFO Epoch: [4] [2030/2502] eta: 0:06:10 lr: 0.000020 loss_cls: 4.0088 (3.8472) grad_norm: 4.1854 (4.3652) time: 0.7876 data: 0.0002 max mem: 8421 +[2024-12-05 06:51:12 root] (utils.py 283): INFO Epoch: [4] [2040/2502] eta: 0:06:03 lr: 0.000020 loss_cls: 4.1634 (3.8479) grad_norm: 4.0897 (4.3642) time: 0.7906 data: 0.0002 max mem: 8421 +[2024-12-05 06:51:20 root] (utils.py 283): INFO Epoch: [4] [2050/2502] eta: 0:05:55 lr: 0.000020 loss_cls: 3.8901 (3.8470) grad_norm: 4.0897 (4.3632) time: 0.7897 data: 0.0002 max mem: 8421 +[2024-12-05 06:51:28 root] (utils.py 283): INFO Epoch: [4] [2060/2502] eta: 0:05:47 lr: 0.000020 loss_cls: 3.6320 (3.8466) grad_norm: 4.2291 (4.3645) time: 0.7895 data: 0.0002 max mem: 8421 +[2024-12-05 06:51:36 root] (utils.py 283): INFO Epoch: [4] [2070/2502] eta: 0:05:39 lr: 0.000020 loss_cls: 3.9905 (3.8472) grad_norm: 4.2304 (4.3634) time: 0.7909 data: 0.0002 max mem: 8421 +[2024-12-05 06:51:44 root] (utils.py 283): INFO Epoch: [4] [2080/2502] eta: 0:05:31 lr: 0.000020 loss_cls: 4.0255 (3.8467) grad_norm: 4.1354 (4.3627) time: 0.7917 data: 0.0002 max mem: 8421 +[2024-12-05 06:51:52 root] (utils.py 283): INFO Epoch: [4] [2090/2502] eta: 0:05:23 lr: 0.000020 loss_cls: 3.9109 (3.8478) grad_norm: 4.2158 (4.3629) time: 0.7917 data: 0.0002 max mem: 8421 +[2024-12-05 06:52:00 root] (utils.py 283): INFO Epoch: [4] [2100/2502] eta: 0:05:16 lr: 0.000020 loss_cls: 4.1951 (3.8468) grad_norm: 4.2618 (4.3630) time: 0.7915 data: 0.0002 max mem: 8421 +[2024-12-05 06:52:08 root] (utils.py 283): INFO Epoch: [4] [2110/2502] eta: 0:05:08 lr: 0.000020 loss_cls: 3.9950 (3.8475) grad_norm: 4.0347 (4.3620) time: 0.7913 data: 0.0002 max mem: 8421 +[2024-12-05 06:52:15 root] (utils.py 283): INFO Epoch: [4] [2120/2502] eta: 0:05:00 lr: 0.000020 loss_cls: 4.0828 (3.8470) grad_norm: 4.0347 (4.3632) time: 0.7813 data: 0.0002 max mem: 8421 +[2024-12-05 06:52:23 root] (utils.py 283): INFO Epoch: [4] [2130/2502] eta: 0:04:52 lr: 0.000020 loss_cls: 4.0356 (3.8473) grad_norm: 4.0280 (4.3619) time: 0.7661 data: 0.0002 max mem: 8421 +[2024-12-05 06:52:31 root] (utils.py 283): INFO Epoch: [4] [2140/2502] eta: 0:04:44 lr: 0.000020 loss_cls: 4.0989 (3.8485) grad_norm: 4.0970 (4.3613) time: 0.7632 data: 0.0002 max mem: 8421 +[2024-12-05 06:52:38 root] (utils.py 283): INFO Epoch: [4] [2150/2502] eta: 0:04:36 lr: 0.000020 loss_cls: 4.0618 (3.8482) grad_norm: 4.1288 (4.3603) time: 0.7644 data: 0.0002 max mem: 8421 +[2024-12-05 06:52:46 root] (utils.py 283): INFO Epoch: [4] [2160/2502] eta: 0:04:28 lr: 0.000020 loss_cls: 4.0618 (3.8488) grad_norm: 4.1761 (4.3608) time: 0.7660 data: 0.0002 max mem: 8421 +[2024-12-05 06:52:53 root] (utils.py 283): INFO Epoch: [4] [2170/2502] eta: 0:04:20 lr: 0.000020 loss_cls: 4.1309 (3.8497) grad_norm: 4.3764 (4.3609) time: 0.7650 data: 0.0002 max mem: 8421 +[2024-12-05 06:53:01 root] (utils.py 283): INFO Epoch: [4] [2180/2502] eta: 0:04:12 lr: 0.000020 loss_cls: 3.9116 (3.8493) grad_norm: 4.2522 (4.3604) time: 0.7631 data: 0.0002 max mem: 8421 +[2024-12-05 06:53:09 root] (utils.py 283): INFO Epoch: [4] [2190/2502] eta: 0:04:05 lr: 0.000020 loss_cls: 4.1610 (3.8506) grad_norm: 4.1106 (4.3616) time: 0.7653 data: 0.0002 max mem: 8421 +[2024-12-05 06:53:16 root] (utils.py 283): INFO Epoch: [4] [2200/2502] eta: 0:03:57 lr: 0.000020 loss_cls: 3.9644 (3.8490) grad_norm: 4.0430 (4.3608) time: 0.7670 data: 0.0003 max mem: 8421 +[2024-12-05 06:53:24 root] (utils.py 283): INFO Epoch: [4] [2210/2502] eta: 0:03:49 lr: 0.000020 loss_cls: 3.6353 (3.8487) grad_norm: 4.3162 (4.3621) time: 0.7658 data: 0.0003 max mem: 8421 +[2024-12-05 06:53:32 root] (utils.py 283): INFO Epoch: [4] [2220/2502] eta: 0:03:41 lr: 0.000020 loss_cls: 3.8541 (3.8479) grad_norm: 4.2391 (4.3622) time: 0.7654 data: 0.0002 max mem: 8421 +[2024-12-05 06:53:40 root] (utils.py 283): INFO Epoch: [4] [2230/2502] eta: 0:03:33 lr: 0.000020 loss_cls: 3.7777 (3.8471) grad_norm: 4.1866 (4.3621) time: 0.7740 data: 0.0002 max mem: 8421 +[2024-12-05 06:53:47 root] (utils.py 283): INFO Epoch: [4] [2240/2502] eta: 0:03:25 lr: 0.000020 loss_cls: 3.7588 (3.8464) grad_norm: 4.3308 (4.3624) time: 0.7841 data: 0.0002 max mem: 8421 +[2024-12-05 06:53:55 root] (utils.py 283): INFO Epoch: [4] [2250/2502] eta: 0:03:17 lr: 0.000020 loss_cls: 3.6619 (3.8454) grad_norm: 4.2787 (4.3619) time: 0.7873 data: 0.0003 max mem: 8421 +[2024-12-05 06:54:03 root] (utils.py 283): INFO Epoch: [4] [2260/2502] eta: 0:03:10 lr: 0.000020 loss_cls: 3.4944 (3.8448) grad_norm: 4.1295 (4.3611) time: 0.7897 data: 0.0002 max mem: 8421 +[2024-12-05 06:54:11 root] (utils.py 283): INFO Epoch: [4] [2270/2502] eta: 0:03:02 lr: 0.000020 loss_cls: 3.8897 (3.8454) grad_norm: 4.2679 (4.3619) time: 0.7861 data: 0.0002 max mem: 8421 +[2024-12-05 06:54:19 root] (utils.py 283): INFO Epoch: [4] [2280/2502] eta: 0:02:54 lr: 0.000020 loss_cls: 4.0372 (3.8463) grad_norm: 4.3248 (4.3611) time: 0.7793 data: 0.0002 max mem: 8421 +[2024-12-05 06:54:27 root] (utils.py 283): INFO Epoch: [4] [2290/2502] eta: 0:02:46 lr: 0.000020 loss_cls: 4.1676 (3.8466) grad_norm: 4.1199 (4.3609) time: 0.7725 data: 0.0002 max mem: 8421 +[2024-12-05 06:54:34 root] (utils.py 283): INFO Epoch: [4] [2300/2502] eta: 0:02:38 lr: 0.000020 loss_cls: 3.7130 (3.8460) grad_norm: 4.1323 (4.3611) time: 0.7666 data: 0.0002 max mem: 8421 +[2024-12-05 06:54:42 root] (utils.py 283): INFO Epoch: [4] [2310/2502] eta: 0:02:30 lr: 0.000020 loss_cls: 3.6924 (3.8450) grad_norm: 4.1323 (4.3613) time: 0.7642 data: 0.0002 max mem: 8421 +[2024-12-05 06:54:49 root] (utils.py 283): INFO Epoch: [4] [2320/2502] eta: 0:02:22 lr: 0.000020 loss_cls: 3.8310 (3.8457) grad_norm: 4.2220 (4.3611) time: 0.7620 data: 0.0002 max mem: 8421 +[2024-12-05 06:54:57 root] (utils.py 283): INFO Epoch: [4] [2330/2502] eta: 0:02:14 lr: 0.000020 loss_cls: 4.0931 (3.8469) grad_norm: 4.2220 (4.3618) time: 0.7608 data: 0.0002 max mem: 8421 +[2024-12-05 06:55:05 root] (utils.py 283): INFO Epoch: [4] [2340/2502] eta: 0:02:07 lr: 0.000020 loss_cls: 4.0931 (3.8468) grad_norm: 4.4576 (4.3642) time: 0.7600 data: 0.0002 max mem: 8421 +[2024-12-05 06:55:12 root] (utils.py 283): INFO Epoch: [4] [2350/2502] eta: 0:01:59 lr: 0.000020 loss_cls: 3.8779 (3.8470) grad_norm: 4.2821 (4.3637) time: 0.7609 data: 0.0002 max mem: 8421 +[2024-12-05 06:55:20 root] (utils.py 283): INFO Epoch: [4] [2360/2502] eta: 0:01:51 lr: 0.000020 loss_cls: 3.7731 (3.8464) grad_norm: 4.0892 (4.3627) time: 0.7629 data: 0.0002 max mem: 8421 +[2024-12-05 06:55:28 root] (utils.py 283): INFO Epoch: [4] [2370/2502] eta: 0:01:43 lr: 0.000020 loss_cls: 3.7723 (3.8461) grad_norm: 4.1049 (4.3647) time: 0.7625 data: 0.0002 max mem: 8421 +[2024-12-05 06:55:35 root] (utils.py 283): INFO Epoch: [4] [2380/2502] eta: 0:01:35 lr: 0.000020 loss_cls: 3.9763 (3.8468) grad_norm: 4.1958 (4.3636) time: 0.7654 data: 0.0002 max mem: 8421 +[2024-12-05 06:55:43 root] (utils.py 283): INFO Epoch: [4] [2390/2502] eta: 0:01:27 lr: 0.000020 loss_cls: 3.9819 (3.8459) grad_norm: 4.0767 (4.3644) time: 0.7704 data: 0.0002 max mem: 8421 +[2024-12-05 06:55:51 root] (utils.py 283): INFO Epoch: [4] [2400/2502] eta: 0:01:19 lr: 0.000020 loss_cls: 3.7442 (3.8458) grad_norm: 4.1225 (4.3650) time: 0.7803 data: 0.0003 max mem: 8421 +[2024-12-05 06:55:59 root] (utils.py 283): INFO Epoch: [4] [2410/2502] eta: 0:01:12 lr: 0.000020 loss_cls: 4.1651 (3.8466) grad_norm: 4.2275 (4.3649) time: 0.7795 data: 0.0003 max mem: 8421 +[2024-12-05 06:56:06 root] (utils.py 283): INFO Epoch: [4] [2420/2502] eta: 0:01:04 lr: 0.000020 loss_cls: 3.9713 (3.8459) grad_norm: 4.3301 (4.3650) time: 0.7654 data: 0.0002 max mem: 8421 +[2024-12-05 06:56:14 root] (utils.py 283): INFO Epoch: [4] [2430/2502] eta: 0:00:56 lr: 0.000020 loss_cls: 3.5624 (3.8459) grad_norm: 4.3647 (4.3649) time: 0.7618 data: 0.0002 max mem: 8421 +[2024-12-05 06:56:21 root] (utils.py 283): INFO Epoch: [4] [2440/2502] eta: 0:00:48 lr: 0.000020 loss_cls: 4.0124 (3.8462) grad_norm: 4.3647 (4.3649) time: 0.7623 data: 0.0002 max mem: 8421 +[2024-12-05 06:56:29 root] (utils.py 283): INFO Epoch: [4] [2450/2502] eta: 0:00:40 lr: 0.000020 loss_cls: 4.0124 (3.8471) grad_norm: 4.3180 (4.3653) time: 0.7627 data: 0.0002 max mem: 8421 +[2024-12-05 06:56:37 root] (utils.py 283): INFO Epoch: [4] [2460/2502] eta: 0:00:32 lr: 0.000020 loss_cls: 4.0906 (3.8473) grad_norm: 4.2383 (4.3650) time: 0.7620 data: 0.0002 max mem: 8421 +[2024-12-05 06:56:44 root] (utils.py 283): INFO Epoch: [4] [2470/2502] eta: 0:00:25 lr: 0.000020 loss_cls: 4.0906 (3.8465) grad_norm: 4.3493 (4.3656) time: 0.7619 data: 0.0002 max mem: 8421 +[2024-12-05 06:56:52 root] (utils.py 283): INFO Epoch: [4] [2480/2502] eta: 0:00:17 lr: 0.000020 loss_cls: 3.4811 (3.8458) grad_norm: 4.3493 (4.3656) time: 0.7618 data: 0.0003 max mem: 8421 +[2024-12-05 06:57:00 root] (utils.py 283): INFO Epoch: [4] [2490/2502] eta: 0:00:09 lr: 0.000020 loss_cls: 3.8396 (3.8466) grad_norm: 4.3249 (4.3657) time: 0.7860 data: 0.0216 max mem: 8421 +[2024-12-05 06:57:08 root] (utils.py 283): INFO Epoch: [4] [2500/2502] eta: 0:00:01 lr: 0.000020 loss_cls: 3.8560 (3.8462) grad_norm: 4.3190 (4.3651) time: 0.7894 data: 0.0216 max mem: 8421 +[2024-12-05 06:57:08 root] (utils.py 283): INFO Epoch: [4] [2501/2502] eta: 0:00:00 lr: 0.000020 loss_cls: 3.8560 (3.8457) grad_norm: 4.2267 (4.3649) time: 0.7902 data: 0.0216 max mem: 8421 +[2024-12-05 06:57:08 root] (utils.py 297): INFO Epoch: [4] Total time: 0:32:40 (0.7836 s / it) +[2024-12-05 06:57:08 root] (engine.py 178): INFO Averaged stats:lr: 0.000020 loss_cls: 3.8560 (3.8464) grad_norm: 4.2267 (4.3649) +[2024-12-05 06:57:09 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:14 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7108 (0.7108) acc1: 84.3750 (84.3750) acc3: 96.0938 (96.0938) acc5: 97.6562 (97.6562) time: 0.1431 data: 0.0005 max mem: 8421 +[2024-12-05 06:57:10 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8899 (0.9096) acc1: 82.8125 (80.9659) acc3: 92.1875 (92.0455) acc5: 96.0938 (94.8864) time: 0.1325 data: 0.0003 max mem: 8421 +[2024-12-05 06:57:12 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9170 (0.9503) acc1: 79.6875 (80.0223) acc3: 91.4062 (91.5923) acc5: 94.5312 (94.5685) time: 0.1315 data: 0.0004 max mem: 8421 +[2024-12-05 06:57:13 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0103 (0.9487) acc1: 78.1250 (79.2339) acc3: 91.4062 (92.3891) acc5: 95.3125 (94.9849) time: 0.1316 data: 0.0004 max mem: 8421 +[2024-12-05 06:57:14 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8808 (0.9395) acc1: 79.6875 (79.7066) acc3: 92.9688 (92.6067) acc5: 96.0938 (95.1982) time: 0.1318 data: 0.0004 max mem: 8421 +[2024-12-05 06:57:16 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0749 (1.0370) acc1: 75.0000 (77.5888) acc3: 85.9375 (90.7935) acc5: 91.4062 (93.8113) time: 0.1323 data: 0.0004 max mem: 8421 +[2024-12-05 06:57:17 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3747 (1.0789) acc1: 70.3125 (76.7674) acc3: 85.1562 (90.0359) acc5: 89.0625 (93.1609) time: 0.1333 data: 0.0004 max mem: 8421 +[2024-12-05 06:57:18 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3279 (1.1230) acc1: 71.8750 (75.6932) acc3: 85.9375 (89.4366) acc5: 89.8438 (92.7487) time: 0.1394 data: 0.0067 max mem: 8421 +[2024-12-05 06:57:20 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3597 (1.1607) acc1: 69.5312 (74.8939) acc3: 85.1562 (88.8503) acc5: 89.0625 (92.1489) time: 0.1794 data: 0.0478 max mem: 8421 +[2024-12-05 06:57:22 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.4237 (1.1947) acc1: 69.5312 (74.0556) acc3: 85.1562 (88.3070) acc5: 88.2812 (91.6552) time: 0.1760 data: 0.0448 max mem: 8421 +[2024-12-05 06:57:23 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2025 (1.1845) acc1: 73.4375 (74.1680) acc3: 86.7188 (88.4880) acc5: 90.6250 (91.8480) time: 0.1833 data: 0.0447 max mem: 8421 +[2024-12-05 06:57:23 root] (utils.py 297): INFO Test: Total time: 0:00:14 (0.1442 s / it) +[2024-12-05 06:57:24 root] (engine.py 263): INFO * Acc@1 73.932 Acc@3 88.412 Acc@5 91.918 loss 1.182 flops 1.285 layer_flops 1.251 +[2024-12-05 06:57:24 root] (main.py 546): INFO Accuracy of the network on the 50000 test images: 73.9% +[2024-12-05 06:57:24 root] (main.py 550): INFO Max accuracy: 73.93% +[2024-12-05 06:57:25 root] (utils.py 283): INFO Epoch: [5] [ 0/2502] eta: 0:39:48 lr: 0.000019 loss_cls: 3.8565 (3.8565) grad_norm: 4.5008 (4.5008) time: 0.9546 data: 0.0003 max mem: 8421 +[2024-12-05 06:57:33 root] (utils.py 283): INFO Epoch: [5] [ 10/2502] eta: 0:32:38 lr: 0.000019 loss_cls: 3.7791 (3.8803) grad_norm: 4.5008 (4.3940) time: 0.7861 data: 0.0002 max mem: 8421 +[2024-12-05 06:57:40 root] (utils.py 283): INFO Epoch: [5] [ 20/2502] eta: 0:32:07 lr: 0.000019 loss_cls: 3.9380 (3.9544) grad_norm: 4.3214 (4.5545) time: 0.7677 data: 0.0002 max mem: 8421 +[2024-12-05 06:57:48 root] (utils.py 283): INFO Epoch: [5] [ 30/2502] eta: 0:32:06 lr: 0.000019 loss_cls: 4.1475 (4.0178) grad_norm: 4.6140 (4.6191) time: 0.7758 data: 0.0002 max mem: 8421 +[2024-12-05 06:57:56 root] (utils.py 283): INFO Epoch: [5] [ 40/2502] eta: 0:31:48 lr: 0.000019 loss_cls: 4.2067 (4.0165) grad_norm: 4.3991 (4.5501) time: 0.7740 data: 0.0002 max mem: 8421 +[2024-12-05 06:58:03 root] (utils.py 283): INFO Epoch: [5] [ 50/2502] eta: 0:31:38 lr: 0.000019 loss_cls: 4.1304 (4.0291) grad_norm: 4.1581 (4.4679) time: 0.7661 data: 0.0002 max mem: 8421 +[2024-12-05 06:58:11 root] (utils.py 283): INFO Epoch: [5] [ 60/2502] eta: 0:31:27 lr: 0.000019 loss_cls: 4.2679 (4.0846) grad_norm: 4.0950 (4.4270) time: 0.7676 data: 0.0003 max mem: 8421 +[2024-12-05 06:58:19 root] (utils.py 283): INFO Epoch: [5] [ 70/2502] eta: 0:31:17 lr: 0.000019 loss_cls: 4.3464 (4.0948) grad_norm: 4.1828 (4.4199) time: 0.7658 data: 0.0003 max mem: 8421 +[2024-12-05 06:58:26 root] (utils.py 283): INFO Epoch: [5] [ 80/2502] eta: 0:31:07 lr: 0.000019 loss_cls: 4.1518 (4.0815) grad_norm: 4.3088 (4.4065) time: 0.7651 data: 0.0002 max mem: 8421 +[2024-12-05 06:58:34 root] (utils.py 283): INFO Epoch: [5] [ 90/2502] eta: 0:30:59 lr: 0.000019 loss_cls: 4.0873 (4.0540) grad_norm: 4.1497 (4.4425) time: 0.7671 data: 0.0002 max mem: 8421 +[2024-12-05 06:58:42 root] (utils.py 283): INFO Epoch: [5] [ 100/2502] eta: 0:30:49 lr: 0.000019 loss_cls: 4.0873 (4.0500) grad_norm: 4.1706 (4.4465) time: 0.7659 data: 0.0002 max mem: 8421 +[2024-12-05 06:58:49 root] (utils.py 283): INFO Epoch: [5] [ 110/2502] eta: 0:30:42 lr: 0.000019 loss_cls: 3.9111 (4.0191) grad_norm: 4.2840 (4.4289) time: 0.7673 data: 0.0002 max mem: 8421 +[2024-12-05 06:58:57 root] (utils.py 283): INFO Epoch: [5] [ 120/2502] eta: 0:30:38 lr: 0.000019 loss_cls: 3.9111 (4.0247) grad_norm: 4.1672 (4.4052) time: 0.7807 data: 0.0002 max mem: 8421 +[2024-12-05 06:59:05 root] (utils.py 283): INFO Epoch: [5] [ 130/2502] eta: 0:30:33 lr: 0.000019 loss_cls: 4.0685 (4.0226) grad_norm: 4.2403 (4.4038) time: 0.7880 data: 0.0002 max mem: 8421 +[2024-12-05 06:59:13 root] (utils.py 283): INFO Epoch: [5] [ 140/2502] eta: 0:30:27 lr: 0.000019 loss_cls: 4.0860 (4.0314) grad_norm: 4.3189 (4.4072) time: 0.7865 data: 0.0002 max mem: 8421 +[2024-12-05 06:59:21 root] (utils.py 283): INFO Epoch: [5] [ 150/2502] eta: 0:30:21 lr: 0.000019 loss_cls: 4.1948 (4.0164) grad_norm: 4.3841 (4.4078) time: 0.7860 data: 0.0002 max mem: 8421 +[2024-12-05 06:59:29 root] (utils.py 283): INFO Epoch: [5] [ 160/2502] eta: 0:30:15 lr: 0.000019 loss_cls: 4.1948 (4.0267) grad_norm: 4.2522 (4.3946) time: 0.7866 data: 0.0002 max mem: 8421 +[2024-12-05 06:59:37 root] (utils.py 283): INFO Epoch: [5] [ 170/2502] eta: 0:30:09 lr: 0.000019 loss_cls: 4.2077 (4.0224) grad_norm: 4.1145 (4.3735) time: 0.7851 data: 0.0002 max mem: 8421 +[2024-12-05 06:59:44 root] (utils.py 283): INFO Epoch: [5] [ 180/2502] eta: 0:30:02 lr: 0.000019 loss_cls: 4.2311 (4.0295) grad_norm: 4.1423 (4.3679) time: 0.7830 data: 0.0002 max mem: 8421 +[2024-12-05 06:59:52 root] (utils.py 283): INFO Epoch: [5] [ 190/2502] eta: 0:29:55 lr: 0.000019 loss_cls: 4.2825 (4.0285) grad_norm: 4.3370 (4.4009) time: 0.7832 data: 0.0002 max mem: 8421 +[2024-12-05 07:00:00 root] (utils.py 283): INFO Epoch: [5] [ 200/2502] eta: 0:29:48 lr: 0.000019 loss_cls: 4.1396 (4.0388) grad_norm: 4.1130 (4.3874) time: 0.7844 data: 0.0002 max mem: 8421 +[2024-12-05 07:00:08 root] (utils.py 283): INFO Epoch: [5] [ 210/2502] eta: 0:29:41 lr: 0.000019 loss_cls: 4.1419 (4.0513) grad_norm: 4.2239 (4.3793) time: 0.7843 data: 0.0002 max mem: 8421 +[2024-12-05 07:00:16 root] (utils.py 283): INFO Epoch: [5] [ 220/2502] eta: 0:29:34 lr: 0.000019 loss_cls: 4.1857 (4.0428) grad_norm: 4.2239 (4.3741) time: 0.7831 data: 0.0002 max mem: 8421 +[2024-12-05 07:00:24 root] (utils.py 283): INFO Epoch: [5] [ 230/2502] eta: 0:29:27 lr: 0.000019 loss_cls: 4.1857 (4.0541) grad_norm: 4.3310 (4.3783) time: 0.7838 data: 0.0002 max mem: 8421 +[2024-12-05 07:00:31 root] (utils.py 283): INFO Epoch: [5] [ 240/2502] eta: 0:29:20 lr: 0.000019 loss_cls: 3.9652 (4.0350) grad_norm: 4.3642 (4.3775) time: 0.7837 data: 0.0002 max mem: 8421 +[2024-12-05 07:00:39 root] (utils.py 283): INFO Epoch: [5] [ 250/2502] eta: 0:29:13 lr: 0.000019 loss_cls: 3.7837 (4.0298) grad_norm: 4.2188 (4.3703) time: 0.7850 data: 0.0002 max mem: 8421 +[2024-12-05 07:00:47 root] (utils.py 283): INFO Epoch: [5] [ 260/2502] eta: 0:29:05 lr: 0.000019 loss_cls: 3.9367 (4.0243) grad_norm: 4.1280 (4.3712) time: 0.7863 data: 0.0002 max mem: 8421 +[2024-12-05 07:00:55 root] (utils.py 283): INFO Epoch: [5] [ 270/2502] eta: 0:28:58 lr: 0.000019 loss_cls: 4.2895 (4.0305) grad_norm: 4.0994 (4.3600) time: 0.7851 data: 0.0002 max mem: 8421 +[2024-12-05 07:01:03 root] (utils.py 283): INFO Epoch: [5] [ 280/2502] eta: 0:28:52 lr: 0.000019 loss_cls: 4.1684 (4.0260) grad_norm: 4.1053 (4.3566) time: 0.7907 data: 0.0002 max mem: 8421 +[2024-12-05 07:01:11 root] (utils.py 283): INFO Epoch: [5] [ 290/2502] eta: 0:28:44 lr: 0.000019 loss_cls: 3.6492 (4.0100) grad_norm: 4.1570 (4.3466) time: 0.7897 data: 0.0002 max mem: 8421 +[2024-12-05 07:01:19 root] (utils.py 283): INFO Epoch: [5] [ 300/2502] eta: 0:28:37 lr: 0.000019 loss_cls: 3.6492 (4.0079) grad_norm: 4.1810 (4.3469) time: 0.7843 data: 0.0002 max mem: 8421 +[2024-12-05 07:01:27 root] (utils.py 283): INFO Epoch: [5] [ 310/2502] eta: 0:28:29 lr: 0.000019 loss_cls: 4.1207 (4.0077) grad_norm: 4.1780 (4.3377) time: 0.7841 data: 0.0002 max mem: 8421 +[2024-12-05 07:01:34 root] (utils.py 283): INFO Epoch: [5] [ 320/2502] eta: 0:28:20 lr: 0.000019 loss_cls: 4.1119 (4.0056) grad_norm: 4.1780 (4.3375) time: 0.7706 data: 0.0002 max mem: 8421 +[2024-12-05 07:01:42 root] (utils.py 283): INFO Epoch: [5] [ 330/2502] eta: 0:28:12 lr: 0.000019 loss_cls: 4.0746 (4.0029) grad_norm: 4.2281 (4.3433) time: 0.7699 data: 0.0002 max mem: 8421 +[2024-12-05 07:01:50 root] (utils.py 283): INFO Epoch: [5] [ 340/2502] eta: 0:28:03 lr: 0.000019 loss_cls: 3.7923 (3.9977) grad_norm: 4.1534 (4.3407) time: 0.7724 data: 0.0002 max mem: 8421 +[2024-12-05 07:01:57 root] (utils.py 283): INFO Epoch: [5] [ 350/2502] eta: 0:27:55 lr: 0.000019 loss_cls: 3.7629 (3.9936) grad_norm: 4.3051 (4.3460) time: 0.7654 data: 0.0002 max mem: 8421 +[2024-12-05 07:02:05 root] (utils.py 283): INFO Epoch: [5] [ 360/2502] eta: 0:27:46 lr: 0.000019 loss_cls: 3.4563 (3.9744) grad_norm: 4.2551 (4.3449) time: 0.7664 data: 0.0002 max mem: 8421 +[2024-12-05 07:02:13 root] (utils.py 283): INFO Epoch: [5] [ 370/2502] eta: 0:27:38 lr: 0.000019 loss_cls: 3.4055 (3.9726) grad_norm: 4.1413 (4.3374) time: 0.7696 data: 0.0002 max mem: 8421 +[2024-12-05 07:02:20 root] (utils.py 283): INFO Epoch: [5] [ 380/2502] eta: 0:27:30 lr: 0.000019 loss_cls: 4.1209 (3.9710) grad_norm: 4.0784 (4.3374) time: 0.7712 data: 0.0002 max mem: 8421 +[2024-12-05 07:02:28 root] (utils.py 283): INFO Epoch: [5] [ 390/2502] eta: 0:27:22 lr: 0.000019 loss_cls: 3.9660 (3.9643) grad_norm: 4.1136 (4.3329) time: 0.7671 data: 0.0002 max mem: 8421 +[2024-12-05 07:02:36 root] (utils.py 283): INFO Epoch: [5] [ 400/2502] eta: 0:27:13 lr: 0.000019 loss_cls: 3.9660 (3.9644) grad_norm: 4.1674 (4.3310) time: 0.7677 data: 0.0002 max mem: 8421 +[2024-12-05 07:02:43 root] (utils.py 283): INFO Epoch: [5] [ 410/2502] eta: 0:27:05 lr: 0.000019 loss_cls: 3.9970 (3.9620) grad_norm: 4.2959 (4.3319) time: 0.7657 data: 0.0002 max mem: 8421 +[2024-12-05 07:02:51 root] (utils.py 283): INFO Epoch: [5] [ 420/2502] eta: 0:26:56 lr: 0.000019 loss_cls: 3.9499 (3.9629) grad_norm: 4.2362 (4.3303) time: 0.7608 data: 0.0002 max mem: 8421 +[2024-12-05 07:02:59 root] (utils.py 283): INFO Epoch: [5] [ 430/2502] eta: 0:26:48 lr: 0.000019 loss_cls: 4.0936 (3.9662) grad_norm: 4.1842 (4.3330) time: 0.7616 data: 0.0003 max mem: 8421 +[2024-12-05 07:03:06 root] (utils.py 283): INFO Epoch: [5] [ 440/2502] eta: 0:26:39 lr: 0.000019 loss_cls: 4.1521 (3.9697) grad_norm: 4.1842 (4.3313) time: 0.7631 data: 0.0003 max mem: 8421 +[2024-12-05 07:03:14 root] (utils.py 283): INFO Epoch: [5] [ 450/2502] eta: 0:26:31 lr: 0.000019 loss_cls: 3.9580 (3.9622) grad_norm: 4.2425 (4.3337) time: 0.7643 data: 0.0002 max mem: 8421 +[2024-12-05 07:03:21 root] (utils.py 283): INFO Epoch: [5] [ 460/2502] eta: 0:26:23 lr: 0.000019 loss_cls: 3.7359 (3.9601) grad_norm: 4.3838 (4.3347) time: 0.7675 data: 0.0002 max mem: 8421 +[2024-12-05 07:03:29 root] (utils.py 283): INFO Epoch: [5] [ 470/2502] eta: 0:26:15 lr: 0.000019 loss_cls: 3.9937 (3.9634) grad_norm: 4.3891 (4.3351) time: 0.7679 data: 0.0002 max mem: 8421 +[2024-12-05 07:03:37 root] (utils.py 283): INFO Epoch: [5] [ 480/2502] eta: 0:26:08 lr: 0.000019 loss_cls: 4.0668 (3.9614) grad_norm: 4.2337 (4.3322) time: 0.7740 data: 0.0002 max mem: 8421 +[2024-12-05 07:03:45 root] (utils.py 283): INFO Epoch: [5] [ 490/2502] eta: 0:26:00 lr: 0.000019 loss_cls: 4.0668 (3.9649) grad_norm: 4.1662 (4.3298) time: 0.7761 data: 0.0002 max mem: 8421 +[2024-12-05 07:03:52 root] (utils.py 283): INFO Epoch: [5] [ 500/2502] eta: 0:25:52 lr: 0.000019 loss_cls: 4.1406 (3.9641) grad_norm: 4.1490 (4.3278) time: 0.7706 data: 0.0003 max mem: 8421 +[2024-12-05 07:04:00 root] (utils.py 283): INFO Epoch: [5] [ 510/2502] eta: 0:25:44 lr: 0.000019 loss_cls: 3.9173 (3.9639) grad_norm: 4.1837 (4.3260) time: 0.7688 data: 0.0003 max mem: 8421 +[2024-12-05 07:04:08 root] (utils.py 283): INFO Epoch: [5] [ 520/2502] eta: 0:25:35 lr: 0.000019 loss_cls: 3.9173 (3.9630) grad_norm: 4.3111 (4.3291) time: 0.7633 data: 0.0003 max mem: 8421 +[2024-12-05 07:04:15 root] (utils.py 283): INFO Epoch: [5] [ 530/2502] eta: 0:25:27 lr: 0.000019 loss_cls: 3.9897 (3.9615) grad_norm: 4.1901 (4.3282) time: 0.7603 data: 0.0002 max mem: 8421 +[2024-12-05 07:04:23 root] (utils.py 283): INFO Epoch: [5] [ 540/2502] eta: 0:25:19 lr: 0.000019 loss_cls: 4.0601 (3.9620) grad_norm: 4.1897 (4.3274) time: 0.7616 data: 0.0002 max mem: 8421 +[2024-12-05 07:04:31 root] (utils.py 283): INFO Epoch: [5] [ 550/2502] eta: 0:25:12 lr: 0.000019 loss_cls: 4.0701 (3.9642) grad_norm: 4.2927 (4.3283) time: 0.7776 data: 0.0003 max mem: 8421 +[2024-12-05 07:04:39 root] (utils.py 283): INFO Epoch: [5] [ 560/2502] eta: 0:25:04 lr: 0.000019 loss_cls: 4.1349 (3.9627) grad_norm: 4.1451 (4.3258) time: 0.7818 data: 0.0003 max mem: 8421 +[2024-12-05 07:04:46 root] (utils.py 283): INFO Epoch: [5] [ 570/2502] eta: 0:24:56 lr: 0.000019 loss_cls: 3.8730 (3.9610) grad_norm: 4.1058 (4.3240) time: 0.7677 data: 0.0003 max mem: 8421 +[2024-12-05 07:04:54 root] (utils.py 283): INFO Epoch: [5] [ 580/2502] eta: 0:24:48 lr: 0.000019 loss_cls: 3.8191 (3.9551) grad_norm: 4.0505 (4.3197) time: 0.7702 data: 0.0003 max mem: 8421 +[2024-12-05 07:05:02 root] (utils.py 283): INFO Epoch: [5] [ 590/2502] eta: 0:24:41 lr: 0.000019 loss_cls: 3.8191 (3.9559) grad_norm: 4.0563 (4.3162) time: 0.7820 data: 0.0003 max mem: 8421 +[2024-12-05 07:05:10 root] (utils.py 283): INFO Epoch: [5] [ 600/2502] eta: 0:24:33 lr: 0.000019 loss_cls: 4.0313 (3.9537) grad_norm: 4.2470 (4.3174) time: 0.7867 data: 0.0003 max mem: 8421 +[2024-12-05 07:05:18 root] (utils.py 283): INFO Epoch: [5] [ 610/2502] eta: 0:24:26 lr: 0.000019 loss_cls: 4.0028 (3.9535) grad_norm: 4.1356 (4.3159) time: 0.7851 data: 0.0002 max mem: 8421 +[2024-12-05 07:05:25 root] (utils.py 283): INFO Epoch: [5] [ 620/2502] eta: 0:24:18 lr: 0.000019 loss_cls: 3.9959 (3.9556) grad_norm: 4.1356 (4.3150) time: 0.7847 data: 0.0002 max mem: 8421 +[2024-12-05 07:05:33 root] (utils.py 283): INFO Epoch: [5] [ 630/2502] eta: 0:24:11 lr: 0.000019 loss_cls: 4.0121 (3.9557) grad_norm: 4.2045 (4.3157) time: 0.7928 data: 0.0003 max mem: 8421 +[2024-12-05 07:05:41 root] (utils.py 283): INFO Epoch: [5] [ 640/2502] eta: 0:24:04 lr: 0.000019 loss_cls: 3.7370 (3.9484) grad_norm: 4.2553 (4.3188) time: 0.7966 data: 0.0003 max mem: 8421 +[2024-12-05 07:05:49 root] (utils.py 283): INFO Epoch: [5] [ 650/2502] eta: 0:23:56 lr: 0.000019 loss_cls: 3.7370 (3.9499) grad_norm: 4.2621 (4.3174) time: 0.7848 data: 0.0002 max mem: 8421 +[2024-12-05 07:05:57 root] (utils.py 283): INFO Epoch: [5] [ 660/2502] eta: 0:23:48 lr: 0.000019 loss_cls: 4.0846 (3.9478) grad_norm: 4.1248 (4.3133) time: 0.7714 data: 0.0002 max mem: 8421 +[2024-12-05 07:06:04 root] (utils.py 283): INFO Epoch: [5] [ 670/2502] eta: 0:23:40 lr: 0.000019 loss_cls: 3.8945 (3.9472) grad_norm: 4.1248 (4.3121) time: 0.7641 data: 0.0002 max mem: 8421 +[2024-12-05 07:06:12 root] (utils.py 283): INFO Epoch: [5] [ 680/2502] eta: 0:23:32 lr: 0.000019 loss_cls: 4.0372 (3.9475) grad_norm: 4.1083 (4.3155) time: 0.7652 data: 0.0002 max mem: 8421 +[2024-12-05 07:06:20 root] (utils.py 283): INFO Epoch: [5] [ 690/2502] eta: 0:23:24 lr: 0.000019 loss_cls: 3.9388 (3.9444) grad_norm: 4.1277 (4.3153) time: 0.7668 data: 0.0003 max mem: 8421 +[2024-12-05 07:06:27 root] (utils.py 283): INFO Epoch: [5] [ 700/2502] eta: 0:23:17 lr: 0.000019 loss_cls: 4.0154 (3.9460) grad_norm: 4.1486 (4.3137) time: 0.7700 data: 0.0003 max mem: 8421 +[2024-12-05 07:06:35 root] (utils.py 283): INFO Epoch: [5] [ 710/2502] eta: 0:23:09 lr: 0.000019 loss_cls: 4.2390 (3.9485) grad_norm: 4.0271 (4.3141) time: 0.7697 data: 0.0002 max mem: 8421 +[2024-12-05 07:06:43 root] (utils.py 283): INFO Epoch: [5] [ 720/2502] eta: 0:23:00 lr: 0.000019 loss_cls: 4.2164 (3.9482) grad_norm: 4.0447 (4.3130) time: 0.7630 data: 0.0002 max mem: 8421 +[2024-12-05 07:06:50 root] (utils.py 283): INFO Epoch: [5] [ 730/2502] eta: 0:22:53 lr: 0.000019 loss_cls: 4.2193 (3.9527) grad_norm: 4.0447 (4.3115) time: 0.7672 data: 0.0002 max mem: 8421 +[2024-12-05 07:06:58 root] (utils.py 283): INFO Epoch: [5] [ 740/2502] eta: 0:22:45 lr: 0.000019 loss_cls: 4.3472 (3.9573) grad_norm: 4.1982 (4.3120) time: 0.7675 data: 0.0002 max mem: 8421 +[2024-12-05 07:07:06 root] (utils.py 283): INFO Epoch: [5] [ 750/2502] eta: 0:22:37 lr: 0.000019 loss_cls: 4.2219 (3.9566) grad_norm: 4.4873 (4.3156) time: 0.7630 data: 0.0002 max mem: 8421 +[2024-12-05 07:07:13 root] (utils.py 283): INFO Epoch: [5] [ 760/2502] eta: 0:22:29 lr: 0.000019 loss_cls: 3.8346 (3.9570) grad_norm: 4.4220 (4.3152) time: 0.7656 data: 0.0002 max mem: 8421 +[2024-12-05 07:07:21 root] (utils.py 283): INFO Epoch: [5] [ 770/2502] eta: 0:22:21 lr: 0.000019 loss_cls: 3.8346 (3.9549) grad_norm: 4.1828 (4.3135) time: 0.7669 data: 0.0002 max mem: 8421 +[2024-12-05 07:07:29 root] (utils.py 283): INFO Epoch: [5] [ 780/2502] eta: 0:22:13 lr: 0.000019 loss_cls: 3.8058 (3.9547) grad_norm: 4.1828 (4.3203) time: 0.7638 data: 0.0002 max mem: 8421 +[2024-12-05 07:07:36 root] (utils.py 283): INFO Epoch: [5] [ 790/2502] eta: 0:22:05 lr: 0.000019 loss_cls: 3.9987 (3.9549) grad_norm: 4.3527 (4.3219) time: 0.7597 data: 0.0002 max mem: 8421 +[2024-12-05 07:07:44 root] (utils.py 283): INFO Epoch: [5] [ 800/2502] eta: 0:21:57 lr: 0.000019 loss_cls: 4.0744 (3.9558) grad_norm: 4.3254 (4.3224) time: 0.7601 data: 0.0002 max mem: 8421 +[2024-12-05 07:07:51 root] (utils.py 283): INFO Epoch: [5] [ 810/2502] eta: 0:21:49 lr: 0.000019 loss_cls: 4.2602 (3.9569) grad_norm: 4.2305 (4.3221) time: 0.7597 data: 0.0002 max mem: 8421 +[2024-12-05 07:07:59 root] (utils.py 283): INFO Epoch: [5] [ 820/2502] eta: 0:21:41 lr: 0.000019 loss_cls: 4.0301 (3.9574) grad_norm: 4.1047 (4.3204) time: 0.7621 data: 0.0002 max mem: 8421 +[2024-12-05 07:08:07 root] (utils.py 283): INFO Epoch: [5] [ 830/2502] eta: 0:21:33 lr: 0.000019 loss_cls: 4.0301 (3.9550) grad_norm: 4.1642 (4.3220) time: 0.7643 data: 0.0002 max mem: 8421 +[2024-12-05 07:08:14 root] (utils.py 283): INFO Epoch: [5] [ 840/2502] eta: 0:21:25 lr: 0.000019 loss_cls: 3.7647 (3.9516) grad_norm: 4.2186 (4.3231) time: 0.7626 data: 0.0002 max mem: 8421 +[2024-12-05 07:08:22 root] (utils.py 283): INFO Epoch: [5] [ 850/2502] eta: 0:21:17 lr: 0.000019 loss_cls: 3.9344 (3.9541) grad_norm: 4.2602 (4.3331) time: 0.7619 data: 0.0002 max mem: 8421 +[2024-12-05 07:08:30 root] (utils.py 283): INFO Epoch: [5] [ 860/2502] eta: 0:21:09 lr: 0.000019 loss_cls: 4.2109 (3.9543) grad_norm: 4.2581 (4.3327) time: 0.7635 data: 0.0002 max mem: 8421 +[2024-12-05 07:08:37 root] (utils.py 283): INFO Epoch: [5] [ 870/2502] eta: 0:21:01 lr: 0.000019 loss_cls: 3.8223 (3.9522) grad_norm: 4.2206 (4.3316) time: 0.7672 data: 0.0002 max mem: 8421 +[2024-12-05 07:08:45 root] (utils.py 283): INFO Epoch: [5] [ 880/2502] eta: 0:20:53 lr: 0.000019 loss_cls: 4.0986 (3.9532) grad_norm: 4.2655 (4.3357) time: 0.7726 data: 0.0002 max mem: 8421 +[2024-12-05 07:08:53 root] (utils.py 283): INFO Epoch: [5] [ 890/2502] eta: 0:20:46 lr: 0.000019 loss_cls: 4.2472 (3.9556) grad_norm: 4.2029 (4.3344) time: 0.7824 data: 0.0002 max mem: 8421 +[2024-12-05 07:09:01 root] (utils.py 283): INFO Epoch: [5] [ 900/2502] eta: 0:20:38 lr: 0.000019 loss_cls: 4.2039 (3.9537) grad_norm: 4.1323 (4.3323) time: 0.7804 data: 0.0002 max mem: 8421 +[2024-12-05 07:09:08 root] (utils.py 283): INFO Epoch: [5] [ 910/2502] eta: 0:20:30 lr: 0.000019 loss_cls: 3.5965 (3.9536) grad_norm: 4.0342 (4.3290) time: 0.7697 data: 0.0002 max mem: 8421 +[2024-12-05 07:09:16 root] (utils.py 283): INFO Epoch: [5] [ 920/2502] eta: 0:20:22 lr: 0.000019 loss_cls: 4.2165 (3.9557) grad_norm: 3.9981 (4.3268) time: 0.7648 data: 0.0003 max mem: 8421 +[2024-12-05 07:09:24 root] (utils.py 283): INFO Epoch: [5] [ 930/2502] eta: 0:20:15 lr: 0.000019 loss_cls: 4.0714 (3.9542) grad_norm: 4.2011 (4.3267) time: 0.7654 data: 0.0003 max mem: 8421 +[2024-12-05 07:09:31 root] (utils.py 283): INFO Epoch: [5] [ 940/2502] eta: 0:20:07 lr: 0.000019 loss_cls: 3.6994 (3.9528) grad_norm: 4.2237 (4.3251) time: 0.7690 data: 0.0003 max mem: 8421 +[2024-12-05 07:09:39 root] (utils.py 283): INFO Epoch: [5] [ 950/2502] eta: 0:19:59 lr: 0.000019 loss_cls: 3.8467 (3.9521) grad_norm: 4.0762 (4.3226) time: 0.7751 data: 0.0002 max mem: 8421 +[2024-12-05 07:09:47 root] (utils.py 283): INFO Epoch: [5] [ 960/2502] eta: 0:19:52 lr: 0.000019 loss_cls: 3.9438 (3.9529) grad_norm: 4.1652 (4.3218) time: 0.7834 data: 0.0002 max mem: 8421 +[2024-12-05 07:09:55 root] (utils.py 283): INFO Epoch: [5] [ 970/2502] eta: 0:19:44 lr: 0.000019 loss_cls: 3.9438 (3.9494) grad_norm: 4.2539 (4.3223) time: 0.7871 data: 0.0002 max mem: 8421 +[2024-12-05 07:10:03 root] (utils.py 283): INFO Epoch: [5] [ 980/2502] eta: 0:19:37 lr: 0.000019 loss_cls: 3.6439 (3.9483) grad_norm: 4.2314 (4.3252) time: 0.7879 data: 0.0003 max mem: 8421 +[2024-12-05 07:10:11 root] (utils.py 283): INFO Epoch: [5] [ 990/2502] eta: 0:19:29 lr: 0.000019 loss_cls: 3.8866 (3.9494) grad_norm: 4.2314 (4.3256) time: 0.7876 data: 0.0003 max mem: 8421 +[2024-12-05 07:10:19 root] (utils.py 283): INFO Epoch: [5] [1000/2502] eta: 0:19:22 lr: 0.000019 loss_cls: 3.9011 (3.9494) grad_norm: 4.2467 (4.3287) time: 0.7869 data: 0.0003 max mem: 8421 +[2024-12-05 07:10:26 root] (utils.py 283): INFO Epoch: [5] [1010/2502] eta: 0:19:14 lr: 0.000019 loss_cls: 4.0902 (3.9503) grad_norm: 4.2933 (4.3327) time: 0.7869 data: 0.0003 max mem: 8421 +[2024-12-05 07:10:34 root] (utils.py 283): INFO Epoch: [5] [1020/2502] eta: 0:19:07 lr: 0.000019 loss_cls: 3.9908 (3.9487) grad_norm: 4.3463 (4.3318) time: 0.7874 data: 0.0003 max mem: 8421 +[2024-12-05 07:10:42 root] (utils.py 283): INFO Epoch: [5] [1030/2502] eta: 0:18:59 lr: 0.000019 loss_cls: 3.8901 (3.9497) grad_norm: 4.2084 (4.3306) time: 0.7873 data: 0.0003 max mem: 8421 +[2024-12-05 07:10:50 root] (utils.py 283): INFO Epoch: [5] [1040/2502] eta: 0:18:52 lr: 0.000019 loss_cls: 3.8230 (3.9474) grad_norm: 4.1818 (4.3302) time: 0.7886 data: 0.0002 max mem: 8421 +[2024-12-05 07:10:58 root] (utils.py 283): INFO Epoch: [5] [1050/2502] eta: 0:18:44 lr: 0.000019 loss_cls: 3.8230 (3.9471) grad_norm: 4.1892 (4.3322) time: 0.7894 data: 0.0003 max mem: 8421 +[2024-12-05 07:11:06 root] (utils.py 283): INFO Epoch: [5] [1060/2502] eta: 0:18:36 lr: 0.000019 loss_cls: 4.1766 (3.9507) grad_norm: 4.1731 (4.3308) time: 0.7895 data: 0.0002 max mem: 8421 +[2024-12-05 07:11:14 root] (utils.py 283): INFO Epoch: [5] [1070/2502] eta: 0:18:29 lr: 0.000019 loss_cls: 4.3481 (3.9514) grad_norm: 4.1567 (4.3300) time: 0.7990 data: 0.0003 max mem: 8421 +[2024-12-05 07:11:22 root] (utils.py 283): INFO Epoch: [5] [1080/2502] eta: 0:18:22 lr: 0.000019 loss_cls: 4.2332 (3.9528) grad_norm: 4.3129 (4.3306) time: 0.7994 data: 0.0003 max mem: 8421 +[2024-12-05 07:11:30 root] (utils.py 283): INFO Epoch: [5] [1090/2502] eta: 0:18:14 lr: 0.000019 loss_cls: 4.1120 (3.9540) grad_norm: 4.2397 (4.3305) time: 0.7828 data: 0.0002 max mem: 8421 +[2024-12-05 07:11:37 root] (utils.py 283): INFO Epoch: [5] [1100/2502] eta: 0:18:06 lr: 0.000019 loss_cls: 4.0687 (3.9521) grad_norm: 4.2045 (4.3300) time: 0.7682 data: 0.0002 max mem: 8421 +[2024-12-05 07:11:45 root] (utils.py 283): INFO Epoch: [5] [1110/2502] eta: 0:17:58 lr: 0.000019 loss_cls: 4.2121 (3.9548) grad_norm: 4.1669 (4.3287) time: 0.7733 data: 0.0002 max mem: 8421 +[2024-12-05 07:11:53 root] (utils.py 283): INFO Epoch: [5] [1120/2502] eta: 0:17:50 lr: 0.000019 loss_cls: 4.2195 (3.9559) grad_norm: 4.1070 (4.3269) time: 0.7730 data: 0.0002 max mem: 8421 +[2024-12-05 07:12:00 root] (utils.py 283): INFO Epoch: [5] [1130/2502] eta: 0:17:42 lr: 0.000019 loss_cls: 4.1968 (3.9578) grad_norm: 4.1192 (4.3269) time: 0.7610 data: 0.0002 max mem: 8421 +[2024-12-05 07:12:08 root] (utils.py 283): INFO Epoch: [5] [1140/2502] eta: 0:17:35 lr: 0.000019 loss_cls: 4.2130 (3.9587) grad_norm: 4.2195 (4.3300) time: 0.7620 data: 0.0002 max mem: 8421 +[2024-12-05 07:12:16 root] (utils.py 283): INFO Epoch: [5] [1150/2502] eta: 0:17:27 lr: 0.000019 loss_cls: 4.1664 (3.9604) grad_norm: 4.1280 (4.3285) time: 0.7665 data: 0.0002 max mem: 8421 +[2024-12-05 07:12:23 root] (utils.py 283): INFO Epoch: [5] [1160/2502] eta: 0:17:19 lr: 0.000019 loss_cls: 4.2124 (3.9603) grad_norm: 4.1678 (4.3277) time: 0.7638 data: 0.0002 max mem: 8421 +[2024-12-05 07:12:31 root] (utils.py 283): INFO Epoch: [5] [1170/2502] eta: 0:17:11 lr: 0.000019 loss_cls: 4.0765 (3.9610) grad_norm: 4.2305 (4.3265) time: 0.7632 data: 0.0002 max mem: 8421 +[2024-12-05 07:12:38 root] (utils.py 283): INFO Epoch: [5] [1180/2502] eta: 0:17:03 lr: 0.000019 loss_cls: 4.0985 (3.9622) grad_norm: 4.2597 (4.3267) time: 0.7642 data: 0.0002 max mem: 8421 +[2024-12-05 07:12:46 root] (utils.py 283): INFO Epoch: [5] [1190/2502] eta: 0:16:55 lr: 0.000019 loss_cls: 4.0287 (3.9616) grad_norm: 4.0885 (4.3240) time: 0.7600 data: 0.0002 max mem: 8421 +[2024-12-05 07:12:54 root] (utils.py 283): INFO Epoch: [5] [1200/2502] eta: 0:16:47 lr: 0.000019 loss_cls: 4.0473 (3.9613) grad_norm: 4.0586 (4.3231) time: 0.7612 data: 0.0002 max mem: 8421 +[2024-12-05 07:13:01 root] (utils.py 283): INFO Epoch: [5] [1210/2502] eta: 0:16:39 lr: 0.000019 loss_cls: 4.0658 (3.9606) grad_norm: 4.4065 (4.3262) time: 0.7621 data: 0.0002 max mem: 8421 +[2024-12-05 07:13:09 root] (utils.py 283): INFO Epoch: [5] [1220/2502] eta: 0:16:32 lr: 0.000019 loss_cls: 4.1278 (3.9619) grad_norm: 4.1858 (4.3256) time: 0.7674 data: 0.0003 max mem: 8421 +[2024-12-05 07:13:17 root] (utils.py 283): INFO Epoch: [5] [1230/2502] eta: 0:16:24 lr: 0.000019 loss_cls: 4.2540 (3.9628) grad_norm: 4.1963 (4.3263) time: 0.7704 data: 0.0002 max mem: 8421 +[2024-12-05 07:13:24 root] (utils.py 283): INFO Epoch: [5] [1240/2502] eta: 0:16:16 lr: 0.000019 loss_cls: 4.1874 (3.9637) grad_norm: 4.2050 (4.3246) time: 0.7637 data: 0.0002 max mem: 8421 +[2024-12-05 07:13:32 root] (utils.py 283): INFO Epoch: [5] [1250/2502] eta: 0:16:08 lr: 0.000019 loss_cls: 4.1629 (3.9619) grad_norm: 4.0659 (4.3234) time: 0.7615 data: 0.0003 max mem: 8421 +[2024-12-05 07:13:40 root] (utils.py 283): INFO Epoch: [5] [1260/2502] eta: 0:16:00 lr: 0.000019 loss_cls: 4.0333 (3.9631) grad_norm: 4.0100 (4.3208) time: 0.7640 data: 0.0003 max mem: 8421 +[2024-12-05 07:13:47 root] (utils.py 283): INFO Epoch: [5] [1270/2502] eta: 0:15:53 lr: 0.000019 loss_cls: 4.0944 (3.9627) grad_norm: 4.0944 (4.3219) time: 0.7718 data: 0.0003 max mem: 8421 +[2024-12-05 07:13:55 root] (utils.py 283): INFO Epoch: [5] [1280/2502] eta: 0:15:45 lr: 0.000019 loss_cls: 3.9798 (3.9621) grad_norm: 4.2707 (4.3231) time: 0.7834 data: 0.0003 max mem: 8421 +[2024-12-05 07:14:03 root] (utils.py 283): INFO Epoch: [5] [1290/2502] eta: 0:15:37 lr: 0.000019 loss_cls: 4.0482 (3.9637) grad_norm: 4.2783 (4.3230) time: 0.7820 data: 0.0003 max mem: 8421 +[2024-12-05 07:14:11 root] (utils.py 283): INFO Epoch: [5] [1300/2502] eta: 0:15:30 lr: 0.000019 loss_cls: 4.1101 (3.9645) grad_norm: 4.2783 (4.3234) time: 0.7702 data: 0.0003 max mem: 8421 +[2024-12-05 07:14:18 root] (utils.py 283): INFO Epoch: [5] [1310/2502] eta: 0:15:22 lr: 0.000019 loss_cls: 3.9923 (3.9627) grad_norm: 4.2018 (4.3220) time: 0.7626 data: 0.0003 max mem: 8421 +[2024-12-05 07:14:26 root] (utils.py 283): INFO Epoch: [5] [1320/2502] eta: 0:15:14 lr: 0.000019 loss_cls: 3.8527 (3.9628) grad_norm: 4.0202 (4.3204) time: 0.7604 data: 0.0003 max mem: 8421 +[2024-12-05 07:14:34 root] (utils.py 283): INFO Epoch: [5] [1330/2502] eta: 0:15:06 lr: 0.000019 loss_cls: 4.3346 (3.9642) grad_norm: 4.0202 (4.3207) time: 0.7603 data: 0.0003 max mem: 8421 +[2024-12-05 07:14:41 root] (utils.py 283): INFO Epoch: [5] [1340/2502] eta: 0:14:58 lr: 0.000019 loss_cls: 4.1288 (3.9623) grad_norm: 4.2051 (4.3202) time: 0.7629 data: 0.0003 max mem: 8421 +[2024-12-05 07:14:49 root] (utils.py 283): INFO Epoch: [5] [1350/2502] eta: 0:14:50 lr: 0.000019 loss_cls: 4.0400 (3.9642) grad_norm: 4.2051 (4.3197) time: 0.7651 data: 0.0003 max mem: 8421 +[2024-12-05 07:14:56 root] (utils.py 283): INFO Epoch: [5] [1360/2502] eta: 0:14:43 lr: 0.000019 loss_cls: 4.0028 (3.9619) grad_norm: 4.1637 (4.3187) time: 0.7655 data: 0.0002 max mem: 8421 +[2024-12-05 07:15:04 root] (utils.py 283): INFO Epoch: [5] [1370/2502] eta: 0:14:35 lr: 0.000019 loss_cls: 3.7039 (3.9610) grad_norm: 3.9766 (4.3166) time: 0.7651 data: 0.0002 max mem: 8421 +[2024-12-05 07:15:12 root] (utils.py 283): INFO Epoch: [5] [1380/2502] eta: 0:14:27 lr: 0.000019 loss_cls: 3.9278 (3.9601) grad_norm: 4.1628 (4.3182) time: 0.7669 data: 0.0002 max mem: 8421 +[2024-12-05 07:15:19 root] (utils.py 283): INFO Epoch: [5] [1390/2502] eta: 0:14:19 lr: 0.000019 loss_cls: 3.9278 (3.9601) grad_norm: 4.1696 (4.3166) time: 0.7672 data: 0.0002 max mem: 8421 +[2024-12-05 07:15:27 root] (utils.py 283): INFO Epoch: [5] [1400/2502] eta: 0:14:11 lr: 0.000019 loss_cls: 4.0975 (3.9592) grad_norm: 3.9760 (4.3149) time: 0.7673 data: 0.0003 max mem: 8421 +[2024-12-05 07:15:35 root] (utils.py 283): INFO Epoch: [5] [1410/2502] eta: 0:14:04 lr: 0.000019 loss_cls: 3.9804 (3.9590) grad_norm: 4.1595 (4.3143) time: 0.7645 data: 0.0002 max mem: 8421 +[2024-12-05 07:15:42 root] (utils.py 283): INFO Epoch: [5] [1420/2502] eta: 0:13:56 lr: 0.000019 loss_cls: 3.9130 (3.9590) grad_norm: 4.2339 (4.3136) time: 0.7666 data: 0.0002 max mem: 8421 +[2024-12-05 07:15:50 root] (utils.py 283): INFO Epoch: [5] [1430/2502] eta: 0:13:48 lr: 0.000019 loss_cls: 3.9659 (3.9582) grad_norm: 4.1850 (4.3126) time: 0.7688 data: 0.0002 max mem: 8421 +[2024-12-05 07:15:58 root] (utils.py 283): INFO Epoch: [5] [1440/2502] eta: 0:13:40 lr: 0.000019 loss_cls: 3.7796 (3.9571) grad_norm: 4.1359 (4.3117) time: 0.7649 data: 0.0002 max mem: 8421 +[2024-12-05 07:16:05 root] (utils.py 283): INFO Epoch: [5] [1450/2502] eta: 0:13:32 lr: 0.000019 loss_cls: 3.7796 (3.9556) grad_norm: 4.1381 (4.3115) time: 0.7632 data: 0.0002 max mem: 8421 +[2024-12-05 07:16:13 root] (utils.py 283): INFO Epoch: [5] [1460/2502] eta: 0:13:25 lr: 0.000019 loss_cls: 4.0596 (3.9567) grad_norm: 4.3071 (4.3130) time: 0.7612 data: 0.0003 max mem: 8421 +[2024-12-05 07:16:21 root] (utils.py 283): INFO Epoch: [5] [1470/2502] eta: 0:13:17 lr: 0.000019 loss_cls: 4.2153 (3.9591) grad_norm: 4.3402 (4.3128) time: 0.7686 data: 0.0003 max mem: 8421 +[2024-12-05 07:16:28 root] (utils.py 283): INFO Epoch: [5] [1480/2502] eta: 0:13:09 lr: 0.000019 loss_cls: 4.1475 (3.9589) grad_norm: 4.3138 (4.3171) time: 0.7688 data: 0.0003 max mem: 8421 +[2024-12-05 07:16:36 root] (utils.py 283): INFO Epoch: [5] [1490/2502] eta: 0:13:01 lr: 0.000019 loss_cls: 3.8448 (3.9587) grad_norm: 4.2551 (4.3161) time: 0.7644 data: 0.0002 max mem: 8421 +[2024-12-05 07:16:44 root] (utils.py 283): INFO Epoch: [5] [1500/2502] eta: 0:12:54 lr: 0.000019 loss_cls: 3.8448 (3.9579) grad_norm: 4.1808 (4.3165) time: 0.7654 data: 0.0003 max mem: 8421 +[2024-12-05 07:16:51 root] (utils.py 283): INFO Epoch: [5] [1510/2502] eta: 0:12:46 lr: 0.000019 loss_cls: 3.7698 (3.9569) grad_norm: 4.2503 (4.3198) time: 0.7652 data: 0.0002 max mem: 8421 +[2024-12-05 07:16:59 root] (utils.py 283): INFO Epoch: [5] [1520/2502] eta: 0:12:38 lr: 0.000019 loss_cls: 4.1613 (3.9583) grad_norm: 4.2664 (4.3207) time: 0.7807 data: 0.0002 max mem: 8421 +[2024-12-05 07:17:07 root] (utils.py 283): INFO Epoch: [5] [1530/2502] eta: 0:12:30 lr: 0.000019 loss_cls: 3.9770 (3.9577) grad_norm: 4.2951 (4.3210) time: 0.7784 data: 0.0002 max mem: 8421 +[2024-12-05 07:17:15 root] (utils.py 283): INFO Epoch: [5] [1540/2502] eta: 0:12:23 lr: 0.000019 loss_cls: 3.9770 (3.9596) grad_norm: 4.2598 (4.3237) time: 0.7628 data: 0.0002 max mem: 8421 +[2024-12-05 07:17:22 root] (utils.py 283): INFO Epoch: [5] [1550/2502] eta: 0:12:15 lr: 0.000019 loss_cls: 4.3074 (3.9622) grad_norm: 4.3580 (4.3237) time: 0.7633 data: 0.0002 max mem: 8421 +[2024-12-05 07:17:30 root] (utils.py 283): INFO Epoch: [5] [1560/2502] eta: 0:12:07 lr: 0.000019 loss_cls: 4.2777 (3.9635) grad_norm: 4.3322 (4.3248) time: 0.7627 data: 0.0002 max mem: 8421 +[2024-12-05 07:17:37 root] (utils.py 283): INFO Epoch: [5] [1570/2502] eta: 0:11:59 lr: 0.000019 loss_cls: 3.7853 (3.9608) grad_norm: 4.2237 (4.3234) time: 0.7640 data: 0.0002 max mem: 8421 +[2024-12-05 07:17:45 root] (utils.py 283): INFO Epoch: [5] [1580/2502] eta: 0:11:52 lr: 0.000019 loss_cls: 3.4587 (3.9584) grad_norm: 3.9756 (4.3217) time: 0.7629 data: 0.0002 max mem: 8421 +[2024-12-05 07:17:53 root] (utils.py 283): INFO Epoch: [5] [1590/2502] eta: 0:11:44 lr: 0.000019 loss_cls: 3.5632 (3.9578) grad_norm: 4.2756 (4.3222) time: 0.7626 data: 0.0002 max mem: 8421 +[2024-12-05 07:18:00 root] (utils.py 283): INFO Epoch: [5] [1600/2502] eta: 0:11:36 lr: 0.000019 loss_cls: 3.8502 (3.9560) grad_norm: 4.3851 (4.3227) time: 0.7637 data: 0.0002 max mem: 8421 +[2024-12-05 07:18:08 root] (utils.py 283): INFO Epoch: [5] [1610/2502] eta: 0:11:28 lr: 0.000019 loss_cls: 4.0385 (3.9565) grad_norm: 4.0875 (4.3221) time: 0.7642 data: 0.0002 max mem: 8421 +[2024-12-05 07:18:16 root] (utils.py 283): INFO Epoch: [5] [1620/2502] eta: 0:11:21 lr: 0.000019 loss_cls: 3.9455 (3.9554) grad_norm: 4.0875 (4.3214) time: 0.7650 data: 0.0002 max mem: 8421 +[2024-12-05 07:18:23 root] (utils.py 283): INFO Epoch: [5] [1630/2502] eta: 0:11:13 lr: 0.000019 loss_cls: 3.9171 (3.9557) grad_norm: 4.2630 (4.3228) time: 0.7648 data: 0.0002 max mem: 8421 +[2024-12-05 07:18:31 root] (utils.py 283): INFO Epoch: [5] [1640/2502] eta: 0:11:05 lr: 0.000019 loss_cls: 4.0728 (3.9568) grad_norm: 4.4329 (4.3243) time: 0.7643 data: 0.0002 max mem: 8421 +[2024-12-05 07:18:39 root] (utils.py 283): INFO Epoch: [5] [1650/2502] eta: 0:10:57 lr: 0.000019 loss_cls: 4.0479 (3.9564) grad_norm: 4.4329 (4.3251) time: 0.7647 data: 0.0002 max mem: 8421 +[2024-12-05 07:18:46 root] (utils.py 283): INFO Epoch: [5] [1660/2502] eta: 0:10:49 lr: 0.000019 loss_cls: 3.7682 (3.9551) grad_norm: 4.2647 (4.3266) time: 0.7641 data: 0.0002 max mem: 8421 +[2024-12-05 07:18:54 root] (utils.py 283): INFO Epoch: [5] [1670/2502] eta: 0:10:42 lr: 0.000019 loss_cls: 4.0741 (3.9566) grad_norm: 4.1853 (4.3259) time: 0.7663 data: 0.0002 max mem: 8421 +[2024-12-05 07:19:02 root] (utils.py 283): INFO Epoch: [5] [1680/2502] eta: 0:10:34 lr: 0.000019 loss_cls: 4.2457 (3.9577) grad_norm: 4.0456 (4.3246) time: 0.7672 data: 0.0002 max mem: 8421 +[2024-12-05 07:19:09 root] (utils.py 283): INFO Epoch: [5] [1690/2502] eta: 0:10:26 lr: 0.000019 loss_cls: 4.0745 (3.9561) grad_norm: 4.0791 (4.3245) time: 0.7690 data: 0.0003 max mem: 8421 +[2024-12-05 07:19:17 root] (utils.py 283): INFO Epoch: [5] [1700/2502] eta: 0:10:19 lr: 0.000019 loss_cls: 3.9092 (3.9563) grad_norm: 4.3432 (4.3256) time: 0.7752 data: 0.0003 max mem: 8421 +[2024-12-05 07:19:25 root] (utils.py 283): INFO Epoch: [5] [1710/2502] eta: 0:10:11 lr: 0.000019 loss_cls: 4.1164 (3.9565) grad_norm: 4.1617 (4.3239) time: 0.7760 data: 0.0002 max mem: 8421 +[2024-12-05 07:19:32 root] (utils.py 283): INFO Epoch: [5] [1720/2502] eta: 0:10:03 lr: 0.000019 loss_cls: 4.2256 (3.9572) grad_norm: 4.0330 (4.3220) time: 0.7682 data: 0.0002 max mem: 8421 +[2024-12-05 07:19:40 root] (utils.py 283): INFO Epoch: [5] [1730/2502] eta: 0:09:55 lr: 0.000019 loss_cls: 4.2256 (3.9555) grad_norm: 4.1395 (4.3225) time: 0.7736 data: 0.0002 max mem: 8421 +[2024-12-05 07:19:48 root] (utils.py 283): INFO Epoch: [5] [1740/2502] eta: 0:09:48 lr: 0.000019 loss_cls: 3.8134 (3.9560) grad_norm: 4.2460 (4.3216) time: 0.7872 data: 0.0002 max mem: 8421 +[2024-12-05 07:19:56 root] (utils.py 283): INFO Epoch: [5] [1750/2502] eta: 0:09:40 lr: 0.000019 loss_cls: 4.1532 (3.9562) grad_norm: 4.1585 (4.3225) time: 0.7751 data: 0.0003 max mem: 8421 +[2024-12-05 07:20:03 root] (utils.py 283): INFO Epoch: [5] [1760/2502] eta: 0:09:32 lr: 0.000019 loss_cls: 4.1532 (3.9563) grad_norm: 4.5235 (4.3245) time: 0.7618 data: 0.0002 max mem: 8421 +[2024-12-05 07:20:11 root] (utils.py 283): INFO Epoch: [5] [1770/2502] eta: 0:09:25 lr: 0.000019 loss_cls: 3.7527 (3.9549) grad_norm: 4.1321 (4.3244) time: 0.7610 data: 0.0002 max mem: 8421 +[2024-12-05 07:20:19 root] (utils.py 283): INFO Epoch: [5] [1780/2502] eta: 0:09:17 lr: 0.000019 loss_cls: 3.9089 (3.9550) grad_norm: 4.1016 (4.3253) time: 0.7607 data: 0.0002 max mem: 8421 +[2024-12-05 07:20:26 root] (utils.py 283): INFO Epoch: [5] [1790/2502] eta: 0:09:09 lr: 0.000019 loss_cls: 3.9089 (3.9544) grad_norm: 4.3807 (4.3252) time: 0.7679 data: 0.0002 max mem: 8421 +[2024-12-05 07:20:34 root] (utils.py 283): INFO Epoch: [5] [1800/2502] eta: 0:09:01 lr: 0.000019 loss_cls: 3.7487 (3.9530) grad_norm: 4.2757 (4.3248) time: 0.7683 data: 0.0003 max mem: 8421 +[2024-12-05 07:20:42 root] (utils.py 283): INFO Epoch: [5] [1810/2502] eta: 0:08:54 lr: 0.000019 loss_cls: 4.0336 (3.9543) grad_norm: 4.1832 (4.3246) time: 0.7746 data: 0.0003 max mem: 8421 +[2024-12-05 07:20:50 root] (utils.py 283): INFO Epoch: [5] [1820/2502] eta: 0:08:46 lr: 0.000019 loss_cls: 4.1614 (3.9547) grad_norm: 4.1832 (4.3246) time: 0.7872 data: 0.0003 max mem: 8421 +[2024-12-05 07:20:58 root] (utils.py 283): INFO Epoch: [5] [1830/2502] eta: 0:08:38 lr: 0.000019 loss_cls: 4.1129 (3.9542) grad_norm: 4.1000 (4.3246) time: 0.7841 data: 0.0003 max mem: 8421 +[2024-12-05 07:21:05 root] (utils.py 283): INFO Epoch: [5] [1840/2502] eta: 0:08:31 lr: 0.000019 loss_cls: 3.9818 (3.9526) grad_norm: 4.1395 (4.3242) time: 0.7729 data: 0.0002 max mem: 8421 +[2024-12-05 07:21:13 root] (utils.py 283): INFO Epoch: [5] [1850/2502] eta: 0:08:23 lr: 0.000019 loss_cls: 3.9719 (3.9518) grad_norm: 4.1395 (4.3232) time: 0.7675 data: 0.0002 max mem: 8421 +[2024-12-05 07:21:21 root] (utils.py 283): INFO Epoch: [5] [1860/2502] eta: 0:08:15 lr: 0.000019 loss_cls: 4.0175 (3.9519) grad_norm: 4.1259 (4.3224) time: 0.7665 data: 0.0002 max mem: 8421 +[2024-12-05 07:21:28 root] (utils.py 283): INFO Epoch: [5] [1870/2502] eta: 0:08:07 lr: 0.000019 loss_cls: 4.2167 (3.9530) grad_norm: 3.9690 (4.3202) time: 0.7644 data: 0.0002 max mem: 8421 +[2024-12-05 07:21:36 root] (utils.py 283): INFO Epoch: [5] [1880/2502] eta: 0:08:00 lr: 0.000019 loss_cls: 4.1352 (3.9517) grad_norm: 3.9569 (4.3199) time: 0.7685 data: 0.0002 max mem: 8421 +[2024-12-05 07:21:44 root] (utils.py 283): INFO Epoch: [5] [1890/2502] eta: 0:07:52 lr: 0.000019 loss_cls: 4.0109 (3.9520) grad_norm: 4.0800 (4.3198) time: 0.7707 data: 0.0002 max mem: 8421 +[2024-12-05 07:21:51 root] (utils.py 283): INFO Epoch: [5] [1900/2502] eta: 0:07:44 lr: 0.000019 loss_cls: 4.0778 (3.9530) grad_norm: 4.0499 (4.3185) time: 0.7747 data: 0.0002 max mem: 8421 +[2024-12-05 07:21:59 root] (utils.py 283): INFO Epoch: [5] [1910/2502] eta: 0:07:36 lr: 0.000019 loss_cls: 4.1533 (3.9532) grad_norm: 4.1288 (4.3194) time: 0.7699 data: 0.0002 max mem: 8421 +[2024-12-05 07:22:07 root] (utils.py 283): INFO Epoch: [5] [1920/2502] eta: 0:07:29 lr: 0.000019 loss_cls: 4.0480 (3.9525) grad_norm: 4.1720 (4.3203) time: 0.7636 data: 0.0002 max mem: 8421 +[2024-12-05 07:22:14 root] (utils.py 283): INFO Epoch: [5] [1930/2502] eta: 0:07:21 lr: 0.000019 loss_cls: 3.8201 (3.9510) grad_norm: 4.1705 (4.3200) time: 0.7670 data: 0.0002 max mem: 8421 +[2024-12-05 07:22:22 root] (utils.py 283): INFO Epoch: [5] [1940/2502] eta: 0:07:13 lr: 0.000019 loss_cls: 4.1070 (3.9514) grad_norm: 4.1565 (4.3219) time: 0.7675 data: 0.0002 max mem: 8421 +[2024-12-05 07:22:30 root] (utils.py 283): INFO Epoch: [5] [1950/2502] eta: 0:07:06 lr: 0.000019 loss_cls: 4.1075 (3.9515) grad_norm: 4.3052 (4.3217) time: 0.7749 data: 0.0002 max mem: 8421 +[2024-12-05 07:22:38 root] (utils.py 283): INFO Epoch: [5] [1960/2502] eta: 0:06:58 lr: 0.000019 loss_cls: 4.1075 (3.9503) grad_norm: 4.1999 (4.3212) time: 0.7786 data: 0.0002 max mem: 8421 +[2024-12-05 07:22:45 root] (utils.py 283): INFO Epoch: [5] [1970/2502] eta: 0:06:50 lr: 0.000019 loss_cls: 4.2025 (3.9530) grad_norm: 4.2107 (4.3263) time: 0.7718 data: 0.0002 max mem: 8421 +[2024-12-05 07:22:53 root] (utils.py 283): INFO Epoch: [5] [1980/2502] eta: 0:06:42 lr: 0.000019 loss_cls: 4.4066 (3.9532) grad_norm: 4.2164 (4.3265) time: 0.7679 data: 0.0002 max mem: 8421 +[2024-12-05 07:23:01 root] (utils.py 283): INFO Epoch: [5] [1990/2502] eta: 0:06:35 lr: 0.000019 loss_cls: 3.7140 (3.9508) grad_norm: 4.1914 (4.3279) time: 0.7650 data: 0.0002 max mem: 8421 +[2024-12-05 07:23:08 root] (utils.py 283): INFO Epoch: [5] [2000/2502] eta: 0:06:27 lr: 0.000019 loss_cls: 3.8989 (3.9513) grad_norm: 4.1598 (4.3274) time: 0.7644 data: 0.0002 max mem: 8421 +[2024-12-05 07:23:16 root] (utils.py 283): INFO Epoch: [5] [2010/2502] eta: 0:06:19 lr: 0.000019 loss_cls: 4.2921 (3.9520) grad_norm: 4.1665 (4.3276) time: 0.7650 data: 0.0002 max mem: 8421 +[2024-12-05 07:23:24 root] (utils.py 283): INFO Epoch: [5] [2020/2502] eta: 0:06:11 lr: 0.000019 loss_cls: 4.2387 (3.9517) grad_norm: 4.1665 (4.3283) time: 0.7649 data: 0.0002 max mem: 8421 +[2024-12-05 07:23:31 root] (utils.py 283): INFO Epoch: [5] [2030/2502] eta: 0:06:04 lr: 0.000019 loss_cls: 3.9788 (3.9522) grad_norm: 4.2732 (4.3291) time: 0.7651 data: 0.0002 max mem: 8421 +[2024-12-05 07:23:39 root] (utils.py 283): INFO Epoch: [5] [2040/2502] eta: 0:05:56 lr: 0.000019 loss_cls: 3.9788 (3.9517) grad_norm: 4.2166 (4.3284) time: 0.7684 data: 0.0002 max mem: 8421 +[2024-12-05 07:23:47 root] (utils.py 283): INFO Epoch: [5] [2050/2502] eta: 0:05:48 lr: 0.000019 loss_cls: 4.0281 (3.9518) grad_norm: 4.1527 (4.3285) time: 0.7694 data: 0.0002 max mem: 8421 +[2024-12-05 07:23:54 root] (utils.py 283): INFO Epoch: [5] [2060/2502] eta: 0:05:41 lr: 0.000019 loss_cls: 4.1284 (3.9527) grad_norm: 4.0405 (4.3272) time: 0.7661 data: 0.0002 max mem: 8421 +[2024-12-05 07:24:02 root] (utils.py 283): INFO Epoch: [5] [2070/2502] eta: 0:05:33 lr: 0.000019 loss_cls: 4.1991 (3.9524) grad_norm: 3.9679 (4.3266) time: 0.7666 data: 0.0002 max mem: 8421 +[2024-12-05 07:24:10 root] (utils.py 283): INFO Epoch: [5] [2080/2502] eta: 0:05:25 lr: 0.000019 loss_cls: 4.1991 (3.9533) grad_norm: 4.2001 (4.3270) time: 0.7738 data: 0.0002 max mem: 8421 +[2024-12-05 07:24:18 root] (utils.py 283): INFO Epoch: [5] [2090/2502] eta: 0:05:17 lr: 0.000019 loss_cls: 4.2019 (3.9540) grad_norm: 4.3471 (4.3286) time: 0.7869 data: 0.0002 max mem: 8421 +[2024-12-05 07:24:26 root] (utils.py 283): INFO Epoch: [5] [2100/2502] eta: 0:05:10 lr: 0.000019 loss_cls: 3.9033 (3.9541) grad_norm: 4.3061 (4.3282) time: 0.7924 data: 0.0002 max mem: 8421 +[2024-12-05 07:24:34 root] (utils.py 283): INFO Epoch: [5] [2110/2502] eta: 0:05:02 lr: 0.000019 loss_cls: 3.8205 (3.9532) grad_norm: 4.2080 (4.3281) time: 0.7899 data: 0.0002 max mem: 8421 +[2024-12-05 07:24:41 root] (utils.py 283): INFO Epoch: [5] [2120/2502] eta: 0:04:54 lr: 0.000019 loss_cls: 3.9392 (3.9541) grad_norm: 4.3994 (4.3284) time: 0.7875 data: 0.0003 max mem: 8421 +[2024-12-05 07:24:49 root] (utils.py 283): INFO Epoch: [5] [2130/2502] eta: 0:04:47 lr: 0.000019 loss_cls: 4.2522 (3.9539) grad_norm: 4.0688 (4.3273) time: 0.7753 data: 0.0002 max mem: 8421 +[2024-12-05 07:24:57 root] (utils.py 283): INFO Epoch: [5] [2140/2502] eta: 0:04:39 lr: 0.000019 loss_cls: 4.2446 (3.9551) grad_norm: 3.9544 (4.3270) time: 0.7640 data: 0.0002 max mem: 8421 +[2024-12-05 07:25:04 root] (utils.py 283): INFO Epoch: [5] [2150/2502] eta: 0:04:31 lr: 0.000019 loss_cls: 4.2033 (3.9554) grad_norm: 4.2839 (4.3270) time: 0.7655 data: 0.0002 max mem: 8421 +[2024-12-05 07:25:12 root] (utils.py 283): INFO Epoch: [5] [2160/2502] eta: 0:04:23 lr: 0.000019 loss_cls: 4.2014 (3.9570) grad_norm: 4.1497 (4.3259) time: 0.7657 data: 0.0002 max mem: 8421 +[2024-12-05 07:25:20 root] (utils.py 283): INFO Epoch: [5] [2170/2502] eta: 0:04:16 lr: 0.000019 loss_cls: 4.1876 (3.9569) grad_norm: 3.9937 (4.3261) time: 0.7655 data: 0.0002 max mem: 8421 +[2024-12-05 07:25:27 root] (utils.py 283): INFO Epoch: [5] [2180/2502] eta: 0:04:08 lr: 0.000019 loss_cls: 3.9833 (3.9570) grad_norm: 4.1910 (4.3258) time: 0.7645 data: 0.0002 max mem: 8421 +[2024-12-05 07:25:35 root] (utils.py 283): INFO Epoch: [5] [2190/2502] eta: 0:04:00 lr: 0.000019 loss_cls: 4.2816 (3.9582) grad_norm: 4.1894 (4.3252) time: 0.7639 data: 0.0002 max mem: 8421 +[2024-12-05 07:25:43 root] (utils.py 283): INFO Epoch: [5] [2200/2502] eta: 0:03:53 lr: 0.000019 loss_cls: 4.2816 (3.9593) grad_norm: 4.2027 (4.3267) time: 0.7640 data: 0.0002 max mem: 8421 +[2024-12-05 07:25:50 root] (utils.py 283): INFO Epoch: [5] [2210/2502] eta: 0:03:45 lr: 0.000019 loss_cls: 4.0066 (3.9588) grad_norm: 4.3351 (4.3273) time: 0.7642 data: 0.0002 max mem: 8421 +[2024-12-05 07:25:58 root] (utils.py 283): INFO Epoch: [5] [2220/2502] eta: 0:03:37 lr: 0.000019 loss_cls: 4.1328 (3.9599) grad_norm: 4.0567 (4.3270) time: 0.7658 data: 0.0002 max mem: 8421 +[2024-12-05 07:26:05 root] (utils.py 283): INFO Epoch: [5] [2230/2502] eta: 0:03:29 lr: 0.000019 loss_cls: 4.1339 (3.9590) grad_norm: 4.0710 (4.3263) time: 0.7631 data: 0.0002 max mem: 8421 +[2024-12-05 07:26:13 root] (utils.py 283): INFO Epoch: [5] [2240/2502] eta: 0:03:22 lr: 0.000019 loss_cls: 3.9993 (3.9592) grad_norm: 4.0710 (4.3257) time: 0.7616 data: 0.0002 max mem: 8421 +[2024-12-05 07:26:21 root] (utils.py 283): INFO Epoch: [5] [2250/2502] eta: 0:03:14 lr: 0.000019 loss_cls: 3.9993 (3.9584) grad_norm: 4.1663 (4.3260) time: 0.7628 data: 0.0002 max mem: 8421 +[2024-12-05 07:26:29 root] (utils.py 283): INFO Epoch: [5] [2260/2502] eta: 0:03:06 lr: 0.000019 loss_cls: 3.9847 (3.9583) grad_norm: 4.1663 (4.3245) time: 0.7774 data: 0.0002 max mem: 8421 +[2024-12-05 07:26:36 root] (utils.py 283): INFO Epoch: [5] [2270/2502] eta: 0:02:58 lr: 0.000019 loss_cls: 3.9097 (3.9571) grad_norm: 3.9949 (4.3245) time: 0.7790 data: 0.0002 max mem: 8421 +[2024-12-05 07:26:44 root] (utils.py 283): INFO Epoch: [5] [2280/2502] eta: 0:02:51 lr: 0.000019 loss_cls: 3.9097 (3.9569) grad_norm: 4.0096 (4.3236) time: 0.7623 data: 0.0002 max mem: 8421 +[2024-12-05 07:26:52 root] (utils.py 283): INFO Epoch: [5] [2290/2502] eta: 0:02:43 lr: 0.000019 loss_cls: 4.0909 (3.9571) grad_norm: 4.0452 (4.3227) time: 0.7598 data: 0.0002 max mem: 8421 +[2024-12-05 07:26:59 root] (utils.py 283): INFO Epoch: [5] [2300/2502] eta: 0:02:35 lr: 0.000019 loss_cls: 4.1029 (3.9572) grad_norm: 4.0587 (4.3221) time: 0.7604 data: 0.0002 max mem: 8421 +[2024-12-05 07:27:07 root] (utils.py 283): INFO Epoch: [5] [2310/2502] eta: 0:02:28 lr: 0.000019 loss_cls: 4.0492 (3.9569) grad_norm: 4.0606 (4.3211) time: 0.7606 data: 0.0002 max mem: 8421 +[2024-12-05 07:27:14 root] (utils.py 283): INFO Epoch: [5] [2320/2502] eta: 0:02:20 lr: 0.000019 loss_cls: 3.9244 (3.9559) grad_norm: 4.1124 (4.3215) time: 0.7625 data: 0.0002 max mem: 8421 +[2024-12-05 07:27:22 root] (utils.py 283): INFO Epoch: [5] [2330/2502] eta: 0:02:12 lr: 0.000019 loss_cls: 4.0541 (3.9562) grad_norm: 4.1372 (4.3210) time: 0.7640 data: 0.0002 max mem: 8421 +[2024-12-05 07:27:30 root] (utils.py 283): INFO Epoch: [5] [2340/2502] eta: 0:02:04 lr: 0.000019 loss_cls: 4.0921 (3.9564) grad_norm: 4.1479 (4.3212) time: 0.7638 data: 0.0002 max mem: 8421 +[2024-12-05 07:27:37 root] (utils.py 283): INFO Epoch: [5] [2350/2502] eta: 0:01:57 lr: 0.000019 loss_cls: 4.0931 (3.9555) grad_norm: 4.1193 (4.3201) time: 0.7725 data: 0.0002 max mem: 8421 +[2024-12-05 07:27:45 root] (utils.py 283): INFO Epoch: [5] [2360/2502] eta: 0:01:49 lr: 0.000019 loss_cls: 3.6472 (3.9543) grad_norm: 4.1158 (4.3222) time: 0.7689 data: 0.0002 max mem: 8421 +[2024-12-05 07:27:53 root] (utils.py 283): INFO Epoch: [5] [2370/2502] eta: 0:01:41 lr: 0.000019 loss_cls: 3.6120 (3.9526) grad_norm: 4.2110 (4.3221) time: 0.7590 data: 0.0002 max mem: 8421 +[2024-12-05 07:28:00 root] (utils.py 283): INFO Epoch: [5] [2380/2502] eta: 0:01:34 lr: 0.000019 loss_cls: 3.6952 (3.9528) grad_norm: 4.1007 (4.3205) time: 0.7628 data: 0.0003 max mem: 8421 +[2024-12-05 07:28:08 root] (utils.py 283): INFO Epoch: [5] [2390/2502] eta: 0:01:26 lr: 0.000019 loss_cls: 3.9628 (3.9523) grad_norm: 4.0666 (4.3232) time: 0.7627 data: 0.0003 max mem: 8421 +[2024-12-05 07:28:15 root] (utils.py 283): INFO Epoch: [5] [2400/2502] eta: 0:01:18 lr: 0.000019 loss_cls: 3.6704 (3.9518) grad_norm: 4.2908 (4.3240) time: 0.7614 data: 0.0003 max mem: 8421 +[2024-12-05 07:28:23 root] (utils.py 283): INFO Epoch: [5] [2410/2502] eta: 0:01:10 lr: 0.000019 loss_cls: 3.6289 (3.9515) grad_norm: 4.3100 (4.3236) time: 0.7624 data: 0.0003 max mem: 8421 +[2024-12-05 07:28:31 root] (utils.py 283): INFO Epoch: [5] [2420/2502] eta: 0:01:03 lr: 0.000019 loss_cls: 4.1696 (3.9523) grad_norm: 4.2343 (4.3232) time: 0.7644 data: 0.0002 max mem: 8421 +[2024-12-05 07:28:38 root] (utils.py 283): INFO Epoch: [5] [2430/2502] eta: 0:00:55 lr: 0.000019 loss_cls: 4.2341 (3.9527) grad_norm: 4.2194 (4.3230) time: 0.7652 data: 0.0003 max mem: 8421 +[2024-12-05 07:28:46 root] (utils.py 283): INFO Epoch: [5] [2440/2502] eta: 0:00:47 lr: 0.000019 loss_cls: 4.2920 (3.9536) grad_norm: 4.2194 (4.3228) time: 0.7636 data: 0.0003 max mem: 8421 +[2024-12-05 07:28:54 root] (utils.py 283): INFO Epoch: [5] [2450/2502] eta: 0:00:40 lr: 0.000019 loss_cls: 4.1395 (3.9540) grad_norm: 4.3749 (4.3236) time: 0.7623 data: 0.0003 max mem: 8421 +[2024-12-05 07:29:01 root] (utils.py 283): INFO Epoch: [5] [2460/2502] eta: 0:00:32 lr: 0.000019 loss_cls: 3.9600 (3.9531) grad_norm: 4.2966 (4.3234) time: 0.7662 data: 0.0002 max mem: 8421 +[2024-12-05 07:29:09 root] (utils.py 283): INFO Epoch: [5] [2470/2502] eta: 0:00:24 lr: 0.000019 loss_cls: 3.8803 (3.9536) grad_norm: 4.2097 (4.3229) time: 0.7665 data: 0.0002 max mem: 8421 +[2024-12-05 07:29:17 root] (utils.py 283): INFO Epoch: [5] [2480/2502] eta: 0:00:16 lr: 0.000019 loss_cls: 4.3464 (3.9551) grad_norm: 4.0665 (4.3220) time: 0.7667 data: 0.0002 max mem: 8421 +[2024-12-05 07:29:25 root] (utils.py 283): INFO Epoch: [5] [2490/2502] eta: 0:00:09 lr: 0.000019 loss_cls: 4.1449 (3.9546) grad_norm: 4.0665 (4.3217) time: 0.7862 data: 0.0213 max mem: 8421 +[2024-12-05 07:29:32 root] (utils.py 283): INFO Epoch: [5] [2500/2502] eta: 0:00:01 lr: 0.000019 loss_cls: 4.0992 (3.9554) grad_norm: 4.1505 (4.3225) time: 0.7824 data: 0.0213 max mem: 8421 +[2024-12-05 07:29:33 root] (utils.py 283): INFO Epoch: [5] [2501/2502] eta: 0:00:00 lr: 0.000019 loss_cls: 4.0992 (3.9557) grad_norm: 4.2404 (4.3229) time: 0.7824 data: 0.0213 max mem: 8421 +[2024-12-05 07:29:33 root] (utils.py 297): INFO Epoch: [5] Total time: 0:32:09 (0.7711 s / it) +[2024-12-05 07:29:33 root] (engine.py 178): INFO Averaged stats:lr: 0.000019 loss_cls: 4.0992 (3.9561) grad_norm: 4.2404 (4.3229) +[2024-12-05 07:29:34 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7259 (0.7259) acc1: 84.3750 (84.3750) acc3: 96.0938 (96.0938) acc5: 98.4375 (98.4375) time: 0.1312 data: 0.0003 max mem: 8421 +[2024-12-05 07:29:35 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8112 (0.8829) acc1: 82.8125 (81.1080) acc3: 92.9688 (92.6136) acc5: 96.8750 (95.6676) time: 0.1313 data: 0.0003 max mem: 8421 +[2024-12-05 07:29:36 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8971 (0.9260) acc1: 78.9062 (79.8363) acc3: 92.9688 (92.2991) acc5: 96.0938 (95.0149) time: 0.1316 data: 0.0004 max mem: 8421 +[2024-12-05 07:29:38 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9318 (0.9286) acc1: 78.9062 (79.0827) acc3: 92.1875 (92.6159) acc5: 94.5312 (95.0857) time: 0.1317 data: 0.0004 max mem: 8421 +[2024-12-05 07:29:39 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8574 (0.9165) acc1: 79.6875 (79.5351) acc3: 94.5312 (92.7782) acc5: 95.3125 (95.1982) time: 0.1606 data: 0.0294 max mem: 8421 +[2024-12-05 07:29:41 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0486 (1.0157) acc1: 75.7812 (77.3284) acc3: 88.2812 (91.0846) acc5: 92.1875 (93.9185) time: 0.1605 data: 0.0294 max mem: 8421 +[2024-12-05 07:29:43 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3355 (1.0595) acc1: 70.3125 (76.5625) acc3: 85.9375 (90.2792) acc5: 89.0625 (93.2121) time: 0.1547 data: 0.0235 max mem: 8421 +[2024-12-05 07:29:45 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:04 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3026 (1.1025) acc1: 71.8750 (75.4401) acc3: 85.9375 (89.7337) acc5: 89.8438 (92.7927) time: 0.1887 data: 0.0573 max mem: 8421 +[2024-12-05 07:29:46 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3229 (1.1408) acc1: 68.7500 (74.5853) acc3: 85.9375 (89.1397) acc5: 89.8438 (92.2936) time: 0.1781 data: 0.0467 max mem: 8421 +[2024-12-05 07:29:48 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.4001 (1.1724) acc1: 67.9688 (73.7809) acc3: 84.3750 (88.5388) acc5: 89.0625 (91.8269) time: 0.1670 data: 0.0359 max mem: 8421 +[2024-12-05 07:29:49 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2544 (1.1598) acc1: 68.7500 (73.9040) acc3: 87.5000 (88.6960) acc5: 90.6250 (92.0080) time: 0.1647 data: 0.0357 max mem: 8421 +[2024-12-05 07:29:49 root] (utils.py 297): INFO Test: Total time: 0:00:15 (0.1561 s / it) +[2024-12-05 07:29:49 root] (engine.py 263): INFO * Acc@1 74.064 Acc@3 88.470 Acc@5 91.924 loss 1.161 flops 1.285 layer_flops 1.251 +[2024-12-05 07:29:49 root] (main.py 546): INFO Accuracy of the network on the 50000 test images: 74.1% +[2024-12-05 07:29:49 root] (main.py 550): INFO Max accuracy: 74.06% +[2024-12-05 07:29:50 root] (utils.py 283): INFO Epoch: [6] [ 0/2502] eta: 0:32:50 lr: 0.000019 loss_cls: 4.3257 (4.3257) grad_norm: 3.8252 (3.8252) time: 0.7874 data: 0.0003 max mem: 8421 +[2024-12-05 07:29:58 root] (utils.py 283): INFO Epoch: [6] [ 10/2502] eta: 0:32:41 lr: 0.000019 loss_cls: 4.0484 (3.9707) grad_norm: 4.0702 (4.0866) time: 0.7871 data: 0.0003 max mem: 8421 +[2024-12-05 07:30:05 root] (utils.py 283): INFO Epoch: [6] [ 20/2502] eta: 0:32:25 lr: 0.000019 loss_cls: 4.0484 (3.9759) grad_norm: 4.0702 (4.1332) time: 0.7836 data: 0.0002 max mem: 8421 +[2024-12-05 07:30:13 root] (utils.py 283): INFO Epoch: [6] [ 30/2502] eta: 0:32:06 lr: 0.000019 loss_cls: 3.8559 (3.8908) grad_norm: 4.1723 (4.2202) time: 0.7748 data: 0.0002 max mem: 8421 +[2024-12-05 07:30:21 root] (utils.py 283): INFO Epoch: [6] [ 40/2502] eta: 0:31:49 lr: 0.000019 loss_cls: 3.9435 (3.9145) grad_norm: 4.2096 (4.2322) time: 0.7671 data: 0.0002 max mem: 8421 +[2024-12-05 07:30:28 root] (utils.py 283): INFO Epoch: [6] [ 50/2502] eta: 0:31:35 lr: 0.000019 loss_cls: 4.0478 (3.9202) grad_norm: 4.2096 (4.2446) time: 0.7641 data: 0.0002 max mem: 8421 +[2024-12-05 07:30:36 root] (utils.py 283): INFO Epoch: [6] [ 60/2502] eta: 0:31:25 lr: 0.000019 loss_cls: 4.0928 (3.9518) grad_norm: 4.2250 (4.2432) time: 0.7650 data: 0.0002 max mem: 8421 +[2024-12-05 07:30:44 root] (utils.py 283): INFO Epoch: [6] [ 70/2502] eta: 0:31:15 lr: 0.000019 loss_cls: 4.0395 (3.9266) grad_norm: 4.0638 (4.3531) time: 0.7662 data: 0.0002 max mem: 8421 +[2024-12-05 07:30:51 root] (utils.py 283): INFO Epoch: [6] [ 80/2502] eta: 0:31:05 lr: 0.000019 loss_cls: 3.7996 (3.9140) grad_norm: 4.1034 (4.3347) time: 0.7644 data: 0.0002 max mem: 8421 +[2024-12-05 07:30:59 root] (utils.py 283): INFO Epoch: [6] [ 90/2502] eta: 0:30:55 lr: 0.000019 loss_cls: 3.7529 (3.9037) grad_norm: 4.1062 (4.3181) time: 0.7630 data: 0.0002 max mem: 8421 +[2024-12-05 07:31:07 root] (utils.py 283): INFO Epoch: [6] [ 100/2502] eta: 0:30:46 lr: 0.000019 loss_cls: 3.9359 (3.9308) grad_norm: 4.1062 (4.3405) time: 0.7628 data: 0.0002 max mem: 8421 +[2024-12-05 07:31:14 root] (utils.py 283): INFO Epoch: [6] [ 110/2502] eta: 0:30:43 lr: 0.000019 loss_cls: 4.2896 (3.9261) grad_norm: 4.1048 (4.3289) time: 0.7754 data: 0.0002 max mem: 8421 +[2024-12-05 07:31:22 root] (utils.py 283): INFO Epoch: [6] [ 120/2502] eta: 0:30:33 lr: 0.000019 loss_cls: 3.8309 (3.9159) grad_norm: 4.0394 (4.3226) time: 0.7738 data: 0.0002 max mem: 8421 +[2024-12-05 07:31:30 root] (utils.py 283): INFO Epoch: [6] [ 130/2502] eta: 0:30:25 lr: 0.000019 loss_cls: 3.8630 (3.9286) grad_norm: 4.1072 (4.3106) time: 0.7637 data: 0.0002 max mem: 8421 +[2024-12-05 07:31:37 root] (utils.py 283): INFO Epoch: [6] [ 140/2502] eta: 0:30:17 lr: 0.000019 loss_cls: 4.1947 (3.9399) grad_norm: 4.1064 (4.3472) time: 0.7687 data: 0.0002 max mem: 8421 +[2024-12-05 07:31:45 root] (utils.py 283): INFO Epoch: [6] [ 150/2502] eta: 0:30:08 lr: 0.000019 loss_cls: 4.0153 (3.9342) grad_norm: 4.1572 (4.3470) time: 0.7643 data: 0.0002 max mem: 8421 +[2024-12-05 07:31:53 root] (utils.py 283): INFO Epoch: [6] [ 160/2502] eta: 0:29:58 lr: 0.000019 loss_cls: 3.9859 (3.9522) grad_norm: 4.1938 (4.3864) time: 0.7586 data: 0.0002 max mem: 8421 +[2024-12-05 07:32:00 root] (utils.py 283): INFO Epoch: [6] [ 170/2502] eta: 0:29:50 lr: 0.000019 loss_cls: 4.1033 (3.9433) grad_norm: 4.2371 (4.3815) time: 0.7586 data: 0.0002 max mem: 8421 +[2024-12-05 07:32:08 root] (utils.py 283): INFO Epoch: [6] [ 180/2502] eta: 0:29:42 lr: 0.000019 loss_cls: 4.1033 (3.9498) grad_norm: 4.2236 (4.3713) time: 0.7637 data: 0.0002 max mem: 8421 +[2024-12-05 07:32:16 root] (utils.py 283): INFO Epoch: [6] [ 190/2502] eta: 0:29:34 lr: 0.000019 loss_cls: 3.7968 (3.9347) grad_norm: 3.9584 (4.3484) time: 0.7686 data: 0.0002 max mem: 8421 +[2024-12-05 07:32:23 root] (utils.py 283): INFO Epoch: [6] [ 200/2502] eta: 0:29:26 lr: 0.000019 loss_cls: 4.1058 (3.9436) grad_norm: 3.9029 (4.3355) time: 0.7643 data: 0.0002 max mem: 8421 +[2024-12-05 07:32:31 root] (utils.py 283): INFO Epoch: [6] [ 210/2502] eta: 0:29:18 lr: 0.000019 loss_cls: 4.1867 (3.9561) grad_norm: 4.2771 (4.3387) time: 0.7621 data: 0.0002 max mem: 8421 +[2024-12-05 07:32:38 root] (utils.py 283): INFO Epoch: [6] [ 220/2502] eta: 0:29:09 lr: 0.000019 loss_cls: 4.1165 (3.9588) grad_norm: 4.3244 (4.3287) time: 0.7618 data: 0.0003 max mem: 8421 +[2024-12-05 07:32:46 root] (utils.py 283): INFO Epoch: [6] [ 230/2502] eta: 0:29:01 lr: 0.000019 loss_cls: 4.0239 (3.9505) grad_norm: 4.0622 (4.3156) time: 0.7599 data: 0.0003 max mem: 8421 +[2024-12-05 07:32:54 root] (utils.py 283): INFO Epoch: [6] [ 240/2502] eta: 0:28:53 lr: 0.000019 loss_cls: 4.2312 (3.9663) grad_norm: 4.0648 (4.3292) time: 0.7596 data: 0.0002 max mem: 8421 +[2024-12-05 07:33:01 root] (utils.py 283): INFO Epoch: [6] [ 250/2502] eta: 0:28:45 lr: 0.000019 loss_cls: 4.3292 (3.9707) grad_norm: 4.1136 (4.3287) time: 0.7648 data: 0.0002 max mem: 8421 +[2024-12-05 07:33:09 root] (utils.py 283): INFO Epoch: [6] [ 260/2502] eta: 0:28:37 lr: 0.000019 loss_cls: 4.1168 (3.9701) grad_norm: 4.0985 (4.3222) time: 0.7663 data: 0.0004 max mem: 8421 +[2024-12-05 07:33:17 root] (utils.py 283): INFO Epoch: [6] [ 270/2502] eta: 0:28:29 lr: 0.000019 loss_cls: 4.0440 (3.9720) grad_norm: 4.1068 (4.3144) time: 0.7603 data: 0.0004 max mem: 8421 +[2024-12-05 07:33:24 root] (utils.py 283): INFO Epoch: [6] [ 280/2502] eta: 0:28:21 lr: 0.000019 loss_cls: 4.1843 (3.9761) grad_norm: 4.1682 (4.3112) time: 0.7602 data: 0.0003 max mem: 8421 +[2024-12-05 07:33:32 root] (utils.py 283): INFO Epoch: [6] [ 290/2502] eta: 0:28:13 lr: 0.000019 loss_cls: 4.1940 (3.9733) grad_norm: 4.2205 (4.3093) time: 0.7629 data: 0.0003 max mem: 8421 +[2024-12-05 07:33:39 root] (utils.py 283): INFO Epoch: [6] [ 300/2502] eta: 0:28:06 lr: 0.000019 loss_cls: 4.0816 (3.9779) grad_norm: 4.1756 (4.3191) time: 0.7663 data: 0.0003 max mem: 8421 +[2024-12-05 07:33:47 root] (utils.py 283): INFO Epoch: [6] [ 310/2502] eta: 0:27:58 lr: 0.000019 loss_cls: 4.0659 (3.9708) grad_norm: 4.2531 (4.3219) time: 0.7645 data: 0.0003 max mem: 8421 +[2024-12-05 07:33:55 root] (utils.py 283): INFO Epoch: [6] [ 320/2502] eta: 0:27:50 lr: 0.000019 loss_cls: 3.7878 (3.9700) grad_norm: 4.3419 (4.3204) time: 0.7642 data: 0.0002 max mem: 8421 +[2024-12-05 07:34:02 root] (utils.py 283): INFO Epoch: [6] [ 330/2502] eta: 0:27:43 lr: 0.000019 loss_cls: 3.7878 (3.9641) grad_norm: 4.1526 (4.3125) time: 0.7672 data: 0.0003 max mem: 8421 +[2024-12-05 07:34:10 root] (utils.py 283): INFO Epoch: [6] [ 340/2502] eta: 0:27:35 lr: 0.000019 loss_cls: 4.1813 (3.9685) grad_norm: 4.2542 (4.3191) time: 0.7660 data: 0.0003 max mem: 8421 +[2024-12-05 07:34:18 root] (utils.py 283): INFO Epoch: [6] [ 350/2502] eta: 0:27:28 lr: 0.000019 loss_cls: 4.2045 (3.9723) grad_norm: 4.3272 (4.3161) time: 0.7677 data: 0.0002 max mem: 8421 +[2024-12-05 07:34:25 root] (utils.py 283): INFO Epoch: [6] [ 360/2502] eta: 0:27:20 lr: 0.000019 loss_cls: 4.0026 (3.9677) grad_norm: 4.2430 (4.3212) time: 0.7693 data: 0.0003 max mem: 8421 +[2024-12-05 07:34:33 root] (utils.py 283): INFO Epoch: [6] [ 370/2502] eta: 0:27:13 lr: 0.000019 loss_cls: 3.8572 (3.9662) grad_norm: 4.2430 (4.3155) time: 0.7677 data: 0.0003 max mem: 8421 +[2024-12-05 07:34:41 root] (utils.py 283): INFO Epoch: [6] [ 380/2502] eta: 0:27:05 lr: 0.000019 loss_cls: 3.8572 (3.9613) grad_norm: 4.1359 (4.3083) time: 0.7665 data: 0.0003 max mem: 8421 +[2024-12-05 07:34:48 root] (utils.py 283): INFO Epoch: [6] [ 390/2502] eta: 0:26:57 lr: 0.000019 loss_cls: 4.0181 (3.9620) grad_norm: 4.0914 (4.3036) time: 0.7661 data: 0.0003 max mem: 8421 +[2024-12-05 07:34:56 root] (utils.py 283): INFO Epoch: [6] [ 400/2502] eta: 0:26:50 lr: 0.000019 loss_cls: 4.1159 (3.9655) grad_norm: 4.0760 (4.3028) time: 0.7673 data: 0.0003 max mem: 8421 +[2024-12-05 07:35:04 root] (utils.py 283): INFO Epoch: [6] [ 410/2502] eta: 0:26:42 lr: 0.000019 loss_cls: 4.0601 (3.9683) grad_norm: 4.1928 (4.3095) time: 0.7675 data: 0.0002 max mem: 8421 +[2024-12-05 07:35:11 root] (utils.py 283): INFO Epoch: [6] [ 420/2502] eta: 0:26:34 lr: 0.000019 loss_cls: 3.9557 (3.9595) grad_norm: 4.2023 (4.3090) time: 0.7666 data: 0.0002 max mem: 8421 +[2024-12-05 07:35:19 root] (utils.py 283): INFO Epoch: [6] [ 430/2502] eta: 0:26:27 lr: 0.000019 loss_cls: 4.0359 (3.9596) grad_norm: 4.0564 (4.3038) time: 0.7663 data: 0.0002 max mem: 8421 +[2024-12-05 07:35:27 root] (utils.py 283): INFO Epoch: [6] [ 440/2502] eta: 0:26:19 lr: 0.000019 loss_cls: 4.0735 (3.9612) grad_norm: 4.2022 (4.3016) time: 0.7669 data: 0.0003 max mem: 8421 +[2024-12-05 07:35:34 root] (utils.py 283): INFO Epoch: [6] [ 450/2502] eta: 0:26:11 lr: 0.000019 loss_cls: 4.0323 (3.9617) grad_norm: 4.2498 (4.3005) time: 0.7657 data: 0.0002 max mem: 8421 +[2024-12-05 07:35:42 root] (utils.py 283): INFO Epoch: [6] [ 460/2502] eta: 0:26:04 lr: 0.000019 loss_cls: 4.1664 (3.9648) grad_norm: 4.0235 (4.2960) time: 0.7635 data: 0.0002 max mem: 8421 +[2024-12-05 07:35:50 root] (utils.py 283): INFO Epoch: [6] [ 470/2502] eta: 0:25:56 lr: 0.000019 loss_cls: 4.2766 (3.9668) grad_norm: 4.0235 (4.2968) time: 0.7615 data: 0.0002 max mem: 8421 +[2024-12-05 07:35:57 root] (utils.py 283): INFO Epoch: [6] [ 480/2502] eta: 0:25:48 lr: 0.000019 loss_cls: 4.0736 (3.9607) grad_norm: 4.1527 (4.2943) time: 0.7625 data: 0.0002 max mem: 8421 +[2024-12-05 07:36:05 root] (utils.py 283): INFO Epoch: [6] [ 490/2502] eta: 0:25:41 lr: 0.000019 loss_cls: 4.0736 (3.9654) grad_norm: 4.0856 (4.2921) time: 0.7771 data: 0.0002 max mem: 8421 +[2024-12-05 07:36:13 root] (utils.py 283): INFO Epoch: [6] [ 500/2502] eta: 0:25:34 lr: 0.000019 loss_cls: 4.2344 (3.9662) grad_norm: 4.0856 (4.2862) time: 0.7779 data: 0.0002 max mem: 8421 +[2024-12-05 07:36:21 root] (utils.py 283): INFO Epoch: [6] [ 510/2502] eta: 0:25:26 lr: 0.000019 loss_cls: 3.8291 (3.9619) grad_norm: 4.0311 (4.2851) time: 0.7690 data: 0.0002 max mem: 8421 +[2024-12-05 07:36:28 root] (utils.py 283): INFO Epoch: [6] [ 520/2502] eta: 0:25:19 lr: 0.000019 loss_cls: 4.1033 (3.9654) grad_norm: 4.2055 (4.2890) time: 0.7692 data: 0.0002 max mem: 8421 +[2024-12-05 07:36:36 root] (utils.py 283): INFO Epoch: [6] [ 530/2502] eta: 0:25:11 lr: 0.000019 loss_cls: 4.0792 (3.9571) grad_norm: 4.0885 (4.2873) time: 0.7647 data: 0.0002 max mem: 8421 +[2024-12-05 07:36:44 root] (utils.py 283): INFO Epoch: [6] [ 540/2502] eta: 0:25:03 lr: 0.000019 loss_cls: 3.8730 (3.9551) grad_norm: 4.1209 (4.2869) time: 0.7645 data: 0.0002 max mem: 8421 +[2024-12-05 07:36:51 root] (utils.py 283): INFO Epoch: [6] [ 550/2502] eta: 0:24:55 lr: 0.000019 loss_cls: 4.0432 (3.9534) grad_norm: 4.1209 (4.2817) time: 0.7662 data: 0.0003 max mem: 8421 +[2024-12-05 07:36:59 root] (utils.py 283): INFO Epoch: [6] [ 560/2502] eta: 0:24:48 lr: 0.000019 loss_cls: 4.0900 (3.9540) grad_norm: 4.1223 (4.2817) time: 0.7702 data: 0.0003 max mem: 8421 +[2024-12-05 07:37:07 root] (utils.py 283): INFO Epoch: [6] [ 570/2502] eta: 0:24:40 lr: 0.000019 loss_cls: 3.7401 (3.9513) grad_norm: 4.1929 (4.2834) time: 0.7690 data: 0.0002 max mem: 8421 +[2024-12-05 07:37:14 root] (utils.py 283): INFO Epoch: [6] [ 580/2502] eta: 0:24:32 lr: 0.000019 loss_cls: 4.0952 (3.9527) grad_norm: 4.2004 (4.2835) time: 0.7634 data: 0.0002 max mem: 8421 +[2024-12-05 07:37:22 root] (utils.py 283): INFO Epoch: [6] [ 590/2502] eta: 0:24:25 lr: 0.000019 loss_cls: 4.2033 (3.9557) grad_norm: 4.2004 (4.2833) time: 0.7646 data: 0.0002 max mem: 8421 +[2024-12-05 07:37:30 root] (utils.py 283): INFO Epoch: [6] [ 600/2502] eta: 0:24:17 lr: 0.000019 loss_cls: 4.1783 (3.9513) grad_norm: 4.3284 (4.2866) time: 0.7640 data: 0.0002 max mem: 8421 +[2024-12-05 07:37:37 root] (utils.py 283): INFO Epoch: [6] [ 610/2502] eta: 0:24:09 lr: 0.000019 loss_cls: 3.9706 (3.9516) grad_norm: 4.1887 (4.2824) time: 0.7630 data: 0.0002 max mem: 8421 +[2024-12-05 07:37:45 root] (utils.py 283): INFO Epoch: [6] [ 620/2502] eta: 0:24:02 lr: 0.000019 loss_cls: 3.9706 (3.9519) grad_norm: 4.0473 (4.2833) time: 0.7661 data: 0.0002 max mem: 8421 +[2024-12-05 07:37:53 root] (utils.py 283): INFO Epoch: [6] [ 630/2502] eta: 0:23:54 lr: 0.000019 loss_cls: 4.2140 (3.9575) grad_norm: 4.2267 (4.2841) time: 0.7682 data: 0.0002 max mem: 8421 +[2024-12-05 07:38:00 root] (utils.py 283): INFO Epoch: [6] [ 640/2502] eta: 0:23:46 lr: 0.000019 loss_cls: 4.1757 (3.9550) grad_norm: 4.2503 (4.2838) time: 0.7652 data: 0.0002 max mem: 8421 +[2024-12-05 07:38:08 root] (utils.py 283): INFO Epoch: [6] [ 650/2502] eta: 0:23:39 lr: 0.000019 loss_cls: 3.9280 (3.9540) grad_norm: 4.0483 (4.2811) time: 0.7639 data: 0.0002 max mem: 8421 +[2024-12-05 07:38:15 root] (utils.py 283): INFO Epoch: [6] [ 660/2502] eta: 0:23:31 lr: 0.000019 loss_cls: 3.8167 (3.9527) grad_norm: 4.1732 (4.2901) time: 0.7653 data: 0.0002 max mem: 8421 +[2024-12-05 07:38:23 root] (utils.py 283): INFO Epoch: [6] [ 670/2502] eta: 0:23:23 lr: 0.000019 loss_cls: 3.8167 (3.9526) grad_norm: 4.4289 (4.2924) time: 0.7637 data: 0.0002 max mem: 8421 +[2024-12-05 07:38:31 root] (utils.py 283): INFO Epoch: [6] [ 680/2502] eta: 0:23:15 lr: 0.000019 loss_cls: 4.0572 (3.9540) grad_norm: 4.2717 (4.2910) time: 0.7611 data: 0.0002 max mem: 8421 +[2024-12-05 07:38:38 root] (utils.py 283): INFO Epoch: [6] [ 690/2502] eta: 0:23:08 lr: 0.000019 loss_cls: 4.0742 (3.9554) grad_norm: 4.1844 (4.2917) time: 0.7605 data: 0.0002 max mem: 8421 +[2024-12-05 07:38:46 root] (utils.py 283): INFO Epoch: [6] [ 700/2502] eta: 0:23:00 lr: 0.000019 loss_cls: 4.3023 (3.9573) grad_norm: 4.2039 (4.2998) time: 0.7604 data: 0.0002 max mem: 8421 +[2024-12-05 07:38:53 root] (utils.py 283): INFO Epoch: [6] [ 710/2502] eta: 0:22:52 lr: 0.000019 loss_cls: 3.9804 (3.9541) grad_norm: 4.1265 (4.2990) time: 0.7593 data: 0.0002 max mem: 8421 +[2024-12-05 07:39:01 root] (utils.py 283): INFO Epoch: [6] [ 720/2502] eta: 0:22:44 lr: 0.000019 loss_cls: 3.7735 (3.9515) grad_norm: 4.1115 (4.3009) time: 0.7588 data: 0.0002 max mem: 8421 +[2024-12-05 07:39:09 root] (utils.py 283): INFO Epoch: [6] [ 730/2502] eta: 0:22:36 lr: 0.000019 loss_cls: 3.9514 (3.9502) grad_norm: 4.1130 (4.2987) time: 0.7608 data: 0.0002 max mem: 8421 +[2024-12-05 07:39:16 root] (utils.py 283): INFO Epoch: [6] [ 740/2502] eta: 0:22:29 lr: 0.000019 loss_cls: 3.9632 (3.9497) grad_norm: 4.1073 (4.2965) time: 0.7613 data: 0.0002 max mem: 8421 +[2024-12-05 07:39:24 root] (utils.py 283): INFO Epoch: [6] [ 750/2502] eta: 0:22:21 lr: 0.000019 loss_cls: 4.2124 (3.9516) grad_norm: 4.1349 (4.2947) time: 0.7602 data: 0.0003 max mem: 8421 +[2024-12-05 07:39:32 root] (utils.py 283): INFO Epoch: [6] [ 760/2502] eta: 0:22:13 lr: 0.000019 loss_cls: 4.1362 (3.9524) grad_norm: 4.2765 (4.2981) time: 0.7617 data: 0.0003 max mem: 8421 +[2024-12-05 07:39:39 root] (utils.py 283): INFO Epoch: [6] [ 770/2502] eta: 0:22:05 lr: 0.000019 loss_cls: 4.0420 (3.9506) grad_norm: 4.3024 (4.2971) time: 0.7620 data: 0.0003 max mem: 8421 +[2024-12-05 07:39:47 root] (utils.py 283): INFO Epoch: [6] [ 780/2502] eta: 0:21:58 lr: 0.000019 loss_cls: 3.9607 (3.9504) grad_norm: 4.1675 (4.2978) time: 0.7621 data: 0.0003 max mem: 8421 +[2024-12-05 07:39:54 root] (utils.py 283): INFO Epoch: [6] [ 790/2502] eta: 0:21:50 lr: 0.000019 loss_cls: 3.9607 (3.9482) grad_norm: 4.2892 (4.2993) time: 0.7633 data: 0.0002 max mem: 8421 +[2024-12-05 07:40:02 root] (utils.py 283): INFO Epoch: [6] [ 800/2502] eta: 0:21:42 lr: 0.000019 loss_cls: 3.8205 (3.9488) grad_norm: 4.2260 (4.2983) time: 0.7642 data: 0.0003 max mem: 8421 +[2024-12-05 07:40:10 root] (utils.py 283): INFO Epoch: [6] [ 810/2502] eta: 0:21:35 lr: 0.000019 loss_cls: 3.8997 (3.9483) grad_norm: 4.1995 (4.2986) time: 0.7669 data: 0.0003 max mem: 8421 +[2024-12-05 07:40:17 root] (utils.py 283): INFO Epoch: [6] [ 820/2502] eta: 0:21:27 lr: 0.000019 loss_cls: 4.0839 (3.9503) grad_norm: 4.2901 (4.3023) time: 0.7669 data: 0.0003 max mem: 8421 +[2024-12-05 07:40:25 root] (utils.py 283): INFO Epoch: [6] [ 830/2502] eta: 0:21:19 lr: 0.000019 loss_cls: 4.0839 (3.9509) grad_norm: 4.3099 (4.3032) time: 0.7666 data: 0.0003 max mem: 8421 +[2024-12-05 07:40:33 root] (utils.py 283): INFO Epoch: [6] [ 840/2502] eta: 0:21:12 lr: 0.000019 loss_cls: 4.0063 (3.9498) grad_norm: 4.2108 (4.3011) time: 0.7682 data: 0.0003 max mem: 8421 +[2024-12-05 07:40:40 root] (utils.py 283): INFO Epoch: [6] [ 850/2502] eta: 0:21:04 lr: 0.000019 loss_cls: 4.0473 (3.9486) grad_norm: 4.0673 (4.2984) time: 0.7655 data: 0.0003 max mem: 8421 +[2024-12-05 07:40:48 root] (utils.py 283): INFO Epoch: [6] [ 860/2502] eta: 0:20:56 lr: 0.000019 loss_cls: 4.1183 (3.9529) grad_norm: 4.1270 (4.2972) time: 0.7648 data: 0.0003 max mem: 8421 +[2024-12-05 07:40:56 root] (utils.py 283): INFO Epoch: [6] [ 870/2502] eta: 0:20:49 lr: 0.000019 loss_cls: 4.2778 (3.9576) grad_norm: 4.1424 (4.2958) time: 0.7661 data: 0.0002 max mem: 8421 +[2024-12-05 07:41:03 root] (utils.py 283): INFO Epoch: [6] [ 880/2502] eta: 0:20:41 lr: 0.000019 loss_cls: 4.1465 (3.9589) grad_norm: 3.9713 (4.2926) time: 0.7650 data: 0.0003 max mem: 8421 +[2024-12-05 07:41:11 root] (utils.py 283): INFO Epoch: [6] [ 890/2502] eta: 0:20:33 lr: 0.000019 loss_cls: 4.1255 (3.9579) grad_norm: 4.0669 (4.2926) time: 0.7622 data: 0.0003 max mem: 8421 +[2024-12-05 07:41:19 root] (utils.py 283): INFO Epoch: [6] [ 900/2502] eta: 0:20:26 lr: 0.000019 loss_cls: 3.7624 (3.9560) grad_norm: 4.2812 (4.2926) time: 0.7589 data: 0.0002 max mem: 8421 +[2024-12-05 07:41:26 root] (utils.py 283): INFO Epoch: [6] [ 910/2502] eta: 0:20:18 lr: 0.000019 loss_cls: 4.0138 (3.9564) grad_norm: 4.2718 (4.2930) time: 0.7605 data: 0.0002 max mem: 8421 +[2024-12-05 07:41:34 root] (utils.py 283): INFO Epoch: [6] [ 920/2502] eta: 0:20:10 lr: 0.000019 loss_cls: 4.0711 (3.9566) grad_norm: 4.1907 (4.3001) time: 0.7715 data: 0.0002 max mem: 8421 +[2024-12-05 07:41:42 root] (utils.py 283): INFO Epoch: [6] [ 930/2502] eta: 0:20:03 lr: 0.000019 loss_cls: 4.2281 (3.9575) grad_norm: 4.1907 (4.3014) time: 0.7720 data: 0.0002 max mem: 8421 +[2024-12-05 07:41:49 root] (utils.py 283): INFO Epoch: [6] [ 940/2502] eta: 0:19:55 lr: 0.000019 loss_cls: 4.2281 (3.9601) grad_norm: 4.2453 (4.3014) time: 0.7631 data: 0.0002 max mem: 8421 +[2024-12-05 07:41:57 root] (utils.py 283): INFO Epoch: [6] [ 950/2502] eta: 0:19:47 lr: 0.000019 loss_cls: 4.1259 (3.9583) grad_norm: 4.2453 (4.3001) time: 0.7619 data: 0.0002 max mem: 8421 +[2024-12-05 07:42:05 root] (utils.py 283): INFO Epoch: [6] [ 960/2502] eta: 0:19:40 lr: 0.000019 loss_cls: 3.9850 (3.9565) grad_norm: 4.2401 (4.2991) time: 0.7650 data: 0.0002 max mem: 8421 +[2024-12-05 07:42:12 root] (utils.py 283): INFO Epoch: [6] [ 970/2502] eta: 0:19:32 lr: 0.000019 loss_cls: 4.3237 (3.9589) grad_norm: 4.1319 (4.2967) time: 0.7670 data: 0.0002 max mem: 8421 +[2024-12-05 07:42:20 root] (utils.py 283): INFO Epoch: [6] [ 980/2502] eta: 0:19:24 lr: 0.000019 loss_cls: 4.3237 (3.9599) grad_norm: 4.1811 (4.2986) time: 0.7653 data: 0.0002 max mem: 8421 +[2024-12-05 07:42:28 root] (utils.py 283): INFO Epoch: [6] [ 990/2502] eta: 0:19:17 lr: 0.000019 loss_cls: 4.0294 (3.9606) grad_norm: 4.2143 (4.2972) time: 0.7696 data: 0.0002 max mem: 8421 +[2024-12-05 07:42:35 root] (utils.py 283): INFO Epoch: [6] [1000/2502] eta: 0:19:09 lr: 0.000019 loss_cls: 3.9500 (3.9622) grad_norm: 4.0613 (4.2951) time: 0.7666 data: 0.0002 max mem: 8421 +[2024-12-05 07:42:43 root] (utils.py 283): INFO Epoch: [6] [1010/2502] eta: 0:19:02 lr: 0.000019 loss_cls: 4.1713 (3.9642) grad_norm: 4.1105 (4.2943) time: 0.7630 data: 0.0002 max mem: 8421 +[2024-12-05 07:42:51 root] (utils.py 283): INFO Epoch: [6] [1020/2502] eta: 0:18:54 lr: 0.000019 loss_cls: 4.1668 (3.9627) grad_norm: 4.3102 (4.2963) time: 0.7657 data: 0.0002 max mem: 8421 +[2024-12-05 07:42:58 root] (utils.py 283): INFO Epoch: [6] [1030/2502] eta: 0:18:46 lr: 0.000019 loss_cls: 3.6696 (3.9598) grad_norm: 4.2999 (4.2959) time: 0.7681 data: 0.0002 max mem: 8421 +[2024-12-05 07:43:06 root] (utils.py 283): INFO Epoch: [6] [1040/2502] eta: 0:18:39 lr: 0.000019 loss_cls: 3.8752 (3.9602) grad_norm: 4.0749 (4.2955) time: 0.7713 data: 0.0002 max mem: 8421 +[2024-12-05 07:43:14 root] (utils.py 283): INFO Epoch: [6] [1050/2502] eta: 0:18:31 lr: 0.000019 loss_cls: 3.9078 (3.9598) grad_norm: 4.1064 (4.2954) time: 0.7667 data: 0.0002 max mem: 8421 +[2024-12-05 07:43:21 root] (utils.py 283): INFO Epoch: [6] [1060/2502] eta: 0:18:23 lr: 0.000019 loss_cls: 4.0870 (3.9603) grad_norm: 4.1484 (4.2950) time: 0.7634 data: 0.0002 max mem: 8421 +[2024-12-05 07:43:29 root] (utils.py 283): INFO Epoch: [6] [1070/2502] eta: 0:18:16 lr: 0.000019 loss_cls: 4.2178 (3.9621) grad_norm: 4.1455 (4.2927) time: 0.7649 data: 0.0003 max mem: 8421 +[2024-12-05 07:43:36 root] (utils.py 283): INFO Epoch: [6] [1080/2502] eta: 0:18:08 lr: 0.000019 loss_cls: 4.1164 (3.9622) grad_norm: 4.1455 (4.3007) time: 0.7644 data: 0.0003 max mem: 8421 +[2024-12-05 07:43:44 root] (utils.py 283): INFO Epoch: [6] [1090/2502] eta: 0:18:00 lr: 0.000019 loss_cls: 4.1164 (3.9628) grad_norm: 4.3831 (4.3028) time: 0.7624 data: 0.0002 max mem: 8421 +[2024-12-05 07:43:52 root] (utils.py 283): INFO Epoch: [6] [1100/2502] eta: 0:17:53 lr: 0.000019 loss_cls: 4.1670 (3.9628) grad_norm: 4.2822 (4.3025) time: 0.7613 data: 0.0002 max mem: 8421 +[2024-12-05 07:43:59 root] (utils.py 283): INFO Epoch: [6] [1110/2502] eta: 0:17:45 lr: 0.000019 loss_cls: 3.6816 (3.9599) grad_norm: 4.1731 (4.3010) time: 0.7597 data: 0.0003 max mem: 8421 +[2024-12-05 07:44:07 root] (utils.py 283): INFO Epoch: [6] [1120/2502] eta: 0:17:37 lr: 0.000019 loss_cls: 3.6256 (3.9593) grad_norm: 4.1524 (4.3068) time: 0.7610 data: 0.0002 max mem: 8421 +[2024-12-05 07:44:15 root] (utils.py 283): INFO Epoch: [6] [1130/2502] eta: 0:17:29 lr: 0.000019 loss_cls: 4.0184 (3.9601) grad_norm: 4.2085 (4.3068) time: 0.7621 data: 0.0002 max mem: 8421 +[2024-12-05 07:44:22 root] (utils.py 283): INFO Epoch: [6] [1140/2502] eta: 0:17:22 lr: 0.000019 loss_cls: 3.7408 (3.9572) grad_norm: 4.1181 (4.3058) time: 0.7619 data: 0.0002 max mem: 8421 +[2024-12-05 07:44:30 root] (utils.py 283): INFO Epoch: [6] [1150/2502] eta: 0:17:14 lr: 0.000019 loss_cls: 3.7008 (3.9571) grad_norm: 4.0607 (4.3038) time: 0.7649 data: 0.0003 max mem: 8421 +[2024-12-05 07:44:38 root] (utils.py 283): INFO Epoch: [6] [1160/2502] eta: 0:17:07 lr: 0.000019 loss_cls: 4.0236 (3.9568) grad_norm: 4.0607 (4.3041) time: 0.7697 data: 0.0002 max mem: 8421 +[2024-12-05 07:44:45 root] (utils.py 283): INFO Epoch: [6] [1170/2502] eta: 0:16:59 lr: 0.000019 loss_cls: 4.0236 (3.9596) grad_norm: 4.1178 (4.3021) time: 0.7742 data: 0.0002 max mem: 8421 +[2024-12-05 07:44:53 root] (utils.py 283): INFO Epoch: [6] [1180/2502] eta: 0:16:52 lr: 0.000019 loss_cls: 3.9984 (3.9586) grad_norm: 4.0484 (4.3022) time: 0.7850 data: 0.0002 max mem: 8421 +[2024-12-05 07:45:01 root] (utils.py 283): INFO Epoch: [6] [1190/2502] eta: 0:16:44 lr: 0.000019 loss_cls: 4.0039 (3.9585) grad_norm: 4.3818 (4.3037) time: 0.7920 data: 0.0002 max mem: 8421 +[2024-12-05 07:45:09 root] (utils.py 283): INFO Epoch: [6] [1200/2502] eta: 0:16:37 lr: 0.000019 loss_cls: 4.0870 (3.9599) grad_norm: 4.3818 (4.3044) time: 0.7892 data: 0.0002 max mem: 8421 +[2024-12-05 07:45:17 root] (utils.py 283): INFO Epoch: [6] [1210/2502] eta: 0:16:29 lr: 0.000019 loss_cls: 3.7635 (3.9564) grad_norm: 4.2951 (4.3046) time: 0.7880 data: 0.0002 max mem: 8421 +[2024-12-05 07:45:25 root] (utils.py 283): INFO Epoch: [6] [1220/2502] eta: 0:16:22 lr: 0.000019 loss_cls: 3.7635 (3.9571) grad_norm: 4.2951 (4.3041) time: 0.7864 data: 0.0002 max mem: 8421 +[2024-12-05 07:45:33 root] (utils.py 283): INFO Epoch: [6] [1230/2502] eta: 0:16:15 lr: 0.000019 loss_cls: 4.0173 (3.9558) grad_norm: 4.1964 (4.3032) time: 0.7876 data: 0.0002 max mem: 8421 +[2024-12-05 07:45:41 root] (utils.py 283): INFO Epoch: [6] [1240/2502] eta: 0:16:07 lr: 0.000019 loss_cls: 4.1576 (3.9576) grad_norm: 4.1726 (4.3099) time: 0.7901 data: 0.0002 max mem: 8421 +[2024-12-05 07:45:48 root] (utils.py 283): INFO Epoch: [6] [1250/2502] eta: 0:16:00 lr: 0.000019 loss_cls: 4.2287 (3.9599) grad_norm: 4.2537 (4.3116) time: 0.7895 data: 0.0003 max mem: 8421 +[2024-12-05 07:45:56 root] (utils.py 283): INFO Epoch: [6] [1260/2502] eta: 0:15:52 lr: 0.000019 loss_cls: 4.2133 (3.9614) grad_norm: 4.2903 (4.3124) time: 0.7851 data: 0.0003 max mem: 8421 +[2024-12-05 07:46:04 root] (utils.py 283): INFO Epoch: [6] [1270/2502] eta: 0:15:44 lr: 0.000019 loss_cls: 4.1003 (3.9618) grad_norm: 4.2903 (4.3133) time: 0.7713 data: 0.0002 max mem: 8421 +[2024-12-05 07:46:12 root] (utils.py 283): INFO Epoch: [6] [1280/2502] eta: 0:15:37 lr: 0.000019 loss_cls: 4.1003 (3.9623) grad_norm: 4.2394 (4.3165) time: 0.7626 data: 0.0002 max mem: 8421 +[2024-12-05 07:46:19 root] (utils.py 283): INFO Epoch: [6] [1290/2502] eta: 0:15:29 lr: 0.000019 loss_cls: 4.2170 (3.9650) grad_norm: 4.5360 (4.3195) time: 0.7669 data: 0.0002 max mem: 8421 +[2024-12-05 07:46:27 root] (utils.py 283): INFO Epoch: [6] [1300/2502] eta: 0:15:21 lr: 0.000019 loss_cls: 4.2487 (3.9671) grad_norm: 4.4111 (4.3187) time: 0.7662 data: 0.0002 max mem: 8421 +[2024-12-05 07:46:35 root] (utils.py 283): INFO Epoch: [6] [1310/2502] eta: 0:15:14 lr: 0.000019 loss_cls: 4.2487 (3.9680) grad_norm: 4.0701 (4.3178) time: 0.7624 data: 0.0002 max mem: 8421 +[2024-12-05 07:46:42 root] (utils.py 283): INFO Epoch: [6] [1320/2502] eta: 0:15:06 lr: 0.000019 loss_cls: 4.2203 (3.9698) grad_norm: 4.1839 (4.3175) time: 0.7607 data: 0.0002 max mem: 8421 +[2024-12-05 07:46:50 root] (utils.py 283): INFO Epoch: [6] [1330/2502] eta: 0:14:58 lr: 0.000019 loss_cls: 4.0892 (3.9678) grad_norm: 4.1839 (4.3168) time: 0.7603 data: 0.0002 max mem: 8421 +[2024-12-05 07:46:57 root] (utils.py 283): INFO Epoch: [6] [1340/2502] eta: 0:14:51 lr: 0.000019 loss_cls: 3.9014 (3.9679) grad_norm: 4.0952 (4.3163) time: 0.7610 data: 0.0002 max mem: 8421 +[2024-12-05 07:47:05 root] (utils.py 283): INFO Epoch: [6] [1350/2502] eta: 0:14:43 lr: 0.000019 loss_cls: 4.1784 (3.9667) grad_norm: 4.2090 (4.3183) time: 0.7624 data: 0.0002 max mem: 8421 +[2024-12-05 07:47:13 root] (utils.py 283): INFO Epoch: [6] [1360/2502] eta: 0:14:35 lr: 0.000019 loss_cls: 4.1229 (3.9667) grad_norm: 4.2125 (4.3170) time: 0.7646 data: 0.0003 max mem: 8421 +[2024-12-05 07:47:21 root] (utils.py 283): INFO Epoch: [6] [1370/2502] eta: 0:14:28 lr: 0.000019 loss_cls: 4.1229 (3.9681) grad_norm: 4.1663 (4.3187) time: 0.7797 data: 0.0003 max mem: 8421 +[2024-12-05 07:47:28 root] (utils.py 283): INFO Epoch: [6] [1380/2502] eta: 0:14:20 lr: 0.000019 loss_cls: 4.1181 (3.9682) grad_norm: 4.3821 (4.3197) time: 0.7921 data: 0.0003 max mem: 8421 +[2024-12-05 07:47:36 root] (utils.py 283): INFO Epoch: [6] [1390/2502] eta: 0:14:13 lr: 0.000019 loss_cls: 4.1583 (3.9689) grad_norm: 4.3247 (4.3189) time: 0.7884 data: 0.0003 max mem: 8421 +[2024-12-05 07:47:44 root] (utils.py 283): INFO Epoch: [6] [1400/2502] eta: 0:14:05 lr: 0.000019 loss_cls: 4.1759 (3.9687) grad_norm: 4.0746 (4.3186) time: 0.7866 data: 0.0003 max mem: 8421 +[2024-12-05 07:47:52 root] (utils.py 283): INFO Epoch: [6] [1410/2502] eta: 0:13:58 lr: 0.000019 loss_cls: 3.8406 (3.9684) grad_norm: 4.0289 (4.3172) time: 0.7879 data: 0.0002 max mem: 8421 +[2024-12-05 07:48:00 root] (utils.py 283): INFO Epoch: [6] [1420/2502] eta: 0:13:50 lr: 0.000019 loss_cls: 4.0600 (3.9697) grad_norm: 4.0222 (4.3170) time: 0.7846 data: 0.0003 max mem: 8421 +[2024-12-05 07:48:08 root] (utils.py 283): INFO Epoch: [6] [1430/2502] eta: 0:13:43 lr: 0.000019 loss_cls: 3.7979 (3.9672) grad_norm: 4.0222 (4.3149) time: 0.7833 data: 0.0003 max mem: 8421 +[2024-12-05 07:48:16 root] (utils.py 283): INFO Epoch: [6] [1440/2502] eta: 0:13:35 lr: 0.000019 loss_cls: 3.7979 (3.9672) grad_norm: 4.1822 (4.3178) time: 0.7925 data: 0.0003 max mem: 8421 +[2024-12-05 07:48:24 root] (utils.py 283): INFO Epoch: [6] [1450/2502] eta: 0:13:28 lr: 0.000019 loss_cls: 4.1705 (3.9662) grad_norm: 4.3150 (4.3166) time: 0.7920 data: 0.0003 max mem: 8421 +[2024-12-05 07:48:31 root] (utils.py 283): INFO Epoch: [6] [1460/2502] eta: 0:13:20 lr: 0.000019 loss_cls: 4.1667 (3.9652) grad_norm: 4.2719 (4.3167) time: 0.7832 data: 0.0003 max mem: 8421 +[2024-12-05 07:48:39 root] (utils.py 283): INFO Epoch: [6] [1470/2502] eta: 0:13:12 lr: 0.000019 loss_cls: 3.4522 (3.9623) grad_norm: 4.2719 (4.3166) time: 0.7717 data: 0.0003 max mem: 8421 +[2024-12-05 07:48:47 root] (utils.py 283): INFO Epoch: [6] [1480/2502] eta: 0:13:05 lr: 0.000019 loss_cls: 3.6018 (3.9624) grad_norm: 4.2003 (4.3168) time: 0.7656 data: 0.0003 max mem: 8421 +[2024-12-05 07:48:54 root] (utils.py 283): INFO Epoch: [6] [1490/2502] eta: 0:12:57 lr: 0.000019 loss_cls: 4.1316 (3.9630) grad_norm: 4.2553 (4.3168) time: 0.7656 data: 0.0002 max mem: 8421 +[2024-12-05 07:49:02 root] (utils.py 283): INFO Epoch: [6] [1500/2502] eta: 0:12:49 lr: 0.000019 loss_cls: 4.1304 (3.9623) grad_norm: 4.1737 (4.3160) time: 0.7669 data: 0.0002 max mem: 8421 +[2024-12-05 07:49:10 root] (utils.py 283): INFO Epoch: [6] [1510/2502] eta: 0:12:41 lr: 0.000019 loss_cls: 3.9486 (3.9626) grad_norm: 4.2722 (4.3163) time: 0.7672 data: 0.0002 max mem: 8421 +[2024-12-05 07:49:17 root] (utils.py 283): INFO Epoch: [6] [1520/2502] eta: 0:12:34 lr: 0.000019 loss_cls: 3.9857 (3.9619) grad_norm: 4.1325 (4.3153) time: 0.7634 data: 0.0002 max mem: 8421 +[2024-12-05 07:49:25 root] (utils.py 283): INFO Epoch: [6] [1530/2502] eta: 0:12:26 lr: 0.000019 loss_cls: 3.9905 (3.9618) grad_norm: 4.0549 (4.3157) time: 0.7657 data: 0.0003 max mem: 8421 +[2024-12-05 07:49:33 root] (utils.py 283): INFO Epoch: [6] [1540/2502] eta: 0:12:18 lr: 0.000019 loss_cls: 4.0627 (3.9631) grad_norm: 4.0915 (4.3153) time: 0.7639 data: 0.0003 max mem: 8421 +[2024-12-05 07:49:40 root] (utils.py 283): INFO Epoch: [6] [1550/2502] eta: 0:12:11 lr: 0.000019 loss_cls: 4.1077 (3.9640) grad_norm: 4.0800 (4.3143) time: 0.7619 data: 0.0002 max mem: 8421 +[2024-12-05 07:49:48 root] (utils.py 283): INFO Epoch: [6] [1560/2502] eta: 0:12:03 lr: 0.000019 loss_cls: 4.0081 (3.9642) grad_norm: 4.0625 (4.3137) time: 0.7677 data: 0.0003 max mem: 8421 +[2024-12-05 07:49:56 root] (utils.py 283): INFO Epoch: [6] [1570/2502] eta: 0:11:55 lr: 0.000019 loss_cls: 4.0813 (3.9650) grad_norm: 4.0878 (4.3125) time: 0.7711 data: 0.0003 max mem: 8421 +[2024-12-05 07:50:03 root] (utils.py 283): INFO Epoch: [6] [1580/2502] eta: 0:11:48 lr: 0.000019 loss_cls: 4.2366 (3.9646) grad_norm: 4.0761 (4.3111) time: 0.7693 data: 0.0003 max mem: 8421 +[2024-12-05 07:50:11 root] (utils.py 283): INFO Epoch: [6] [1590/2502] eta: 0:11:40 lr: 0.000019 loss_cls: 4.0497 (3.9653) grad_norm: 4.0544 (4.3090) time: 0.7693 data: 0.0003 max mem: 8421 +[2024-12-05 07:50:19 root] (utils.py 283): INFO Epoch: [6] [1600/2502] eta: 0:11:32 lr: 0.000019 loss_cls: 3.9764 (3.9641) grad_norm: 4.0864 (4.3084) time: 0.7707 data: 0.0003 max mem: 8421 +[2024-12-05 07:50:27 root] (utils.py 283): INFO Epoch: [6] [1610/2502] eta: 0:11:25 lr: 0.000019 loss_cls: 3.7479 (3.9631) grad_norm: 4.2198 (4.3076) time: 0.7787 data: 0.0003 max mem: 8421 +[2024-12-05 07:50:34 root] (utils.py 283): INFO Epoch: [6] [1620/2502] eta: 0:11:17 lr: 0.000019 loss_cls: 4.0838 (3.9645) grad_norm: 4.2586 (4.3134) time: 0.7755 data: 0.0003 max mem: 8421 +[2024-12-05 07:50:42 root] (utils.py 283): INFO Epoch: [6] [1630/2502] eta: 0:11:09 lr: 0.000019 loss_cls: 4.1812 (3.9640) grad_norm: 4.2537 (4.3127) time: 0.7645 data: 0.0002 max mem: 8421 +[2024-12-05 07:50:50 root] (utils.py 283): INFO Epoch: [6] [1640/2502] eta: 0:11:02 lr: 0.000019 loss_cls: 4.2171 (3.9647) grad_norm: 4.2192 (4.3123) time: 0.7636 data: 0.0003 max mem: 8421 +[2024-12-05 07:50:57 root] (utils.py 283): INFO Epoch: [6] [1650/2502] eta: 0:10:54 lr: 0.000019 loss_cls: 3.8458 (3.9628) grad_norm: 4.0775 (4.3108) time: 0.7634 data: 0.0003 max mem: 8421 +[2024-12-05 07:51:05 root] (utils.py 283): INFO Epoch: [6] [1660/2502] eta: 0:10:46 lr: 0.000019 loss_cls: 3.8458 (3.9623) grad_norm: 3.9704 (4.3092) time: 0.7639 data: 0.0003 max mem: 8421 +[2024-12-05 07:51:13 root] (utils.py 283): INFO Epoch: [6] [1670/2502] eta: 0:10:39 lr: 0.000019 loss_cls: 3.9371 (3.9610) grad_norm: 4.0070 (4.3087) time: 0.7654 data: 0.0003 max mem: 8421 +[2024-12-05 07:51:20 root] (utils.py 283): INFO Epoch: [6] [1680/2502] eta: 0:10:31 lr: 0.000019 loss_cls: 3.5799 (3.9593) grad_norm: 4.0375 (4.3077) time: 0.7698 data: 0.0003 max mem: 8421 +[2024-12-05 07:51:28 root] (utils.py 283): INFO Epoch: [6] [1690/2502] eta: 0:10:23 lr: 0.000019 loss_cls: 3.7154 (3.9583) grad_norm: 4.1070 (4.3071) time: 0.7713 data: 0.0003 max mem: 8421 +[2024-12-05 07:51:36 root] (utils.py 283): INFO Epoch: [6] [1700/2502] eta: 0:10:16 lr: 0.000019 loss_cls: 4.1469 (3.9596) grad_norm: 4.1056 (4.3076) time: 0.7681 data: 0.0003 max mem: 8421 +[2024-12-05 07:51:43 root] (utils.py 283): INFO Epoch: [6] [1710/2502] eta: 0:10:08 lr: 0.000019 loss_cls: 4.1411 (3.9601) grad_norm: 4.1010 (4.3060) time: 0.7642 data: 0.0003 max mem: 8421 +[2024-12-05 07:51:51 root] (utils.py 283): INFO Epoch: [6] [1720/2502] eta: 0:10:00 lr: 0.000019 loss_cls: 4.1361 (3.9618) grad_norm: 4.0856 (4.3054) time: 0.7626 data: 0.0003 max mem: 8421 +[2024-12-05 07:51:59 root] (utils.py 283): INFO Epoch: [6] [1730/2502] eta: 0:09:52 lr: 0.000019 loss_cls: 4.1361 (3.9620) grad_norm: 4.1173 (4.3057) time: 0.7664 data: 0.0003 max mem: 8421 +[2024-12-05 07:52:06 root] (utils.py 283): INFO Epoch: [6] [1740/2502] eta: 0:09:45 lr: 0.000019 loss_cls: 3.6130 (3.9600) grad_norm: 4.1091 (4.3047) time: 0.7668 data: 0.0003 max mem: 8421 +[2024-12-05 07:52:14 root] (utils.py 283): INFO Epoch: [6] [1750/2502] eta: 0:09:37 lr: 0.000019 loss_cls: 3.5659 (3.9586) grad_norm: 4.1340 (4.3044) time: 0.7646 data: 0.0003 max mem: 8421 +[2024-12-05 07:52:22 root] (utils.py 283): INFO Epoch: [6] [1760/2502] eta: 0:09:29 lr: 0.000019 loss_cls: 3.8137 (3.9578) grad_norm: 4.2661 (4.3046) time: 0.7657 data: 0.0002 max mem: 8421 +[2024-12-05 07:52:29 root] (utils.py 283): INFO Epoch: [6] [1770/2502] eta: 0:09:22 lr: 0.000019 loss_cls: 3.6779 (3.9575) grad_norm: 4.4069 (4.3075) time: 0.7658 data: 0.0002 max mem: 8421 +[2024-12-05 07:52:37 root] (utils.py 283): INFO Epoch: [6] [1780/2502] eta: 0:09:14 lr: 0.000019 loss_cls: 4.2096 (3.9580) grad_norm: 4.4069 (4.3070) time: 0.7660 data: 0.0002 max mem: 8421 +[2024-12-05 07:52:45 root] (utils.py 283): INFO Epoch: [6] [1790/2502] eta: 0:09:06 lr: 0.000019 loss_cls: 4.1939 (3.9588) grad_norm: 4.0568 (4.3052) time: 0.7728 data: 0.0002 max mem: 8421 +[2024-12-05 07:52:52 root] (utils.py 283): INFO Epoch: [6] [1800/2502] eta: 0:08:59 lr: 0.000019 loss_cls: 4.0394 (3.9580) grad_norm: 3.9240 (4.3038) time: 0.7793 data: 0.0002 max mem: 8421 +[2024-12-05 07:53:00 root] (utils.py 283): INFO Epoch: [6] [1810/2502] eta: 0:08:51 lr: 0.000019 loss_cls: 3.9855 (3.9575) grad_norm: 3.9427 (4.3026) time: 0.7742 data: 0.0003 max mem: 8421 +[2024-12-05 07:53:08 root] (utils.py 283): INFO Epoch: [6] [1820/2502] eta: 0:08:43 lr: 0.000019 loss_cls: 3.9665 (3.9575) grad_norm: 4.1065 (4.3038) time: 0.7675 data: 0.0002 max mem: 8421 +[2024-12-05 07:53:16 root] (utils.py 283): INFO Epoch: [6] [1830/2502] eta: 0:08:36 lr: 0.000019 loss_cls: 3.9423 (3.9571) grad_norm: 4.1087 (4.3028) time: 0.7790 data: 0.0002 max mem: 8421 +[2024-12-05 07:53:23 root] (utils.py 283): INFO Epoch: [6] [1840/2502] eta: 0:08:28 lr: 0.000019 loss_cls: 3.8457 (3.9571) grad_norm: 4.0807 (4.3026) time: 0.7804 data: 0.0002 max mem: 8421 +[2024-12-05 07:53:31 root] (utils.py 283): INFO Epoch: [6] [1850/2502] eta: 0:08:20 lr: 0.000019 loss_cls: 3.5072 (3.9546) grad_norm: 4.1724 (4.3027) time: 0.7702 data: 0.0002 max mem: 8421 +[2024-12-05 07:53:39 root] (utils.py 283): INFO Epoch: [6] [1860/2502] eta: 0:08:13 lr: 0.000019 loss_cls: 3.6767 (3.9542) grad_norm: 4.2145 (4.3029) time: 0.7685 data: 0.0002 max mem: 8421 +[2024-12-05 07:53:47 root] (utils.py 283): INFO Epoch: [6] [1870/2502] eta: 0:08:05 lr: 0.000019 loss_cls: 3.9769 (3.9550) grad_norm: 4.2496 (4.3048) time: 0.7699 data: 0.0002 max mem: 8421 +[2024-12-05 07:53:54 root] (utils.py 283): INFO Epoch: [6] [1880/2502] eta: 0:07:57 lr: 0.000019 loss_cls: 3.9769 (3.9542) grad_norm: 4.0517 (4.3032) time: 0.7802 data: 0.0002 max mem: 8421 +[2024-12-05 07:54:02 root] (utils.py 283): INFO Epoch: [6] [1890/2502] eta: 0:07:50 lr: 0.000019 loss_cls: 3.8962 (3.9542) grad_norm: 4.0806 (4.3020) time: 0.7747 data: 0.0002 max mem: 8421 +[2024-12-05 07:54:10 root] (utils.py 283): INFO Epoch: [6] [1900/2502] eta: 0:07:42 lr: 0.000019 loss_cls: 3.9819 (3.9534) grad_norm: 4.1825 (4.3016) time: 0.7660 data: 0.0003 max mem: 8421 +[2024-12-05 07:54:17 root] (utils.py 283): INFO Epoch: [6] [1910/2502] eta: 0:07:34 lr: 0.000019 loss_cls: 3.9654 (3.9530) grad_norm: 4.2629 (4.3021) time: 0.7666 data: 0.0002 max mem: 8421 +[2024-12-05 07:54:25 root] (utils.py 283): INFO Epoch: [6] [1920/2502] eta: 0:07:27 lr: 0.000019 loss_cls: 3.9066 (3.9527) grad_norm: 4.1862 (4.3019) time: 0.7680 data: 0.0002 max mem: 8421 +[2024-12-05 07:54:33 root] (utils.py 283): INFO Epoch: [6] [1930/2502] eta: 0:07:19 lr: 0.000019 loss_cls: 3.9825 (3.9536) grad_norm: 4.1410 (4.3017) time: 0.7796 data: 0.0003 max mem: 8421 +[2024-12-05 07:54:41 root] (utils.py 283): INFO Epoch: [6] [1940/2502] eta: 0:07:11 lr: 0.000019 loss_cls: 3.8991 (3.9526) grad_norm: 4.1998 (4.3019) time: 0.7876 data: 0.0003 max mem: 8421 +[2024-12-05 07:54:49 root] (utils.py 283): INFO Epoch: [6] [1950/2502] eta: 0:07:04 lr: 0.000019 loss_cls: 3.6362 (3.9511) grad_norm: 4.1998 (4.3011) time: 0.7877 data: 0.0003 max mem: 8421 +[2024-12-05 07:54:57 root] (utils.py 283): INFO Epoch: [6] [1960/2502] eta: 0:06:56 lr: 0.000019 loss_cls: 4.0322 (3.9519) grad_norm: 4.1967 (4.3015) time: 0.7883 data: 0.0003 max mem: 8421 +[2024-12-05 07:55:04 root] (utils.py 283): INFO Epoch: [6] [1970/2502] eta: 0:06:49 lr: 0.000019 loss_cls: 4.0322 (3.9512) grad_norm: 4.2230 (4.3011) time: 0.7898 data: 0.0003 max mem: 8421 +[2024-12-05 07:55:12 root] (utils.py 283): INFO Epoch: [6] [1980/2502] eta: 0:06:41 lr: 0.000019 loss_cls: 4.0395 (3.9515) grad_norm: 4.0681 (4.3000) time: 0.7903 data: 0.0003 max mem: 8421 +[2024-12-05 07:55:20 root] (utils.py 283): INFO Epoch: [6] [1990/2502] eta: 0:06:33 lr: 0.000019 loss_cls: 3.9911 (3.9512) grad_norm: 4.0094 (4.2997) time: 0.7899 data: 0.0003 max mem: 8421 +[2024-12-05 07:55:28 root] (utils.py 283): INFO Epoch: [6] [2000/2502] eta: 0:06:26 lr: 0.000019 loss_cls: 3.6033 (3.9490) grad_norm: 4.1084 (4.2986) time: 0.7914 data: 0.0003 max mem: 8421 +[2024-12-05 07:55:36 root] (utils.py 283): INFO Epoch: [6] [2010/2502] eta: 0:06:18 lr: 0.000019 loss_cls: 3.5494 (3.9486) grad_norm: 4.0769 (4.2982) time: 0.7927 data: 0.0003 max mem: 8421 +[2024-12-05 07:55:44 root] (utils.py 283): INFO Epoch: [6] [2020/2502] eta: 0:06:10 lr: 0.000019 loss_cls: 4.1061 (3.9481) grad_norm: 4.0625 (4.2970) time: 0.7805 data: 0.0003 max mem: 8421 +[2024-12-05 07:55:51 root] (utils.py 283): INFO Epoch: [6] [2030/2502] eta: 0:06:03 lr: 0.000019 loss_cls: 4.1116 (3.9483) grad_norm: 4.0172 (4.2957) time: 0.7661 data: 0.0002 max mem: 8421 +[2024-12-05 07:55:59 root] (utils.py 283): INFO Epoch: [6] [2040/2502] eta: 0:05:55 lr: 0.000019 loss_cls: 4.0137 (3.9472) grad_norm: 4.0732 (4.2956) time: 0.7755 data: 0.0002 max mem: 8421 +[2024-12-05 07:56:07 root] (utils.py 283): INFO Epoch: [6] [2050/2502] eta: 0:05:47 lr: 0.000019 loss_cls: 4.0137 (3.9470) grad_norm: 4.0929 (4.2948) time: 0.7842 data: 0.0003 max mem: 8421 +[2024-12-05 07:56:15 root] (utils.py 283): INFO Epoch: [6] [2060/2502] eta: 0:05:40 lr: 0.000019 loss_cls: 4.2060 (3.9483) grad_norm: 4.0061 (4.2934) time: 0.7837 data: 0.0003 max mem: 8421 +[2024-12-05 07:56:23 root] (utils.py 283): INFO Epoch: [6] [2070/2502] eta: 0:05:32 lr: 0.000019 loss_cls: 4.1188 (3.9474) grad_norm: 3.9954 (4.2975) time: 0.7860 data: 0.0003 max mem: 8421 +[2024-12-05 07:56:31 root] (utils.py 283): INFO Epoch: [6] [2080/2502] eta: 0:05:24 lr: 0.000019 loss_cls: 3.8125 (3.9457) grad_norm: 4.2114 (4.2981) time: 0.7844 data: 0.0003 max mem: 8421 +[2024-12-05 07:56:38 root] (utils.py 283): INFO Epoch: [6] [2090/2502] eta: 0:05:17 lr: 0.000019 loss_cls: 3.8359 (3.9461) grad_norm: 4.3363 (4.2997) time: 0.7719 data: 0.0003 max mem: 8421 +[2024-12-05 07:56:46 root] (utils.py 283): INFO Epoch: [6] [2100/2502] eta: 0:05:09 lr: 0.000019 loss_cls: 4.0369 (3.9465) grad_norm: 4.0937 (4.2987) time: 0.7627 data: 0.0003 max mem: 8421 +[2024-12-05 07:56:54 root] (utils.py 283): INFO Epoch: [6] [2110/2502] eta: 0:05:01 lr: 0.000019 loss_cls: 3.7975 (3.9455) grad_norm: 4.0103 (4.2976) time: 0.7762 data: 0.0003 max mem: 8421 +[2024-12-05 07:57:01 root] (utils.py 283): INFO Epoch: [6] [2120/2502] eta: 0:04:53 lr: 0.000019 loss_cls: 3.5400 (3.9448) grad_norm: 4.1568 (4.2976) time: 0.7737 data: 0.0003 max mem: 8421 +[2024-12-05 07:57:09 root] (utils.py 283): INFO Epoch: [6] [2130/2502] eta: 0:04:46 lr: 0.000019 loss_cls: 3.8688 (3.9450) grad_norm: 4.1249 (4.2970) time: 0.7607 data: 0.0003 max mem: 8421 +[2024-12-05 07:57:17 root] (utils.py 283): INFO Epoch: [6] [2140/2502] eta: 0:04:38 lr: 0.000019 loss_cls: 3.9434 (3.9450) grad_norm: 4.1249 (4.2965) time: 0.7626 data: 0.0002 max mem: 8421 +[2024-12-05 07:57:24 root] (utils.py 283): INFO Epoch: [6] [2150/2502] eta: 0:04:30 lr: 0.000019 loss_cls: 4.3452 (3.9475) grad_norm: 4.2440 (4.2975) time: 0.7635 data: 0.0002 max mem: 8421 +[2024-12-05 07:57:32 root] (utils.py 283): INFO Epoch: [6] [2160/2502] eta: 0:04:23 lr: 0.000019 loss_cls: 4.4775 (3.9487) grad_norm: 4.4051 (4.2974) time: 0.7661 data: 0.0002 max mem: 8421 +[2024-12-05 07:57:40 root] (utils.py 283): INFO Epoch: [6] [2170/2502] eta: 0:04:15 lr: 0.000019 loss_cls: 4.2867 (3.9491) grad_norm: 4.1114 (4.2988) time: 0.7663 data: 0.0002 max mem: 8421 +[2024-12-05 07:57:47 root] (utils.py 283): INFO Epoch: [6] [2180/2502] eta: 0:04:07 lr: 0.000019 loss_cls: 4.1322 (3.9479) grad_norm: 4.1114 (4.2985) time: 0.7697 data: 0.0002 max mem: 8421 +[2024-12-05 07:57:55 root] (utils.py 283): INFO Epoch: [6] [2190/2502] eta: 0:04:00 lr: 0.000019 loss_cls: 4.0497 (3.9475) grad_norm: 4.1762 (4.2998) time: 0.7693 data: 0.0002 max mem: 8421 +[2024-12-05 07:58:03 root] (utils.py 283): INFO Epoch: [6] [2200/2502] eta: 0:03:52 lr: 0.000019 loss_cls: 3.9588 (3.9485) grad_norm: 4.3001 (4.3002) time: 0.7641 data: 0.0002 max mem: 8421 +[2024-12-05 07:58:10 root] (utils.py 283): INFO Epoch: [6] [2210/2502] eta: 0:03:44 lr: 0.000019 loss_cls: 3.9418 (3.9478) grad_norm: 4.3001 (4.2999) time: 0.7645 data: 0.0002 max mem: 8421 +[2024-12-05 07:58:18 root] (utils.py 283): INFO Epoch: [6] [2220/2502] eta: 0:03:36 lr: 0.000019 loss_cls: 4.0009 (3.9482) grad_norm: 4.1602 (4.2995) time: 0.7640 data: 0.0002 max mem: 8421 +[2024-12-05 07:58:26 root] (utils.py 283): INFO Epoch: [6] [2230/2502] eta: 0:03:29 lr: 0.000019 loss_cls: 4.1474 (3.9482) grad_norm: 4.2698 (4.3024) time: 0.7624 data: 0.0002 max mem: 8421 +[2024-12-05 07:58:33 root] (utils.py 283): INFO Epoch: [6] [2240/2502] eta: 0:03:21 lr: 0.000019 loss_cls: 4.1168 (3.9487) grad_norm: 4.2698 (4.3021) time: 0.7623 data: 0.0002 max mem: 8421 +[2024-12-05 07:58:41 root] (utils.py 283): INFO Epoch: [6] [2250/2502] eta: 0:03:13 lr: 0.000019 loss_cls: 4.1028 (3.9478) grad_norm: 4.1475 (4.3049) time: 0.7640 data: 0.0002 max mem: 8421 +[2024-12-05 07:58:49 root] (utils.py 283): INFO Epoch: [6] [2260/2502] eta: 0:03:06 lr: 0.000019 loss_cls: 4.0966 (3.9490) grad_norm: 4.3945 (4.3053) time: 0.7647 data: 0.0002 max mem: 8421 +[2024-12-05 07:58:56 root] (utils.py 283): INFO Epoch: [6] [2270/2502] eta: 0:02:58 lr: 0.000019 loss_cls: 4.2993 (3.9497) grad_norm: 4.2430 (4.3053) time: 0.7759 data: 0.0002 max mem: 8421 +[2024-12-05 07:59:04 root] (utils.py 283): INFO Epoch: [6] [2280/2502] eta: 0:02:50 lr: 0.000019 loss_cls: 3.6039 (3.9480) grad_norm: 4.2430 (4.3052) time: 0.7947 data: 0.0002 max mem: 8421 +[2024-12-05 07:59:12 root] (utils.py 283): INFO Epoch: [6] [2290/2502] eta: 0:02:43 lr: 0.000019 loss_cls: 3.7119 (3.9477) grad_norm: 4.2250 (4.3047) time: 0.7962 data: 0.0003 max mem: 8421 +[2024-12-05 07:59:20 root] (utils.py 283): INFO Epoch: [6] [2300/2502] eta: 0:02:35 lr: 0.000019 loss_cls: 4.0148 (3.9483) grad_norm: 4.1296 (4.3057) time: 0.7896 data: 0.0003 max mem: 8421 +[2024-12-05 07:59:28 root] (utils.py 283): INFO Epoch: [6] [2310/2502] eta: 0:02:27 lr: 0.000019 loss_cls: 4.0724 (3.9488) grad_norm: 4.1296 (4.3056) time: 0.7901 data: 0.0002 max mem: 8421 +[2024-12-05 07:59:36 root] (utils.py 283): INFO Epoch: [6] [2320/2502] eta: 0:02:20 lr: 0.000019 loss_cls: 3.8186 (3.9482) grad_norm: 4.1607 (4.3057) time: 0.7892 data: 0.0003 max mem: 8421 +[2024-12-05 07:59:44 root] (utils.py 283): INFO Epoch: [6] [2330/2502] eta: 0:02:12 lr: 0.000019 loss_cls: 3.6875 (3.9467) grad_norm: 4.1607 (4.3051) time: 0.7834 data: 0.0003 max mem: 8421 +[2024-12-05 07:59:51 root] (utils.py 283): INFO Epoch: [6] [2340/2502] eta: 0:02:04 lr: 0.000019 loss_cls: 4.0213 (3.9481) grad_norm: 4.0325 (4.3051) time: 0.7690 data: 0.0003 max mem: 8421 +[2024-12-05 07:59:59 root] (utils.py 283): INFO Epoch: [6] [2350/2502] eta: 0:01:57 lr: 0.000019 loss_cls: 4.1650 (3.9483) grad_norm: 4.2325 (4.3047) time: 0.7580 data: 0.0002 max mem: 8421 +[2024-12-05 08:00:07 root] (utils.py 283): INFO Epoch: [6] [2360/2502] eta: 0:01:49 lr: 0.000019 loss_cls: 4.1718 (3.9484) grad_norm: 4.2076 (4.3047) time: 0.7576 data: 0.0002 max mem: 8421 +[2024-12-05 08:00:14 root] (utils.py 283): INFO Epoch: [6] [2370/2502] eta: 0:01:41 lr: 0.000019 loss_cls: 4.0057 (3.9477) grad_norm: 4.2076 (4.3050) time: 0.7632 data: 0.0002 max mem: 8421 +[2024-12-05 08:00:22 root] (utils.py 283): INFO Epoch: [6] [2380/2502] eta: 0:01:33 lr: 0.000019 loss_cls: 3.8938 (3.9478) grad_norm: 4.2564 (4.3046) time: 0.7675 data: 0.0002 max mem: 8421 +[2024-12-05 08:00:30 root] (utils.py 283): INFO Epoch: [6] [2390/2502] eta: 0:01:26 lr: 0.000019 loss_cls: 4.1798 (3.9486) grad_norm: 4.1358 (4.3067) time: 0.7642 data: 0.0002 max mem: 8421 +[2024-12-05 08:00:37 root] (utils.py 283): INFO Epoch: [6] [2400/2502] eta: 0:01:18 lr: 0.000019 loss_cls: 3.9305 (3.9468) grad_norm: 4.1016 (4.3064) time: 0.7608 data: 0.0002 max mem: 8421 +[2024-12-05 08:00:45 root] (utils.py 283): INFO Epoch: [6] [2410/2502] eta: 0:01:10 lr: 0.000019 loss_cls: 3.6979 (3.9469) grad_norm: 4.1177 (4.3060) time: 0.7725 data: 0.0002 max mem: 8421 +[2024-12-05 08:00:53 root] (utils.py 283): INFO Epoch: [6] [2420/2502] eta: 0:01:03 lr: 0.000019 loss_cls: 4.0945 (3.9479) grad_norm: 4.1908 (4.3061) time: 0.7769 data: 0.0003 max mem: 8421 +[2024-12-05 08:01:00 root] (utils.py 283): INFO Epoch: [6] [2430/2502] eta: 0:00:55 lr: 0.000019 loss_cls: 3.9728 (3.9474) grad_norm: 4.1834 (4.3057) time: 0.7665 data: 0.0003 max mem: 8421 +[2024-12-05 08:01:08 root] (utils.py 283): INFO Epoch: [6] [2440/2502] eta: 0:00:47 lr: 0.000019 loss_cls: 3.7013 (3.9461) grad_norm: 4.2205 (4.3069) time: 0.7638 data: 0.0003 max mem: 8421 +[2024-12-05 08:01:16 root] (utils.py 283): INFO Epoch: [6] [2450/2502] eta: 0:00:40 lr: 0.000019 loss_cls: 3.7963 (3.9463) grad_norm: 4.3367 (4.3075) time: 0.7641 data: 0.0003 max mem: 8421 +[2024-12-05 08:01:23 root] (utils.py 283): INFO Epoch: [6] [2460/2502] eta: 0:00:32 lr: 0.000019 loss_cls: 4.2081 (3.9469) grad_norm: 4.2215 (4.3075) time: 0.7627 data: 0.0003 max mem: 8421 +[2024-12-05 08:01:31 root] (utils.py 283): INFO Epoch: [6] [2470/2502] eta: 0:00:24 lr: 0.000019 loss_cls: 4.2081 (3.9473) grad_norm: 4.1799 (4.3071) time: 0.7614 data: 0.0003 max mem: 8421 +[2024-12-05 08:01:38 root] (utils.py 283): INFO Epoch: [6] [2480/2502] eta: 0:00:16 lr: 0.000019 loss_cls: 3.9425 (3.9474) grad_norm: 4.1099 (4.3066) time: 0.7631 data: 0.0003 max mem: 8421 +[2024-12-05 08:01:46 root] (utils.py 283): INFO Epoch: [6] [2490/2502] eta: 0:00:09 lr: 0.000019 loss_cls: 3.9176 (3.9468) grad_norm: 4.2210 (4.3064) time: 0.7836 data: 0.0220 max mem: 8421 +[2024-12-05 08:01:54 root] (utils.py 283): INFO Epoch: [6] [2500/2502] eta: 0:00:01 lr: 0.000019 loss_cls: 4.1546 (3.9465) grad_norm: 4.2848 (4.3067) time: 0.7812 data: 0.0220 max mem: 8421 +[2024-12-05 08:01:55 root] (utils.py 283): INFO Epoch: [6] [2501/2502] eta: 0:00:00 lr: 0.000019 loss_cls: 4.1546 (3.9465) grad_norm: 4.2962 (4.3068) time: 0.7813 data: 0.0220 max mem: 8421 +[2024-12-05 08:01:55 root] (utils.py 297): INFO Epoch: [6] Total time: 0:32:05 (0.7698 s / it) +[2024-12-05 08:01:55 root] (engine.py 178): INFO Averaged stats:lr: 0.000019 loss_cls: 4.1546 (3.9402) grad_norm: 4.2962 (4.3068) +[2024-12-05 08:01:55 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7399 (0.7399) acc1: 86.7188 (86.7188) acc3: 95.3125 (95.3125) acc5: 96.8750 (96.8750) time: 0.1311 data: 0.0003 max mem: 8421 +[2024-12-05 08:01:57 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8550 (0.9077) acc1: 83.5938 (81.1790) acc3: 91.4062 (92.2585) acc5: 95.3125 (94.8864) time: 0.1316 data: 0.0006 max mem: 8421 +[2024-12-05 08:01:58 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9248 (0.9444) acc1: 79.6875 (79.8363) acc3: 90.6250 (92.0015) acc5: 94.5312 (94.8661) time: 0.1317 data: 0.0005 max mem: 8421 +[2024-12-05 08:01:59 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9815 (0.9452) acc1: 79.6875 (79.4355) acc3: 91.4062 (92.3891) acc5: 96.0938 (95.2117) time: 0.1325 data: 0.0005 max mem: 8421 +[2024-12-05 08:02:01 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8901 (0.9374) acc1: 80.4688 (79.7828) acc3: 93.7500 (92.5686) acc5: 96.0938 (95.2172) time: 0.1342 data: 0.0004 max mem: 8421 +[2024-12-05 08:02:02 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0605 (1.0330) acc1: 73.4375 (77.5429) acc3: 87.5000 (90.7782) acc5: 91.4062 (94.0257) time: 0.1336 data: 0.0004 max mem: 8421 +[2024-12-05 08:02:03 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3894 (1.0753) acc1: 69.5312 (76.8186) acc3: 85.1562 (90.0871) acc5: 89.0625 (93.3658) time: 0.1360 data: 0.0045 max mem: 8421 +[2024-12-05 08:02:05 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:04 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3127 (1.1216) acc1: 71.0938 (75.5612) acc3: 85.1562 (89.4916) acc5: 89.8438 (92.8697) time: 0.1726 data: 0.0411 max mem: 8421 +[2024-12-05 08:02:07 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3593 (1.1599) acc1: 70.3125 (74.8167) acc3: 85.1562 (88.8021) acc5: 89.8438 (92.4093) time: 0.1883 data: 0.0569 max mem: 8421 +[2024-12-05 08:02:08 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.4140 (1.1914) acc1: 70.3125 (73.9354) acc3: 83.5938 (88.2812) acc5: 89.8438 (91.9557) time: 0.1518 data: 0.0203 max mem: 8421 +[2024-12-05 08:02:09 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2421 (1.1787) acc1: 70.3125 (74.1680) acc3: 87.5000 (88.4880) acc5: 90.6250 (92.1520) time: 0.1299 data: 0.0007 max mem: 8421 +[2024-12-05 08:02:09 root] (utils.py 297): INFO Test: Total time: 0:00:14 (0.1444 s / it) +[2024-12-05 08:02:09 root] (engine.py 263): INFO * Acc@1 74.026 Acc@3 88.444 Acc@5 91.928 loss 1.178 flops 1.285 layer_flops 1.251 +[2024-12-05 08:02:09 root] (main.py 546): INFO Accuracy of the network on the 50000 test images: 74.0% +[2024-12-05 08:02:09 root] (main.py 550): INFO Max accuracy: 74.06% +[2024-12-05 08:02:10 root] (utils.py 283): INFO Epoch: [7] [ 0/2502] eta: 0:32:03 lr: 0.000018 loss_cls: 3.1505 (3.1505) grad_norm: 4.1050 (4.1050) time: 0.7688 data: 0.0002 max mem: 8421 +[2024-12-05 08:02:18 root] (utils.py 283): INFO Epoch: [7] [ 10/2502] eta: 0:31:46 lr: 0.000018 loss_cls: 3.8138 (3.7413) grad_norm: 4.1505 (4.3456) time: 0.7650 data: 0.0002 max mem: 8421 +[2024-12-05 08:02:25 root] (utils.py 283): INFO Epoch: [7] [ 20/2502] eta: 0:31:43 lr: 0.000018 loss_cls: 3.9319 (3.7851) grad_norm: 4.0988 (4.2902) time: 0.7669 data: 0.0002 max mem: 8421 +[2024-12-05 08:02:33 root] (utils.py 283): INFO Epoch: [7] [ 30/2502] eta: 0:31:44 lr: 0.000018 loss_cls: 3.9057 (3.8110) grad_norm: 4.1762 (4.3737) time: 0.7736 data: 0.0002 max mem: 8421 +[2024-12-05 08:02:41 root] (utils.py 283): INFO Epoch: [7] [ 40/2502] eta: 0:31:46 lr: 0.000018 loss_cls: 3.9536 (3.8608) grad_norm: 4.2813 (4.3247) time: 0.7822 data: 0.0002 max mem: 8421 +[2024-12-05 08:02:49 root] (utils.py 283): INFO Epoch: [7] [ 50/2502] eta: 0:31:43 lr: 0.000018 loss_cls: 4.0484 (3.8245) grad_norm: 4.1430 (4.2898) time: 0.7856 data: 0.0002 max mem: 8421 +[2024-12-05 08:02:57 root] (utils.py 283): INFO Epoch: [7] [ 60/2502] eta: 0:31:39 lr: 0.000018 loss_cls: 3.4912 (3.7701) grad_norm: 4.0901 (4.2655) time: 0.7854 data: 0.0002 max mem: 8421 +[2024-12-05 08:03:05 root] (utils.py 283): INFO Epoch: [7] [ 70/2502] eta: 0:31:38 lr: 0.000018 loss_cls: 3.8321 (3.7935) grad_norm: 4.0067 (4.2326) time: 0.7916 data: 0.0003 max mem: 8421 +[2024-12-05 08:03:13 root] (utils.py 283): INFO Epoch: [7] [ 80/2502] eta: 0:31:32 lr: 0.000018 loss_cls: 4.0814 (3.8342) grad_norm: 4.0921 (4.2727) time: 0.7925 data: 0.0002 max mem: 8421 +[2024-12-05 08:03:20 root] (utils.py 283): INFO Epoch: [7] [ 90/2502] eta: 0:31:25 lr: 0.000018 loss_cls: 4.2103 (3.8332) grad_norm: 4.0681 (4.2496) time: 0.7855 data: 0.0002 max mem: 8421 +[2024-12-05 08:03:28 root] (utils.py 283): INFO Epoch: [7] [ 100/2502] eta: 0:31:16 lr: 0.000018 loss_cls: 4.2136 (3.8509) grad_norm: 4.0292 (4.2342) time: 0.7795 data: 0.0003 max mem: 8421 +[2024-12-05 08:03:36 root] (utils.py 283): INFO Epoch: [7] [ 110/2502] eta: 0:31:08 lr: 0.000018 loss_cls: 3.9485 (3.8382) grad_norm: 4.1067 (4.2253) time: 0.7781 data: 0.0003 max mem: 8421 +[2024-12-05 08:03:44 root] (utils.py 283): INFO Epoch: [7] [ 120/2502] eta: 0:30:56 lr: 0.000018 loss_cls: 3.9485 (3.8481) grad_norm: 4.0586 (4.2178) time: 0.7713 data: 0.0002 max mem: 8421 +[2024-12-05 08:03:51 root] (utils.py 283): INFO Epoch: [7] [ 130/2502] eta: 0:30:46 lr: 0.000018 loss_cls: 4.1174 (3.8744) grad_norm: 4.0627 (4.2162) time: 0.7631 data: 0.0002 max mem: 8421 +[2024-12-05 08:03:59 root] (utils.py 283): INFO Epoch: [7] [ 140/2502] eta: 0:30:36 lr: 0.000018 loss_cls: 4.0486 (3.8790) grad_norm: 4.1885 (4.2184) time: 0.7652 data: 0.0002 max mem: 8421 +[2024-12-05 08:04:07 root] (utils.py 283): INFO Epoch: [7] [ 150/2502] eta: 0:30:32 lr: 0.000018 loss_cls: 4.0171 (3.8839) grad_norm: 4.1469 (4.2085) time: 0.7851 data: 0.0003 max mem: 8421 +[2024-12-05 08:04:15 root] (utils.py 283): INFO Epoch: [7] [ 160/2502] eta: 0:30:22 lr: 0.000018 loss_cls: 4.0279 (3.9001) grad_norm: 3.9511 (4.2196) time: 0.7836 data: 0.0003 max mem: 8421 +[2024-12-05 08:04:22 root] (utils.py 283): INFO Epoch: [7] [ 170/2502] eta: 0:30:13 lr: 0.000018 loss_cls: 4.0279 (3.8905) grad_norm: 4.0095 (4.2210) time: 0.7643 data: 0.0003 max mem: 8421 +[2024-12-05 08:04:30 root] (utils.py 283): INFO Epoch: [7] [ 180/2502] eta: 0:30:04 lr: 0.000018 loss_cls: 3.8208 (3.8929) grad_norm: 4.0875 (4.2145) time: 0.7673 data: 0.0003 max mem: 8421 +[2024-12-05 08:04:38 root] (utils.py 283): INFO Epoch: [7] [ 190/2502] eta: 0:29:55 lr: 0.000018 loss_cls: 4.1169 (3.9032) grad_norm: 4.0430 (4.2107) time: 0.7688 data: 0.0003 max mem: 8421 +[2024-12-05 08:04:45 root] (utils.py 283): INFO Epoch: [7] [ 200/2502] eta: 0:29:46 lr: 0.000018 loss_cls: 4.2435 (3.9197) grad_norm: 4.0430 (4.2078) time: 0.7685 data: 0.0003 max mem: 8421 +[2024-12-05 08:04:53 root] (utils.py 283): INFO Epoch: [7] [ 210/2502] eta: 0:29:37 lr: 0.000018 loss_cls: 4.1475 (3.9144) grad_norm: 4.1203 (4.2085) time: 0.7663 data: 0.0003 max mem: 8421 +[2024-12-05 08:05:01 root] (utils.py 283): INFO Epoch: [7] [ 220/2502] eta: 0:29:28 lr: 0.000018 loss_cls: 3.7032 (3.8884) grad_norm: 4.0312 (4.1985) time: 0.7605 data: 0.0003 max mem: 8421 +[2024-12-05 08:05:08 root] (utils.py 283): INFO Epoch: [7] [ 230/2502] eta: 0:29:18 lr: 0.000018 loss_cls: 3.7381 (3.8940) grad_norm: 4.1259 (4.2081) time: 0.7588 data: 0.0003 max mem: 8421 +[2024-12-05 08:05:16 root] (utils.py 283): INFO Epoch: [7] [ 240/2502] eta: 0:29:10 lr: 0.000018 loss_cls: 3.8746 (3.8929) grad_norm: 4.1809 (4.2051) time: 0.7615 data: 0.0002 max mem: 8421 +[2024-12-05 08:05:23 root] (utils.py 283): INFO Epoch: [7] [ 250/2502] eta: 0:29:01 lr: 0.000018 loss_cls: 3.8343 (3.8911) grad_norm: 4.0650 (4.2055) time: 0.7648 data: 0.0002 max mem: 8421 +[2024-12-05 08:05:31 root] (utils.py 283): INFO Epoch: [7] [ 260/2502] eta: 0:28:55 lr: 0.000018 loss_cls: 3.8514 (3.8871) grad_norm: 4.0648 (4.1981) time: 0.7794 data: 0.0003 max mem: 8421 +[2024-12-05 08:05:39 root] (utils.py 283): INFO Epoch: [7] [ 270/2502] eta: 0:28:46 lr: 0.000018 loss_cls: 3.9192 (3.8890) grad_norm: 4.1341 (4.2225) time: 0.7777 data: 0.0003 max mem: 8421 +[2024-12-05 08:05:47 root] (utils.py 283): INFO Epoch: [7] [ 280/2502] eta: 0:28:40 lr: 0.000018 loss_cls: 3.8034 (3.8864) grad_norm: 4.1515 (4.2242) time: 0.7762 data: 0.0002 max mem: 8421 +[2024-12-05 08:05:55 root] (utils.py 283): INFO Epoch: [7] [ 290/2502] eta: 0:28:33 lr: 0.000018 loss_cls: 3.8127 (3.8927) grad_norm: 4.1515 (4.2299) time: 0.7897 data: 0.0002 max mem: 8421 +[2024-12-05 08:06:03 root] (utils.py 283): INFO Epoch: [7] [ 300/2502] eta: 0:28:26 lr: 0.000018 loss_cls: 3.9728 (3.8937) grad_norm: 4.2923 (4.2267) time: 0.7874 data: 0.0002 max mem: 8421 +[2024-12-05 08:06:10 root] (utils.py 283): INFO Epoch: [7] [ 310/2502] eta: 0:28:18 lr: 0.000018 loss_cls: 4.0574 (3.8975) grad_norm: 4.3657 (4.3056) time: 0.7736 data: 0.0002 max mem: 8421 +[2024-12-05 08:06:18 root] (utils.py 283): INFO Epoch: [7] [ 320/2502] eta: 0:28:10 lr: 0.000018 loss_cls: 3.9496 (3.8927) grad_norm: 4.1992 (4.3015) time: 0.7693 data: 0.0003 max mem: 8421 +[2024-12-05 08:06:26 root] (utils.py 283): INFO Epoch: [7] [ 330/2502] eta: 0:28:03 lr: 0.000018 loss_cls: 4.0505 (3.8981) grad_norm: 4.1231 (4.3021) time: 0.7781 data: 0.0003 max mem: 8421 +[2024-12-05 08:06:34 root] (utils.py 283): INFO Epoch: [7] [ 340/2502] eta: 0:27:56 lr: 0.000018 loss_cls: 4.2091 (3.8989) grad_norm: 4.1853 (4.3029) time: 0.7824 data: 0.0002 max mem: 8421 +[2024-12-05 08:06:41 root] (utils.py 283): INFO Epoch: [7] [ 350/2502] eta: 0:27:47 lr: 0.000018 loss_cls: 3.9530 (3.8937) grad_norm: 4.1986 (4.3152) time: 0.7736 data: 0.0002 max mem: 8421 +[2024-12-05 08:06:49 root] (utils.py 283): INFO Epoch: [7] [ 360/2502] eta: 0:27:39 lr: 0.000018 loss_cls: 4.0982 (3.9022) grad_norm: 4.2558 (4.3213) time: 0.7626 data: 0.0003 max mem: 8421 +[2024-12-05 08:06:57 root] (utils.py 283): INFO Epoch: [7] [ 370/2502] eta: 0:27:30 lr: 0.000018 loss_cls: 4.2470 (3.9075) grad_norm: 4.2024 (4.3184) time: 0.7646 data: 0.0003 max mem: 8421 +[2024-12-05 08:07:04 root] (utils.py 283): INFO Epoch: [7] [ 380/2502] eta: 0:27:22 lr: 0.000018 loss_cls: 3.9865 (3.9033) grad_norm: 4.0827 (4.3207) time: 0.7691 data: 0.0003 max mem: 8421 +[2024-12-05 08:07:12 root] (utils.py 283): INFO Epoch: [7] [ 390/2502] eta: 0:27:14 lr: 0.000018 loss_cls: 3.9865 (3.9076) grad_norm: 4.1455 (4.3277) time: 0.7666 data: 0.0002 max mem: 8421 +[2024-12-05 08:07:20 root] (utils.py 283): INFO Epoch: [7] [ 400/2502] eta: 0:27:06 lr: 0.000018 loss_cls: 4.2162 (3.9161) grad_norm: 4.3553 (4.3256) time: 0.7631 data: 0.0002 max mem: 8421 +[2024-12-05 08:07:27 root] (utils.py 283): INFO Epoch: [7] [ 410/2502] eta: 0:26:57 lr: 0.000018 loss_cls: 3.9052 (3.9142) grad_norm: 4.2261 (4.3216) time: 0.7614 data: 0.0002 max mem: 8421 +[2024-12-05 08:07:35 root] (utils.py 283): INFO Epoch: [7] [ 420/2502] eta: 0:26:49 lr: 0.000018 loss_cls: 3.8801 (3.9143) grad_norm: 4.0971 (4.3261) time: 0.7616 data: 0.0002 max mem: 8421 +[2024-12-05 08:07:43 root] (utils.py 283): INFO Epoch: [7] [ 430/2502] eta: 0:26:41 lr: 0.000018 loss_cls: 3.9790 (3.9119) grad_norm: 4.0676 (4.3204) time: 0.7648 data: 0.0002 max mem: 8421 +[2024-12-05 08:07:50 root] (utils.py 283): INFO Epoch: [7] [ 440/2502] eta: 0:26:33 lr: 0.000018 loss_cls: 3.9655 (3.9152) grad_norm: 4.0643 (4.3160) time: 0.7666 data: 0.0002 max mem: 8421 +[2024-12-05 08:07:58 root] (utils.py 283): INFO Epoch: [7] [ 450/2502] eta: 0:26:25 lr: 0.000018 loss_cls: 3.8827 (3.9100) grad_norm: 4.0560 (4.3100) time: 0.7697 data: 0.0002 max mem: 8421 +[2024-12-05 08:08:06 root] (utils.py 283): INFO Epoch: [7] [ 460/2502] eta: 0:26:17 lr: 0.000018 loss_cls: 3.6808 (3.9074) grad_norm: 3.9623 (4.3054) time: 0.7687 data: 0.0002 max mem: 8421 +[2024-12-05 08:08:13 root] (utils.py 283): INFO Epoch: [7] [ 470/2502] eta: 0:26:09 lr: 0.000018 loss_cls: 4.1672 (3.9092) grad_norm: 4.0602 (4.3093) time: 0.7657 data: 0.0002 max mem: 8421 +[2024-12-05 08:08:21 root] (utils.py 283): INFO Epoch: [7] [ 480/2502] eta: 0:26:01 lr: 0.000018 loss_cls: 3.8532 (3.9087) grad_norm: 4.0063 (4.3029) time: 0.7629 data: 0.0002 max mem: 8421 +[2024-12-05 08:08:28 root] (utils.py 283): INFO Epoch: [7] [ 490/2502] eta: 0:25:53 lr: 0.000018 loss_cls: 3.7904 (3.9093) grad_norm: 4.0063 (4.3023) time: 0.7605 data: 0.0002 max mem: 8421 +[2024-12-05 08:08:36 root] (utils.py 283): INFO Epoch: [7] [ 500/2502] eta: 0:25:45 lr: 0.000018 loss_cls: 3.8980 (3.9094) grad_norm: 4.1419 (4.2970) time: 0.7606 data: 0.0002 max mem: 8421 +[2024-12-05 08:08:44 root] (utils.py 283): INFO Epoch: [7] [ 510/2502] eta: 0:25:36 lr: 0.000018 loss_cls: 3.8980 (3.9079) grad_norm: 4.0372 (4.2923) time: 0.7608 data: 0.0002 max mem: 8421 +[2024-12-05 08:08:51 root] (utils.py 283): INFO Epoch: [7] [ 520/2502] eta: 0:25:28 lr: 0.000018 loss_cls: 3.7648 (3.9041) grad_norm: 4.0487 (4.2880) time: 0.7631 data: 0.0002 max mem: 8421 +[2024-12-05 08:08:59 root] (utils.py 283): INFO Epoch: [7] [ 530/2502] eta: 0:25:20 lr: 0.000018 loss_cls: 3.8569 (3.9082) grad_norm: 4.0903 (4.2877) time: 0.7641 data: 0.0002 max mem: 8421 +[2024-12-05 08:09:07 root] (utils.py 283): INFO Epoch: [7] [ 540/2502] eta: 0:25:13 lr: 0.000018 loss_cls: 4.0670 (3.9064) grad_norm: 4.1465 (4.2863) time: 0.7713 data: 0.0002 max mem: 8421 +[2024-12-05 08:09:14 root] (utils.py 283): INFO Epoch: [7] [ 550/2502] eta: 0:25:05 lr: 0.000018 loss_cls: 3.7657 (3.8975) grad_norm: 4.1465 (4.2852) time: 0.7751 data: 0.0002 max mem: 8421 +[2024-12-05 08:09:22 root] (utils.py 283): INFO Epoch: [7] [ 560/2502] eta: 0:24:57 lr: 0.000018 loss_cls: 3.9400 (3.8974) grad_norm: 3.9851 (4.2778) time: 0.7675 data: 0.0002 max mem: 8421 +[2024-12-05 08:09:30 root] (utils.py 283): INFO Epoch: [7] [ 570/2502] eta: 0:24:50 lr: 0.000018 loss_cls: 4.2445 (3.9030) grad_norm: 4.0091 (4.2899) time: 0.7680 data: 0.0002 max mem: 8421 +[2024-12-05 08:09:38 root] (utils.py 283): INFO Epoch: [7] [ 580/2502] eta: 0:24:42 lr: 0.000018 loss_cls: 4.1340 (3.9032) grad_norm: 4.2575 (4.3038) time: 0.7738 data: 0.0002 max mem: 8421 +[2024-12-05 08:09:45 root] (utils.py 283): INFO Epoch: [7] [ 590/2502] eta: 0:24:34 lr: 0.000018 loss_cls: 4.1340 (3.9108) grad_norm: 4.2575 (4.3051) time: 0.7708 data: 0.0002 max mem: 8421 +[2024-12-05 08:09:53 root] (utils.py 283): INFO Epoch: [7] [ 600/2502] eta: 0:24:27 lr: 0.000018 loss_cls: 4.1830 (3.9070) grad_norm: 4.2860 (4.3022) time: 0.7756 data: 0.0003 max mem: 8421 +[2024-12-05 08:10:01 root] (utils.py 283): INFO Epoch: [7] [ 610/2502] eta: 0:24:19 lr: 0.000018 loss_cls: 3.5782 (3.9046) grad_norm: 4.1057 (4.2983) time: 0.7751 data: 0.0003 max mem: 8421 +[2024-12-05 08:10:08 root] (utils.py 283): INFO Epoch: [7] [ 620/2502] eta: 0:24:11 lr: 0.000018 loss_cls: 4.0467 (3.9087) grad_norm: 3.9598 (4.2948) time: 0.7654 data: 0.0003 max mem: 8421 +[2024-12-05 08:10:16 root] (utils.py 283): INFO Epoch: [7] [ 630/2502] eta: 0:24:03 lr: 0.000018 loss_cls: 4.0170 (3.9077) grad_norm: 4.1781 (4.2969) time: 0.7658 data: 0.0003 max mem: 8421 +[2024-12-05 08:10:24 root] (utils.py 283): INFO Epoch: [7] [ 640/2502] eta: 0:23:56 lr: 0.000018 loss_cls: 3.9789 (3.9098) grad_norm: 4.1315 (4.2937) time: 0.7719 data: 0.0003 max mem: 8421 +[2024-12-05 08:10:31 root] (utils.py 283): INFO Epoch: [7] [ 650/2502] eta: 0:23:48 lr: 0.000018 loss_cls: 4.1417 (3.9115) grad_norm: 4.1315 (4.2921) time: 0.7722 data: 0.0002 max mem: 8421 +[2024-12-05 08:10:39 root] (utils.py 283): INFO Epoch: [7] [ 660/2502] eta: 0:23:40 lr: 0.000018 loss_cls: 3.9315 (3.9082) grad_norm: 4.0269 (4.2881) time: 0.7657 data: 0.0002 max mem: 8421 +[2024-12-05 08:10:47 root] (utils.py 283): INFO Epoch: [7] [ 670/2502] eta: 0:23:32 lr: 0.000018 loss_cls: 4.0930 (3.9114) grad_norm: 4.0269 (4.2974) time: 0.7690 data: 0.0002 max mem: 8421 +[2024-12-05 08:10:55 root] (utils.py 283): INFO Epoch: [7] [ 680/2502] eta: 0:23:25 lr: 0.000018 loss_cls: 4.1510 (3.9136) grad_norm: 4.1877 (4.2979) time: 0.7729 data: 0.0002 max mem: 8421 +[2024-12-05 08:11:02 root] (utils.py 283): INFO Epoch: [7] [ 690/2502] eta: 0:23:17 lr: 0.000018 loss_cls: 3.9997 (3.9099) grad_norm: 4.1843 (4.2962) time: 0.7707 data: 0.0003 max mem: 8421 +[2024-12-05 08:11:10 root] (utils.py 283): INFO Epoch: [7] [ 700/2502] eta: 0:23:09 lr: 0.000018 loss_cls: 3.9637 (3.9121) grad_norm: 4.0687 (4.2934) time: 0.7646 data: 0.0003 max mem: 8421 +[2024-12-05 08:11:18 root] (utils.py 283): INFO Epoch: [7] [ 710/2502] eta: 0:23:01 lr: 0.000018 loss_cls: 4.0105 (3.9111) grad_norm: 4.1728 (4.2963) time: 0.7615 data: 0.0002 max mem: 8421 +[2024-12-05 08:11:25 root] (utils.py 283): INFO Epoch: [7] [ 720/2502] eta: 0:22:53 lr: 0.000018 loss_cls: 3.6852 (3.9075) grad_norm: 4.2327 (4.2946) time: 0.7621 data: 0.0002 max mem: 8421 +[2024-12-05 08:11:33 root] (utils.py 283): INFO Epoch: [7] [ 730/2502] eta: 0:22:45 lr: 0.000018 loss_cls: 3.5094 (3.9045) grad_norm: 4.1923 (4.2947) time: 0.7614 data: 0.0002 max mem: 8421 +[2024-12-05 08:11:40 root] (utils.py 283): INFO Epoch: [7] [ 740/2502] eta: 0:22:37 lr: 0.000018 loss_cls: 4.1726 (3.9063) grad_norm: 4.1373 (4.2914) time: 0.7626 data: 0.0003 max mem: 8421 +[2024-12-05 08:11:48 root] (utils.py 283): INFO Epoch: [7] [ 750/2502] eta: 0:22:29 lr: 0.000018 loss_cls: 4.2237 (3.9079) grad_norm: 4.1797 (4.2907) time: 0.7656 data: 0.0003 max mem: 8421 +[2024-12-05 08:11:56 root] (utils.py 283): INFO Epoch: [7] [ 760/2502] eta: 0:22:22 lr: 0.000018 loss_cls: 4.2237 (3.9120) grad_norm: 4.1797 (4.2878) time: 0.7651 data: 0.0003 max mem: 8421 +[2024-12-05 08:12:03 root] (utils.py 283): INFO Epoch: [7] [ 770/2502] eta: 0:22:14 lr: 0.000018 loss_cls: 4.1808 (3.9135) grad_norm: 4.0008 (4.2846) time: 0.7690 data: 0.0002 max mem: 8421 +[2024-12-05 08:12:11 root] (utils.py 283): INFO Epoch: [7] [ 780/2502] eta: 0:22:06 lr: 0.000018 loss_cls: 3.9295 (3.9149) grad_norm: 4.0008 (4.2842) time: 0.7696 data: 0.0002 max mem: 8421 +[2024-12-05 08:12:19 root] (utils.py 283): INFO Epoch: [7] [ 790/2502] eta: 0:21:58 lr: 0.000018 loss_cls: 3.9295 (3.9155) grad_norm: 4.1952 (4.2849) time: 0.7666 data: 0.0003 max mem: 8421 +[2024-12-05 08:12:26 root] (utils.py 283): INFO Epoch: [7] [ 800/2502] eta: 0:21:51 lr: 0.000018 loss_cls: 3.8999 (3.9127) grad_norm: 4.2244 (4.2836) time: 0.7676 data: 0.0003 max mem: 8421 +[2024-12-05 08:12:34 root] (utils.py 283): INFO Epoch: [7] [ 810/2502] eta: 0:21:43 lr: 0.000018 loss_cls: 3.6878 (3.9119) grad_norm: 4.2348 (4.3039) time: 0.7702 data: 0.0002 max mem: 8421 +[2024-12-05 08:12:42 root] (utils.py 283): INFO Epoch: [7] [ 820/2502] eta: 0:21:35 lr: 0.000018 loss_cls: 3.9075 (3.9120) grad_norm: 4.1891 (4.3041) time: 0.7689 data: 0.0002 max mem: 8421 +[2024-12-05 08:12:49 root] (utils.py 283): INFO Epoch: [7] [ 830/2502] eta: 0:21:27 lr: 0.000018 loss_cls: 4.1641 (3.9146) grad_norm: 4.0754 (4.3008) time: 0.7648 data: 0.0003 max mem: 8421 +[2024-12-05 08:12:57 root] (utils.py 283): INFO Epoch: [7] [ 840/2502] eta: 0:21:20 lr: 0.000018 loss_cls: 3.9122 (3.9125) grad_norm: 4.0369 (4.2989) time: 0.7647 data: 0.0003 max mem: 8421 +[2024-12-05 08:13:05 root] (utils.py 283): INFO Epoch: [7] [ 850/2502] eta: 0:21:12 lr: 0.000018 loss_cls: 3.8913 (3.9135) grad_norm: 4.1364 (4.2969) time: 0.7664 data: 0.0003 max mem: 8421 +[2024-12-05 08:13:13 root] (utils.py 283): INFO Epoch: [7] [ 860/2502] eta: 0:21:04 lr: 0.000018 loss_cls: 4.0325 (3.9137) grad_norm: 4.1894 (4.2953) time: 0.7689 data: 0.0002 max mem: 8421 +[2024-12-05 08:13:20 root] (utils.py 283): INFO Epoch: [7] [ 870/2502] eta: 0:20:56 lr: 0.000018 loss_cls: 4.1393 (3.9149) grad_norm: 4.1645 (4.2936) time: 0.7688 data: 0.0002 max mem: 8421 +[2024-12-05 08:13:28 root] (utils.py 283): INFO Epoch: [7] [ 880/2502] eta: 0:20:49 lr: 0.000018 loss_cls: 4.0890 (3.9161) grad_norm: 4.1645 (4.2950) time: 0.7683 data: 0.0002 max mem: 8421 +[2024-12-05 08:13:35 root] (utils.py 283): INFO Epoch: [7] [ 890/2502] eta: 0:20:41 lr: 0.000018 loss_cls: 4.0188 (3.9181) grad_norm: 4.1767 (4.2975) time: 0.7646 data: 0.0002 max mem: 8421 +[2024-12-05 08:13:43 root] (utils.py 283): INFO Epoch: [7] [ 900/2502] eta: 0:20:33 lr: 0.000018 loss_cls: 4.0799 (3.9182) grad_norm: 4.1483 (4.2947) time: 0.7627 data: 0.0002 max mem: 8421 +[2024-12-05 08:13:51 root] (utils.py 283): INFO Epoch: [7] [ 910/2502] eta: 0:20:25 lr: 0.000018 loss_cls: 3.9983 (3.9152) grad_norm: 4.2206 (4.2999) time: 0.7642 data: 0.0002 max mem: 8421 +[2024-12-05 08:13:58 root] (utils.py 283): INFO Epoch: [7] [ 920/2502] eta: 0:20:17 lr: 0.000018 loss_cls: 3.9453 (3.9155) grad_norm: 4.1054 (4.2977) time: 0.7652 data: 0.0002 max mem: 8421 +[2024-12-05 08:14:06 root] (utils.py 283): INFO Epoch: [7] [ 930/2502] eta: 0:20:10 lr: 0.000018 loss_cls: 4.0461 (3.9151) grad_norm: 4.1054 (4.3006) time: 0.7636 data: 0.0002 max mem: 8421 +[2024-12-05 08:14:14 root] (utils.py 283): INFO Epoch: [7] [ 940/2502] eta: 0:20:02 lr: 0.000018 loss_cls: 4.1109 (3.9181) grad_norm: 4.2902 (4.3059) time: 0.7629 data: 0.0002 max mem: 8421 +[2024-12-05 08:14:21 root] (utils.py 283): INFO Epoch: [7] [ 950/2502] eta: 0:19:54 lr: 0.000018 loss_cls: 4.2456 (3.9201) grad_norm: 4.1539 (4.3056) time: 0.7642 data: 0.0003 max mem: 8421 +[2024-12-05 08:14:29 root] (utils.py 283): INFO Epoch: [7] [ 960/2502] eta: 0:19:46 lr: 0.000018 loss_cls: 4.1369 (3.9203) grad_norm: 4.0873 (4.3048) time: 0.7639 data: 0.0002 max mem: 8421 +[2024-12-05 08:14:37 root] (utils.py 283): INFO Epoch: [7] [ 970/2502] eta: 0:19:38 lr: 0.000018 loss_cls: 3.7000 (3.9179) grad_norm: 4.0911 (4.3047) time: 0.7665 data: 0.0002 max mem: 8421 +[2024-12-05 08:14:44 root] (utils.py 283): INFO Epoch: [7] [ 980/2502] eta: 0:19:31 lr: 0.000018 loss_cls: 3.6021 (3.9155) grad_norm: 4.2623 (4.3043) time: 0.7651 data: 0.0003 max mem: 8421 +[2024-12-05 08:14:52 root] (utils.py 283): INFO Epoch: [7] [ 990/2502] eta: 0:19:23 lr: 0.000018 loss_cls: 3.8644 (3.9192) grad_norm: 4.2493 (4.3046) time: 0.7639 data: 0.0003 max mem: 8421 +[2024-12-05 08:15:00 root] (utils.py 283): INFO Epoch: [7] [1000/2502] eta: 0:19:15 lr: 0.000018 loss_cls: 4.2170 (3.9217) grad_norm: 4.1808 (4.3050) time: 0.7665 data: 0.0003 max mem: 8421 +[2024-12-05 08:15:07 root] (utils.py 283): INFO Epoch: [7] [1010/2502] eta: 0:19:08 lr: 0.000018 loss_cls: 4.0418 (3.9204) grad_norm: 4.1808 (4.3046) time: 0.7708 data: 0.0003 max mem: 8421 +[2024-12-05 08:15:15 root] (utils.py 283): INFO Epoch: [7] [1020/2502] eta: 0:19:00 lr: 0.000018 loss_cls: 3.9183 (3.9202) grad_norm: 4.1439 (4.3039) time: 0.7692 data: 0.0002 max mem: 8421 +[2024-12-05 08:15:23 root] (utils.py 283): INFO Epoch: [7] [1030/2502] eta: 0:18:52 lr: 0.000018 loss_cls: 4.0719 (3.9223) grad_norm: 4.0401 (4.3011) time: 0.7706 data: 0.0002 max mem: 8421 +[2024-12-05 08:15:30 root] (utils.py 283): INFO Epoch: [7] [1040/2502] eta: 0:18:44 lr: 0.000018 loss_cls: 4.2146 (3.9244) grad_norm: 4.1360 (4.3016) time: 0.7700 data: 0.0002 max mem: 8421 +[2024-12-05 08:15:38 root] (utils.py 283): INFO Epoch: [7] [1050/2502] eta: 0:18:37 lr: 0.000018 loss_cls: 4.2313 (3.9267) grad_norm: 4.1554 (4.2997) time: 0.7668 data: 0.0002 max mem: 8421 +[2024-12-05 08:15:46 root] (utils.py 283): INFO Epoch: [7] [1060/2502] eta: 0:18:29 lr: 0.000018 loss_cls: 4.1550 (3.9277) grad_norm: 4.0169 (4.2984) time: 0.7661 data: 0.0002 max mem: 8421 +[2024-12-05 08:15:53 root] (utils.py 283): INFO Epoch: [7] [1070/2502] eta: 0:18:21 lr: 0.000018 loss_cls: 3.9988 (3.9272) grad_norm: 4.1142 (4.2980) time: 0.7625 data: 0.0002 max mem: 8421 +[2024-12-05 08:16:01 root] (utils.py 283): INFO Epoch: [7] [1080/2502] eta: 0:18:13 lr: 0.000018 loss_cls: 3.8049 (3.9263) grad_norm: 4.1142 (4.2968) time: 0.7629 data: 0.0003 max mem: 8421 +[2024-12-05 08:16:09 root] (utils.py 283): INFO Epoch: [7] [1090/2502] eta: 0:18:06 lr: 0.000018 loss_cls: 4.2890 (3.9291) grad_norm: 4.0357 (4.2949) time: 0.7624 data: 0.0002 max mem: 8421 +[2024-12-05 08:16:16 root] (utils.py 283): INFO Epoch: [7] [1100/2502] eta: 0:17:58 lr: 0.000018 loss_cls: 4.1767 (3.9297) grad_norm: 4.0111 (4.2926) time: 0.7641 data: 0.0002 max mem: 8421 +[2024-12-05 08:16:24 root] (utils.py 283): INFO Epoch: [7] [1110/2502] eta: 0:17:50 lr: 0.000018 loss_cls: 4.0870 (3.9302) grad_norm: 4.0111 (4.2910) time: 0.7764 data: 0.0002 max mem: 8421 +[2024-12-05 08:16:32 root] (utils.py 283): INFO Epoch: [7] [1120/2502] eta: 0:17:43 lr: 0.000018 loss_cls: 4.0870 (3.9314) grad_norm: 4.2152 (4.2928) time: 0.7845 data: 0.0003 max mem: 8421 +[2024-12-05 08:16:40 root] (utils.py 283): INFO Epoch: [7] [1130/2502] eta: 0:17:35 lr: 0.000018 loss_cls: 3.8605 (3.9284) grad_norm: 4.3245 (4.2934) time: 0.7694 data: 0.0002 max mem: 8421 +[2024-12-05 08:16:47 root] (utils.py 283): INFO Epoch: [7] [1140/2502] eta: 0:17:27 lr: 0.000018 loss_cls: 3.9262 (3.9306) grad_norm: 4.4123 (4.2951) time: 0.7588 data: 0.0002 max mem: 8421 +[2024-12-05 08:16:55 root] (utils.py 283): INFO Epoch: [7] [1150/2502] eta: 0:17:19 lr: 0.000018 loss_cls: 4.2511 (3.9294) grad_norm: 4.3822 (4.2958) time: 0.7608 data: 0.0002 max mem: 8421 +[2024-12-05 08:17:02 root] (utils.py 283): INFO Epoch: [7] [1160/2502] eta: 0:17:12 lr: 0.000018 loss_cls: 3.8314 (3.9294) grad_norm: 4.4086 (4.2978) time: 0.7610 data: 0.0002 max mem: 8421 +[2024-12-05 08:17:10 root] (utils.py 283): INFO Epoch: [7] [1170/2502] eta: 0:17:04 lr: 0.000018 loss_cls: 4.0162 (3.9281) grad_norm: 4.0931 (4.2950) time: 0.7645 data: 0.0003 max mem: 8421 +[2024-12-05 08:17:18 root] (utils.py 283): INFO Epoch: [7] [1180/2502] eta: 0:16:56 lr: 0.000018 loss_cls: 3.8131 (3.9257) grad_norm: 4.0722 (4.2964) time: 0.7649 data: 0.0003 max mem: 8421 +[2024-12-05 08:17:25 root] (utils.py 283): INFO Epoch: [7] [1190/2502] eta: 0:16:48 lr: 0.000018 loss_cls: 3.7693 (3.9249) grad_norm: 4.2106 (4.3033) time: 0.7642 data: 0.0002 max mem: 8421 +[2024-12-05 08:17:33 root] (utils.py 283): INFO Epoch: [7] [1200/2502] eta: 0:16:41 lr: 0.000018 loss_cls: 3.9572 (3.9253) grad_norm: 4.2106 (4.3009) time: 0.7648 data: 0.0002 max mem: 8421 +[2024-12-05 08:17:41 root] (utils.py 283): INFO Epoch: [7] [1210/2502] eta: 0:16:33 lr: 0.000018 loss_cls: 3.9572 (3.9252) grad_norm: 3.9716 (4.2990) time: 0.7638 data: 0.0002 max mem: 8421 +[2024-12-05 08:17:48 root] (utils.py 283): INFO Epoch: [7] [1220/2502] eta: 0:16:25 lr: 0.000018 loss_cls: 3.8003 (3.9254) grad_norm: 4.1114 (4.2980) time: 0.7688 data: 0.0002 max mem: 8421 +[2024-12-05 08:17:56 root] (utils.py 283): INFO Epoch: [7] [1230/2502] eta: 0:16:18 lr: 0.000018 loss_cls: 4.2215 (3.9273) grad_norm: 4.1397 (4.3005) time: 0.7678 data: 0.0002 max mem: 8421 +[2024-12-05 08:18:04 root] (utils.py 283): INFO Epoch: [7] [1240/2502] eta: 0:16:10 lr: 0.000018 loss_cls: 4.2215 (3.9284) grad_norm: 4.1985 (4.2991) time: 0.7662 data: 0.0002 max mem: 8421 +[2024-12-05 08:18:11 root] (utils.py 283): INFO Epoch: [7] [1250/2502] eta: 0:16:02 lr: 0.000018 loss_cls: 4.0974 (3.9298) grad_norm: 4.0383 (4.2980) time: 0.7651 data: 0.0002 max mem: 8421 +[2024-12-05 08:18:19 root] (utils.py 283): INFO Epoch: [7] [1260/2502] eta: 0:15:54 lr: 0.000018 loss_cls: 4.0515 (3.9305) grad_norm: 4.0484 (4.2979) time: 0.7681 data: 0.0002 max mem: 8421 +[2024-12-05 08:18:27 root] (utils.py 283): INFO Epoch: [7] [1270/2502] eta: 0:15:47 lr: 0.000018 loss_cls: 3.9778 (3.9310) grad_norm: 4.0389 (4.2957) time: 0.7669 data: 0.0002 max mem: 8421 +[2024-12-05 08:18:34 root] (utils.py 283): INFO Epoch: [7] [1280/2502] eta: 0:15:39 lr: 0.000018 loss_cls: 3.9260 (3.9316) grad_norm: 4.0514 (4.2947) time: 0.7585 data: 0.0002 max mem: 8421 +[2024-12-05 08:18:42 root] (utils.py 283): INFO Epoch: [7] [1290/2502] eta: 0:15:31 lr: 0.000018 loss_cls: 4.1703 (3.9323) grad_norm: 4.0514 (4.2988) time: 0.7643 data: 0.0002 max mem: 8421 +[2024-12-05 08:18:50 root] (utils.py 283): INFO Epoch: [7] [1300/2502] eta: 0:15:23 lr: 0.000018 loss_cls: 3.8062 (3.9302) grad_norm: 4.0600 (4.2977) time: 0.7655 data: 0.0002 max mem: 8421 +[2024-12-05 08:18:57 root] (utils.py 283): INFO Epoch: [7] [1310/2502] eta: 0:15:16 lr: 0.000018 loss_cls: 3.7245 (3.9299) grad_norm: 4.2483 (4.2978) time: 0.7649 data: 0.0002 max mem: 8421 +[2024-12-05 08:19:05 root] (utils.py 283): INFO Epoch: [7] [1320/2502] eta: 0:15:08 lr: 0.000018 loss_cls: 3.8332 (3.9306) grad_norm: 4.1252 (4.2962) time: 0.7681 data: 0.0002 max mem: 8421 +[2024-12-05 08:19:13 root] (utils.py 283): INFO Epoch: [7] [1330/2502] eta: 0:15:00 lr: 0.000018 loss_cls: 3.8994 (3.9304) grad_norm: 4.0537 (4.2948) time: 0.7672 data: 0.0002 max mem: 8421 +[2024-12-05 08:19:20 root] (utils.py 283): INFO Epoch: [7] [1340/2502] eta: 0:14:53 lr: 0.000018 loss_cls: 4.0405 (3.9319) grad_norm: 4.0351 (4.2927) time: 0.7644 data: 0.0002 max mem: 8421 +[2024-12-05 08:19:28 root] (utils.py 283): INFO Epoch: [7] [1350/2502] eta: 0:14:45 lr: 0.000018 loss_cls: 4.1050 (3.9318) grad_norm: 4.1246 (4.2925) time: 0.7633 data: 0.0002 max mem: 8421 +[2024-12-05 08:19:35 root] (utils.py 283): INFO Epoch: [7] [1360/2502] eta: 0:14:37 lr: 0.000018 loss_cls: 3.7447 (3.9308) grad_norm: 4.2162 (4.2923) time: 0.7638 data: 0.0002 max mem: 8421 +[2024-12-05 08:19:43 root] (utils.py 283): INFO Epoch: [7] [1370/2502] eta: 0:14:29 lr: 0.000018 loss_cls: 4.2501 (3.9312) grad_norm: 4.1799 (4.2963) time: 0.7629 data: 0.0002 max mem: 8421 +[2024-12-05 08:19:51 root] (utils.py 283): INFO Epoch: [7] [1380/2502] eta: 0:14:22 lr: 0.000018 loss_cls: 4.1412 (3.9311) grad_norm: 4.1005 (4.2969) time: 0.7645 data: 0.0002 max mem: 8421 +[2024-12-05 08:19:58 root] (utils.py 283): INFO Epoch: [7] [1390/2502] eta: 0:14:14 lr: 0.000018 loss_cls: 4.0603 (3.9293) grad_norm: 4.0806 (4.2980) time: 0.7677 data: 0.0003 max mem: 8421 +[2024-12-05 08:20:06 root] (utils.py 283): INFO Epoch: [7] [1400/2502] eta: 0:14:06 lr: 0.000018 loss_cls: 3.7953 (3.9283) grad_norm: 4.1364 (4.2969) time: 0.7682 data: 0.0002 max mem: 8421 +[2024-12-05 08:20:14 root] (utils.py 283): INFO Epoch: [7] [1410/2502] eta: 0:13:59 lr: 0.000018 loss_cls: 4.0443 (3.9291) grad_norm: 4.1375 (4.2963) time: 0.7660 data: 0.0002 max mem: 8421 +[2024-12-05 08:20:21 root] (utils.py 283): INFO Epoch: [7] [1420/2502] eta: 0:13:51 lr: 0.000018 loss_cls: 4.1013 (3.9301) grad_norm: 3.9255 (4.2941) time: 0.7627 data: 0.0003 max mem: 8421 +[2024-12-05 08:20:29 root] (utils.py 283): INFO Epoch: [7] [1430/2502] eta: 0:13:43 lr: 0.000018 loss_cls: 4.1001 (3.9317) grad_norm: 3.9640 (4.2952) time: 0.7624 data: 0.0003 max mem: 8421 +[2024-12-05 08:20:37 root] (utils.py 283): INFO Epoch: [7] [1440/2502] eta: 0:13:36 lr: 0.000018 loss_cls: 3.9011 (3.9304) grad_norm: 4.1550 (4.2943) time: 0.7687 data: 0.0002 max mem: 8421 +[2024-12-05 08:20:45 root] (utils.py 283): INFO Epoch: [7] [1450/2502] eta: 0:13:28 lr: 0.000018 loss_cls: 3.6283 (3.9298) grad_norm: 4.2030 (4.2954) time: 0.7816 data: 0.0002 max mem: 8421 +[2024-12-05 08:20:52 root] (utils.py 283): INFO Epoch: [7] [1460/2502] eta: 0:13:20 lr: 0.000018 loss_cls: 4.2792 (3.9328) grad_norm: 4.2197 (4.2948) time: 0.7776 data: 0.0002 max mem: 8421 +[2024-12-05 08:21:00 root] (utils.py 283): INFO Epoch: [7] [1470/2502] eta: 0:13:13 lr: 0.000018 loss_cls: 4.2533 (3.9321) grad_norm: 4.1324 (4.2983) time: 0.7759 data: 0.0002 max mem: 8421 +[2024-12-05 08:21:08 root] (utils.py 283): INFO Epoch: [7] [1480/2502] eta: 0:13:05 lr: 0.000018 loss_cls: 4.1098 (3.9329) grad_norm: 4.1552 (4.2984) time: 0.7871 data: 0.0003 max mem: 8421 +[2024-12-05 08:21:16 root] (utils.py 283): INFO Epoch: [7] [1490/2502] eta: 0:12:58 lr: 0.000018 loss_cls: 3.9969 (3.9321) grad_norm: 4.1181 (4.2981) time: 0.7804 data: 0.0003 max mem: 8421 +[2024-12-05 08:21:23 root] (utils.py 283): INFO Epoch: [7] [1500/2502] eta: 0:12:50 lr: 0.000018 loss_cls: 3.7675 (3.9294) grad_norm: 4.0887 (4.2983) time: 0.7667 data: 0.0003 max mem: 8421 +[2024-12-05 08:21:31 root] (utils.py 283): INFO Epoch: [7] [1510/2502] eta: 0:12:42 lr: 0.000018 loss_cls: 3.7675 (3.9286) grad_norm: 3.9744 (4.2977) time: 0.7623 data: 0.0002 max mem: 8421 +[2024-12-05 08:21:39 root] (utils.py 283): INFO Epoch: [7] [1520/2502] eta: 0:12:34 lr: 0.000018 loss_cls: 3.9690 (3.9297) grad_norm: 4.0631 (4.2972) time: 0.7652 data: 0.0002 max mem: 8421 +[2024-12-05 08:21:46 root] (utils.py 283): INFO Epoch: [7] [1530/2502] eta: 0:12:27 lr: 0.000018 loss_cls: 4.0930 (3.9295) grad_norm: 4.1757 (4.2970) time: 0.7723 data: 0.0002 max mem: 8421 +[2024-12-05 08:21:54 root] (utils.py 283): INFO Epoch: [7] [1540/2502] eta: 0:12:19 lr: 0.000018 loss_cls: 3.7080 (3.9282) grad_norm: 4.1757 (4.2965) time: 0.7814 data: 0.0002 max mem: 8421 +[2024-12-05 08:22:02 root] (utils.py 283): INFO Epoch: [7] [1550/2502] eta: 0:12:11 lr: 0.000018 loss_cls: 3.7080 (3.9279) grad_norm: 4.2518 (4.2972) time: 0.7747 data: 0.0003 max mem: 8421 +[2024-12-05 08:22:10 root] (utils.py 283): INFO Epoch: [7] [1560/2502] eta: 0:12:04 lr: 0.000018 loss_cls: 4.0693 (3.9271) grad_norm: 4.3728 (4.2968) time: 0.7640 data: 0.0003 max mem: 8421 +[2024-12-05 08:22:17 root] (utils.py 283): INFO Epoch: [7] [1570/2502] eta: 0:11:56 lr: 0.000018 loss_cls: 4.0722 (3.9274) grad_norm: 4.1748 (4.2959) time: 0.7657 data: 0.0002 max mem: 8421 +[2024-12-05 08:22:25 root] (utils.py 283): INFO Epoch: [7] [1580/2502] eta: 0:11:48 lr: 0.000018 loss_cls: 4.0722 (3.9269) grad_norm: 4.0585 (4.2947) time: 0.7660 data: 0.0002 max mem: 8421 +[2024-12-05 08:22:33 root] (utils.py 283): INFO Epoch: [7] [1590/2502] eta: 0:11:41 lr: 0.000018 loss_cls: 3.8172 (3.9260) grad_norm: 4.0811 (4.2954) time: 0.7648 data: 0.0003 max mem: 8421 +[2024-12-05 08:22:40 root] (utils.py 283): INFO Epoch: [7] [1600/2502] eta: 0:11:33 lr: 0.000018 loss_cls: 3.9911 (3.9270) grad_norm: 4.2118 (4.2967) time: 0.7755 data: 0.0003 max mem: 8421 +[2024-12-05 08:22:48 root] (utils.py 283): INFO Epoch: [7] [1610/2502] eta: 0:11:25 lr: 0.000018 loss_cls: 4.0730 (3.9264) grad_norm: 4.2118 (4.2972) time: 0.7841 data: 0.0003 max mem: 8421 +[2024-12-05 08:22:56 root] (utils.py 283): INFO Epoch: [7] [1620/2502] eta: 0:11:18 lr: 0.000018 loss_cls: 3.9040 (3.9254) grad_norm: 4.2355 (4.3039) time: 0.7727 data: 0.0002 max mem: 8421 +[2024-12-05 08:23:04 root] (utils.py 283): INFO Epoch: [7] [1630/2502] eta: 0:11:10 lr: 0.000018 loss_cls: 3.6618 (3.9257) grad_norm: 4.2883 (4.3040) time: 0.7638 data: 0.0002 max mem: 8421 +[2024-12-05 08:23:11 root] (utils.py 283): INFO Epoch: [7] [1640/2502] eta: 0:11:02 lr: 0.000018 loss_cls: 4.0886 (3.9261) grad_norm: 4.2560 (4.3041) time: 0.7645 data: 0.0002 max mem: 8421 +[2024-12-05 08:23:19 root] (utils.py 283): INFO Epoch: [7] [1650/2502] eta: 0:10:55 lr: 0.000018 loss_cls: 3.7894 (3.9255) grad_norm: 4.1500 (4.3045) time: 0.7798 data: 0.0002 max mem: 8421 +[2024-12-05 08:23:27 root] (utils.py 283): INFO Epoch: [7] [1660/2502] eta: 0:10:47 lr: 0.000018 loss_cls: 3.7494 (3.9261) grad_norm: 4.2355 (4.3070) time: 0.7797 data: 0.0002 max mem: 8421 +[2024-12-05 08:23:34 root] (utils.py 283): INFO Epoch: [7] [1670/2502] eta: 0:10:39 lr: 0.000018 loss_cls: 4.0345 (3.9244) grad_norm: 4.2553 (4.3064) time: 0.7620 data: 0.0002 max mem: 8421 +[2024-12-05 08:23:42 root] (utils.py 283): INFO Epoch: [7] [1680/2502] eta: 0:10:32 lr: 0.000018 loss_cls: 3.6885 (3.9232) grad_norm: 4.2553 (4.3083) time: 0.7608 data: 0.0002 max mem: 8421 +[2024-12-05 08:23:50 root] (utils.py 283): INFO Epoch: [7] [1690/2502] eta: 0:10:24 lr: 0.000018 loss_cls: 3.8716 (3.9236) grad_norm: 4.2066 (4.3076) time: 0.7671 data: 0.0002 max mem: 8421 +[2024-12-05 08:23:57 root] (utils.py 283): INFO Epoch: [7] [1700/2502] eta: 0:10:16 lr: 0.000018 loss_cls: 3.9758 (3.9226) grad_norm: 4.0751 (4.3076) time: 0.7683 data: 0.0002 max mem: 8421 +[2024-12-05 08:24:05 root] (utils.py 283): INFO Epoch: [7] [1710/2502] eta: 0:10:08 lr: 0.000018 loss_cls: 4.2271 (3.9242) grad_norm: 4.1080 (4.3072) time: 0.7637 data: 0.0002 max mem: 8421 +[2024-12-05 08:24:13 root] (utils.py 283): INFO Epoch: [7] [1720/2502] eta: 0:10:01 lr: 0.000018 loss_cls: 4.3357 (3.9252) grad_norm: 4.1142 (4.3059) time: 0.7645 data: 0.0002 max mem: 8421 +[2024-12-05 08:24:20 root] (utils.py 283): INFO Epoch: [7] [1730/2502] eta: 0:09:53 lr: 0.000018 loss_cls: 3.9701 (3.9243) grad_norm: 4.1169 (4.3053) time: 0.7650 data: 0.0002 max mem: 8421 +[2024-12-05 08:24:28 root] (utils.py 283): INFO Epoch: [7] [1740/2502] eta: 0:09:45 lr: 0.000018 loss_cls: 3.6507 (3.9232) grad_norm: 4.1169 (4.3045) time: 0.7647 data: 0.0003 max mem: 8421 +[2024-12-05 08:24:36 root] (utils.py 283): INFO Epoch: [7] [1750/2502] eta: 0:09:38 lr: 0.000018 loss_cls: 3.9615 (3.9236) grad_norm: 3.9668 (4.3034) time: 0.7673 data: 0.0002 max mem: 8421 +[2024-12-05 08:24:43 root] (utils.py 283): INFO Epoch: [7] [1760/2502] eta: 0:09:30 lr: 0.000018 loss_cls: 4.0676 (3.9246) grad_norm: 4.0501 (4.3026) time: 0.7672 data: 0.0002 max mem: 8421 +[2024-12-05 08:24:51 root] (utils.py 283): INFO Epoch: [7] [1770/2502] eta: 0:09:22 lr: 0.000018 loss_cls: 4.0676 (3.9249) grad_norm: 4.0953 (4.3021) time: 0.7742 data: 0.0002 max mem: 8421 +[2024-12-05 08:24:59 root] (utils.py 283): INFO Epoch: [7] [1780/2502] eta: 0:09:15 lr: 0.000018 loss_cls: 4.1483 (3.9256) grad_norm: 4.3132 (4.3023) time: 0.7823 data: 0.0002 max mem: 8421 +[2024-12-05 08:25:07 root] (utils.py 283): INFO Epoch: [7] [1790/2502] eta: 0:09:07 lr: 0.000018 loss_cls: 4.0723 (3.9248) grad_norm: 4.3305 (4.3044) time: 0.7722 data: 0.0002 max mem: 8421 +[2024-12-05 08:25:14 root] (utils.py 283): INFO Epoch: [7] [1800/2502] eta: 0:08:59 lr: 0.000018 loss_cls: 4.0723 (3.9257) grad_norm: 4.4459 (4.3048) time: 0.7633 data: 0.0002 max mem: 8421 +[2024-12-05 08:25:22 root] (utils.py 283): INFO Epoch: [7] [1810/2502] eta: 0:08:52 lr: 0.000018 loss_cls: 4.0904 (3.9242) grad_norm: 4.4459 (4.3061) time: 0.7632 data: 0.0002 max mem: 8421 +[2024-12-05 08:25:29 root] (utils.py 283): INFO Epoch: [7] [1820/2502] eta: 0:08:44 lr: 0.000018 loss_cls: 3.6809 (3.9240) grad_norm: 4.0945 (4.3047) time: 0.7638 data: 0.0002 max mem: 8421 +[2024-12-05 08:25:37 root] (utils.py 283): INFO Epoch: [7] [1830/2502] eta: 0:08:36 lr: 0.000018 loss_cls: 3.6809 (3.9226) grad_norm: 4.0878 (4.3034) time: 0.7665 data: 0.0003 max mem: 8421 +[2024-12-05 08:25:45 root] (utils.py 283): INFO Epoch: [7] [1840/2502] eta: 0:08:28 lr: 0.000018 loss_cls: 3.6923 (3.9218) grad_norm: 4.0720 (4.3041) time: 0.7645 data: 0.0003 max mem: 8421 +[2024-12-05 08:25:52 root] (utils.py 283): INFO Epoch: [7] [1850/2502] eta: 0:08:21 lr: 0.000018 loss_cls: 3.9059 (3.9216) grad_norm: 4.0785 (4.3023) time: 0.7600 data: 0.0002 max mem: 8421 +[2024-12-05 08:26:00 root] (utils.py 283): INFO Epoch: [7] [1860/2502] eta: 0:08:13 lr: 0.000018 loss_cls: 4.1398 (3.9233) grad_norm: 4.0448 (4.3010) time: 0.7658 data: 0.0003 max mem: 8421 +[2024-12-05 08:26:08 root] (utils.py 283): INFO Epoch: [7] [1870/2502] eta: 0:08:05 lr: 0.000018 loss_cls: 3.9864 (3.9222) grad_norm: 4.1111 (4.3010) time: 0.7719 data: 0.0002 max mem: 8421 +[2024-12-05 08:26:15 root] (utils.py 283): INFO Epoch: [7] [1880/2502] eta: 0:07:58 lr: 0.000018 loss_cls: 3.8520 (3.9231) grad_norm: 4.1812 (4.3015) time: 0.7690 data: 0.0003 max mem: 8421 +[2024-12-05 08:26:23 root] (utils.py 283): INFO Epoch: [7] [1890/2502] eta: 0:07:50 lr: 0.000018 loss_cls: 4.1953 (3.9238) grad_norm: 4.1341 (4.3005) time: 0.7656 data: 0.0003 max mem: 8421 +[2024-12-05 08:26:31 root] (utils.py 283): INFO Epoch: [7] [1900/2502] eta: 0:07:42 lr: 0.000018 loss_cls: 4.1650 (3.9233) grad_norm: 4.0995 (4.3000) time: 0.7682 data: 0.0003 max mem: 8421 +[2024-12-05 08:26:39 root] (utils.py 283): INFO Epoch: [7] [1910/2502] eta: 0:07:35 lr: 0.000018 loss_cls: 4.1919 (3.9246) grad_norm: 4.0995 (4.2993) time: 0.7734 data: 0.0003 max mem: 8421 +[2024-12-05 08:26:46 root] (utils.py 283): INFO Epoch: [7] [1920/2502] eta: 0:07:27 lr: 0.000018 loss_cls: 4.3413 (3.9266) grad_norm: 4.1296 (4.2989) time: 0.7726 data: 0.0002 max mem: 8421 +[2024-12-05 08:26:54 root] (utils.py 283): INFO Epoch: [7] [1930/2502] eta: 0:07:19 lr: 0.000018 loss_cls: 4.2058 (3.9264) grad_norm: 4.1575 (4.2987) time: 0.7741 data: 0.0002 max mem: 8421 +[2024-12-05 08:27:02 root] (utils.py 283): INFO Epoch: [7] [1940/2502] eta: 0:07:12 lr: 0.000018 loss_cls: 4.1268 (3.9272) grad_norm: 4.1614 (4.2985) time: 0.7837 data: 0.0002 max mem: 8421 +[2024-12-05 08:27:10 root] (utils.py 283): INFO Epoch: [7] [1950/2502] eta: 0:07:04 lr: 0.000018 loss_cls: 3.9828 (3.9260) grad_norm: 4.1094 (4.2975) time: 0.7845 data: 0.0003 max mem: 8421 +[2024-12-05 08:27:18 root] (utils.py 283): INFO Epoch: [7] [1960/2502] eta: 0:06:56 lr: 0.000018 loss_cls: 3.9316 (3.9270) grad_norm: 3.9807 (4.2966) time: 0.7771 data: 0.0003 max mem: 8421 +[2024-12-05 08:27:25 root] (utils.py 283): INFO Epoch: [7] [1970/2502] eta: 0:06:49 lr: 0.000018 loss_cls: 4.1392 (3.9273) grad_norm: 4.2021 (4.2977) time: 0.7690 data: 0.0002 max mem: 8421 +[2024-12-05 08:27:33 root] (utils.py 283): INFO Epoch: [7] [1980/2502] eta: 0:06:41 lr: 0.000018 loss_cls: 4.2207 (3.9285) grad_norm: 4.0848 (4.2978) time: 0.7663 data: 0.0003 max mem: 8421 +[2024-12-05 08:27:41 root] (utils.py 283): INFO Epoch: [7] [1990/2502] eta: 0:06:33 lr: 0.000018 loss_cls: 4.2873 (3.9291) grad_norm: 4.0848 (4.2973) time: 0.7717 data: 0.0003 max mem: 8421 +[2024-12-05 08:27:48 root] (utils.py 283): INFO Epoch: [7] [2000/2502] eta: 0:06:26 lr: 0.000018 loss_cls: 3.4728 (3.9260) grad_norm: 4.1610 (4.2986) time: 0.7747 data: 0.0002 max mem: 8421 +[2024-12-05 08:27:56 root] (utils.py 283): INFO Epoch: [7] [2010/2502] eta: 0:06:18 lr: 0.000018 loss_cls: 3.4645 (3.9251) grad_norm: 4.2091 (4.2984) time: 0.7722 data: 0.0002 max mem: 8421 +[2024-12-05 08:28:04 root] (utils.py 283): INFO Epoch: [7] [2020/2502] eta: 0:06:10 lr: 0.000018 loss_cls: 3.7981 (3.9255) grad_norm: 4.0634 (4.2973) time: 0.7670 data: 0.0002 max mem: 8421 +[2024-12-05 08:28:11 root] (utils.py 283): INFO Epoch: [7] [2030/2502] eta: 0:06:02 lr: 0.000018 loss_cls: 4.1904 (3.9269) grad_norm: 4.0634 (4.2973) time: 0.7629 data: 0.0003 max mem: 8421 +[2024-12-05 08:28:19 root] (utils.py 283): INFO Epoch: [7] [2040/2502] eta: 0:05:55 lr: 0.000018 loss_cls: 4.0531 (3.9263) grad_norm: 4.1689 (4.2971) time: 0.7651 data: 0.0003 max mem: 8421 +[2024-12-05 08:28:27 root] (utils.py 283): INFO Epoch: [7] [2050/2502] eta: 0:05:47 lr: 0.000018 loss_cls: 3.8575 (3.9263) grad_norm: 4.1942 (4.3019) time: 0.7734 data: 0.0003 max mem: 8421 +[2024-12-05 08:28:35 root] (utils.py 283): INFO Epoch: [7] [2060/2502] eta: 0:05:39 lr: 0.000018 loss_cls: 4.0393 (3.9270) grad_norm: 4.2298 (4.3016) time: 0.7825 data: 0.0003 max mem: 8421 +[2024-12-05 08:28:43 root] (utils.py 283): INFO Epoch: [7] [2070/2502] eta: 0:05:32 lr: 0.000018 loss_cls: 3.9439 (3.9270) grad_norm: 4.1851 (4.3010) time: 0.7867 data: 0.0003 max mem: 8421 +[2024-12-05 08:28:50 root] (utils.py 283): INFO Epoch: [7] [2080/2502] eta: 0:05:24 lr: 0.000018 loss_cls: 3.8405 (3.9270) grad_norm: 4.1837 (4.3009) time: 0.7752 data: 0.0002 max mem: 8421 +[2024-12-05 08:28:58 root] (utils.py 283): INFO Epoch: [7] [2090/2502] eta: 0:05:16 lr: 0.000018 loss_cls: 3.7702 (3.9255) grad_norm: 4.2733 (4.3008) time: 0.7624 data: 0.0003 max mem: 8421 +[2024-12-05 08:29:05 root] (utils.py 283): INFO Epoch: [7] [2100/2502] eta: 0:05:09 lr: 0.000018 loss_cls: 3.7849 (3.9255) grad_norm: 4.1909 (4.3022) time: 0.7636 data: 0.0002 max mem: 8421 +[2024-12-05 08:29:13 root] (utils.py 283): INFO Epoch: [7] [2110/2502] eta: 0:05:01 lr: 0.000018 loss_cls: 3.8693 (3.9249) grad_norm: 4.1650 (4.3015) time: 0.7663 data: 0.0003 max mem: 8421 +[2024-12-05 08:29:21 root] (utils.py 283): INFO Epoch: [7] [2120/2502] eta: 0:04:53 lr: 0.000018 loss_cls: 4.1007 (3.9262) grad_norm: 4.1679 (4.3023) time: 0.7658 data: 0.0002 max mem: 8421 +[2024-12-05 08:29:29 root] (utils.py 283): INFO Epoch: [7] [2130/2502] eta: 0:04:46 lr: 0.000018 loss_cls: 4.0742 (3.9256) grad_norm: 4.1814 (4.3015) time: 0.7707 data: 0.0002 max mem: 8421 +[2024-12-05 08:29:36 root] (utils.py 283): INFO Epoch: [7] [2140/2502] eta: 0:04:38 lr: 0.000018 loss_cls: 3.7305 (3.9242) grad_norm: 4.0147 (4.3008) time: 0.7750 data: 0.0002 max mem: 8421 +[2024-12-05 08:29:44 root] (utils.py 283): INFO Epoch: [7] [2150/2502] eta: 0:04:30 lr: 0.000018 loss_cls: 3.8655 (3.9250) grad_norm: 4.1272 (4.3008) time: 0.7821 data: 0.0002 max mem: 8421 +[2024-12-05 08:29:52 root] (utils.py 283): INFO Epoch: [7] [2160/2502] eta: 0:04:23 lr: 0.000018 loss_cls: 4.1642 (3.9259) grad_norm: 4.3905 (4.3011) time: 0.7793 data: 0.0003 max mem: 8421 +[2024-12-05 08:30:00 root] (utils.py 283): INFO Epoch: [7] [2170/2502] eta: 0:04:15 lr: 0.000018 loss_cls: 4.2035 (3.9271) grad_norm: 4.1294 (4.3005) time: 0.7680 data: 0.0003 max mem: 8421 +[2024-12-05 08:30:07 root] (utils.py 283): INFO Epoch: [7] [2180/2502] eta: 0:04:07 lr: 0.000018 loss_cls: 4.1249 (3.9275) grad_norm: 4.1215 (4.2999) time: 0.7668 data: 0.0002 max mem: 8421 +[2024-12-05 08:30:15 root] (utils.py 283): INFO Epoch: [7] [2190/2502] eta: 0:03:59 lr: 0.000018 loss_cls: 3.9853 (3.9276) grad_norm: 4.0147 (4.2986) time: 0.7656 data: 0.0002 max mem: 8421 +[2024-12-05 08:30:23 root] (utils.py 283): INFO Epoch: [7] [2200/2502] eta: 0:03:52 lr: 0.000018 loss_cls: 3.9924 (3.9289) grad_norm: 3.9889 (4.2987) time: 0.7716 data: 0.0002 max mem: 8421 +[2024-12-05 08:30:30 root] (utils.py 283): INFO Epoch: [7] [2210/2502] eta: 0:03:44 lr: 0.000018 loss_cls: 4.1014 (3.9292) grad_norm: 4.0876 (4.2982) time: 0.7711 data: 0.0002 max mem: 8421 +[2024-12-05 08:30:38 root] (utils.py 283): INFO Epoch: [7] [2220/2502] eta: 0:03:36 lr: 0.000018 loss_cls: 3.9951 (3.9287) grad_norm: 4.0897 (4.2978) time: 0.7647 data: 0.0002 max mem: 8421 +[2024-12-05 08:30:46 root] (utils.py 283): INFO Epoch: [7] [2230/2502] eta: 0:03:29 lr: 0.000018 loss_cls: 3.9433 (3.9282) grad_norm: 4.0607 (4.2963) time: 0.7649 data: 0.0002 max mem: 8421 +[2024-12-05 08:30:53 root] (utils.py 283): INFO Epoch: [7] [2240/2502] eta: 0:03:21 lr: 0.000018 loss_cls: 3.9278 (3.9284) grad_norm: 4.0741 (4.2960) time: 0.7654 data: 0.0002 max mem: 8421 +[2024-12-05 08:31:01 root] (utils.py 283): INFO Epoch: [7] [2250/2502] eta: 0:03:13 lr: 0.000018 loss_cls: 3.9278 (3.9278) grad_norm: 4.1832 (4.2954) time: 0.7660 data: 0.0002 max mem: 8421 +[2024-12-05 08:31:09 root] (utils.py 283): INFO Epoch: [7] [2260/2502] eta: 0:03:06 lr: 0.000018 loss_cls: 3.8198 (3.9272) grad_norm: 4.0781 (4.2945) time: 0.7661 data: 0.0003 max mem: 8421 +[2024-12-05 08:31:16 root] (utils.py 283): INFO Epoch: [7] [2270/2502] eta: 0:02:58 lr: 0.000018 loss_cls: 3.9742 (3.9270) grad_norm: 4.2246 (4.2953) time: 0.7639 data: 0.0003 max mem: 8421 +[2024-12-05 08:31:24 root] (utils.py 283): INFO Epoch: [7] [2280/2502] eta: 0:02:50 lr: 0.000018 loss_cls: 3.9709 (3.9277) grad_norm: 4.2246 (4.2951) time: 0.7652 data: 0.0002 max mem: 8421 +[2024-12-05 08:31:31 root] (utils.py 283): INFO Epoch: [7] [2290/2502] eta: 0:02:43 lr: 0.000018 loss_cls: 3.9467 (3.9277) grad_norm: 4.0543 (4.2945) time: 0.7654 data: 0.0002 max mem: 8421 +[2024-12-05 08:31:39 root] (utils.py 283): INFO Epoch: [7] [2300/2502] eta: 0:02:35 lr: 0.000018 loss_cls: 4.0155 (3.9280) grad_norm: 4.0918 (4.2936) time: 0.7615 data: 0.0002 max mem: 8421 +[2024-12-05 08:31:47 root] (utils.py 283): INFO Epoch: [7] [2310/2502] eta: 0:02:27 lr: 0.000018 loss_cls: 4.0520 (3.9274) grad_norm: 4.1377 (4.2940) time: 0.7628 data: 0.0002 max mem: 8421 +[2024-12-05 08:31:54 root] (utils.py 283): INFO Epoch: [7] [2320/2502] eta: 0:02:19 lr: 0.000018 loss_cls: 4.0569 (3.9275) grad_norm: 4.2847 (4.2947) time: 0.7647 data: 0.0002 max mem: 8421 +[2024-12-05 08:32:02 root] (utils.py 283): INFO Epoch: [7] [2330/2502] eta: 0:02:12 lr: 0.000018 loss_cls: 4.0569 (3.9273) grad_norm: 4.3092 (4.2948) time: 0.7633 data: 0.0002 max mem: 8421 +[2024-12-05 08:32:10 root] (utils.py 283): INFO Epoch: [7] [2340/2502] eta: 0:02:04 lr: 0.000018 loss_cls: 3.9274 (3.9269) grad_norm: 4.3092 (4.2947) time: 0.7654 data: 0.0002 max mem: 8421 +[2024-12-05 08:32:17 root] (utils.py 283): INFO Epoch: [7] [2350/2502] eta: 0:01:56 lr: 0.000018 loss_cls: 3.8091 (3.9257) grad_norm: 4.0816 (4.2979) time: 0.7696 data: 0.0002 max mem: 8421 +[2024-12-05 08:32:25 root] (utils.py 283): INFO Epoch: [7] [2360/2502] eta: 0:01:49 lr: 0.000018 loss_cls: 3.8384 (3.9258) grad_norm: 4.2139 (4.2982) time: 0.7669 data: 0.0002 max mem: 8421 +[2024-12-05 08:32:33 root] (utils.py 283): INFO Epoch: [7] [2370/2502] eta: 0:01:41 lr: 0.000018 loss_cls: 3.8384 (3.9255) grad_norm: 4.2644 (4.2978) time: 0.7652 data: 0.0003 max mem: 8421 +[2024-12-05 08:32:40 root] (utils.py 283): INFO Epoch: [7] [2380/2502] eta: 0:01:33 lr: 0.000018 loss_cls: 4.0426 (3.9258) grad_norm: 4.2196 (4.2976) time: 0.7635 data: 0.0002 max mem: 8421 +[2024-12-05 08:32:48 root] (utils.py 283): INFO Epoch: [7] [2390/2502] eta: 0:01:26 lr: 0.000018 loss_cls: 3.9426 (3.9254) grad_norm: 4.0782 (4.2977) time: 0.7673 data: 0.0003 max mem: 8421 +[2024-12-05 08:32:56 root] (utils.py 283): INFO Epoch: [7] [2400/2502] eta: 0:01:18 lr: 0.000018 loss_cls: 3.7083 (3.9247) grad_norm: 4.0716 (4.2969) time: 0.7789 data: 0.0003 max mem: 8421 +[2024-12-05 08:33:04 root] (utils.py 283): INFO Epoch: [7] [2410/2502] eta: 0:01:10 lr: 0.000018 loss_cls: 3.9708 (3.9253) grad_norm: 3.9907 (4.2957) time: 0.7841 data: 0.0003 max mem: 8421 +[2024-12-05 08:33:12 root] (utils.py 283): INFO Epoch: [7] [2420/2502] eta: 0:01:03 lr: 0.000018 loss_cls: 4.0883 (3.9247) grad_norm: 3.9907 (4.2947) time: 0.7847 data: 0.0003 max mem: 8421 +[2024-12-05 08:33:20 root] (utils.py 283): INFO Epoch: [7] [2430/2502] eta: 0:00:55 lr: 0.000018 loss_cls: 4.0992 (3.9248) grad_norm: 4.0051 (4.2953) time: 0.7901 data: 0.0003 max mem: 8421 +[2024-12-05 08:33:27 root] (utils.py 283): INFO Epoch: [7] [2440/2502] eta: 0:00:47 lr: 0.000018 loss_cls: 4.1502 (3.9252) grad_norm: 4.2790 (4.2982) time: 0.7896 data: 0.0003 max mem: 8421 +[2024-12-05 08:33:35 root] (utils.py 283): INFO Epoch: [7] [2450/2502] eta: 0:00:40 lr: 0.000018 loss_cls: 3.9643 (3.9246) grad_norm: 4.2820 (4.2981) time: 0.7750 data: 0.0003 max mem: 8421 +[2024-12-05 08:33:43 root] (utils.py 283): INFO Epoch: [7] [2460/2502] eta: 0:00:32 lr: 0.000018 loss_cls: 3.5793 (3.9239) grad_norm: 4.0999 (4.2976) time: 0.7638 data: 0.0003 max mem: 8421 +[2024-12-05 08:33:50 root] (utils.py 283): INFO Epoch: [7] [2470/2502] eta: 0:00:24 lr: 0.000018 loss_cls: 4.0085 (3.9242) grad_norm: 4.1997 (4.2976) time: 0.7618 data: 0.0002 max mem: 8421 +[2024-12-05 08:33:58 root] (utils.py 283): INFO Epoch: [7] [2480/2502] eta: 0:00:16 lr: 0.000018 loss_cls: 4.0085 (3.9238) grad_norm: 4.1407 (4.2972) time: 0.7616 data: 0.0002 max mem: 8421 +[2024-12-05 08:34:06 root] (utils.py 283): INFO Epoch: [7] [2490/2502] eta: 0:00:09 lr: 0.000018 loss_cls: 3.8730 (3.9230) grad_norm: 4.0186 (4.2968) time: 0.7845 data: 0.0211 max mem: 8421 +[2024-12-05 08:34:14 root] (utils.py 283): INFO Epoch: [7] [2500/2502] eta: 0:00:01 lr: 0.000018 loss_cls: 3.9178 (3.9237) grad_norm: 4.1470 (4.2969) time: 0.7909 data: 0.0212 max mem: 8421 +[2024-12-05 08:34:14 root] (utils.py 283): INFO Epoch: [7] [2501/2502] eta: 0:00:00 lr: 0.000018 loss_cls: 3.8741 (3.9235) grad_norm: 4.2043 (4.2969) time: 0.7908 data: 0.0212 max mem: 8421 +[2024-12-05 08:34:14 root] (utils.py 297): INFO Epoch: [7] Total time: 0:32:05 (0.7694 s / it) +[2024-12-05 08:34:14 root] (engine.py 178): INFO Averaged stats:lr: 0.000018 loss_cls: 3.8741 (3.9230) grad_norm: 4.2043 (4.2969) +[2024-12-05 08:34:15 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7435 (0.7435) acc1: 86.7188 (86.7188) acc3: 94.5312 (94.5312) acc5: 96.8750 (96.8750) time: 0.1310 data: 0.0003 max mem: 8421 +[2024-12-05 08:34:16 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8427 (0.8761) acc1: 84.3750 (82.0312) acc3: 91.4062 (92.2585) acc5: 96.0938 (95.5256) time: 0.1312 data: 0.0004 max mem: 8421 +[2024-12-05 08:34:18 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8931 (0.9388) acc1: 79.6875 (80.2083) acc3: 90.6250 (91.9643) acc5: 95.3125 (95.0521) time: 0.1316 data: 0.0004 max mem: 8421 +[2024-12-05 08:34:19 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9528 (0.9343) acc1: 80.4688 (79.8639) acc3: 92.1875 (92.4143) acc5: 95.3125 (95.2621) time: 0.1319 data: 0.0004 max mem: 8421 +[2024-12-05 08:34:20 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8382 (0.9238) acc1: 80.4688 (80.0686) acc3: 94.5312 (92.7401) acc5: 96.0938 (95.3697) time: 0.1320 data: 0.0005 max mem: 8421 +[2024-12-05 08:34:22 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0764 (1.0228) acc1: 73.4375 (77.6654) acc3: 88.2812 (91.1152) acc5: 91.4062 (94.1176) time: 0.1348 data: 0.0005 max mem: 8421 +[2024-12-05 08:34:23 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3547 (1.0671) acc1: 70.3125 (76.8571) acc3: 84.3750 (90.2280) acc5: 89.0625 (93.3658) time: 0.1346 data: 0.0005 max mem: 8421 +[2024-12-05 08:34:24 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2725 (1.1132) acc1: 71.8750 (75.6932) acc3: 85.9375 (89.6347) acc5: 89.8438 (92.9357) time: 0.1385 data: 0.0071 max mem: 8421 +[2024-12-05 08:34:26 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3329 (1.1509) acc1: 71.0938 (75.0675) acc3: 85.1562 (89.0239) acc5: 89.0625 (92.3997) time: 0.1621 data: 0.0298 max mem: 8421 +[2024-12-05 08:34:28 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3605 (1.1853) acc1: 71.0938 (74.3132) acc3: 84.3750 (88.4014) acc5: 88.2812 (91.8698) time: 0.1588 data: 0.0251 max mem: 8421 +[2024-12-05 08:34:28 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2617 (1.1740) acc1: 73.4375 (74.4800) acc3: 85.9375 (88.6080) acc5: 91.4062 (92.0720) time: 0.1352 data: 0.0026 max mem: 8421 +[2024-12-05 08:34:28 root] (utils.py 297): INFO Test: Total time: 0:00:13 (0.1393 s / it) +[2024-12-05 08:34:29 root] (engine.py 263): INFO * Acc@1 74.140 Acc@3 88.588 Acc@5 92.018 loss 1.170 flops 1.285 layer_flops 1.251 +[2024-12-05 08:34:29 root] (main.py 546): INFO Accuracy of the network on the 50000 test images: 74.1% +[2024-12-05 08:34:29 root] (main.py 550): INFO Max accuracy: 74.14% +[2024-12-05 08:34:30 root] (utils.py 283): INFO Epoch: [8] [ 0/2502] eta: 0:32:04 lr: 0.000018 loss_cls: 4.0049 (4.0049) grad_norm: 4.3023 (4.3023) time: 0.7691 data: 0.0004 max mem: 8421 +[2024-12-05 08:34:37 root] (utils.py 283): INFO Epoch: [8] [ 10/2502] eta: 0:31:59 lr: 0.000018 loss_cls: 4.2030 (4.1460) grad_norm: 4.3023 (4.4112) time: 0.7701 data: 0.0003 max mem: 8421 +[2024-12-05 08:34:45 root] (utils.py 283): INFO Epoch: [8] [ 20/2502] eta: 0:31:55 lr: 0.000018 loss_cls: 4.2797 (4.1808) grad_norm: 4.3493 (4.4347) time: 0.7717 data: 0.0003 max mem: 8421 +[2024-12-05 08:34:53 root] (utils.py 283): INFO Epoch: [8] [ 30/2502] eta: 0:31:40 lr: 0.000018 loss_cls: 4.3041 (4.1902) grad_norm: 4.3494 (4.4016) time: 0.7679 data: 0.0003 max mem: 8421 +[2024-12-05 08:35:00 root] (utils.py 283): INFO Epoch: [8] [ 40/2502] eta: 0:31:29 lr: 0.000018 loss_cls: 3.9706 (4.0551) grad_norm: 4.1720 (4.3157) time: 0.7630 data: 0.0003 max mem: 8421 +[2024-12-05 08:35:08 root] (utils.py 283): INFO Epoch: [8] [ 50/2502] eta: 0:31:27 lr: 0.000018 loss_cls: 4.0462 (4.1113) grad_norm: 4.0596 (4.2772) time: 0.7714 data: 0.0003 max mem: 8421 +[2024-12-05 08:35:16 root] (utils.py 283): INFO Epoch: [8] [ 60/2502] eta: 0:31:16 lr: 0.000018 loss_cls: 4.0932 (4.0891) grad_norm: 4.1644 (4.2579) time: 0.7707 data: 0.0003 max mem: 8421 +[2024-12-05 08:35:23 root] (utils.py 283): INFO Epoch: [8] [ 70/2502] eta: 0:31:07 lr: 0.000018 loss_cls: 4.0075 (4.0876) grad_norm: 4.2470 (4.2893) time: 0.7635 data: 0.0003 max mem: 8421 +[2024-12-05 08:35:31 root] (utils.py 283): INFO Epoch: [8] [ 80/2502] eta: 0:30:58 lr: 0.000018 loss_cls: 4.0075 (4.0698) grad_norm: 4.1699 (4.2716) time: 0.7638 data: 0.0003 max mem: 8421 +[2024-12-05 08:35:39 root] (utils.py 283): INFO Epoch: [8] [ 90/2502] eta: 0:30:49 lr: 0.000018 loss_cls: 4.1083 (4.0683) grad_norm: 3.9610 (4.2959) time: 0.7622 data: 0.0002 max mem: 8421 +[2024-12-05 08:35:46 root] (utils.py 283): INFO Epoch: [8] [ 100/2502] eta: 0:30:44 lr: 0.000018 loss_cls: 3.9916 (4.0347) grad_norm: 4.2698 (4.2955) time: 0.7695 data: 0.0002 max mem: 8421 +[2024-12-05 08:35:54 root] (utils.py 283): INFO Epoch: [8] [ 110/2502] eta: 0:30:34 lr: 0.000018 loss_cls: 3.9044 (4.0364) grad_norm: 4.2021 (4.2809) time: 0.7690 data: 0.0002 max mem: 8421 +[2024-12-05 08:36:02 root] (utils.py 283): INFO Epoch: [8] [ 120/2502] eta: 0:30:26 lr: 0.000018 loss_cls: 3.9929 (4.0296) grad_norm: 4.1382 (4.2886) time: 0.7620 data: 0.0002 max mem: 8421 +[2024-12-05 08:36:09 root] (utils.py 283): INFO Epoch: [8] [ 130/2502] eta: 0:30:19 lr: 0.000018 loss_cls: 4.1029 (4.0281) grad_norm: 4.0867 (4.2645) time: 0.7660 data: 0.0002 max mem: 8421 +[2024-12-05 08:36:17 root] (utils.py 283): INFO Epoch: [8] [ 140/2502] eta: 0:30:10 lr: 0.000018 loss_cls: 3.9165 (3.9996) grad_norm: 4.0687 (4.2627) time: 0.7660 data: 0.0003 max mem: 8421 +[2024-12-05 08:36:25 root] (utils.py 283): INFO Epoch: [8] [ 150/2502] eta: 0:30:04 lr: 0.000018 loss_cls: 3.7462 (3.9893) grad_norm: 4.2788 (4.2829) time: 0.7692 data: 0.0003 max mem: 8421 +[2024-12-05 08:36:32 root] (utils.py 283): INFO Epoch: [8] [ 160/2502] eta: 0:29:57 lr: 0.000018 loss_cls: 3.9978 (3.9918) grad_norm: 4.3038 (4.2921) time: 0.7732 data: 0.0003 max mem: 8421 +[2024-12-05 08:36:40 root] (utils.py 283): INFO Epoch: [8] [ 170/2502] eta: 0:29:50 lr: 0.000018 loss_cls: 4.1487 (4.0024) grad_norm: 4.4421 (4.3031) time: 0.7709 data: 0.0003 max mem: 8421 +[2024-12-05 08:36:48 root] (utils.py 283): INFO Epoch: [8] [ 180/2502] eta: 0:29:43 lr: 0.000018 loss_cls: 3.9546 (3.9875) grad_norm: 4.2660 (4.3002) time: 0.7726 data: 0.0003 max mem: 8421 +[2024-12-05 08:36:56 root] (utils.py 283): INFO Epoch: [8] [ 190/2502] eta: 0:29:36 lr: 0.000018 loss_cls: 4.1385 (3.9973) grad_norm: 4.0251 (4.2954) time: 0.7733 data: 0.0003 max mem: 8421 +[2024-12-05 08:37:03 root] (utils.py 283): INFO Epoch: [8] [ 200/2502] eta: 0:29:30 lr: 0.000018 loss_cls: 4.1907 (4.0019) grad_norm: 4.0251 (4.3058) time: 0.7798 data: 0.0003 max mem: 8421 +[2024-12-05 08:37:11 root] (utils.py 283): INFO Epoch: [8] [ 210/2502] eta: 0:29:25 lr: 0.000018 loss_cls: 3.9240 (3.9904) grad_norm: 4.1860 (4.3028) time: 0.7874 data: 0.0002 max mem: 8421 +[2024-12-05 08:37:19 root] (utils.py 283): INFO Epoch: [8] [ 220/2502] eta: 0:29:19 lr: 0.000018 loss_cls: 3.6178 (3.9714) grad_norm: 4.1527 (4.2925) time: 0.7873 data: 0.0003 max mem: 8421 +[2024-12-05 08:37:27 root] (utils.py 283): INFO Epoch: [8] [ 230/2502] eta: 0:29:11 lr: 0.000018 loss_cls: 3.6178 (3.9670) grad_norm: 4.1474 (4.2961) time: 0.7809 data: 0.0003 max mem: 8421 +[2024-12-05 08:37:35 root] (utils.py 283): INFO Epoch: [8] [ 240/2502] eta: 0:29:03 lr: 0.000018 loss_cls: 4.1465 (3.9756) grad_norm: 4.2706 (4.2918) time: 0.7680 data: 0.0003 max mem: 8421 +[2024-12-05 08:37:42 root] (utils.py 283): INFO Epoch: [8] [ 250/2502] eta: 0:28:54 lr: 0.000018 loss_cls: 4.2873 (3.9879) grad_norm: 4.2364 (4.3065) time: 0.7618 data: 0.0003 max mem: 8421 +[2024-12-05 08:37:50 root] (utils.py 283): INFO Epoch: [8] [ 260/2502] eta: 0:28:46 lr: 0.000018 loss_cls: 4.2218 (3.9917) grad_norm: 4.1790 (4.3032) time: 0.7624 data: 0.0002 max mem: 8421 +[2024-12-05 08:37:57 root] (utils.py 283): INFO Epoch: [8] [ 270/2502] eta: 0:28:38 lr: 0.000018 loss_cls: 4.0439 (3.9819) grad_norm: 4.1284 (4.2912) time: 0.7653 data: 0.0002 max mem: 8421 +[2024-12-05 08:38:05 root] (utils.py 283): INFO Epoch: [8] [ 280/2502] eta: 0:28:30 lr: 0.000018 loss_cls: 3.9499 (3.9836) grad_norm: 3.9638 (4.3036) time: 0.7688 data: 0.0003 max mem: 8421 +[2024-12-05 08:38:13 root] (utils.py 283): INFO Epoch: [8] [ 290/2502] eta: 0:28:22 lr: 0.000018 loss_cls: 4.0349 (3.9805) grad_norm: 4.1145 (4.3080) time: 0.7680 data: 0.0002 max mem: 8421 +[2024-12-05 08:38:21 root] (utils.py 283): INFO Epoch: [8] [ 300/2502] eta: 0:28:14 lr: 0.000018 loss_cls: 4.0349 (3.9862) grad_norm: 4.2847 (4.3086) time: 0.7672 data: 0.0002 max mem: 8421 +[2024-12-05 08:38:28 root] (utils.py 283): INFO Epoch: [8] [ 310/2502] eta: 0:28:07 lr: 0.000018 loss_cls: 3.9102 (3.9759) grad_norm: 4.2315 (4.3054) time: 0.7702 data: 0.0002 max mem: 8421 +[2024-12-05 08:38:36 root] (utils.py 283): INFO Epoch: [8] [ 320/2502] eta: 0:27:59 lr: 0.000018 loss_cls: 3.7636 (3.9743) grad_norm: 4.2966 (4.3098) time: 0.7702 data: 0.0002 max mem: 8421 +[2024-12-05 08:38:44 root] (utils.py 283): INFO Epoch: [8] [ 330/2502] eta: 0:27:52 lr: 0.000018 loss_cls: 3.9911 (3.9778) grad_norm: 4.1735 (4.3055) time: 0.7695 data: 0.0002 max mem: 8421 +[2024-12-05 08:38:51 root] (utils.py 283): INFO Epoch: [8] [ 340/2502] eta: 0:27:44 lr: 0.000018 loss_cls: 4.0322 (3.9816) grad_norm: 4.1348 (4.2980) time: 0.7689 data: 0.0002 max mem: 8421 +[2024-12-05 08:38:59 root] (utils.py 283): INFO Epoch: [8] [ 350/2502] eta: 0:27:36 lr: 0.000018 loss_cls: 4.1287 (3.9843) grad_norm: 4.1097 (4.2997) time: 0.7659 data: 0.0002 max mem: 8421 +[2024-12-05 08:39:07 root] (utils.py 283): INFO Epoch: [8] [ 360/2502] eta: 0:27:28 lr: 0.000018 loss_cls: 4.0815 (3.9869) grad_norm: 4.1585 (4.3016) time: 0.7664 data: 0.0002 max mem: 8421 +[2024-12-05 08:39:14 root] (utils.py 283): INFO Epoch: [8] [ 370/2502] eta: 0:27:20 lr: 0.000018 loss_cls: 3.9102 (3.9741) grad_norm: 4.2839 (4.3100) time: 0.7676 data: 0.0002 max mem: 8421 +[2024-12-05 08:39:22 root] (utils.py 283): INFO Epoch: [8] [ 380/2502] eta: 0:27:12 lr: 0.000018 loss_cls: 3.6528 (3.9693) grad_norm: 4.3756 (4.3090) time: 0.7682 data: 0.0002 max mem: 8421 +[2024-12-05 08:39:30 root] (utils.py 283): INFO Epoch: [8] [ 390/2502] eta: 0:27:04 lr: 0.000018 loss_cls: 3.9719 (3.9680) grad_norm: 4.2220 (4.3078) time: 0.7683 data: 0.0002 max mem: 8421 +[2024-12-05 08:39:37 root] (utils.py 283): INFO Epoch: [8] [ 400/2502] eta: 0:26:57 lr: 0.000018 loss_cls: 3.9719 (3.9611) grad_norm: 4.2220 (4.3060) time: 0.7658 data: 0.0002 max mem: 8421 +[2024-12-05 08:39:45 root] (utils.py 283): INFO Epoch: [8] [ 410/2502] eta: 0:26:49 lr: 0.000018 loss_cls: 3.8721 (3.9607) grad_norm: 4.1548 (4.3050) time: 0.7639 data: 0.0002 max mem: 8421 +[2024-12-05 08:39:53 root] (utils.py 283): INFO Epoch: [8] [ 420/2502] eta: 0:26:41 lr: 0.000018 loss_cls: 3.7177 (3.9521) grad_norm: 4.1534 (4.3058) time: 0.7629 data: 0.0003 max mem: 8421 +[2024-12-05 08:40:00 root] (utils.py 283): INFO Epoch: [8] [ 430/2502] eta: 0:26:33 lr: 0.000018 loss_cls: 3.7177 (3.9479) grad_norm: 4.3067 (4.3123) time: 0.7637 data: 0.0003 max mem: 8421 +[2024-12-05 08:40:08 root] (utils.py 283): INFO Epoch: [8] [ 440/2502] eta: 0:26:25 lr: 0.000018 loss_cls: 3.9662 (3.9512) grad_norm: 4.2314 (4.3179) time: 0.7721 data: 0.0002 max mem: 8421 +[2024-12-05 08:40:16 root] (utils.py 283): INFO Epoch: [8] [ 450/2502] eta: 0:26:18 lr: 0.000018 loss_cls: 4.1114 (3.9506) grad_norm: 4.3233 (4.3191) time: 0.7799 data: 0.0002 max mem: 8421 +[2024-12-05 08:40:23 root] (utils.py 283): INFO Epoch: [8] [ 460/2502] eta: 0:26:10 lr: 0.000018 loss_cls: 4.0339 (3.9478) grad_norm: 4.1938 (4.3141) time: 0.7724 data: 0.0002 max mem: 8421 +[2024-12-05 08:40:31 root] (utils.py 283): INFO Epoch: [8] [ 470/2502] eta: 0:26:02 lr: 0.000018 loss_cls: 4.0622 (3.9504) grad_norm: 4.1157 (4.3250) time: 0.7619 data: 0.0002 max mem: 8421 +[2024-12-05 08:40:39 root] (utils.py 283): INFO Epoch: [8] [ 480/2502] eta: 0:25:54 lr: 0.000018 loss_cls: 3.9377 (3.9462) grad_norm: 4.2929 (4.3271) time: 0.7608 data: 0.0002 max mem: 8421 +[2024-12-05 08:40:46 root] (utils.py 283): INFO Epoch: [8] [ 490/2502] eta: 0:25:46 lr: 0.000018 loss_cls: 3.8236 (3.9464) grad_norm: 4.1658 (4.3214) time: 0.7639 data: 0.0003 max mem: 8421 +[2024-12-05 08:40:54 root] (utils.py 283): INFO Epoch: [8] [ 500/2502] eta: 0:25:39 lr: 0.000018 loss_cls: 4.0405 (3.9487) grad_norm: 4.0719 (4.3192) time: 0.7666 data: 0.0003 max mem: 8421 +[2024-12-05 08:41:02 root] (utils.py 283): INFO Epoch: [8] [ 510/2502] eta: 0:25:31 lr: 0.000018 loss_cls: 3.9570 (3.9459) grad_norm: 4.0732 (4.3190) time: 0.7662 data: 0.0002 max mem: 8421 +[2024-12-05 08:41:09 root] (utils.py 283): INFO Epoch: [8] [ 520/2502] eta: 0:25:23 lr: 0.000018 loss_cls: 3.8442 (3.9458) grad_norm: 4.1134 (4.3214) time: 0.7694 data: 0.0003 max mem: 8421 +[2024-12-05 08:41:17 root] (utils.py 283): INFO Epoch: [8] [ 530/2502] eta: 0:25:16 lr: 0.000018 loss_cls: 3.7242 (3.9383) grad_norm: 4.2418 (4.3205) time: 0.7795 data: 0.0003 max mem: 8421 +[2024-12-05 08:41:25 root] (utils.py 283): INFO Epoch: [8] [ 540/2502] eta: 0:25:09 lr: 0.000018 loss_cls: 3.7242 (3.9416) grad_norm: 4.1406 (4.3175) time: 0.7807 data: 0.0002 max mem: 8421 +[2024-12-05 08:41:33 root] (utils.py 283): INFO Epoch: [8] [ 550/2502] eta: 0:25:01 lr: 0.000018 loss_cls: 4.0877 (3.9394) grad_norm: 4.1406 (4.3165) time: 0.7701 data: 0.0002 max mem: 8421 +[2024-12-05 08:41:40 root] (utils.py 283): INFO Epoch: [8] [ 560/2502] eta: 0:24:53 lr: 0.000018 loss_cls: 3.5601 (3.9321) grad_norm: 4.1150 (4.3126) time: 0.7656 data: 0.0002 max mem: 8421 +[2024-12-05 08:41:48 root] (utils.py 283): INFO Epoch: [8] [ 570/2502] eta: 0:24:45 lr: 0.000018 loss_cls: 3.5390 (3.9315) grad_norm: 4.0538 (4.3084) time: 0.7656 data: 0.0002 max mem: 8421 +[2024-12-05 08:41:56 root] (utils.py 283): INFO Epoch: [8] [ 580/2502] eta: 0:24:38 lr: 0.000018 loss_cls: 4.1289 (3.9310) grad_norm: 4.0727 (4.3152) time: 0.7649 data: 0.0003 max mem: 8421 +[2024-12-05 08:42:03 root] (utils.py 283): INFO Epoch: [8] [ 590/2502] eta: 0:24:30 lr: 0.000018 loss_cls: 3.8610 (3.9262) grad_norm: 4.1441 (4.3138) time: 0.7634 data: 0.0003 max mem: 8421 +[2024-12-05 08:42:11 root] (utils.py 283): INFO Epoch: [8] [ 600/2502] eta: 0:24:22 lr: 0.000018 loss_cls: 3.8332 (3.9255) grad_norm: 3.9936 (4.3076) time: 0.7628 data: 0.0003 max mem: 8421 +[2024-12-05 08:42:19 root] (utils.py 283): INFO Epoch: [8] [ 610/2502] eta: 0:24:14 lr: 0.000018 loss_cls: 3.8711 (3.9237) grad_norm: 3.9936 (4.3038) time: 0.7659 data: 0.0003 max mem: 8421 +[2024-12-05 08:42:26 root] (utils.py 283): INFO Epoch: [8] [ 620/2502] eta: 0:24:06 lr: 0.000018 loss_cls: 3.9154 (3.9236) grad_norm: 4.0011 (4.2976) time: 0.7659 data: 0.0003 max mem: 8421 +[2024-12-05 08:42:34 root] (utils.py 283): INFO Epoch: [8] [ 630/2502] eta: 0:23:58 lr: 0.000018 loss_cls: 4.2688 (3.9292) grad_norm: 4.1192 (4.3036) time: 0.7655 data: 0.0003 max mem: 8421 +[2024-12-05 08:42:42 root] (utils.py 283): INFO Epoch: [8] [ 640/2502] eta: 0:23:51 lr: 0.000018 loss_cls: 4.1355 (3.9273) grad_norm: 4.3235 (4.3038) time: 0.7730 data: 0.0002 max mem: 8421 +[2024-12-05 08:42:49 root] (utils.py 283): INFO Epoch: [8] [ 650/2502] eta: 0:23:43 lr: 0.000018 loss_cls: 3.7739 (3.9237) grad_norm: 4.0545 (4.3008) time: 0.7742 data: 0.0003 max mem: 8421 +[2024-12-05 08:42:57 root] (utils.py 283): INFO Epoch: [8] [ 660/2502] eta: 0:23:36 lr: 0.000018 loss_cls: 3.8159 (3.9248) grad_norm: 4.0978 (4.2986) time: 0.7722 data: 0.0003 max mem: 8421 +[2024-12-05 08:43:05 root] (utils.py 283): INFO Epoch: [8] [ 670/2502] eta: 0:23:28 lr: 0.000018 loss_cls: 4.0053 (3.9250) grad_norm: 4.0978 (4.2950) time: 0.7709 data: 0.0003 max mem: 8421 +[2024-12-05 08:43:13 root] (utils.py 283): INFO Epoch: [8] [ 680/2502] eta: 0:23:21 lr: 0.000018 loss_cls: 4.0250 (3.9281) grad_norm: 4.1564 (4.2959) time: 0.7696 data: 0.0003 max mem: 8421 +[2024-12-05 08:43:20 root] (utils.py 283): INFO Epoch: [8] [ 690/2502] eta: 0:23:13 lr: 0.000018 loss_cls: 4.0728 (3.9294) grad_norm: 4.3108 (4.2952) time: 0.7693 data: 0.0003 max mem: 8421 +[2024-12-05 08:43:28 root] (utils.py 283): INFO Epoch: [8] [ 700/2502] eta: 0:23:05 lr: 0.000018 loss_cls: 4.1283 (3.9312) grad_norm: 4.1347 (4.2930) time: 0.7661 data: 0.0003 max mem: 8421 +[2024-12-05 08:43:36 root] (utils.py 283): INFO Epoch: [8] [ 710/2502] eta: 0:22:57 lr: 0.000018 loss_cls: 4.1381 (3.9304) grad_norm: 4.0809 (4.2929) time: 0.7664 data: 0.0003 max mem: 8421 +[2024-12-05 08:43:43 root] (utils.py 283): INFO Epoch: [8] [ 720/2502] eta: 0:22:49 lr: 0.000018 loss_cls: 4.0627 (3.9297) grad_norm: 4.1050 (4.3005) time: 0.7658 data: 0.0003 max mem: 8421 +[2024-12-05 08:43:51 root] (utils.py 283): INFO Epoch: [8] [ 730/2502] eta: 0:22:42 lr: 0.000018 loss_cls: 4.0710 (3.9325) grad_norm: 4.1940 (4.2992) time: 0.7642 data: 0.0003 max mem: 8421 +[2024-12-05 08:43:58 root] (utils.py 283): INFO Epoch: [8] [ 740/2502] eta: 0:22:34 lr: 0.000018 loss_cls: 4.1702 (3.9321) grad_norm: 4.2090 (4.3010) time: 0.7641 data: 0.0003 max mem: 8421 +[2024-12-05 08:44:06 root] (utils.py 283): INFO Epoch: [8] [ 750/2502] eta: 0:22:26 lr: 0.000018 loss_cls: 4.0692 (3.9333) grad_norm: 4.2380 (4.3041) time: 0.7655 data: 0.0002 max mem: 8421 +[2024-12-05 08:44:14 root] (utils.py 283): INFO Epoch: [8] [ 760/2502] eta: 0:22:18 lr: 0.000018 loss_cls: 3.9315 (3.9328) grad_norm: 4.2145 (4.3027) time: 0.7651 data: 0.0002 max mem: 8421 +[2024-12-05 08:44:21 root] (utils.py 283): INFO Epoch: [8] [ 770/2502] eta: 0:22:11 lr: 0.000018 loss_cls: 3.9315 (3.9304) grad_norm: 4.2625 (4.3022) time: 0.7640 data: 0.0003 max mem: 8421 +[2024-12-05 08:44:29 root] (utils.py 283): INFO Epoch: [8] [ 780/2502] eta: 0:22:03 lr: 0.000018 loss_cls: 4.0172 (3.9326) grad_norm: 4.2133 (4.3001) time: 0.7649 data: 0.0003 max mem: 8421 +[2024-12-05 08:44:37 root] (utils.py 283): INFO Epoch: [8] [ 790/2502] eta: 0:21:55 lr: 0.000018 loss_cls: 3.9938 (3.9276) grad_norm: 4.0912 (4.2999) time: 0.7659 data: 0.0003 max mem: 8421 +[2024-12-05 08:44:44 root] (utils.py 283): INFO Epoch: [8] [ 800/2502] eta: 0:21:47 lr: 0.000018 loss_cls: 3.9938 (3.9274) grad_norm: 4.1051 (4.2983) time: 0.7697 data: 0.0003 max mem: 8421 +[2024-12-05 08:44:52 root] (utils.py 283): INFO Epoch: [8] [ 810/2502] eta: 0:21:40 lr: 0.000018 loss_cls: 4.0884 (3.9272) grad_norm: 4.1051 (4.2981) time: 0.7688 data: 0.0003 max mem: 8421 +[2024-12-05 08:45:00 root] (utils.py 283): INFO Epoch: [8] [ 820/2502] eta: 0:21:32 lr: 0.000018 loss_cls: 3.8406 (3.9239) grad_norm: 4.2888 (4.3026) time: 0.7708 data: 0.0003 max mem: 8421 +[2024-12-05 08:45:08 root] (utils.py 283): INFO Epoch: [8] [ 830/2502] eta: 0:21:24 lr: 0.000018 loss_cls: 4.1198 (3.9258) grad_norm: 4.2624 (4.3023) time: 0.7719 data: 0.0002 max mem: 8421 +[2024-12-05 08:45:15 root] (utils.py 283): INFO Epoch: [8] [ 840/2502] eta: 0:21:17 lr: 0.000018 loss_cls: 4.1662 (3.9261) grad_norm: 4.2041 (4.3041) time: 0.7638 data: 0.0002 max mem: 8421 +[2024-12-05 08:45:23 root] (utils.py 283): INFO Epoch: [8] [ 850/2502] eta: 0:21:09 lr: 0.000018 loss_cls: 4.0568 (3.9272) grad_norm: 4.2041 (4.3032) time: 0.7656 data: 0.0002 max mem: 8421 +[2024-12-05 08:45:30 root] (utils.py 283): INFO Epoch: [8] [ 860/2502] eta: 0:21:01 lr: 0.000018 loss_cls: 4.1387 (3.9277) grad_norm: 3.9955 (4.3012) time: 0.7651 data: 0.0002 max mem: 8421 +[2024-12-05 08:45:38 root] (utils.py 283): INFO Epoch: [8] [ 870/2502] eta: 0:20:53 lr: 0.000018 loss_cls: 4.0671 (3.9278) grad_norm: 4.0451 (4.3019) time: 0.7619 data: 0.0002 max mem: 8421 +[2024-12-05 08:45:46 root] (utils.py 283): INFO Epoch: [8] [ 880/2502] eta: 0:20:46 lr: 0.000018 loss_cls: 4.0164 (3.9268) grad_norm: 4.0619 (4.2997) time: 0.7634 data: 0.0003 max mem: 8421 +[2024-12-05 08:45:53 root] (utils.py 283): INFO Epoch: [8] [ 890/2502] eta: 0:20:38 lr: 0.000018 loss_cls: 3.8049 (3.9243) grad_norm: 4.1444 (4.2975) time: 0.7640 data: 0.0002 max mem: 8421 +[2024-12-05 08:46:01 root] (utils.py 283): INFO Epoch: [8] [ 900/2502] eta: 0:20:30 lr: 0.000018 loss_cls: 3.9492 (3.9242) grad_norm: 4.0852 (4.2951) time: 0.7659 data: 0.0002 max mem: 8421 +[2024-12-05 08:46:09 root] (utils.py 283): INFO Epoch: [8] [ 910/2502] eta: 0:20:22 lr: 0.000018 loss_cls: 3.9531 (3.9219) grad_norm: 4.0752 (4.2930) time: 0.7674 data: 0.0002 max mem: 8421 +[2024-12-05 08:46:16 root] (utils.py 283): INFO Epoch: [8] [ 920/2502] eta: 0:20:15 lr: 0.000018 loss_cls: 3.5669 (3.9196) grad_norm: 4.2287 (4.2963) time: 0.7676 data: 0.0002 max mem: 8421 +[2024-12-05 08:46:24 root] (utils.py 283): INFO Epoch: [8] [ 930/2502] eta: 0:20:07 lr: 0.000018 loss_cls: 3.7480 (3.9185) grad_norm: 4.2432 (4.2954) time: 0.7681 data: 0.0003 max mem: 8421 +[2024-12-05 08:46:32 root] (utils.py 283): INFO Epoch: [8] [ 940/2502] eta: 0:19:59 lr: 0.000018 loss_cls: 3.8842 (3.9206) grad_norm: 4.1347 (4.2938) time: 0.7672 data: 0.0002 max mem: 8421 +[2024-12-05 08:46:39 root] (utils.py 283): INFO Epoch: [8] [ 950/2502] eta: 0:19:52 lr: 0.000018 loss_cls: 3.8943 (3.9187) grad_norm: 4.1347 (4.2942) time: 0.7688 data: 0.0003 max mem: 8421 +[2024-12-05 08:46:47 root] (utils.py 283): INFO Epoch: [8] [ 960/2502] eta: 0:19:44 lr: 0.000018 loss_cls: 4.1051 (3.9220) grad_norm: 4.1805 (4.2984) time: 0.7713 data: 0.0003 max mem: 8421 +[2024-12-05 08:46:55 root] (utils.py 283): INFO Epoch: [8] [ 970/2502] eta: 0:19:36 lr: 0.000018 loss_cls: 4.1154 (3.9222) grad_norm: 4.1846 (4.2982) time: 0.7679 data: 0.0003 max mem: 8421 +[2024-12-05 08:47:02 root] (utils.py 283): INFO Epoch: [8] [ 980/2502] eta: 0:19:29 lr: 0.000018 loss_cls: 3.9725 (3.9217) grad_norm: 4.1970 (4.2989) time: 0.7659 data: 0.0003 max mem: 8421 +[2024-12-05 08:47:10 root] (utils.py 283): INFO Epoch: [8] [ 990/2502] eta: 0:19:21 lr: 0.000018 loss_cls: 4.0823 (3.9213) grad_norm: 4.1970 (4.3002) time: 0.7719 data: 0.0003 max mem: 8421 +[2024-12-05 08:47:18 root] (utils.py 283): INFO Epoch: [8] [1000/2502] eta: 0:19:13 lr: 0.000018 loss_cls: 4.1538 (3.9243) grad_norm: 4.1219 (4.2986) time: 0.7697 data: 0.0003 max mem: 8421 +[2024-12-05 08:47:26 root] (utils.py 283): INFO Epoch: [8] [1010/2502] eta: 0:19:06 lr: 0.000018 loss_cls: 4.1538 (3.9261) grad_norm: 4.1384 (4.2969) time: 0.7638 data: 0.0003 max mem: 8421 +[2024-12-05 08:47:33 root] (utils.py 283): INFO Epoch: [8] [1020/2502] eta: 0:18:58 lr: 0.000018 loss_cls: 4.0302 (3.9267) grad_norm: 4.1143 (4.2941) time: 0.7670 data: 0.0003 max mem: 8421 +[2024-12-05 08:47:41 root] (utils.py 283): INFO Epoch: [8] [1030/2502] eta: 0:18:50 lr: 0.000018 loss_cls: 3.8706 (3.9261) grad_norm: 3.9782 (4.2915) time: 0.7670 data: 0.0003 max mem: 8421 +[2024-12-05 08:47:49 root] (utils.py 283): INFO Epoch: [8] [1040/2502] eta: 0:18:43 lr: 0.000018 loss_cls: 3.8400 (3.9273) grad_norm: 3.9430 (4.2892) time: 0.7653 data: 0.0003 max mem: 8421 +[2024-12-05 08:47:56 root] (utils.py 283): INFO Epoch: [8] [1050/2502] eta: 0:18:35 lr: 0.000018 loss_cls: 3.8217 (3.9243) grad_norm: 4.0569 (4.2882) time: 0.7628 data: 0.0003 max mem: 8421 +[2024-12-05 08:48:04 root] (utils.py 283): INFO Epoch: [8] [1060/2502] eta: 0:18:27 lr: 0.000018 loss_cls: 4.0379 (3.9259) grad_norm: 4.1766 (4.2891) time: 0.7603 data: 0.0002 max mem: 8421 +[2024-12-05 08:48:11 root] (utils.py 283): INFO Epoch: [8] [1070/2502] eta: 0:18:19 lr: 0.000018 loss_cls: 4.0851 (3.9253) grad_norm: 4.1766 (4.2890) time: 0.7662 data: 0.0002 max mem: 8421 +[2024-12-05 08:48:19 root] (utils.py 283): INFO Epoch: [8] [1080/2502] eta: 0:18:12 lr: 0.000018 loss_cls: 4.0131 (3.9244) grad_norm: 4.0735 (4.2918) time: 0.7684 data: 0.0002 max mem: 8421 +[2024-12-05 08:48:27 root] (utils.py 283): INFO Epoch: [8] [1090/2502] eta: 0:18:04 lr: 0.000018 loss_cls: 4.1812 (3.9261) grad_norm: 4.1197 (4.2913) time: 0.7656 data: 0.0002 max mem: 8421 +[2024-12-05 08:48:34 root] (utils.py 283): INFO Epoch: [8] [1100/2502] eta: 0:17:56 lr: 0.000018 loss_cls: 3.9621 (3.9232) grad_norm: 4.0496 (4.2901) time: 0.7672 data: 0.0002 max mem: 8421 +[2024-12-05 08:48:42 root] (utils.py 283): INFO Epoch: [8] [1110/2502] eta: 0:17:49 lr: 0.000018 loss_cls: 3.7913 (3.9239) grad_norm: 4.0456 (4.2888) time: 0.7667 data: 0.0003 max mem: 8421 +[2024-12-05 08:48:50 root] (utils.py 283): INFO Epoch: [8] [1120/2502] eta: 0:17:41 lr: 0.000018 loss_cls: 4.1217 (3.9254) grad_norm: 4.1009 (4.2891) time: 0.7724 data: 0.0003 max mem: 8421 +[2024-12-05 08:48:58 root] (utils.py 283): INFO Epoch: [8] [1130/2502] eta: 0:17:33 lr: 0.000018 loss_cls: 4.0375 (3.9246) grad_norm: 4.1378 (4.2876) time: 0.7722 data: 0.0002 max mem: 8421 +[2024-12-05 08:49:05 root] (utils.py 283): INFO Epoch: [8] [1140/2502] eta: 0:17:26 lr: 0.000018 loss_cls: 3.9323 (3.9257) grad_norm: 3.9975 (4.2865) time: 0.7652 data: 0.0003 max mem: 8421 +[2024-12-05 08:49:13 root] (utils.py 283): INFO Epoch: [8] [1150/2502] eta: 0:17:18 lr: 0.000018 loss_cls: 3.9323 (3.9262) grad_norm: 4.2128 (4.2866) time: 0.7656 data: 0.0003 max mem: 8421 +[2024-12-05 08:49:21 root] (utils.py 283): INFO Epoch: [8] [1160/2502] eta: 0:17:10 lr: 0.000018 loss_cls: 4.0356 (3.9260) grad_norm: 4.2595 (4.2860) time: 0.7676 data: 0.0003 max mem: 8421 +[2024-12-05 08:49:28 root] (utils.py 283): INFO Epoch: [8] [1170/2502] eta: 0:17:03 lr: 0.000018 loss_cls: 3.9852 (3.9251) grad_norm: 4.1176 (4.2840) time: 0.7754 data: 0.0003 max mem: 8421 +[2024-12-05 08:49:36 root] (utils.py 283): INFO Epoch: [8] [1180/2502] eta: 0:16:55 lr: 0.000018 loss_cls: 3.9852 (3.9239) grad_norm: 4.1176 (4.2830) time: 0.7857 data: 0.0003 max mem: 8421 +[2024-12-05 08:49:44 root] (utils.py 283): INFO Epoch: [8] [1190/2502] eta: 0:16:48 lr: 0.000018 loss_cls: 3.9795 (3.9233) grad_norm: 4.1523 (4.3024) time: 0.7847 data: 0.0003 max mem: 8421 +[2024-12-05 08:49:52 root] (utils.py 283): INFO Epoch: [8] [1200/2502] eta: 0:16:40 lr: 0.000018 loss_cls: 4.0906 (3.9232) grad_norm: 4.5902 (4.3142) time: 0.7726 data: 0.0003 max mem: 8421 +[2024-12-05 08:49:59 root] (utils.py 283): INFO Epoch: [8] [1210/2502] eta: 0:16:32 lr: 0.000018 loss_cls: 4.0906 (3.9246) grad_norm: 4.4586 (4.3132) time: 0.7653 data: 0.0003 max mem: 8421 +[2024-12-05 08:50:07 root] (utils.py 283): INFO Epoch: [8] [1220/2502] eta: 0:16:24 lr: 0.000018 loss_cls: 3.8761 (3.9232) grad_norm: 4.1412 (4.3141) time: 0.7634 data: 0.0002 max mem: 8421 +[2024-12-05 08:50:15 root] (utils.py 283): INFO Epoch: [8] [1230/2502] eta: 0:16:17 lr: 0.000018 loss_cls: 3.6978 (3.9234) grad_norm: 4.1541 (4.3148) time: 0.7644 data: 0.0003 max mem: 8421 +[2024-12-05 08:50:22 root] (utils.py 283): INFO Epoch: [8] [1240/2502] eta: 0:16:09 lr: 0.000018 loss_cls: 4.1754 (3.9221) grad_norm: 4.1130 (4.3154) time: 0.7668 data: 0.0002 max mem: 8421 +[2024-12-05 08:50:30 root] (utils.py 283): INFO Epoch: [8] [1250/2502] eta: 0:16:01 lr: 0.000018 loss_cls: 4.1903 (3.9234) grad_norm: 4.0496 (4.3141) time: 0.7670 data: 0.0002 max mem: 8421 +[2024-12-05 08:50:38 root] (utils.py 283): INFO Epoch: [8] [1260/2502] eta: 0:15:54 lr: 0.000018 loss_cls: 4.0664 (3.9223) grad_norm: 4.0684 (4.3142) time: 0.7678 data: 0.0002 max mem: 8421 +[2024-12-05 08:50:45 root] (utils.py 283): INFO Epoch: [8] [1270/2502] eta: 0:15:46 lr: 0.000018 loss_cls: 3.9761 (3.9211) grad_norm: 4.0384 (4.3118) time: 0.7687 data: 0.0002 max mem: 8421 +[2024-12-05 08:50:53 root] (utils.py 283): INFO Epoch: [8] [1280/2502] eta: 0:15:39 lr: 0.000018 loss_cls: 4.0008 (3.9218) grad_norm: 4.0384 (4.3098) time: 0.7781 data: 0.0003 max mem: 8421 +[2024-12-05 08:51:01 root] (utils.py 283): INFO Epoch: [8] [1290/2502] eta: 0:15:31 lr: 0.000018 loss_cls: 4.1364 (3.9230) grad_norm: 4.1180 (4.3095) time: 0.7861 data: 0.0003 max mem: 8421 +[2024-12-05 08:51:09 root] (utils.py 283): INFO Epoch: [8] [1300/2502] eta: 0:15:23 lr: 0.000018 loss_cls: 4.0955 (3.9237) grad_norm: 4.1342 (4.3094) time: 0.7854 data: 0.0003 max mem: 8421 +[2024-12-05 08:51:17 root] (utils.py 283): INFO Epoch: [8] [1310/2502] eta: 0:15:16 lr: 0.000018 loss_cls: 4.0706 (3.9234) grad_norm: 4.1846 (4.3090) time: 0.7854 data: 0.0003 max mem: 8421 +[2024-12-05 08:51:25 root] (utils.py 283): INFO Epoch: [8] [1320/2502] eta: 0:15:08 lr: 0.000018 loss_cls: 4.0668 (3.9237) grad_norm: 4.0872 (4.3068) time: 0.7864 data: 0.0003 max mem: 8421 +[2024-12-05 08:51:33 root] (utils.py 283): INFO Epoch: [8] [1330/2502] eta: 0:15:01 lr: 0.000018 loss_cls: 3.9986 (3.9236) grad_norm: 4.0798 (4.3054) time: 0.7877 data: 0.0003 max mem: 8421 +[2024-12-05 08:51:40 root] (utils.py 283): INFO Epoch: [8] [1340/2502] eta: 0:14:53 lr: 0.000018 loss_cls: 4.1217 (3.9255) grad_norm: 4.1493 (4.3045) time: 0.7873 data: 0.0003 max mem: 8421 +[2024-12-05 08:51:48 root] (utils.py 283): INFO Epoch: [8] [1350/2502] eta: 0:14:46 lr: 0.000018 loss_cls: 4.2008 (3.9261) grad_norm: 3.9769 (4.3026) time: 0.7816 data: 0.0002 max mem: 8421 +[2024-12-05 08:51:56 root] (utils.py 283): INFO Epoch: [8] [1360/2502] eta: 0:14:38 lr: 0.000018 loss_cls: 4.0595 (3.9269) grad_norm: 4.1393 (4.3025) time: 0.7683 data: 0.0002 max mem: 8421 +[2024-12-05 08:52:04 root] (utils.py 283): INFO Epoch: [8] [1370/2502] eta: 0:14:30 lr: 0.000018 loss_cls: 4.2280 (3.9275) grad_norm: 4.1393 (4.3025) time: 0.7634 data: 0.0002 max mem: 8421 +[2024-12-05 08:52:11 root] (utils.py 283): INFO Epoch: [8] [1380/2502] eta: 0:14:22 lr: 0.000018 loss_cls: 3.9035 (3.9259) grad_norm: 4.0518 (4.3020) time: 0.7651 data: 0.0002 max mem: 8421 +[2024-12-05 08:52:19 root] (utils.py 283): INFO Epoch: [8] [1390/2502] eta: 0:14:15 lr: 0.000018 loss_cls: 3.8120 (3.9247) grad_norm: 3.9952 (4.2999) time: 0.7640 data: 0.0002 max mem: 8421 +[2024-12-05 08:52:26 root] (utils.py 283): INFO Epoch: [8] [1400/2502] eta: 0:14:07 lr: 0.000018 loss_cls: 3.6908 (3.9233) grad_norm: 3.9387 (4.2986) time: 0.7643 data: 0.0002 max mem: 8421 +[2024-12-05 08:52:34 root] (utils.py 283): INFO Epoch: [8] [1410/2502] eta: 0:13:59 lr: 0.000018 loss_cls: 4.1740 (3.9259) grad_norm: 4.1021 (4.2984) time: 0.7676 data: 0.0002 max mem: 8421 +[2024-12-05 08:52:42 root] (utils.py 283): INFO Epoch: [8] [1420/2502] eta: 0:13:52 lr: 0.000018 loss_cls: 4.1740 (3.9250) grad_norm: 4.1474 (4.2980) time: 0.7838 data: 0.0002 max mem: 8421 +[2024-12-05 08:52:50 root] (utils.py 283): INFO Epoch: [8] [1430/2502] eta: 0:13:44 lr: 0.000018 loss_cls: 3.8084 (3.9256) grad_norm: 4.1326 (4.2981) time: 0.7804 data: 0.0002 max mem: 8421 +[2024-12-05 08:52:57 root] (utils.py 283): INFO Epoch: [8] [1440/2502] eta: 0:13:36 lr: 0.000018 loss_cls: 4.1606 (3.9258) grad_norm: 4.1276 (4.2970) time: 0.7651 data: 0.0002 max mem: 8421 +[2024-12-05 08:53:05 root] (utils.py 283): INFO Epoch: [8] [1450/2502] eta: 0:13:29 lr: 0.000018 loss_cls: 4.1460 (3.9266) grad_norm: 4.2056 (4.2972) time: 0.7659 data: 0.0002 max mem: 8421 +[2024-12-05 08:53:13 root] (utils.py 283): INFO Epoch: [8] [1460/2502] eta: 0:13:21 lr: 0.000018 loss_cls: 4.1515 (3.9282) grad_norm: 4.3388 (4.2988) time: 0.7662 data: 0.0002 max mem: 8421 +[2024-12-05 08:53:20 root] (utils.py 283): INFO Epoch: [8] [1470/2502] eta: 0:13:13 lr: 0.000018 loss_cls: 4.0817 (3.9270) grad_norm: 4.2814 (4.2987) time: 0.7651 data: 0.0003 max mem: 8421 +[2024-12-05 08:53:28 root] (utils.py 283): INFO Epoch: [8] [1480/2502] eta: 0:13:06 lr: 0.000018 loss_cls: 4.1808 (3.9292) grad_norm: 4.3146 (4.3018) time: 0.7637 data: 0.0003 max mem: 8421 +[2024-12-05 08:53:36 root] (utils.py 283): INFO Epoch: [8] [1490/2502] eta: 0:12:58 lr: 0.000018 loss_cls: 4.2516 (3.9308) grad_norm: 4.3703 (4.3025) time: 0.7636 data: 0.0002 max mem: 8421 +[2024-12-05 08:53:43 root] (utils.py 283): INFO Epoch: [8] [1500/2502] eta: 0:12:50 lr: 0.000018 loss_cls: 4.1924 (3.9330) grad_norm: 4.1863 (4.3027) time: 0.7644 data: 0.0002 max mem: 8421 +[2024-12-05 08:53:51 root] (utils.py 283): INFO Epoch: [8] [1510/2502] eta: 0:12:42 lr: 0.000018 loss_cls: 4.1675 (3.9331) grad_norm: 4.1117 (4.3015) time: 0.7654 data: 0.0002 max mem: 8421 +[2024-12-05 08:53:59 root] (utils.py 283): INFO Epoch: [8] [1520/2502] eta: 0:12:35 lr: 0.000018 loss_cls: 3.8121 (3.9315) grad_norm: 4.1117 (4.3014) time: 0.7721 data: 0.0002 max mem: 8421 +[2024-12-05 08:54:06 root] (utils.py 283): INFO Epoch: [8] [1530/2502] eta: 0:12:27 lr: 0.000018 loss_cls: 4.0216 (3.9313) grad_norm: 4.2599 (4.3009) time: 0.7707 data: 0.0002 max mem: 8421 +[2024-12-05 08:54:14 root] (utils.py 283): INFO Epoch: [8] [1540/2502] eta: 0:12:19 lr: 0.000018 loss_cls: 4.0294 (3.9327) grad_norm: 4.2599 (4.3006) time: 0.7619 data: 0.0002 max mem: 8421 +[2024-12-05 08:54:22 root] (utils.py 283): INFO Epoch: [8] [1550/2502] eta: 0:12:12 lr: 0.000018 loss_cls: 3.8487 (3.9318) grad_norm: 4.1525 (4.3003) time: 0.7607 data: 0.0002 max mem: 8421 +[2024-12-05 08:54:29 root] (utils.py 283): INFO Epoch: [8] [1560/2502] eta: 0:12:04 lr: 0.000018 loss_cls: 3.8661 (3.9315) grad_norm: 4.1420 (4.2999) time: 0.7628 data: 0.0002 max mem: 8421 +[2024-12-05 08:54:37 root] (utils.py 283): INFO Epoch: [8] [1570/2502] eta: 0:11:56 lr: 0.000018 loss_cls: 3.8661 (3.9306) grad_norm: 4.1238 (4.2993) time: 0.7689 data: 0.0002 max mem: 8421 +[2024-12-05 08:54:45 root] (utils.py 283): INFO Epoch: [8] [1580/2502] eta: 0:11:48 lr: 0.000018 loss_cls: 3.8905 (3.9307) grad_norm: 4.1612 (4.2989) time: 0.7676 data: 0.0003 max mem: 8421 +[2024-12-05 08:54:52 root] (utils.py 283): INFO Epoch: [8] [1590/2502] eta: 0:11:41 lr: 0.000018 loss_cls: 4.0918 (3.9300) grad_norm: 4.1809 (4.2978) time: 0.7625 data: 0.0003 max mem: 8421 +[2024-12-05 08:55:00 root] (utils.py 283): INFO Epoch: [8] [1600/2502] eta: 0:11:33 lr: 0.000018 loss_cls: 3.9424 (3.9297) grad_norm: 4.1583 (4.2972) time: 0.7631 data: 0.0003 max mem: 8421 +[2024-12-05 08:55:08 root] (utils.py 283): INFO Epoch: [8] [1610/2502] eta: 0:11:25 lr: 0.000018 loss_cls: 3.7431 (3.9296) grad_norm: 4.1583 (4.2974) time: 0.7659 data: 0.0003 max mem: 8421 +[2024-12-05 08:55:15 root] (utils.py 283): INFO Epoch: [8] [1620/2502] eta: 0:11:18 lr: 0.000018 loss_cls: 3.7175 (3.9279) grad_norm: 4.1036 (4.2968) time: 0.7712 data: 0.0002 max mem: 8421 +[2024-12-05 08:55:23 root] (utils.py 283): INFO Epoch: [8] [1630/2502] eta: 0:11:10 lr: 0.000018 loss_cls: 3.8077 (3.9280) grad_norm: 4.0630 (4.2955) time: 0.7717 data: 0.0003 max mem: 8421 +[2024-12-05 08:55:31 root] (utils.py 283): INFO Epoch: [8] [1640/2502] eta: 0:11:02 lr: 0.000018 loss_cls: 3.9529 (3.9276) grad_norm: 4.0140 (4.2954) time: 0.7657 data: 0.0002 max mem: 8421 +[2024-12-05 08:55:38 root] (utils.py 283): INFO Epoch: [8] [1650/2502] eta: 0:10:54 lr: 0.000018 loss_cls: 4.0776 (3.9281) grad_norm: 4.0454 (4.2941) time: 0.7613 data: 0.0002 max mem: 8421 +[2024-12-05 08:55:46 root] (utils.py 283): INFO Epoch: [8] [1660/2502] eta: 0:10:47 lr: 0.000018 loss_cls: 4.0776 (3.9278) grad_norm: 4.1192 (4.2942) time: 0.7646 data: 0.0003 max mem: 8421 +[2024-12-05 08:55:54 root] (utils.py 283): INFO Epoch: [8] [1670/2502] eta: 0:10:39 lr: 0.000018 loss_cls: 4.0807 (3.9290) grad_norm: 4.1639 (4.2941) time: 0.7675 data: 0.0003 max mem: 8421 +[2024-12-05 08:56:01 root] (utils.py 283): INFO Epoch: [8] [1680/2502] eta: 0:10:31 lr: 0.000018 loss_cls: 4.2135 (3.9285) grad_norm: 4.0721 (4.2924) time: 0.7630 data: 0.0002 max mem: 8421 +[2024-12-05 08:56:09 root] (utils.py 283): INFO Epoch: [8] [1690/2502] eta: 0:10:24 lr: 0.000018 loss_cls: 3.9257 (3.9289) grad_norm: 4.0198 (4.2917) time: 0.7606 data: 0.0002 max mem: 8421 +[2024-12-05 08:56:16 root] (utils.py 283): INFO Epoch: [8] [1700/2502] eta: 0:10:16 lr: 0.000018 loss_cls: 3.9676 (3.9288) grad_norm: 4.0045 (4.2899) time: 0.7629 data: 0.0003 max mem: 8421 +[2024-12-05 08:56:24 root] (utils.py 283): INFO Epoch: [8] [1710/2502] eta: 0:10:08 lr: 0.000018 loss_cls: 3.9626 (3.9273) grad_norm: 3.9244 (4.2882) time: 0.7637 data: 0.0002 max mem: 8421 +[2024-12-05 08:56:32 root] (utils.py 283): INFO Epoch: [8] [1720/2502] eta: 0:10:01 lr: 0.000018 loss_cls: 3.6996 (3.9263) grad_norm: 4.0497 (4.2889) time: 0.7631 data: 0.0002 max mem: 8421 +[2024-12-05 08:56:39 root] (utils.py 283): INFO Epoch: [8] [1730/2502] eta: 0:09:53 lr: 0.000018 loss_cls: 3.8309 (3.9254) grad_norm: 4.1296 (4.2887) time: 0.7713 data: 0.0003 max mem: 8421 +[2024-12-05 08:56:47 root] (utils.py 283): INFO Epoch: [8] [1740/2502] eta: 0:09:45 lr: 0.000018 loss_cls: 3.9081 (3.9248) grad_norm: 4.0291 (4.2888) time: 0.7718 data: 0.0003 max mem: 8421 +[2024-12-05 08:56:55 root] (utils.py 283): INFO Epoch: [8] [1750/2502] eta: 0:09:37 lr: 0.000018 loss_cls: 3.7058 (3.9239) grad_norm: 4.1814 (4.2895) time: 0.7650 data: 0.0002 max mem: 8421 +[2024-12-05 08:57:02 root] (utils.py 283): INFO Epoch: [8] [1760/2502] eta: 0:09:30 lr: 0.000018 loss_cls: 4.0147 (3.9243) grad_norm: 4.1606 (4.2899) time: 0.7665 data: 0.0002 max mem: 8421 +[2024-12-05 08:57:10 root] (utils.py 283): INFO Epoch: [8] [1770/2502] eta: 0:09:22 lr: 0.000018 loss_cls: 4.1174 (3.9243) grad_norm: 4.0930 (4.2893) time: 0.7674 data: 0.0002 max mem: 8421 +[2024-12-05 08:57:18 root] (utils.py 283): INFO Epoch: [8] [1780/2502] eta: 0:09:14 lr: 0.000018 loss_cls: 4.0284 (3.9242) grad_norm: 4.1138 (4.2883) time: 0.7671 data: 0.0002 max mem: 8421 +[2024-12-05 08:57:25 root] (utils.py 283): INFO Epoch: [8] [1790/2502] eta: 0:09:07 lr: 0.000018 loss_cls: 3.6979 (3.9231) grad_norm: 3.9717 (4.2870) time: 0.7676 data: 0.0003 max mem: 8421 +[2024-12-05 08:57:33 root] (utils.py 283): INFO Epoch: [8] [1800/2502] eta: 0:08:59 lr: 0.000018 loss_cls: 3.7732 (3.9239) grad_norm: 4.1065 (4.2873) time: 0.7689 data: 0.0002 max mem: 8421 +[2024-12-05 08:57:41 root] (utils.py 283): INFO Epoch: [8] [1810/2502] eta: 0:08:51 lr: 0.000018 loss_cls: 4.3393 (3.9252) grad_norm: 4.1400 (4.2870) time: 0.7665 data: 0.0002 max mem: 8421 +[2024-12-05 08:57:48 root] (utils.py 283): INFO Epoch: [8] [1820/2502] eta: 0:08:44 lr: 0.000018 loss_cls: 4.1364 (3.9248) grad_norm: 4.1765 (4.2867) time: 0.7638 data: 0.0002 max mem: 8421 +[2024-12-05 08:57:56 root] (utils.py 283): INFO Epoch: [8] [1830/2502] eta: 0:08:36 lr: 0.000018 loss_cls: 3.7583 (3.9240) grad_norm: 4.0674 (4.2853) time: 0.7635 data: 0.0002 max mem: 8421 +[2024-12-05 08:58:04 root] (utils.py 283): INFO Epoch: [8] [1840/2502] eta: 0:08:28 lr: 0.000018 loss_cls: 3.9630 (3.9243) grad_norm: 4.0175 (4.2846) time: 0.7644 data: 0.0002 max mem: 8421 +[2024-12-05 08:58:11 root] (utils.py 283): INFO Epoch: [8] [1850/2502] eta: 0:08:21 lr: 0.000018 loss_cls: 4.0250 (3.9241) grad_norm: 4.2107 (4.2853) time: 0.7640 data: 0.0002 max mem: 8421 +[2024-12-05 08:58:19 root] (utils.py 283): INFO Epoch: [8] [1860/2502] eta: 0:08:13 lr: 0.000018 loss_cls: 3.9261 (3.9235) grad_norm: 4.2308 (4.2852) time: 0.7661 data: 0.0002 max mem: 8421 +[2024-12-05 08:58:27 root] (utils.py 283): INFO Epoch: [8] [1870/2502] eta: 0:08:05 lr: 0.000018 loss_cls: 3.8440 (3.9235) grad_norm: 4.1966 (4.2851) time: 0.7680 data: 0.0003 max mem: 8421 +[2024-12-05 08:58:34 root] (utils.py 283): INFO Epoch: [8] [1880/2502] eta: 0:07:57 lr: 0.000018 loss_cls: 3.7009 (3.9221) grad_norm: 4.1712 (4.2845) time: 0.7680 data: 0.0002 max mem: 8421 +[2024-12-05 08:58:42 root] (utils.py 283): INFO Epoch: [8] [1890/2502] eta: 0:07:50 lr: 0.000018 loss_cls: 4.2525 (3.9242) grad_norm: 4.1706 (4.2851) time: 0.7689 data: 0.0002 max mem: 8421 +[2024-12-05 08:58:50 root] (utils.py 283): INFO Epoch: [8] [1900/2502] eta: 0:07:42 lr: 0.000018 loss_cls: 4.1732 (3.9251) grad_norm: 4.2972 (4.2893) time: 0.7660 data: 0.0002 max mem: 8421 +[2024-12-05 08:58:57 root] (utils.py 283): INFO Epoch: [8] [1910/2502] eta: 0:07:34 lr: 0.000018 loss_cls: 4.0378 (3.9253) grad_norm: 4.2849 (4.2886) time: 0.7649 data: 0.0002 max mem: 8421 +[2024-12-05 08:59:05 root] (utils.py 283): INFO Epoch: [8] [1920/2502] eta: 0:07:27 lr: 0.000018 loss_cls: 4.0099 (3.9258) grad_norm: 4.1911 (4.2884) time: 0.7657 data: 0.0002 max mem: 8421 +[2024-12-05 08:59:13 root] (utils.py 283): INFO Epoch: [8] [1930/2502] eta: 0:07:19 lr: 0.000018 loss_cls: 3.8328 (3.9246) grad_norm: 4.1424 (4.2897) time: 0.7664 data: 0.0003 max mem: 8421 +[2024-12-05 08:59:20 root] (utils.py 283): INFO Epoch: [8] [1940/2502] eta: 0:07:11 lr: 0.000018 loss_cls: 3.8749 (3.9250) grad_norm: 4.1164 (4.2894) time: 0.7669 data: 0.0003 max mem: 8421 +[2024-12-05 08:59:28 root] (utils.py 283): INFO Epoch: [8] [1950/2502] eta: 0:07:04 lr: 0.000018 loss_cls: 4.0702 (3.9243) grad_norm: 4.0732 (4.2881) time: 0.7711 data: 0.0002 max mem: 8421 +[2024-12-05 08:59:36 root] (utils.py 283): INFO Epoch: [8] [1960/2502] eta: 0:06:56 lr: 0.000018 loss_cls: 4.1596 (3.9244) grad_norm: 4.0830 (4.2888) time: 0.7678 data: 0.0003 max mem: 8421 +[2024-12-05 08:59:43 root] (utils.py 283): INFO Epoch: [8] [1970/2502] eta: 0:06:48 lr: 0.000018 loss_cls: 4.1596 (3.9242) grad_norm: 4.2042 (4.2884) time: 0.7609 data: 0.0002 max mem: 8421 +[2024-12-05 08:59:51 root] (utils.py 283): INFO Epoch: [8] [1980/2502] eta: 0:06:41 lr: 0.000018 loss_cls: 3.7904 (3.9236) grad_norm: 4.1313 (4.2884) time: 0.7611 data: 0.0002 max mem: 8421 +[2024-12-05 08:59:59 root] (utils.py 283): INFO Epoch: [8] [1990/2502] eta: 0:06:33 lr: 0.000018 loss_cls: 3.7904 (3.9227) grad_norm: 4.1097 (4.2875) time: 0.7619 data: 0.0002 max mem: 8421 +[2024-12-05 09:00:06 root] (utils.py 283): INFO Epoch: [8] [2000/2502] eta: 0:06:25 lr: 0.000018 loss_cls: 4.1872 (3.9239) grad_norm: 4.1192 (4.2870) time: 0.7723 data: 0.0002 max mem: 8421 +[2024-12-05 09:00:14 root] (utils.py 283): INFO Epoch: [8] [2010/2502] eta: 0:06:18 lr: 0.000018 loss_cls: 4.1872 (3.9239) grad_norm: 4.2123 (4.2885) time: 0.7850 data: 0.0003 max mem: 8421 +[2024-12-05 09:00:22 root] (utils.py 283): INFO Epoch: [8] [2020/2502] eta: 0:06:10 lr: 0.000018 loss_cls: 3.7854 (3.9234) grad_norm: 4.1686 (4.2879) time: 0.7910 data: 0.0002 max mem: 8421 +[2024-12-05 09:00:30 root] (utils.py 283): INFO Epoch: [8] [2030/2502] eta: 0:06:02 lr: 0.000018 loss_cls: 3.9920 (3.9244) grad_norm: 4.0386 (4.2868) time: 0.7908 data: 0.0002 max mem: 8421 +[2024-12-05 09:00:38 root] (utils.py 283): INFO Epoch: [8] [2040/2502] eta: 0:05:55 lr: 0.000018 loss_cls: 3.9920 (3.9237) grad_norm: 4.0723 (4.2863) time: 0.7878 data: 0.0002 max mem: 8421 +[2024-12-05 09:00:46 root] (utils.py 283): INFO Epoch: [8] [2050/2502] eta: 0:05:47 lr: 0.000018 loss_cls: 4.1078 (3.9244) grad_norm: 4.1303 (4.2858) time: 0.7891 data: 0.0002 max mem: 8421 +[2024-12-05 09:00:54 root] (utils.py 283): INFO Epoch: [8] [2060/2502] eta: 0:05:39 lr: 0.000018 loss_cls: 4.1078 (3.9247) grad_norm: 4.2111 (4.2868) time: 0.7873 data: 0.0003 max mem: 8421 +[2024-12-05 09:01:02 root] (utils.py 283): INFO Epoch: [8] [2070/2502] eta: 0:05:32 lr: 0.000018 loss_cls: 4.2250 (3.9257) grad_norm: 4.3403 (4.2871) time: 0.7850 data: 0.0003 max mem: 8421 +[2024-12-05 09:01:09 root] (utils.py 283): INFO Epoch: [8] [2080/2502] eta: 0:05:24 lr: 0.000018 loss_cls: 4.0499 (3.9259) grad_norm: 4.2259 (4.2876) time: 0.7840 data: 0.0003 max mem: 8421 +[2024-12-05 09:01:17 root] (utils.py 283): INFO Epoch: [8] [2090/2502] eta: 0:05:16 lr: 0.000018 loss_cls: 3.8027 (3.9240) grad_norm: 4.1539 (4.2870) time: 0.7773 data: 0.0003 max mem: 8421 +[2024-12-05 09:01:25 root] (utils.py 283): INFO Epoch: [8] [2100/2502] eta: 0:05:09 lr: 0.000018 loss_cls: 3.7483 (3.9230) grad_norm: 4.1056 (4.2885) time: 0.7699 data: 0.0003 max mem: 8421 +[2024-12-05 09:01:33 root] (utils.py 283): INFO Epoch: [8] [2110/2502] eta: 0:05:01 lr: 0.000018 loss_cls: 3.7973 (3.9227) grad_norm: 4.0511 (4.2878) time: 0.7759 data: 0.0003 max mem: 8421 +[2024-12-05 09:01:40 root] (utils.py 283): INFO Epoch: [8] [2120/2502] eta: 0:04:53 lr: 0.000018 loss_cls: 3.8131 (3.9217) grad_norm: 4.0167 (4.2871) time: 0.7766 data: 0.0003 max mem: 8421 +[2024-12-05 09:01:48 root] (utils.py 283): INFO Epoch: [8] [2130/2502] eta: 0:04:46 lr: 0.000018 loss_cls: 3.6386 (3.9207) grad_norm: 4.0035 (4.2868) time: 0.7650 data: 0.0003 max mem: 8421 +[2024-12-05 09:01:56 root] (utils.py 283): INFO Epoch: [8] [2140/2502] eta: 0:04:38 lr: 0.000018 loss_cls: 3.6714 (3.9204) grad_norm: 4.0243 (4.2865) time: 0.7620 data: 0.0002 max mem: 8421 +[2024-12-05 09:02:03 root] (utils.py 283): INFO Epoch: [8] [2150/2502] eta: 0:04:30 lr: 0.000018 loss_cls: 3.7351 (3.9196) grad_norm: 4.0354 (4.2855) time: 0.7639 data: 0.0002 max mem: 8421 +[2024-12-05 09:02:11 root] (utils.py 283): INFO Epoch: [8] [2160/2502] eta: 0:04:23 lr: 0.000018 loss_cls: 3.9971 (3.9205) grad_norm: 4.1233 (4.2858) time: 0.7687 data: 0.0002 max mem: 8421 +[2024-12-05 09:02:19 root] (utils.py 283): INFO Epoch: [8] [2170/2502] eta: 0:04:15 lr: 0.000018 loss_cls: 3.9417 (3.9196) grad_norm: 4.1596 (4.2851) time: 0.7692 data: 0.0002 max mem: 8421 +[2024-12-05 09:02:26 root] (utils.py 283): INFO Epoch: [8] [2180/2502] eta: 0:04:07 lr: 0.000018 loss_cls: 3.6036 (3.9185) grad_norm: 3.9680 (4.2836) time: 0.7638 data: 0.0002 max mem: 8421 +[2024-12-05 09:02:34 root] (utils.py 283): INFO Epoch: [8] [2190/2502] eta: 0:03:59 lr: 0.000018 loss_cls: 3.6817 (3.9177) grad_norm: 4.0866 (4.2841) time: 0.7600 data: 0.0002 max mem: 8421 +[2024-12-05 09:02:41 root] (utils.py 283): INFO Epoch: [8] [2200/2502] eta: 0:03:52 lr: 0.000018 loss_cls: 3.7120 (3.9167) grad_norm: 4.1872 (4.2840) time: 0.7598 data: 0.0002 max mem: 8421 +[2024-12-05 09:02:49 root] (utils.py 283): INFO Epoch: [8] [2210/2502] eta: 0:03:44 lr: 0.000018 loss_cls: 3.9078 (3.9165) grad_norm: 4.1001 (4.2833) time: 0.7617 data: 0.0002 max mem: 8421 +[2024-12-05 09:02:57 root] (utils.py 283): INFO Epoch: [8] [2220/2502] eta: 0:03:36 lr: 0.000018 loss_cls: 3.8761 (3.9164) grad_norm: 4.1305 (4.2828) time: 0.7677 data: 0.0002 max mem: 8421 +[2024-12-05 09:03:05 root] (utils.py 283): INFO Epoch: [8] [2230/2502] eta: 0:03:29 lr: 0.000018 loss_cls: 4.1226 (3.9165) grad_norm: 4.0462 (4.2825) time: 0.7728 data: 0.0002 max mem: 8421 +[2024-12-05 09:03:12 root] (utils.py 283): INFO Epoch: [8] [2240/2502] eta: 0:03:21 lr: 0.000018 loss_cls: 4.0735 (3.9162) grad_norm: 4.2746 (4.2838) time: 0.7694 data: 0.0002 max mem: 8421 +[2024-12-05 09:03:20 root] (utils.py 283): INFO Epoch: [8] [2250/2502] eta: 0:03:13 lr: 0.000018 loss_cls: 3.9083 (3.9167) grad_norm: 4.2672 (4.2843) time: 0.7641 data: 0.0002 max mem: 8421 +[2024-12-05 09:03:28 root] (utils.py 283): INFO Epoch: [8] [2260/2502] eta: 0:03:06 lr: 0.000018 loss_cls: 3.9551 (3.9173) grad_norm: 4.1901 (4.2838) time: 0.7672 data: 0.0002 max mem: 8421 +[2024-12-05 09:03:35 root] (utils.py 283): INFO Epoch: [8] [2270/2502] eta: 0:02:58 lr: 0.000018 loss_cls: 3.7240 (3.9160) grad_norm: 4.0660 (4.2829) time: 0.7687 data: 0.0002 max mem: 8421 +[2024-12-05 09:03:43 root] (utils.py 283): INFO Epoch: [8] [2280/2502] eta: 0:02:50 lr: 0.000018 loss_cls: 3.7240 (3.9169) grad_norm: 4.1399 (4.2827) time: 0.7652 data: 0.0002 max mem: 8421 +[2024-12-05 09:03:51 root] (utils.py 283): INFO Epoch: [8] [2290/2502] eta: 0:02:43 lr: 0.000018 loss_cls: 4.1718 (3.9175) grad_norm: 4.1634 (4.2822) time: 0.7674 data: 0.0002 max mem: 8421 +[2024-12-05 09:03:58 root] (utils.py 283): INFO Epoch: [8] [2300/2502] eta: 0:02:35 lr: 0.000018 loss_cls: 4.0857 (3.9177) grad_norm: 4.1634 (4.2820) time: 0.7684 data: 0.0002 max mem: 8421 +[2024-12-05 09:04:06 root] (utils.py 283): INFO Epoch: [8] [2310/2502] eta: 0:02:27 lr: 0.000018 loss_cls: 4.0857 (3.9176) grad_norm: 4.1154 (4.2817) time: 0.7694 data: 0.0002 max mem: 8421 +[2024-12-05 09:04:14 root] (utils.py 283): INFO Epoch: [8] [2320/2502] eta: 0:02:19 lr: 0.000018 loss_cls: 4.2149 (3.9181) grad_norm: 4.0923 (4.2815) time: 0.7719 data: 0.0002 max mem: 8421 +[2024-12-05 09:04:21 root] (utils.py 283): INFO Epoch: [8] [2330/2502] eta: 0:02:12 lr: 0.000018 loss_cls: 4.1217 (3.9176) grad_norm: 4.1111 (4.2813) time: 0.7701 data: 0.0003 max mem: 8421 +[2024-12-05 09:04:29 root] (utils.py 283): INFO Epoch: [8] [2340/2502] eta: 0:02:04 lr: 0.000018 loss_cls: 4.1217 (3.9181) grad_norm: 4.0355 (4.2805) time: 0.7663 data: 0.0003 max mem: 8421 +[2024-12-05 09:04:37 root] (utils.py 283): INFO Epoch: [8] [2350/2502] eta: 0:01:56 lr: 0.000018 loss_cls: 4.0721 (3.9179) grad_norm: 4.0355 (4.2793) time: 0.7665 data: 0.0002 max mem: 8421 +[2024-12-05 09:04:44 root] (utils.py 283): INFO Epoch: [8] [2360/2502] eta: 0:01:49 lr: 0.000018 loss_cls: 3.7660 (3.9161) grad_norm: 4.0792 (4.2791) time: 0.7678 data: 0.0002 max mem: 8421 +[2024-12-05 09:04:52 root] (utils.py 283): INFO Epoch: [8] [2370/2502] eta: 0:01:41 lr: 0.000018 loss_cls: 3.6887 (3.9153) grad_norm: 4.1560 (4.2789) time: 0.7672 data: 0.0002 max mem: 8421 +[2024-12-05 09:05:00 root] (utils.py 283): INFO Epoch: [8] [2380/2502] eta: 0:01:33 lr: 0.000018 loss_cls: 4.1200 (3.9155) grad_norm: 4.1447 (4.2784) time: 0.7670 data: 0.0002 max mem: 8421 +[2024-12-05 09:05:07 root] (utils.py 283): INFO Epoch: [8] [2390/2502] eta: 0:01:26 lr: 0.000018 loss_cls: 3.8655 (3.9145) grad_norm: 4.0616 (4.2793) time: 0.7706 data: 0.0003 max mem: 8421 +[2024-12-05 09:05:15 root] (utils.py 283): INFO Epoch: [8] [2400/2502] eta: 0:01:18 lr: 0.000018 loss_cls: 3.8655 (3.9147) grad_norm: 4.1495 (4.2793) time: 0.7780 data: 0.0003 max mem: 8421 +[2024-12-05 09:05:23 root] (utils.py 283): INFO Epoch: [8] [2410/2502] eta: 0:01:10 lr: 0.000018 loss_cls: 3.9843 (3.9138) grad_norm: 4.1895 (4.2790) time: 0.7718 data: 0.0003 max mem: 8421 +[2024-12-05 09:05:31 root] (utils.py 283): INFO Epoch: [8] [2420/2502] eta: 0:01:03 lr: 0.000018 loss_cls: 3.8840 (3.9136) grad_norm: 4.0905 (4.2785) time: 0.7635 data: 0.0002 max mem: 8421 +[2024-12-05 09:05:38 root] (utils.py 283): INFO Epoch: [8] [2430/2502] eta: 0:00:55 lr: 0.000018 loss_cls: 3.7316 (3.9121) grad_norm: 4.0997 (4.2781) time: 0.7641 data: 0.0002 max mem: 8421 +[2024-12-05 09:05:46 root] (utils.py 283): INFO Epoch: [8] [2440/2502] eta: 0:00:47 lr: 0.000018 loss_cls: 3.7422 (3.9122) grad_norm: 4.1828 (4.2780) time: 0.7633 data: 0.0002 max mem: 8421 +[2024-12-05 09:05:53 root] (utils.py 283): INFO Epoch: [8] [2450/2502] eta: 0:00:39 lr: 0.000018 loss_cls: 3.8528 (3.9116) grad_norm: 4.2567 (4.2778) time: 0.7655 data: 0.0002 max mem: 8421 +[2024-12-05 09:06:01 root] (utils.py 283): INFO Epoch: [8] [2460/2502] eta: 0:00:32 lr: 0.000018 loss_cls: 3.8921 (3.9123) grad_norm: 4.0469 (4.2769) time: 0.7728 data: 0.0002 max mem: 8421 +[2024-12-05 09:06:09 root] (utils.py 283): INFO Epoch: [8] [2470/2502] eta: 0:00:24 lr: 0.000018 loss_cls: 3.8921 (3.9119) grad_norm: 4.0469 (4.2818) time: 0.7812 data: 0.0002 max mem: 8421 +[2024-12-05 09:06:17 root] (utils.py 283): INFO Epoch: [8] [2480/2502] eta: 0:00:16 lr: 0.000018 loss_cls: 3.8732 (3.9117) grad_norm: 4.2180 (4.2897) time: 0.7746 data: 0.0002 max mem: 8421 +[2024-12-05 09:06:25 root] (utils.py 283): INFO Epoch: [8] [2490/2502] eta: 0:00:09 lr: 0.000018 loss_cls: 3.9560 (3.9122) grad_norm: 4.4824 (4.2925) time: 0.7847 data: 0.0217 max mem: 8421 +[2024-12-05 09:06:32 root] (utils.py 283): INFO Epoch: [8] [2500/2502] eta: 0:00:01 lr: 0.000018 loss_cls: 4.0699 (3.9120) grad_norm: 4.2484 (4.2923) time: 0.7841 data: 0.0217 max mem: 8421 +[2024-12-05 09:06:33 root] (utils.py 283): INFO Epoch: [8] [2501/2502] eta: 0:00:00 lr: 0.000018 loss_cls: 4.0699 (3.9121) grad_norm: 4.1717 (4.2922) time: 0.7842 data: 0.0217 max mem: 8421 +[2024-12-05 09:06:33 root] (utils.py 297): INFO Epoch: [8] Total time: 0:32:04 (0.7692 s / it) +[2024-12-05 09:06:33 root] (engine.py 178): INFO Averaged stats:lr: 0.000018 loss_cls: 4.0699 (3.9274) grad_norm: 4.1717 (4.2922) +[2024-12-05 09:06:34 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7861 (0.7861) acc1: 83.5938 (83.5938) acc3: 93.7500 (93.7500) acc5: 96.8750 (96.8750) time: 0.1309 data: 0.0002 max mem: 8421 +[2024-12-05 09:06:35 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8232 (0.8661) acc1: 82.0312 (81.6761) acc3: 93.7500 (92.8977) acc5: 96.0938 (95.2415) time: 0.1312 data: 0.0003 max mem: 8421 +[2024-12-05 09:06:36 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9108 (0.9227) acc1: 79.6875 (80.1711) acc3: 92.1875 (92.2991) acc5: 94.5312 (94.7545) time: 0.1315 data: 0.0004 max mem: 8421 +[2024-12-05 09:06:38 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9813 (0.9274) acc1: 79.6875 (79.7379) acc3: 91.4062 (92.5907) acc5: 94.5312 (94.9849) time: 0.1317 data: 0.0004 max mem: 8421 +[2024-12-05 09:06:39 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8830 (0.9206) acc1: 80.4688 (80.0495) acc3: 92.9688 (92.7210) acc5: 95.3125 (95.0648) time: 0.1329 data: 0.0005 max mem: 8421 +[2024-12-05 09:06:40 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0754 (1.0169) acc1: 73.4375 (78.0025) acc3: 88.2812 (91.0692) acc5: 92.1875 (93.8419) time: 0.1332 data: 0.0005 max mem: 8421 +[2024-12-05 09:06:42 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3612 (1.0640) acc1: 71.8750 (77.1516) acc3: 85.1562 (90.2664) acc5: 89.8438 (93.1609) time: 0.1485 data: 0.0171 max mem: 8421 +[2024-12-05 09:06:44 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2923 (1.1091) acc1: 71.8750 (75.9683) acc3: 85.9375 (89.6017) acc5: 89.8438 (92.7487) time: 0.1639 data: 0.0327 max mem: 8421 +[2024-12-05 09:06:45 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3449 (1.1471) acc1: 67.9688 (75.1833) acc3: 83.5938 (88.8985) acc5: 89.0625 (92.2068) time: 0.1475 data: 0.0163 max mem: 8421 +[2024-12-05 09:06:46 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3651 (1.1798) acc1: 67.1875 (74.3046) acc3: 83.5938 (88.3070) acc5: 88.2812 (91.6981) time: 0.1317 data: 0.0006 max mem: 8421 +[2024-12-05 09:06:47 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2316 (1.1673) acc1: 72.6562 (74.4960) acc3: 85.9375 (88.5120) acc5: 90.6250 (91.9120) time: 0.1295 data: 0.0006 max mem: 8421 +[2024-12-05 09:06:47 root] (utils.py 297): INFO Test: Total time: 0:00:13 (0.1383 s / it) +[2024-12-05 09:06:47 root] (engine.py 263): INFO * Acc@1 74.178 Acc@3 88.516 Acc@5 91.894 loss 1.169 flops 1.285 layer_flops 1.251 +[2024-12-05 09:06:47 root] (main.py 546): INFO Accuracy of the network on the 50000 test images: 74.2% +[2024-12-05 09:06:47 root] (main.py 550): INFO Max accuracy: 74.18% +[2024-12-05 09:06:48 root] (utils.py 283): INFO Epoch: [9] [ 0/2502] eta: 0:31:41 lr: 0.000017 loss_cls: 4.5914 (4.5914) grad_norm: 5.5981 (5.5981) time: 0.7598 data: 0.0004 max mem: 8421 +[2024-12-05 09:06:56 root] (utils.py 283): INFO Epoch: [9] [ 10/2502] eta: 0:31:33 lr: 0.000017 loss_cls: 4.2002 (4.2121) grad_norm: 4.2790 (4.3491) time: 0.7596 data: 0.0003 max mem: 8421 +[2024-12-05 09:07:03 root] (utils.py 283): INFO Epoch: [9] [ 20/2502] eta: 0:31:26 lr: 0.000017 loss_cls: 4.0691 (4.1112) grad_norm: 4.1668 (4.3645) time: 0.7601 data: 0.0002 max mem: 8421 +[2024-12-05 09:07:11 root] (utils.py 283): INFO Epoch: [9] [ 30/2502] eta: 0:31:17 lr: 0.000017 loss_cls: 4.0174 (4.0258) grad_norm: 4.0608 (4.3512) time: 0.7596 data: 0.0003 max mem: 8421 +[2024-12-05 09:07:19 root] (utils.py 283): INFO Epoch: [9] [ 40/2502] eta: 0:31:12 lr: 0.000017 loss_cls: 3.8709 (4.0209) grad_norm: 4.0889 (4.3208) time: 0.7614 data: 0.0002 max mem: 8421 +[2024-12-05 09:07:26 root] (utils.py 283): INFO Epoch: [9] [ 50/2502] eta: 0:31:05 lr: 0.000017 loss_cls: 4.1504 (3.9912) grad_norm: 4.1239 (4.2800) time: 0.7629 data: 0.0002 max mem: 8421 +[2024-12-05 09:07:34 root] (utils.py 283): INFO Epoch: [9] [ 60/2502] eta: 0:30:59 lr: 0.000017 loss_cls: 3.9915 (3.9627) grad_norm: 4.1239 (4.2814) time: 0.7631 data: 0.0003 max mem: 8421 +[2024-12-05 09:07:42 root] (utils.py 283): INFO Epoch: [9] [ 70/2502] eta: 0:30:52 lr: 0.000017 loss_cls: 3.9915 (3.9787) grad_norm: 4.1185 (4.2756) time: 0.7631 data: 0.0003 max mem: 8421 +[2024-12-05 09:07:49 root] (utils.py 283): INFO Epoch: [9] [ 80/2502] eta: 0:30:48 lr: 0.000017 loss_cls: 4.1761 (3.9930) grad_norm: 4.1367 (4.2733) time: 0.7682 data: 0.0002 max mem: 8421 +[2024-12-05 09:07:57 root] (utils.py 283): INFO Epoch: [9] [ 90/2502] eta: 0:30:39 lr: 0.000017 loss_cls: 3.8375 (3.9757) grad_norm: 4.1804 (4.2715) time: 0.7667 data: 0.0002 max mem: 8421 +[2024-12-05 09:08:04 root] (utils.py 283): INFO Epoch: [9] [ 100/2502] eta: 0:30:31 lr: 0.000017 loss_cls: 3.7250 (3.9601) grad_norm: 4.0781 (4.2441) time: 0.7596 data: 0.0002 max mem: 8421 +[2024-12-05 09:08:12 root] (utils.py 283): INFO Epoch: [9] [ 110/2502] eta: 0:30:22 lr: 0.000017 loss_cls: 4.1233 (3.9685) grad_norm: 4.1062 (4.2468) time: 0.7594 data: 0.0002 max mem: 8421 +[2024-12-05 09:08:20 root] (utils.py 283): INFO Epoch: [9] [ 120/2502] eta: 0:30:14 lr: 0.000017 loss_cls: 4.1262 (3.9716) grad_norm: 4.1083 (4.2274) time: 0.7593 data: 0.0002 max mem: 8421 +[2024-12-05 09:08:27 root] (utils.py 283): INFO Epoch: [9] [ 130/2502] eta: 0:30:08 lr: 0.000017 loss_cls: 4.0316 (3.9618) grad_norm: 4.0649 (4.2189) time: 0.7654 data: 0.0002 max mem: 8421 +[2024-12-05 09:08:35 root] (utils.py 283): INFO Epoch: [9] [ 140/2502] eta: 0:30:01 lr: 0.000017 loss_cls: 4.0356 (3.9574) grad_norm: 4.0858 (4.2123) time: 0.7661 data: 0.0002 max mem: 8421 +[2024-12-05 09:08:43 root] (utils.py 283): INFO Epoch: [9] [ 150/2502] eta: 0:29:54 lr: 0.000017 loss_cls: 4.0996 (3.9646) grad_norm: 4.1526 (4.2247) time: 0.7662 data: 0.0002 max mem: 8421 +[2024-12-05 09:08:50 root] (utils.py 283): INFO Epoch: [9] [ 160/2502] eta: 0:29:46 lr: 0.000017 loss_cls: 4.1388 (3.9710) grad_norm: 4.2131 (4.2171) time: 0.7637 data: 0.0002 max mem: 8421 +[2024-12-05 09:08:58 root] (utils.py 283): INFO Epoch: [9] [ 170/2502] eta: 0:29:38 lr: 0.000017 loss_cls: 4.1423 (3.9790) grad_norm: 3.9207 (4.2035) time: 0.7608 data: 0.0002 max mem: 8421 +[2024-12-05 09:09:06 root] (utils.py 283): INFO Epoch: [9] [ 180/2502] eta: 0:29:31 lr: 0.000017 loss_cls: 4.1886 (3.9786) grad_norm: 3.9420 (4.2079) time: 0.7631 data: 0.0002 max mem: 8421 +[2024-12-05 09:09:13 root] (utils.py 283): INFO Epoch: [9] [ 190/2502] eta: 0:29:23 lr: 0.000017 loss_cls: 4.1886 (3.9812) grad_norm: 4.0908 (4.2037) time: 0.7604 data: 0.0002 max mem: 8421 +[2024-12-05 09:09:21 root] (utils.py 283): INFO Epoch: [9] [ 200/2502] eta: 0:29:14 lr: 0.000017 loss_cls: 4.0718 (3.9749) grad_norm: 4.0908 (4.2033) time: 0.7587 data: 0.0002 max mem: 8421 +[2024-12-05 09:09:28 root] (utils.py 283): INFO Epoch: [9] [ 210/2502] eta: 0:29:06 lr: 0.000017 loss_cls: 4.0812 (3.9825) grad_norm: 4.2921 (4.2202) time: 0.7588 data: 0.0002 max mem: 8421 +[2024-12-05 09:09:36 root] (utils.py 283): INFO Epoch: [9] [ 220/2502] eta: 0:28:58 lr: 0.000017 loss_cls: 4.1363 (3.9833) grad_norm: 4.4153 (4.2252) time: 0.7593 data: 0.0002 max mem: 8421 +[2024-12-05 09:09:43 root] (utils.py 283): INFO Epoch: [9] [ 230/2502] eta: 0:28:51 lr: 0.000017 loss_cls: 4.0740 (3.9802) grad_norm: 4.0629 (4.2213) time: 0.7603 data: 0.0002 max mem: 8421 +[2024-12-05 09:09:51 root] (utils.py 283): INFO Epoch: [9] [ 240/2502] eta: 0:28:43 lr: 0.000017 loss_cls: 3.4790 (3.9668) grad_norm: 4.1343 (4.2176) time: 0.7595 data: 0.0002 max mem: 8421 +[2024-12-05 09:09:59 root] (utils.py 283): INFO Epoch: [9] [ 250/2502] eta: 0:28:35 lr: 0.000017 loss_cls: 3.5692 (3.9670) grad_norm: 4.1343 (4.2139) time: 0.7582 data: 0.0002 max mem: 8421 +[2024-12-05 09:10:06 root] (utils.py 283): INFO Epoch: [9] [ 260/2502] eta: 0:28:28 lr: 0.000017 loss_cls: 4.1822 (3.9783) grad_norm: 4.0819 (4.2140) time: 0.7629 data: 0.0002 max mem: 8421 +[2024-12-05 09:10:14 root] (utils.py 283): INFO Epoch: [9] [ 270/2502] eta: 0:28:22 lr: 0.000017 loss_cls: 3.9468 (3.9647) grad_norm: 4.0497 (4.2083) time: 0.7769 data: 0.0002 max mem: 8421 +[2024-12-05 09:10:22 root] (utils.py 283): INFO Epoch: [9] [ 280/2502] eta: 0:28:15 lr: 0.000017 loss_cls: 3.4945 (3.9543) grad_norm: 4.0816 (4.2056) time: 0.7777 data: 0.0002 max mem: 8421 +[2024-12-05 09:10:29 root] (utils.py 283): INFO Epoch: [9] [ 290/2502] eta: 0:28:07 lr: 0.000017 loss_cls: 3.7014 (3.9495) grad_norm: 4.1894 (4.2321) time: 0.7623 data: 0.0002 max mem: 8421 +[2024-12-05 09:10:37 root] (utils.py 283): INFO Epoch: [9] [ 300/2502] eta: 0:27:59 lr: 0.000017 loss_cls: 4.0227 (3.9483) grad_norm: 4.2730 (4.2308) time: 0.7591 data: 0.0002 max mem: 8421 +[2024-12-05 09:10:45 root] (utils.py 283): INFO Epoch: [9] [ 310/2502] eta: 0:27:52 lr: 0.000017 loss_cls: 4.1560 (3.9508) grad_norm: 4.2008 (4.2277) time: 0.7634 data: 0.0002 max mem: 8421 +[2024-12-05 09:10:52 root] (utils.py 283): INFO Epoch: [9] [ 320/2502] eta: 0:27:44 lr: 0.000017 loss_cls: 4.1033 (3.9492) grad_norm: 3.9616 (4.2300) time: 0.7638 data: 0.0002 max mem: 8421 +[2024-12-05 09:11:00 root] (utils.py 283): INFO Epoch: [9] [ 330/2502] eta: 0:27:36 lr: 0.000017 loss_cls: 3.8854 (3.9442) grad_norm: 4.0224 (4.2249) time: 0.7633 data: 0.0002 max mem: 8421 +[2024-12-05 09:11:08 root] (utils.py 283): INFO Epoch: [9] [ 340/2502] eta: 0:27:29 lr: 0.000017 loss_cls: 4.0215 (3.9531) grad_norm: 4.0080 (4.2192) time: 0.7636 data: 0.0002 max mem: 8421 +[2024-12-05 09:11:15 root] (utils.py 283): INFO Epoch: [9] [ 350/2502] eta: 0:27:21 lr: 0.000017 loss_cls: 4.0215 (3.9466) grad_norm: 4.0641 (4.2197) time: 0.7630 data: 0.0002 max mem: 8421 +[2024-12-05 09:11:23 root] (utils.py 283): INFO Epoch: [9] [ 360/2502] eta: 0:27:14 lr: 0.000017 loss_cls: 3.9583 (3.9525) grad_norm: 4.3063 (4.2360) time: 0.7627 data: 0.0002 max mem: 8421 +[2024-12-05 09:11:31 root] (utils.py 283): INFO Epoch: [9] [ 370/2502] eta: 0:27:06 lr: 0.000017 loss_cls: 3.9678 (3.9496) grad_norm: 4.1116 (4.2384) time: 0.7629 data: 0.0002 max mem: 8421 +[2024-12-05 09:11:38 root] (utils.py 283): INFO Epoch: [9] [ 380/2502] eta: 0:26:58 lr: 0.000017 loss_cls: 3.8866 (3.9464) grad_norm: 4.1092 (4.2366) time: 0.7636 data: 0.0002 max mem: 8421 +[2024-12-05 09:11:46 root] (utils.py 283): INFO Epoch: [9] [ 390/2502] eta: 0:26:52 lr: 0.000017 loss_cls: 4.1123 (3.9492) grad_norm: 4.0991 (4.2331) time: 0.7710 data: 0.0002 max mem: 8421 +[2024-12-05 09:11:54 root] (utils.py 283): INFO Epoch: [9] [ 400/2502] eta: 0:26:44 lr: 0.000017 loss_cls: 4.2911 (3.9552) grad_norm: 4.1065 (4.2347) time: 0.7682 data: 0.0003 max mem: 8421 +[2024-12-05 09:12:01 root] (utils.py 283): INFO Epoch: [9] [ 410/2502] eta: 0:26:36 lr: 0.000017 loss_cls: 4.1358 (3.9591) grad_norm: 4.2303 (4.2341) time: 0.7602 data: 0.0003 max mem: 8421 +[2024-12-05 09:12:09 root] (utils.py 283): INFO Epoch: [9] [ 420/2502] eta: 0:26:29 lr: 0.000017 loss_cls: 3.9908 (3.9547) grad_norm: 4.2103 (4.2330) time: 0.7661 data: 0.0003 max mem: 8421 +[2024-12-05 09:12:16 root] (utils.py 283): INFO Epoch: [9] [ 430/2502] eta: 0:26:21 lr: 0.000017 loss_cls: 3.9145 (3.9534) grad_norm: 4.2285 (4.2339) time: 0.7668 data: 0.0003 max mem: 8421 +[2024-12-05 09:12:24 root] (utils.py 283): INFO Epoch: [9] [ 440/2502] eta: 0:26:14 lr: 0.000017 loss_cls: 4.0287 (3.9517) grad_norm: 4.2225 (4.2328) time: 0.7663 data: 0.0003 max mem: 8421 +[2024-12-05 09:12:32 root] (utils.py 283): INFO Epoch: [9] [ 450/2502] eta: 0:26:06 lr: 0.000017 loss_cls: 4.0235 (3.9530) grad_norm: 4.0794 (4.2306) time: 0.7664 data: 0.0003 max mem: 8421 +[2024-12-05 09:12:39 root] (utils.py 283): INFO Epoch: [9] [ 460/2502] eta: 0:25:59 lr: 0.000017 loss_cls: 4.0235 (3.9544) grad_norm: 3.9478 (4.2248) time: 0.7651 data: 0.0003 max mem: 8421 +[2024-12-05 09:12:47 root] (utils.py 283): INFO Epoch: [9] [ 470/2502] eta: 0:25:51 lr: 0.000017 loss_cls: 3.9902 (3.9497) grad_norm: 3.9752 (4.2246) time: 0.7645 data: 0.0003 max mem: 8421 +[2024-12-05 09:12:55 root] (utils.py 283): INFO Epoch: [9] [ 480/2502] eta: 0:25:43 lr: 0.000017 loss_cls: 3.8162 (3.9434) grad_norm: 4.1039 (4.2253) time: 0.7633 data: 0.0003 max mem: 8421 +[2024-12-05 09:13:02 root] (utils.py 283): INFO Epoch: [9] [ 490/2502] eta: 0:25:36 lr: 0.000017 loss_cls: 3.7986 (3.9394) grad_norm: 4.1039 (4.2258) time: 0.7650 data: 0.0003 max mem: 8421 +[2024-12-05 09:13:10 root] (utils.py 283): INFO Epoch: [9] [ 500/2502] eta: 0:25:28 lr: 0.000017 loss_cls: 3.9988 (3.9438) grad_norm: 4.0020 (4.2261) time: 0.7671 data: 0.0003 max mem: 8421 +[2024-12-05 09:13:18 root] (utils.py 283): INFO Epoch: [9] [ 510/2502] eta: 0:25:21 lr: 0.000017 loss_cls: 4.3227 (3.9493) grad_norm: 4.1202 (4.2295) time: 0.7749 data: 0.0003 max mem: 8421 +[2024-12-05 09:13:26 root] (utils.py 283): INFO Epoch: [9] [ 520/2502] eta: 0:25:14 lr: 0.000017 loss_cls: 4.1781 (3.9496) grad_norm: 4.1385 (4.2313) time: 0.7754 data: 0.0003 max mem: 8421 +[2024-12-05 09:13:33 root] (utils.py 283): INFO Epoch: [9] [ 530/2502] eta: 0:25:07 lr: 0.000017 loss_cls: 4.1224 (3.9523) grad_norm: 4.1870 (4.2307) time: 0.7713 data: 0.0003 max mem: 8421 +[2024-12-05 09:13:41 root] (utils.py 283): INFO Epoch: [9] [ 540/2502] eta: 0:24:59 lr: 0.000017 loss_cls: 4.1372 (3.9538) grad_norm: 4.1721 (4.2299) time: 0.7691 data: 0.0003 max mem: 8421 +[2024-12-05 09:13:49 root] (utils.py 283): INFO Epoch: [9] [ 550/2502] eta: 0:24:51 lr: 0.000017 loss_cls: 3.9467 (3.9512) grad_norm: 4.2233 (4.2318) time: 0.7639 data: 0.0002 max mem: 8421 +[2024-12-05 09:13:56 root] (utils.py 283): INFO Epoch: [9] [ 560/2502] eta: 0:24:44 lr: 0.000017 loss_cls: 3.8482 (3.9488) grad_norm: 4.0698 (4.2307) time: 0.7613 data: 0.0002 max mem: 8421 +[2024-12-05 09:14:04 root] (utils.py 283): INFO Epoch: [9] [ 570/2502] eta: 0:24:36 lr: 0.000017 loss_cls: 3.8482 (3.9430) grad_norm: 4.1419 (4.2353) time: 0.7618 data: 0.0002 max mem: 8421 +[2024-12-05 09:14:11 root] (utils.py 283): INFO Epoch: [9] [ 580/2502] eta: 0:24:28 lr: 0.000017 loss_cls: 4.1821 (3.9453) grad_norm: 4.1620 (4.2407) time: 0.7626 data: 0.0002 max mem: 8421 +[2024-12-05 09:14:19 root] (utils.py 283): INFO Epoch: [9] [ 590/2502] eta: 0:24:21 lr: 0.000017 loss_cls: 4.2246 (3.9488) grad_norm: 4.1818 (4.2410) time: 0.7631 data: 0.0002 max mem: 8421 +[2024-12-05 09:14:27 root] (utils.py 283): INFO Epoch: [9] [ 600/2502] eta: 0:24:14 lr: 0.000017 loss_cls: 4.1661 (3.9498) grad_norm: 4.1098 (4.2407) time: 0.7788 data: 0.0003 max mem: 8421 +[2024-12-05 09:14:35 root] (utils.py 283): INFO Epoch: [9] [ 610/2502] eta: 0:24:06 lr: 0.000017 loss_cls: 3.9209 (3.9469) grad_norm: 4.1277 (4.2428) time: 0.7783 data: 0.0003 max mem: 8421 +[2024-12-05 09:14:42 root] (utils.py 283): INFO Epoch: [9] [ 620/2502] eta: 0:23:58 lr: 0.000017 loss_cls: 3.9552 (3.9476) grad_norm: 4.1870 (4.2431) time: 0.7629 data: 0.0002 max mem: 8421 +[2024-12-05 09:14:50 root] (utils.py 283): INFO Epoch: [9] [ 630/2502] eta: 0:23:51 lr: 0.000017 loss_cls: 4.1212 (3.9462) grad_norm: 4.1788 (4.2421) time: 0.7610 data: 0.0002 max mem: 8421 +[2024-12-05 09:14:57 root] (utils.py 283): INFO Epoch: [9] [ 640/2502] eta: 0:23:43 lr: 0.000017 loss_cls: 4.1219 (3.9512) grad_norm: 4.2505 (4.2450) time: 0.7595 data: 0.0002 max mem: 8421 +[2024-12-05 09:15:05 root] (utils.py 283): INFO Epoch: [9] [ 650/2502] eta: 0:23:35 lr: 0.000017 loss_cls: 4.0869 (3.9486) grad_norm: 4.1607 (4.2441) time: 0.7608 data: 0.0003 max mem: 8421 +[2024-12-05 09:15:13 root] (utils.py 283): INFO Epoch: [9] [ 660/2502] eta: 0:23:27 lr: 0.000017 loss_cls: 3.7110 (3.9431) grad_norm: 4.1074 (4.2473) time: 0.7636 data: 0.0002 max mem: 8421 +[2024-12-05 09:15:20 root] (utils.py 283): INFO Epoch: [9] [ 670/2502] eta: 0:23:20 lr: 0.000017 loss_cls: 3.9144 (3.9461) grad_norm: 4.1074 (4.2461) time: 0.7661 data: 0.0002 max mem: 8421 +[2024-12-05 09:15:28 root] (utils.py 283): INFO Epoch: [9] [ 680/2502] eta: 0:23:12 lr: 0.000017 loss_cls: 3.9156 (3.9433) grad_norm: 4.1271 (4.2458) time: 0.7672 data: 0.0002 max mem: 8421 +[2024-12-05 09:15:36 root] (utils.py 283): INFO Epoch: [9] [ 690/2502] eta: 0:23:05 lr: 0.000017 loss_cls: 3.9156 (3.9432) grad_norm: 4.1851 (4.2451) time: 0.7711 data: 0.0002 max mem: 8421 +[2024-12-05 09:15:44 root] (utils.py 283): INFO Epoch: [9] [ 700/2502] eta: 0:22:58 lr: 0.000017 loss_cls: 4.1337 (3.9459) grad_norm: 4.2412 (4.2453) time: 0.7796 data: 0.0003 max mem: 8421 +[2024-12-05 09:15:51 root] (utils.py 283): INFO Epoch: [9] [ 710/2502] eta: 0:22:50 lr: 0.000017 loss_cls: 4.1550 (3.9474) grad_norm: 4.0606 (4.2415) time: 0.7726 data: 0.0003 max mem: 8421 +[2024-12-05 09:15:59 root] (utils.py 283): INFO Epoch: [9] [ 720/2502] eta: 0:22:42 lr: 0.000017 loss_cls: 4.1326 (3.9480) grad_norm: 4.0333 (4.2442) time: 0.7599 data: 0.0002 max mem: 8421 +[2024-12-05 09:16:07 root] (utils.py 283): INFO Epoch: [9] [ 730/2502] eta: 0:22:35 lr: 0.000017 loss_cls: 4.0064 (3.9448) grad_norm: 4.2231 (4.2441) time: 0.7609 data: 0.0002 max mem: 8421 +[2024-12-05 09:16:14 root] (utils.py 283): INFO Epoch: [9] [ 740/2502] eta: 0:22:27 lr: 0.000017 loss_cls: 3.7751 (3.9414) grad_norm: 4.0485 (4.2416) time: 0.7627 data: 0.0002 max mem: 8421 +[2024-12-05 09:16:22 root] (utils.py 283): INFO Epoch: [9] [ 750/2502] eta: 0:22:19 lr: 0.000017 loss_cls: 4.0138 (3.9407) grad_norm: 4.1649 (4.2423) time: 0.7627 data: 0.0002 max mem: 8421 +[2024-12-05 09:16:29 root] (utils.py 283): INFO Epoch: [9] [ 760/2502] eta: 0:22:12 lr: 0.000017 loss_cls: 3.9451 (3.9392) grad_norm: 4.2619 (4.2437) time: 0.7622 data: 0.0002 max mem: 8421 +[2024-12-05 09:16:37 root] (utils.py 283): INFO Epoch: [9] [ 770/2502] eta: 0:22:04 lr: 0.000017 loss_cls: 3.8423 (3.9379) grad_norm: 4.1122 (4.2427) time: 0.7623 data: 0.0002 max mem: 8421 +[2024-12-05 09:16:45 root] (utils.py 283): INFO Epoch: [9] [ 780/2502] eta: 0:21:56 lr: 0.000017 loss_cls: 3.8542 (3.9400) grad_norm: 4.2632 (4.2458) time: 0.7617 data: 0.0002 max mem: 8421 +[2024-12-05 09:16:52 root] (utils.py 283): INFO Epoch: [9] [ 790/2502] eta: 0:21:48 lr: 0.000017 loss_cls: 3.9190 (3.9381) grad_norm: 4.2672 (4.2461) time: 0.7620 data: 0.0002 max mem: 8421 +[2024-12-05 09:17:00 root] (utils.py 283): INFO Epoch: [9] [ 800/2502] eta: 0:21:41 lr: 0.000017 loss_cls: 3.5793 (3.9308) grad_norm: 4.0480 (4.2419) time: 0.7638 data: 0.0002 max mem: 8421 +[2024-12-05 09:17:08 root] (utils.py 283): INFO Epoch: [9] [ 810/2502] eta: 0:21:33 lr: 0.000017 loss_cls: 3.2488 (3.9289) grad_norm: 3.8820 (4.2378) time: 0.7622 data: 0.0003 max mem: 8421 +[2024-12-05 09:17:15 root] (utils.py 283): INFO Epoch: [9] [ 820/2502] eta: 0:21:25 lr: 0.000017 loss_cls: 3.9769 (3.9293) grad_norm: 4.0413 (4.2398) time: 0.7617 data: 0.0003 max mem: 8421 +[2024-12-05 09:17:23 root] (utils.py 283): INFO Epoch: [9] [ 830/2502] eta: 0:21:18 lr: 0.000017 loss_cls: 4.0728 (3.9289) grad_norm: 4.2958 (4.2432) time: 0.7623 data: 0.0002 max mem: 8421 +[2024-12-05 09:17:30 root] (utils.py 283): INFO Epoch: [9] [ 840/2502] eta: 0:21:10 lr: 0.000017 loss_cls: 4.0728 (3.9299) grad_norm: 4.1231 (4.2449) time: 0.7621 data: 0.0002 max mem: 8421 +[2024-12-05 09:17:38 root] (utils.py 283): INFO Epoch: [9] [ 850/2502] eta: 0:21:02 lr: 0.000017 loss_cls: 4.0915 (3.9303) grad_norm: 4.1608 (4.2480) time: 0.7642 data: 0.0002 max mem: 8421 +[2024-12-05 09:17:46 root] (utils.py 283): INFO Epoch: [9] [ 860/2502] eta: 0:20:55 lr: 0.000017 loss_cls: 4.2406 (3.9332) grad_norm: 4.2147 (4.2493) time: 0.7651 data: 0.0002 max mem: 8421 +[2024-12-05 09:17:53 root] (utils.py 283): INFO Epoch: [9] [ 870/2502] eta: 0:20:47 lr: 0.000017 loss_cls: 4.3334 (3.9375) grad_norm: 4.2105 (4.2493) time: 0.7635 data: 0.0002 max mem: 8421 +[2024-12-05 09:18:01 root] (utils.py 283): INFO Epoch: [9] [ 880/2502] eta: 0:20:39 lr: 0.000017 loss_cls: 4.0926 (3.9372) grad_norm: 4.1633 (4.2486) time: 0.7635 data: 0.0003 max mem: 8421 +[2024-12-05 09:18:09 root] (utils.py 283): INFO Epoch: [9] [ 890/2502] eta: 0:20:32 lr: 0.000017 loss_cls: 3.5266 (3.9310) grad_norm: 3.9496 (4.2455) time: 0.7652 data: 0.0002 max mem: 8421 +[2024-12-05 09:18:16 root] (utils.py 283): INFO Epoch: [9] [ 900/2502] eta: 0:20:24 lr: 0.000017 loss_cls: 3.2089 (3.9263) grad_norm: 3.9496 (4.2473) time: 0.7666 data: 0.0002 max mem: 8421 +[2024-12-05 09:18:24 root] (utils.py 283): INFO Epoch: [9] [ 910/2502] eta: 0:20:16 lr: 0.000017 loss_cls: 3.8897 (3.9266) grad_norm: 4.1198 (4.2522) time: 0.7643 data: 0.0002 max mem: 8421 +[2024-12-05 09:18:32 root] (utils.py 283): INFO Epoch: [9] [ 920/2502] eta: 0:20:09 lr: 0.000017 loss_cls: 4.0705 (3.9291) grad_norm: 4.1189 (4.2532) time: 0.7611 data: 0.0002 max mem: 8421 +[2024-12-05 09:18:39 root] (utils.py 283): INFO Epoch: [9] [ 930/2502] eta: 0:20:01 lr: 0.000017 loss_cls: 4.3060 (3.9328) grad_norm: 4.1189 (4.2592) time: 0.7605 data: 0.0002 max mem: 8421 +[2024-12-05 09:18:47 root] (utils.py 283): INFO Epoch: [9] [ 940/2502] eta: 0:19:53 lr: 0.000017 loss_cls: 4.1860 (3.9336) grad_norm: 4.2114 (4.2598) time: 0.7611 data: 0.0002 max mem: 8421 +[2024-12-05 09:18:54 root] (utils.py 283): INFO Epoch: [9] [ 950/2502] eta: 0:19:46 lr: 0.000017 loss_cls: 3.8834 (3.9321) grad_norm: 4.2911 (4.2603) time: 0.7623 data: 0.0002 max mem: 8421 +[2024-12-05 09:19:02 root] (utils.py 283): INFO Epoch: [9] [ 960/2502] eta: 0:19:38 lr: 0.000017 loss_cls: 3.9296 (3.9336) grad_norm: 4.2402 (4.2601) time: 0.7711 data: 0.0002 max mem: 8421 +[2024-12-05 09:19:10 root] (utils.py 283): INFO Epoch: [9] [ 970/2502] eta: 0:19:31 lr: 0.000017 loss_cls: 4.1161 (3.9326) grad_norm: 4.2402 (4.2599) time: 0.7709 data: 0.0002 max mem: 8421 +[2024-12-05 09:19:17 root] (utils.py 283): INFO Epoch: [9] [ 980/2502] eta: 0:19:23 lr: 0.000017 loss_cls: 3.8625 (3.9325) grad_norm: 4.1199 (4.2597) time: 0.7623 data: 0.0002 max mem: 8421 +[2024-12-05 09:19:25 root] (utils.py 283): INFO Epoch: [9] [ 990/2502] eta: 0:19:15 lr: 0.000017 loss_cls: 3.9665 (3.9319) grad_norm: 3.9953 (4.2572) time: 0.7622 data: 0.0002 max mem: 8421 +[2024-12-05 09:19:33 root] (utils.py 283): INFO Epoch: [9] [1000/2502] eta: 0:19:08 lr: 0.000017 loss_cls: 3.9982 (3.9332) grad_norm: 4.0781 (4.2572) time: 0.7612 data: 0.0002 max mem: 8421 +[2024-12-05 09:19:40 root] (utils.py 283): INFO Epoch: [9] [1010/2502] eta: 0:19:00 lr: 0.000017 loss_cls: 3.8572 (3.9288) grad_norm: 4.0899 (4.2536) time: 0.7625 data: 0.0002 max mem: 8421 +[2024-12-05 09:19:48 root] (utils.py 283): INFO Epoch: [9] [1020/2502] eta: 0:18:52 lr: 0.000017 loss_cls: 3.7804 (3.9265) grad_norm: 3.9755 (4.2527) time: 0.7670 data: 0.0002 max mem: 8421 +[2024-12-05 09:19:56 root] (utils.py 283): INFO Epoch: [9] [1030/2502] eta: 0:18:45 lr: 0.000017 loss_cls: 3.8398 (3.9263) grad_norm: 4.1367 (4.2520) time: 0.7657 data: 0.0002 max mem: 8421 +[2024-12-05 09:20:03 root] (utils.py 283): INFO Epoch: [9] [1040/2502] eta: 0:18:37 lr: 0.000017 loss_cls: 3.8576 (3.9249) grad_norm: 4.1225 (4.2511) time: 0.7619 data: 0.0002 max mem: 8421 +[2024-12-05 09:20:11 root] (utils.py 283): INFO Epoch: [9] [1050/2502] eta: 0:18:30 lr: 0.000017 loss_cls: 3.9442 (3.9247) grad_norm: 4.0153 (4.2488) time: 0.7701 data: 0.0002 max mem: 8421 +[2024-12-05 09:20:19 root] (utils.py 283): INFO Epoch: [9] [1060/2502] eta: 0:18:22 lr: 0.000017 loss_cls: 3.7789 (3.9219) grad_norm: 3.9637 (4.2465) time: 0.7706 data: 0.0003 max mem: 8421 +[2024-12-05 09:20:26 root] (utils.py 283): INFO Epoch: [9] [1070/2502] eta: 0:18:14 lr: 0.000017 loss_cls: 3.6828 (3.9215) grad_norm: 4.0438 (4.2487) time: 0.7634 data: 0.0002 max mem: 8421 +[2024-12-05 09:20:34 root] (utils.py 283): INFO Epoch: [9] [1080/2502] eta: 0:18:07 lr: 0.000017 loss_cls: 3.8468 (3.9210) grad_norm: 4.0403 (4.2469) time: 0.7625 data: 0.0002 max mem: 8421 +[2024-12-05 09:20:42 root] (utils.py 283): INFO Epoch: [9] [1090/2502] eta: 0:17:59 lr: 0.000017 loss_cls: 3.6537 (3.9181) grad_norm: 3.9423 (4.2447) time: 0.7652 data: 0.0002 max mem: 8421 +[2024-12-05 09:20:49 root] (utils.py 283): INFO Epoch: [9] [1100/2502] eta: 0:17:51 lr: 0.000017 loss_cls: 3.7221 (3.9176) grad_norm: 4.1145 (4.2444) time: 0.7665 data: 0.0002 max mem: 8421 +[2024-12-05 09:20:57 root] (utils.py 283): INFO Epoch: [9] [1110/2502] eta: 0:17:44 lr: 0.000017 loss_cls: 4.0360 (3.9181) grad_norm: 4.1925 (4.2458) time: 0.7646 data: 0.0002 max mem: 8421 +[2024-12-05 09:21:05 root] (utils.py 283): INFO Epoch: [9] [1120/2502] eta: 0:17:36 lr: 0.000017 loss_cls: 4.1526 (3.9199) grad_norm: 4.0613 (4.2439) time: 0.7649 data: 0.0002 max mem: 8421 +[2024-12-05 09:21:12 root] (utils.py 283): INFO Epoch: [9] [1130/2502] eta: 0:17:28 lr: 0.000017 loss_cls: 4.0517 (3.9164) grad_norm: 4.0273 (4.2473) time: 0.7665 data: 0.0002 max mem: 8421 +[2024-12-05 09:21:20 root] (utils.py 283): INFO Epoch: [9] [1140/2502] eta: 0:17:21 lr: 0.000017 loss_cls: 3.4015 (3.9129) grad_norm: 3.9937 (4.2450) time: 0.7637 data: 0.0002 max mem: 8421 +[2024-12-05 09:21:27 root] (utils.py 283): INFO Epoch: [9] [1150/2502] eta: 0:17:13 lr: 0.000017 loss_cls: 3.4015 (3.9111) grad_norm: 3.9937 (4.2444) time: 0.7605 data: 0.0002 max mem: 8421 +[2024-12-05 09:21:35 root] (utils.py 283): INFO Epoch: [9] [1160/2502] eta: 0:17:05 lr: 0.000017 loss_cls: 4.1417 (3.9122) grad_norm: 4.1864 (4.2440) time: 0.7649 data: 0.0003 max mem: 8421 +[2024-12-05 09:21:43 root] (utils.py 283): INFO Epoch: [9] [1170/2502] eta: 0:16:58 lr: 0.000017 loss_cls: 4.2060 (3.9137) grad_norm: 4.1488 (4.2429) time: 0.7680 data: 0.0003 max mem: 8421 +[2024-12-05 09:21:50 root] (utils.py 283): INFO Epoch: [9] [1180/2502] eta: 0:16:50 lr: 0.000017 loss_cls: 4.0080 (3.9121) grad_norm: 4.1262 (4.2420) time: 0.7675 data: 0.0002 max mem: 8421 +[2024-12-05 09:21:58 root] (utils.py 283): INFO Epoch: [9] [1190/2502] eta: 0:16:43 lr: 0.000017 loss_cls: 3.8530 (3.9112) grad_norm: 4.1408 (4.2411) time: 0.7688 data: 0.0002 max mem: 8421 +[2024-12-05 09:22:06 root] (utils.py 283): INFO Epoch: [9] [1200/2502] eta: 0:16:35 lr: 0.000017 loss_cls: 3.9637 (3.9112) grad_norm: 4.1546 (4.2406) time: 0.7713 data: 0.0002 max mem: 8421 +[2024-12-05 09:22:14 root] (utils.py 283): INFO Epoch: [9] [1210/2502] eta: 0:16:27 lr: 0.000017 loss_cls: 3.9994 (3.9113) grad_norm: 4.1287 (4.2398) time: 0.7686 data: 0.0003 max mem: 8421 +[2024-12-05 09:22:21 root] (utils.py 283): INFO Epoch: [9] [1220/2502] eta: 0:16:20 lr: 0.000017 loss_cls: 3.9279 (3.9100) grad_norm: 4.0610 (4.2381) time: 0.7668 data: 0.0002 max mem: 8421 +[2024-12-05 09:22:29 root] (utils.py 283): INFO Epoch: [9] [1230/2502] eta: 0:16:12 lr: 0.000017 loss_cls: 3.8321 (3.9097) grad_norm: 4.0515 (4.2380) time: 0.7649 data: 0.0002 max mem: 8421 +[2024-12-05 09:22:37 root] (utils.py 283): INFO Epoch: [9] [1240/2502] eta: 0:16:05 lr: 0.000017 loss_cls: 3.9699 (3.9114) grad_norm: 4.0721 (4.2366) time: 0.7650 data: 0.0002 max mem: 8421 +[2024-12-05 09:22:44 root] (utils.py 283): INFO Epoch: [9] [1250/2502] eta: 0:15:57 lr: 0.000017 loss_cls: 4.1647 (3.9114) grad_norm: 3.9912 (4.2363) time: 0.7652 data: 0.0003 max mem: 8421 +[2024-12-05 09:22:52 root] (utils.py 283): INFO Epoch: [9] [1260/2502] eta: 0:15:49 lr: 0.000017 loss_cls: 3.9261 (3.9124) grad_norm: 4.0931 (4.2357) time: 0.7634 data: 0.0003 max mem: 8421 +[2024-12-05 09:23:00 root] (utils.py 283): INFO Epoch: [9] [1270/2502] eta: 0:15:42 lr: 0.000017 loss_cls: 4.3355 (3.9157) grad_norm: 4.2086 (4.2379) time: 0.7759 data: 0.0003 max mem: 8421 +[2024-12-05 09:23:08 root] (utils.py 283): INFO Epoch: [9] [1280/2502] eta: 0:15:34 lr: 0.000017 loss_cls: 4.1724 (3.9154) grad_norm: 4.1014 (4.2371) time: 0.7870 data: 0.0003 max mem: 8421 +[2024-12-05 09:23:15 root] (utils.py 283): INFO Epoch: [9] [1290/2502] eta: 0:15:27 lr: 0.000017 loss_cls: 3.9524 (3.9159) grad_norm: 4.0612 (4.2360) time: 0.7797 data: 0.0003 max mem: 8421 +[2024-12-05 09:23:23 root] (utils.py 283): INFO Epoch: [9] [1300/2502] eta: 0:15:19 lr: 0.000017 loss_cls: 3.8401 (3.9142) grad_norm: 4.1320 (4.2379) time: 0.7686 data: 0.0003 max mem: 8421 +[2024-12-05 09:23:31 root] (utils.py 283): INFO Epoch: [9] [1310/2502] eta: 0:15:12 lr: 0.000017 loss_cls: 3.7432 (3.9131) grad_norm: 4.1730 (4.2377) time: 0.7715 data: 0.0003 max mem: 8421 +[2024-12-05 09:23:38 root] (utils.py 283): INFO Epoch: [9] [1320/2502] eta: 0:15:04 lr: 0.000017 loss_cls: 3.8329 (3.9130) grad_norm: 4.1910 (4.2375) time: 0.7694 data: 0.0003 max mem: 8421 +[2024-12-05 09:23:46 root] (utils.py 283): INFO Epoch: [9] [1330/2502] eta: 0:14:56 lr: 0.000017 loss_cls: 4.0031 (3.9139) grad_norm: 4.1297 (4.2367) time: 0.7612 data: 0.0003 max mem: 8421 +[2024-12-05 09:23:54 root] (utils.py 283): INFO Epoch: [9] [1340/2502] eta: 0:14:49 lr: 0.000017 loss_cls: 4.1030 (3.9136) grad_norm: 4.2798 (4.2385) time: 0.7650 data: 0.0002 max mem: 8421 +[2024-12-05 09:24:01 root] (utils.py 283): INFO Epoch: [9] [1350/2502] eta: 0:14:41 lr: 0.000017 loss_cls: 4.1030 (3.9150) grad_norm: 4.0784 (4.2372) time: 0.7654 data: 0.0002 max mem: 8421 +[2024-12-05 09:24:09 root] (utils.py 283): INFO Epoch: [9] [1360/2502] eta: 0:14:33 lr: 0.000017 loss_cls: 4.1183 (3.9158) grad_norm: 4.0784 (4.2368) time: 0.7630 data: 0.0002 max mem: 8421 +[2024-12-05 09:24:17 root] (utils.py 283): INFO Epoch: [9] [1370/2502] eta: 0:14:26 lr: 0.000017 loss_cls: 3.8814 (3.9140) grad_norm: 4.1421 (4.2369) time: 0.7637 data: 0.0002 max mem: 8421 +[2024-12-05 09:24:24 root] (utils.py 283): INFO Epoch: [9] [1380/2502] eta: 0:14:18 lr: 0.000017 loss_cls: 3.8974 (3.9145) grad_norm: 4.2335 (4.2379) time: 0.7636 data: 0.0002 max mem: 8421 +[2024-12-05 09:24:32 root] (utils.py 283): INFO Epoch: [9] [1390/2502] eta: 0:14:10 lr: 0.000017 loss_cls: 4.1435 (3.9150) grad_norm: 4.1536 (4.2368) time: 0.7697 data: 0.0002 max mem: 8421 +[2024-12-05 09:24:40 root] (utils.py 283): INFO Epoch: [9] [1400/2502] eta: 0:14:03 lr: 0.000017 loss_cls: 3.9767 (3.9149) grad_norm: 4.0836 (4.2392) time: 0.7690 data: 0.0002 max mem: 8421 +[2024-12-05 09:24:47 root] (utils.py 283): INFO Epoch: [9] [1410/2502] eta: 0:13:55 lr: 0.000017 loss_cls: 3.8744 (3.9134) grad_norm: 3.9782 (4.2374) time: 0.7619 data: 0.0002 max mem: 8421 +[2024-12-05 09:24:55 root] (utils.py 283): INFO Epoch: [9] [1420/2502] eta: 0:13:47 lr: 0.000017 loss_cls: 3.8574 (3.9127) grad_norm: 3.9422 (4.2401) time: 0.7616 data: 0.0002 max mem: 8421 +[2024-12-05 09:25:02 root] (utils.py 283): INFO Epoch: [9] [1430/2502] eta: 0:13:40 lr: 0.000017 loss_cls: 4.0217 (3.9122) grad_norm: 4.0640 (4.2390) time: 0.7603 data: 0.0002 max mem: 8421 +[2024-12-05 09:25:10 root] (utils.py 283): INFO Epoch: [9] [1440/2502] eta: 0:13:32 lr: 0.000017 loss_cls: 3.9722 (3.9109) grad_norm: 4.2270 (4.2390) time: 0.7719 data: 0.0002 max mem: 8421 +[2024-12-05 09:25:18 root] (utils.py 283): INFO Epoch: [9] [1450/2502] eta: 0:13:25 lr: 0.000017 loss_cls: 3.9086 (3.9104) grad_norm: 4.2546 (4.2397) time: 0.7840 data: 0.0002 max mem: 8421 +[2024-12-05 09:25:26 root] (utils.py 283): INFO Epoch: [9] [1460/2502] eta: 0:13:17 lr: 0.000017 loss_cls: 3.9385 (3.9103) grad_norm: 4.1416 (4.2396) time: 0.7848 data: 0.0002 max mem: 8421 +[2024-12-05 09:25:34 root] (utils.py 283): INFO Epoch: [9] [1470/2502] eta: 0:13:10 lr: 0.000017 loss_cls: 4.1858 (3.9092) grad_norm: 4.0567 (4.2410) time: 0.7843 data: 0.0002 max mem: 8421 +[2024-12-05 09:25:42 root] (utils.py 283): INFO Epoch: [9] [1480/2502] eta: 0:13:02 lr: 0.000017 loss_cls: 4.2273 (3.9108) grad_norm: 3.9605 (4.2397) time: 0.7850 data: 0.0002 max mem: 8421 +[2024-12-05 09:25:50 root] (utils.py 283): INFO Epoch: [9] [1490/2502] eta: 0:12:55 lr: 0.000017 loss_cls: 4.2146 (3.9120) grad_norm: 3.9632 (4.2392) time: 0.7876 data: 0.0002 max mem: 8421 +[2024-12-05 09:25:58 root] (utils.py 283): INFO Epoch: [9] [1500/2502] eta: 0:12:47 lr: 0.000017 loss_cls: 4.1440 (3.9138) grad_norm: 4.1332 (4.2402) time: 0.7958 data: 0.0002 max mem: 8421 +[2024-12-05 09:26:05 root] (utils.py 283): INFO Epoch: [9] [1510/2502] eta: 0:12:40 lr: 0.000017 loss_cls: 4.0891 (3.9138) grad_norm: 4.1862 (4.2399) time: 0.7950 data: 0.0002 max mem: 8421 +[2024-12-05 09:26:13 root] (utils.py 283): INFO Epoch: [9] [1520/2502] eta: 0:12:32 lr: 0.000017 loss_cls: 3.9783 (3.9143) grad_norm: 4.0923 (4.2392) time: 0.7813 data: 0.0002 max mem: 8421 +[2024-12-05 09:26:21 root] (utils.py 283): INFO Epoch: [9] [1530/2502] eta: 0:12:24 lr: 0.000017 loss_cls: 3.9531 (3.9133) grad_norm: 4.0923 (4.2395) time: 0.7690 data: 0.0002 max mem: 8421 +[2024-12-05 09:26:29 root] (utils.py 283): INFO Epoch: [9] [1540/2502] eta: 0:12:17 lr: 0.000017 loss_cls: 3.9561 (3.9140) grad_norm: 4.2814 (4.2398) time: 0.7673 data: 0.0002 max mem: 8421 +[2024-12-05 09:26:36 root] (utils.py 283): INFO Epoch: [9] [1550/2502] eta: 0:12:09 lr: 0.000017 loss_cls: 3.7728 (3.9125) grad_norm: 4.1051 (4.2393) time: 0.7727 data: 0.0003 max mem: 8421 +[2024-12-05 09:26:44 root] (utils.py 283): INFO Epoch: [9] [1560/2502] eta: 0:12:01 lr: 0.000017 loss_cls: 4.0344 (3.9142) grad_norm: 4.1476 (4.2388) time: 0.7742 data: 0.0002 max mem: 8421 +[2024-12-05 09:26:52 root] (utils.py 283): INFO Epoch: [9] [1570/2502] eta: 0:11:54 lr: 0.000017 loss_cls: 4.1544 (3.9158) grad_norm: 4.1755 (4.2388) time: 0.7722 data: 0.0002 max mem: 8421 +[2024-12-05 09:26:59 root] (utils.py 283): INFO Epoch: [9] [1580/2502] eta: 0:11:46 lr: 0.000017 loss_cls: 4.2115 (3.9176) grad_norm: 4.1675 (4.2388) time: 0.7726 data: 0.0003 max mem: 8421 +[2024-12-05 09:27:07 root] (utils.py 283): INFO Epoch: [9] [1590/2502] eta: 0:11:39 lr: 0.000017 loss_cls: 3.9988 (3.9177) grad_norm: 4.1562 (4.2396) time: 0.7695 data: 0.0002 max mem: 8421 +[2024-12-05 09:27:15 root] (utils.py 283): INFO Epoch: [9] [1600/2502] eta: 0:11:31 lr: 0.000017 loss_cls: 3.7711 (3.9167) grad_norm: 4.0175 (4.2387) time: 0.7639 data: 0.0002 max mem: 8421 +[2024-12-05 09:27:22 root] (utils.py 283): INFO Epoch: [9] [1610/2502] eta: 0:11:23 lr: 0.000017 loss_cls: 3.6596 (3.9160) grad_norm: 4.0384 (4.2394) time: 0.7641 data: 0.0002 max mem: 8421 +[2024-12-05 09:27:30 root] (utils.py 283): INFO Epoch: [9] [1620/2502] eta: 0:11:16 lr: 0.000017 loss_cls: 3.9700 (3.9162) grad_norm: 4.0384 (4.2397) time: 0.7698 data: 0.0002 max mem: 8421 +[2024-12-05 09:27:38 root] (utils.py 283): INFO Epoch: [9] [1630/2502] eta: 0:11:08 lr: 0.000017 loss_cls: 4.0451 (3.9162) grad_norm: 4.0321 (4.2394) time: 0.7813 data: 0.0002 max mem: 8421 +[2024-12-05 09:27:46 root] (utils.py 283): INFO Epoch: [9] [1640/2502] eta: 0:11:00 lr: 0.000017 loss_cls: 4.1595 (3.9175) grad_norm: 4.1162 (4.2394) time: 0.7861 data: 0.0003 max mem: 8421 +[2024-12-05 09:27:54 root] (utils.py 283): INFO Epoch: [9] [1650/2502] eta: 0:10:53 lr: 0.000017 loss_cls: 4.0786 (3.9174) grad_norm: 4.0215 (4.2381) time: 0.7777 data: 0.0003 max mem: 8421 +[2024-12-05 09:28:01 root] (utils.py 283): INFO Epoch: [9] [1660/2502] eta: 0:10:45 lr: 0.000017 loss_cls: 4.0621 (3.9167) grad_norm: 4.0262 (4.2384) time: 0.7816 data: 0.0003 max mem: 8421 +[2024-12-05 09:28:10 root] (utils.py 283): INFO Epoch: [9] [1670/2502] eta: 0:10:38 lr: 0.000017 loss_cls: 3.9510 (3.9161) grad_norm: 4.2000 (4.2389) time: 0.7975 data: 0.0003 max mem: 8421 +[2024-12-05 09:28:17 root] (utils.py 283): INFO Epoch: [9] [1680/2502] eta: 0:10:30 lr: 0.000017 loss_cls: 4.1008 (3.9173) grad_norm: 4.3226 (4.2406) time: 0.7938 data: 0.0003 max mem: 8421 +[2024-12-05 09:28:25 root] (utils.py 283): INFO Epoch: [9] [1690/2502] eta: 0:10:22 lr: 0.000017 loss_cls: 4.1413 (3.9188) grad_norm: 4.3319 (4.2411) time: 0.7756 data: 0.0002 max mem: 8421 +[2024-12-05 09:28:33 root] (utils.py 283): INFO Epoch: [9] [1700/2502] eta: 0:10:15 lr: 0.000017 loss_cls: 3.7876 (3.9155) grad_norm: 4.0861 (4.2405) time: 0.7714 data: 0.0002 max mem: 8421 +[2024-12-05 09:28:41 root] (utils.py 283): INFO Epoch: [9] [1710/2502] eta: 0:10:07 lr: 0.000017 loss_cls: 3.3200 (3.9143) grad_norm: 3.9484 (4.2401) time: 0.7757 data: 0.0003 max mem: 8421 +[2024-12-05 09:28:48 root] (utils.py 283): INFO Epoch: [9] [1720/2502] eta: 0:10:00 lr: 0.000017 loss_cls: 3.9000 (3.9135) grad_norm: 4.1390 (4.2408) time: 0.7724 data: 0.0002 max mem: 8421 +[2024-12-05 09:28:56 root] (utils.py 283): INFO Epoch: [9] [1730/2502] eta: 0:09:52 lr: 0.000017 loss_cls: 4.0049 (3.9146) grad_norm: 4.2803 (4.2430) time: 0.7730 data: 0.0002 max mem: 8421 +[2024-12-05 09:29:04 root] (utils.py 283): INFO Epoch: [9] [1740/2502] eta: 0:09:44 lr: 0.000017 loss_cls: 4.1969 (3.9154) grad_norm: 4.2053 (4.2425) time: 0.7726 data: 0.0002 max mem: 8421 +[2024-12-05 09:29:11 root] (utils.py 283): INFO Epoch: [9] [1750/2502] eta: 0:09:37 lr: 0.000017 loss_cls: 3.9031 (3.9151) grad_norm: 4.2048 (4.2446) time: 0.7656 data: 0.0002 max mem: 8421 +[2024-12-05 09:29:19 root] (utils.py 283): INFO Epoch: [9] [1760/2502] eta: 0:09:29 lr: 0.000017 loss_cls: 3.8104 (3.9147) grad_norm: 4.2333 (4.2453) time: 0.7598 data: 0.0002 max mem: 8421 +[2024-12-05 09:29:27 root] (utils.py 283): INFO Epoch: [9] [1770/2502] eta: 0:09:21 lr: 0.000017 loss_cls: 3.6127 (3.9139) grad_norm: 4.1115 (4.2445) time: 0.7625 data: 0.0002 max mem: 8421 +[2024-12-05 09:29:34 root] (utils.py 283): INFO Epoch: [9] [1780/2502] eta: 0:09:13 lr: 0.000017 loss_cls: 3.6920 (3.9149) grad_norm: 4.2537 (4.2473) time: 0.7642 data: 0.0002 max mem: 8421 +[2024-12-05 09:29:42 root] (utils.py 283): INFO Epoch: [9] [1790/2502] eta: 0:09:06 lr: 0.000017 loss_cls: 3.9912 (3.9152) grad_norm: 4.2903 (4.2470) time: 0.7621 data: 0.0002 max mem: 8421 +[2024-12-05 09:29:49 root] (utils.py 283): INFO Epoch: [9] [1800/2502] eta: 0:08:58 lr: 0.000017 loss_cls: 3.7742 (3.9138) grad_norm: 3.9599 (4.2463) time: 0.7618 data: 0.0002 max mem: 8421 +[2024-12-05 09:29:57 root] (utils.py 283): INFO Epoch: [9] [1810/2502] eta: 0:08:50 lr: 0.000017 loss_cls: 3.9768 (3.9147) grad_norm: 4.0345 (4.2463) time: 0.7637 data: 0.0002 max mem: 8421 +[2024-12-05 09:30:05 root] (utils.py 283): INFO Epoch: [9] [1820/2502] eta: 0:08:43 lr: 0.000017 loss_cls: 4.0247 (3.9148) grad_norm: 4.1321 (4.2468) time: 0.7728 data: 0.0002 max mem: 8421 +[2024-12-05 09:30:13 root] (utils.py 283): INFO Epoch: [9] [1830/2502] eta: 0:08:35 lr: 0.000017 loss_cls: 3.8746 (3.9145) grad_norm: 4.0819 (4.2463) time: 0.7749 data: 0.0003 max mem: 8421 +[2024-12-05 09:30:20 root] (utils.py 283): INFO Epoch: [9] [1840/2502] eta: 0:08:27 lr: 0.000017 loss_cls: 3.8746 (3.9151) grad_norm: 4.1659 (4.2461) time: 0.7695 data: 0.0002 max mem: 8421 +[2024-12-05 09:30:28 root] (utils.py 283): INFO Epoch: [9] [1850/2502] eta: 0:08:20 lr: 0.000017 loss_cls: 3.9363 (3.9139) grad_norm: 4.2319 (4.2463) time: 0.7680 data: 0.0002 max mem: 8421 +[2024-12-05 09:30:36 root] (utils.py 283): INFO Epoch: [9] [1860/2502] eta: 0:08:12 lr: 0.000017 loss_cls: 4.0670 (3.9148) grad_norm: 4.2193 (4.2480) time: 0.7670 data: 0.0003 max mem: 8421 +[2024-12-05 09:30:43 root] (utils.py 283): INFO Epoch: [9] [1870/2502] eta: 0:08:04 lr: 0.000017 loss_cls: 4.0139 (3.9152) grad_norm: 4.1850 (4.2471) time: 0.7663 data: 0.0002 max mem: 8421 +[2024-12-05 09:30:51 root] (utils.py 283): INFO Epoch: [9] [1880/2502] eta: 0:07:57 lr: 0.000017 loss_cls: 3.9997 (3.9155) grad_norm: 4.0249 (4.2482) time: 0.7668 data: 0.0002 max mem: 8421 +[2024-12-05 09:30:59 root] (utils.py 283): INFO Epoch: [9] [1890/2502] eta: 0:07:49 lr: 0.000017 loss_cls: 4.0339 (3.9169) grad_norm: 4.1099 (4.2481) time: 0.7653 data: 0.0002 max mem: 8421 +[2024-12-05 09:31:06 root] (utils.py 283): INFO Epoch: [9] [1900/2502] eta: 0:07:41 lr: 0.000017 loss_cls: 4.0629 (3.9169) grad_norm: 3.9624 (4.2465) time: 0.7636 data: 0.0002 max mem: 8421 +[2024-12-05 09:31:14 root] (utils.py 283): INFO Epoch: [9] [1910/2502] eta: 0:07:34 lr: 0.000017 loss_cls: 4.0109 (3.9160) grad_norm: 3.9624 (4.2463) time: 0.7647 data: 0.0002 max mem: 8421 +[2024-12-05 09:31:22 root] (utils.py 283): INFO Epoch: [9] [1920/2502] eta: 0:07:26 lr: 0.000017 loss_cls: 4.1361 (3.9180) grad_norm: 4.2076 (4.2474) time: 0.7684 data: 0.0002 max mem: 8421 +[2024-12-05 09:31:29 root] (utils.py 283): INFO Epoch: [9] [1930/2502] eta: 0:07:18 lr: 0.000017 loss_cls: 4.1572 (3.9173) grad_norm: 4.2581 (4.2469) time: 0.7664 data: 0.0002 max mem: 8421 +[2024-12-05 09:31:37 root] (utils.py 283): INFO Epoch: [9] [1940/2502] eta: 0:07:11 lr: 0.000017 loss_cls: 3.9605 (3.9180) grad_norm: 4.1648 (4.2467) time: 0.7606 data: 0.0002 max mem: 8421 +[2024-12-05 09:31:45 root] (utils.py 283): INFO Epoch: [9] [1950/2502] eta: 0:07:03 lr: 0.000017 loss_cls: 4.2418 (3.9185) grad_norm: 4.2160 (4.2480) time: 0.7707 data: 0.0002 max mem: 8421 +[2024-12-05 09:31:52 root] (utils.py 283): INFO Epoch: [9] [1960/2502] eta: 0:06:55 lr: 0.000017 loss_cls: 4.2641 (3.9204) grad_norm: 4.2661 (4.2481) time: 0.7707 data: 0.0002 max mem: 8421 +[2024-12-05 09:32:00 root] (utils.py 283): INFO Epoch: [9] [1970/2502] eta: 0:06:48 lr: 0.000017 loss_cls: 4.2309 (3.9209) grad_norm: 4.2228 (4.2488) time: 0.7624 data: 0.0002 max mem: 8421 +[2024-12-05 09:32:08 root] (utils.py 283): INFO Epoch: [9] [1980/2502] eta: 0:06:40 lr: 0.000017 loss_cls: 4.1502 (3.9227) grad_norm: 4.1836 (4.2488) time: 0.7646 data: 0.0002 max mem: 8421 +[2024-12-05 09:32:15 root] (utils.py 283): INFO Epoch: [9] [1990/2502] eta: 0:06:32 lr: 0.000017 loss_cls: 4.0699 (3.9226) grad_norm: 4.3101 (4.2497) time: 0.7627 data: 0.0002 max mem: 8421 +[2024-12-05 09:32:23 root] (utils.py 283): INFO Epoch: [9] [2000/2502] eta: 0:06:25 lr: 0.000017 loss_cls: 3.8353 (3.9223) grad_norm: 4.2577 (4.2495) time: 0.7610 data: 0.0002 max mem: 8421 +[2024-12-05 09:32:30 root] (utils.py 283): INFO Epoch: [9] [2010/2502] eta: 0:06:17 lr: 0.000017 loss_cls: 4.1149 (3.9238) grad_norm: 4.1566 (4.2486) time: 0.7617 data: 0.0002 max mem: 8421 +[2024-12-05 09:32:38 root] (utils.py 283): INFO Epoch: [9] [2020/2502] eta: 0:06:09 lr: 0.000017 loss_cls: 4.2042 (3.9236) grad_norm: 4.1566 (4.2485) time: 0.7656 data: 0.0002 max mem: 8421 +[2024-12-05 09:32:46 root] (utils.py 283): INFO Epoch: [9] [2030/2502] eta: 0:06:02 lr: 0.000017 loss_cls: 4.1048 (3.9240) grad_norm: 4.1160 (4.2475) time: 0.7627 data: 0.0002 max mem: 8421 +[2024-12-05 09:32:53 root] (utils.py 283): INFO Epoch: [9] [2040/2502] eta: 0:05:54 lr: 0.000017 loss_cls: 3.8781 (3.9232) grad_norm: 4.0708 (4.2470) time: 0.7594 data: 0.0002 max mem: 8421 +[2024-12-05 09:33:01 root] (utils.py 283): INFO Epoch: [9] [2050/2502] eta: 0:05:46 lr: 0.000017 loss_cls: 3.9700 (3.9242) grad_norm: 4.0708 (4.2463) time: 0.7602 data: 0.0002 max mem: 8421 +[2024-12-05 09:33:08 root] (utils.py 283): INFO Epoch: [9] [2060/2502] eta: 0:05:39 lr: 0.000017 loss_cls: 4.0438 (3.9236) grad_norm: 4.1100 (4.2467) time: 0.7589 data: 0.0002 max mem: 8421 +[2024-12-05 09:33:16 root] (utils.py 283): INFO Epoch: [9] [2070/2502] eta: 0:05:31 lr: 0.000017 loss_cls: 4.0063 (3.9232) grad_norm: 4.3414 (4.2473) time: 0.7604 data: 0.0002 max mem: 8421 +[2024-12-05 09:33:24 root] (utils.py 283): INFO Epoch: [9] [2080/2502] eta: 0:05:23 lr: 0.000017 loss_cls: 4.0430 (3.9234) grad_norm: 4.3911 (4.2474) time: 0.7617 data: 0.0002 max mem: 8421 +[2024-12-05 09:33:31 root] (utils.py 283): INFO Epoch: [9] [2090/2502] eta: 0:05:15 lr: 0.000017 loss_cls: 4.1678 (3.9240) grad_norm: 4.0973 (4.2469) time: 0.7635 data: 0.0002 max mem: 8421 +[2024-12-05 09:33:39 root] (utils.py 283): INFO Epoch: [9] [2100/2502] eta: 0:05:08 lr: 0.000017 loss_cls: 4.2963 (3.9258) grad_norm: 4.1433 (4.2470) time: 0.7651 data: 0.0002 max mem: 8421 +[2024-12-05 09:33:47 root] (utils.py 283): INFO Epoch: [9] [2110/2502] eta: 0:05:00 lr: 0.000017 loss_cls: 4.2945 (3.9267) grad_norm: 4.2595 (4.2477) time: 0.7647 data: 0.0002 max mem: 8421 +[2024-12-05 09:33:54 root] (utils.py 283): INFO Epoch: [9] [2120/2502] eta: 0:04:52 lr: 0.000017 loss_cls: 4.1413 (3.9265) grad_norm: 4.1691 (4.2471) time: 0.7624 data: 0.0002 max mem: 8421 +[2024-12-05 09:34:02 root] (utils.py 283): INFO Epoch: [9] [2130/2502] eta: 0:04:45 lr: 0.000017 loss_cls: 3.8787 (3.9245) grad_norm: 4.1691 (4.2480) time: 0.7699 data: 0.0002 max mem: 8421 +[2024-12-05 09:34:10 root] (utils.py 283): INFO Epoch: [9] [2140/2502] eta: 0:04:37 lr: 0.000017 loss_cls: 3.8054 (3.9241) grad_norm: 4.3577 (4.2492) time: 0.7718 data: 0.0002 max mem: 8421 +[2024-12-05 09:34:17 root] (utils.py 283): INFO Epoch: [9] [2150/2502] eta: 0:04:29 lr: 0.000017 loss_cls: 4.0675 (3.9246) grad_norm: 4.0854 (4.2485) time: 0.7655 data: 0.0002 max mem: 8421 +[2024-12-05 09:34:25 root] (utils.py 283): INFO Epoch: [9] [2160/2502] eta: 0:04:22 lr: 0.000017 loss_cls: 4.1015 (3.9234) grad_norm: 3.9482 (4.2478) time: 0.7648 data: 0.0002 max mem: 8421 +[2024-12-05 09:34:33 root] (utils.py 283): INFO Epoch: [9] [2170/2502] eta: 0:04:14 lr: 0.000017 loss_cls: 3.7966 (3.9228) grad_norm: 4.0213 (4.2471) time: 0.7640 data: 0.0002 max mem: 8421 +[2024-12-05 09:34:40 root] (utils.py 283): INFO Epoch: [9] [2180/2502] eta: 0:04:06 lr: 0.000017 loss_cls: 3.9484 (3.9223) grad_norm: 4.0001 (4.2468) time: 0.7663 data: 0.0002 max mem: 8421 +[2024-12-05 09:34:48 root] (utils.py 283): INFO Epoch: [9] [2190/2502] eta: 0:03:59 lr: 0.000017 loss_cls: 3.5292 (3.9189) grad_norm: 3.9611 (4.2458) time: 0.7671 data: 0.0003 max mem: 8421 +[2024-12-05 09:34:56 root] (utils.py 283): INFO Epoch: [9] [2200/2502] eta: 0:03:51 lr: 0.000017 loss_cls: 3.4902 (3.9184) grad_norm: 4.0581 (4.2461) time: 0.7645 data: 0.0002 max mem: 8421 +[2024-12-05 09:35:03 root] (utils.py 283): INFO Epoch: [9] [2210/2502] eta: 0:03:43 lr: 0.000017 loss_cls: 3.9229 (3.9181) grad_norm: 4.2887 (4.2459) time: 0.7629 data: 0.0002 max mem: 8421 +[2024-12-05 09:35:11 root] (utils.py 283): INFO Epoch: [9] [2220/2502] eta: 0:03:36 lr: 0.000017 loss_cls: 4.0533 (3.9181) grad_norm: 4.0948 (4.2454) time: 0.7670 data: 0.0002 max mem: 8421 +[2024-12-05 09:35:19 root] (utils.py 283): INFO Epoch: [9] [2230/2502] eta: 0:03:28 lr: 0.000017 loss_cls: 4.1004 (3.9183) grad_norm: 4.0451 (4.2444) time: 0.7702 data: 0.0002 max mem: 8421 +[2024-12-05 09:35:26 root] (utils.py 283): INFO Epoch: [9] [2240/2502] eta: 0:03:20 lr: 0.000017 loss_cls: 3.9056 (3.9174) grad_norm: 3.9994 (4.2462) time: 0.7674 data: 0.0003 max mem: 8421 +[2024-12-05 09:35:34 root] (utils.py 283): INFO Epoch: [9] [2250/2502] eta: 0:03:13 lr: 0.000017 loss_cls: 3.6680 (3.9162) grad_norm: 4.0976 (4.2456) time: 0.7675 data: 0.0003 max mem: 8421 +[2024-12-05 09:35:42 root] (utils.py 283): INFO Epoch: [9] [2260/2502] eta: 0:03:05 lr: 0.000017 loss_cls: 3.6483 (3.9158) grad_norm: 4.0976 (4.2451) time: 0.7694 data: 0.0003 max mem: 8421 +[2024-12-05 09:35:49 root] (utils.py 283): INFO Epoch: [9] [2270/2502] eta: 0:02:57 lr: 0.000017 loss_cls: 3.7129 (3.9143) grad_norm: 4.1114 (4.2446) time: 0.7694 data: 0.0003 max mem: 8421 +[2024-12-05 09:35:57 root] (utils.py 283): INFO Epoch: [9] [2280/2502] eta: 0:02:50 lr: 0.000017 loss_cls: 3.8792 (3.9153) grad_norm: 4.1650 (4.2443) time: 0.7685 data: 0.0003 max mem: 8421 +[2024-12-05 09:36:05 root] (utils.py 283): INFO Epoch: [9] [2290/2502] eta: 0:02:42 lr: 0.000017 loss_cls: 4.1403 (3.9159) grad_norm: 4.0387 (4.2434) time: 0.7689 data: 0.0003 max mem: 8421 +[2024-12-05 09:36:12 root] (utils.py 283): INFO Epoch: [9] [2300/2502] eta: 0:02:34 lr: 0.000017 loss_cls: 4.0154 (3.9160) grad_norm: 4.0772 (4.2453) time: 0.7697 data: 0.0003 max mem: 8421 +[2024-12-05 09:36:20 root] (utils.py 283): INFO Epoch: [9] [2310/2502] eta: 0:02:27 lr: 0.000017 loss_cls: 3.8927 (3.9154) grad_norm: 4.4304 (4.2464) time: 0.7696 data: 0.0003 max mem: 8421 +[2024-12-05 09:36:28 root] (utils.py 283): INFO Epoch: [9] [2320/2502] eta: 0:02:19 lr: 0.000017 loss_cls: 3.7366 (3.9146) grad_norm: 4.2137 (4.2458) time: 0.7742 data: 0.0003 max mem: 8421 +[2024-12-05 09:36:36 root] (utils.py 283): INFO Epoch: [9] [2330/2502] eta: 0:02:11 lr: 0.000017 loss_cls: 3.9296 (3.9151) grad_norm: 4.1245 (4.2461) time: 0.7729 data: 0.0003 max mem: 8421 +[2024-12-05 09:36:43 root] (utils.py 283): INFO Epoch: [9] [2340/2502] eta: 0:02:04 lr: 0.000017 loss_cls: 4.0840 (3.9157) grad_norm: 4.2239 (4.2459) time: 0.7655 data: 0.0003 max mem: 8421 +[2024-12-05 09:36:51 root] (utils.py 283): INFO Epoch: [9] [2350/2502] eta: 0:01:56 lr: 0.000017 loss_cls: 3.9033 (3.9153) grad_norm: 4.2114 (4.2467) time: 0.7620 data: 0.0002 max mem: 8421 +[2024-12-05 09:36:58 root] (utils.py 283): INFO Epoch: [9] [2360/2502] eta: 0:01:48 lr: 0.000017 loss_cls: 3.8798 (3.9157) grad_norm: 4.3194 (4.2469) time: 0.7611 data: 0.0002 max mem: 8421 +[2024-12-05 09:37:06 root] (utils.py 283): INFO Epoch: [9] [2370/2502] eta: 0:01:41 lr: 0.000017 loss_cls: 4.0986 (3.9158) grad_norm: 4.1136 (4.2468) time: 0.7653 data: 0.0003 max mem: 8421 +[2024-12-05 09:37:14 root] (utils.py 283): INFO Epoch: [9] [2380/2502] eta: 0:01:33 lr: 0.000017 loss_cls: 4.0986 (3.9160) grad_norm: 4.0470 (4.2464) time: 0.7673 data: 0.0002 max mem: 8421 +[2024-12-05 09:37:21 root] (utils.py 283): INFO Epoch: [9] [2390/2502] eta: 0:01:25 lr: 0.000017 loss_cls: 4.1867 (3.9160) grad_norm: 4.0511 (4.2466) time: 0.7652 data: 0.0003 max mem: 8421 +[2024-12-05 09:37:29 root] (utils.py 283): INFO Epoch: [9] [2400/2502] eta: 0:01:18 lr: 0.000017 loss_cls: 4.1867 (3.9165) grad_norm: 4.1158 (4.2464) time: 0.7718 data: 0.0003 max mem: 8421 +[2024-12-05 09:37:37 root] (utils.py 283): INFO Epoch: [9] [2410/2502] eta: 0:01:10 lr: 0.000017 loss_cls: 4.1433 (3.9167) grad_norm: 4.0571 (4.2456) time: 0.7677 data: 0.0002 max mem: 8421 +[2024-12-05 09:37:44 root] (utils.py 283): INFO Epoch: [9] [2420/2502] eta: 0:01:02 lr: 0.000017 loss_cls: 4.0960 (3.9158) grad_norm: 4.1013 (4.2452) time: 0.7591 data: 0.0002 max mem: 8421 +[2024-12-05 09:37:52 root] (utils.py 283): INFO Epoch: [9] [2430/2502] eta: 0:00:55 lr: 0.000017 loss_cls: 4.1013 (3.9163) grad_norm: 4.0933 (4.2448) time: 0.7624 data: 0.0002 max mem: 8421 +[2024-12-05 09:38:00 root] (utils.py 283): INFO Epoch: [9] [2440/2502] eta: 0:00:47 lr: 0.000017 loss_cls: 4.0846 (3.9160) grad_norm: 4.0074 (4.2438) time: 0.7617 data: 0.0002 max mem: 8421 +[2024-12-05 09:38:07 root] (utils.py 283): INFO Epoch: [9] [2450/2502] eta: 0:00:39 lr: 0.000017 loss_cls: 3.9700 (3.9160) grad_norm: 4.0459 (4.2430) time: 0.7617 data: 0.0002 max mem: 8421 +[2024-12-05 09:38:15 root] (utils.py 283): INFO Epoch: [9] [2460/2502] eta: 0:00:32 lr: 0.000017 loss_cls: 3.8439 (3.9155) grad_norm: 4.1183 (4.2440) time: 0.7664 data: 0.0002 max mem: 8421 +[2024-12-05 09:38:23 root] (utils.py 283): INFO Epoch: [9] [2470/2502] eta: 0:00:24 lr: 0.000017 loss_cls: 3.6042 (3.9154) grad_norm: 4.2948 (4.2443) time: 0.7861 data: 0.0003 max mem: 8421 +[2024-12-05 09:38:31 root] (utils.py 283): INFO Epoch: [9] [2480/2502] eta: 0:00:16 lr: 0.000017 loss_cls: 4.1824 (3.9158) grad_norm: 4.2948 (4.2440) time: 0.8013 data: 0.0003 max mem: 8421 +[2024-12-05 09:38:40 root] (utils.py 283): INFO Epoch: [9] [2490/2502] eta: 0:00:09 lr: 0.000017 loss_cls: 4.1903 (3.9164) grad_norm: 4.3017 (4.2446) time: 0.8260 data: 0.0268 max mem: 8421 +[2024-12-05 09:38:47 root] (utils.py 283): INFO Epoch: [9] [2500/2502] eta: 0:00:01 lr: 0.000017 loss_cls: 4.1263 (3.9165) grad_norm: 4.3492 (4.2477) time: 0.8198 data: 0.0268 max mem: 8421 +[2024-12-05 09:38:48 root] (utils.py 283): INFO Epoch: [9] [2501/2502] eta: 0:00:00 lr: 0.000017 loss_cls: 4.1263 (3.9167) grad_norm: 4.3492 (4.2477) time: 0.8176 data: 0.0268 max mem: 8421 +[2024-12-05 09:38:48 root] (utils.py 297): INFO Epoch: [9] Total time: 0:32:00 (0.7677 s / it) +[2024-12-05 09:38:48 root] (engine.py 178): INFO Averaged stats:lr: 0.000017 loss_cls: 4.1263 (3.9080) grad_norm: 4.3492 (4.2477) +[2024-12-05 09:38:49 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7450 (0.7450) acc1: 85.9375 (85.9375) acc3: 93.7500 (93.7500) acc5: 96.8750 (96.8750) time: 0.1308 data: 0.0003 max mem: 8421 +[2024-12-05 09:38:50 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8431 (0.8795) acc1: 83.5938 (82.1733) acc3: 93.7500 (92.4006) acc5: 95.3125 (95.5256) time: 0.1313 data: 0.0004 max mem: 8421 +[2024-12-05 09:38:51 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9147 (0.9267) acc1: 78.9062 (80.7292) acc3: 92.9688 (92.2619) acc5: 95.3125 (95.1637) time: 0.1315 data: 0.0004 max mem: 8421 +[2024-12-05 09:38:53 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9520 (0.9296) acc1: 79.6875 (80.1159) acc3: 92.1875 (92.7167) acc5: 96.0938 (95.3125) time: 0.1319 data: 0.0005 max mem: 8421 +[2024-12-05 09:38:54 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8498 (0.9231) acc1: 81.2500 (80.4878) acc3: 93.7500 (92.7591) acc5: 96.0938 (95.2553) time: 0.1323 data: 0.0004 max mem: 8421 +[2024-12-05 09:38:56 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0602 (1.0204) acc1: 72.6562 (78.1556) acc3: 87.5000 (91.1458) acc5: 90.6250 (93.9491) time: 0.1451 data: 0.0116 max mem: 8421 +[2024-12-05 09:38:57 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3790 (1.0663) acc1: 69.5312 (77.2413) acc3: 85.1562 (90.3304) acc5: 89.0625 (93.1609) time: 0.1692 data: 0.0355 max mem: 8421 +[2024-12-05 09:38:59 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2774 (1.1068) acc1: 71.0938 (76.2984) acc3: 86.7188 (89.7337) acc5: 89.8438 (92.7047) time: 0.1560 data: 0.0244 max mem: 8421 +[2024-12-05 09:39:00 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3054 (1.1440) acc1: 71.0938 (75.5112) acc3: 84.3750 (89.0432) acc5: 89.0625 (92.2261) time: 0.1505 data: 0.0190 max mem: 8421 +[2024-12-05 09:39:02 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3696 (1.1776) acc1: 70.3125 (74.5622) acc3: 84.3750 (88.5045) acc5: 88.2812 (91.7840) time: 0.1583 data: 0.0257 max mem: 8421 +[2024-12-05 09:39:03 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2223 (1.1649) acc1: 71.0938 (74.8080) acc3: 86.7188 (88.7200) acc5: 91.4062 (91.9760) time: 0.1377 data: 0.0074 max mem: 8421 +[2024-12-05 09:39:03 root] (utils.py 297): INFO Test: Total time: 0:00:14 (0.1445 s / it) +[2024-12-05 09:39:03 root] (engine.py 263): INFO * Acc@1 74.406 Acc@3 88.556 Acc@5 92.024 loss 1.166 flops 1.285 layer_flops 1.251 +[2024-12-05 09:39:03 root] (main.py 546): INFO Accuracy of the network on the 50000 test images: 74.4% +[2024-12-05 09:39:03 root] (main.py 550): INFO Max accuracy: 74.41% +[2024-12-05 09:39:04 root] (utils.py 283): INFO Epoch: [10] [ 0/2502] eta: 0:31:59 lr: 0.000016 loss_cls: 4.7135 (4.7135) grad_norm: 4.5017 (4.5017) time: 0.7670 data: 0.0003 max mem: 8421 +[2024-12-05 09:39:11 root] (utils.py 283): INFO Epoch: [10] [ 10/2502] eta: 0:31:51 lr: 0.000016 loss_cls: 3.9657 (3.8465) grad_norm: 4.2924 (4.2825) time: 0.7672 data: 0.0003 max mem: 8421 +[2024-12-05 09:39:19 root] (utils.py 283): INFO Epoch: [10] [ 20/2502] eta: 0:31:43 lr: 0.000016 loss_cls: 3.9657 (3.8413) grad_norm: 4.0384 (4.1684) time: 0.7669 data: 0.0003 max mem: 8421 +[2024-12-05 09:39:27 root] (utils.py 283): INFO Epoch: [10] [ 30/2502] eta: 0:31:33 lr: 0.000016 loss_cls: 4.1935 (3.8696) grad_norm: 4.1303 (4.2263) time: 0.7654 data: 0.0002 max mem: 8421 +[2024-12-05 09:39:34 root] (utils.py 283): INFO Epoch: [10] [ 40/2502] eta: 0:31:29 lr: 0.000016 loss_cls: 3.8932 (3.8468) grad_norm: 4.1699 (4.2766) time: 0.7677 data: 0.0002 max mem: 8421 +[2024-12-05 09:39:42 root] (utils.py 283): INFO Epoch: [10] [ 50/2502] eta: 0:31:33 lr: 0.000016 loss_cls: 3.6066 (3.8191) grad_norm: 4.1684 (4.2664) time: 0.7817 data: 0.0003 max mem: 8421 +[2024-12-05 09:39:50 root] (utils.py 283): INFO Epoch: [10] [ 60/2502] eta: 0:31:22 lr: 0.000016 loss_cls: 3.6066 (3.7756) grad_norm: 4.2710 (4.2838) time: 0.7784 data: 0.0003 max mem: 8421 +[2024-12-05 09:39:58 root] (utils.py 283): INFO Epoch: [10] [ 70/2502] eta: 0:31:12 lr: 0.000016 loss_cls: 3.9189 (3.8156) grad_norm: 4.3361 (4.2979) time: 0.7643 data: 0.0002 max mem: 8421 +[2024-12-05 09:40:05 root] (utils.py 283): INFO Epoch: [10] [ 80/2502] eta: 0:31:02 lr: 0.000016 loss_cls: 4.0731 (3.8339) grad_norm: 4.2342 (4.2970) time: 0.7629 data: 0.0002 max mem: 8421 +[2024-12-05 09:40:13 root] (utils.py 283): INFO Epoch: [10] [ 90/2502] eta: 0:30:55 lr: 0.000016 loss_cls: 4.0156 (3.8300) grad_norm: 4.1111 (4.3105) time: 0.7660 data: 0.0002 max mem: 8421 +[2024-12-05 09:40:21 root] (utils.py 283): INFO Epoch: [10] [ 100/2502] eta: 0:30:47 lr: 0.000016 loss_cls: 4.0897 (3.8440) grad_norm: 4.1111 (4.2921) time: 0.7691 data: 0.0002 max mem: 8421 +[2024-12-05 09:40:28 root] (utils.py 283): INFO Epoch: [10] [ 110/2502] eta: 0:30:37 lr: 0.000016 loss_cls: 4.0897 (3.8443) grad_norm: 3.9860 (4.2819) time: 0.7634 data: 0.0002 max mem: 8421 +[2024-12-05 09:40:36 root] (utils.py 283): INFO Epoch: [10] [ 120/2502] eta: 0:30:28 lr: 0.000016 loss_cls: 3.8101 (3.8377) grad_norm: 3.9970 (4.2677) time: 0.7607 data: 0.0002 max mem: 8421 +[2024-12-05 09:40:43 root] (utils.py 283): INFO Epoch: [10] [ 130/2502] eta: 0:30:19 lr: 0.000016 loss_cls: 3.8858 (3.8520) grad_norm: 4.1015 (4.2770) time: 0.7626 data: 0.0002 max mem: 8421 +[2024-12-05 09:40:51 root] (utils.py 283): INFO Epoch: [10] [ 140/2502] eta: 0:30:11 lr: 0.000016 loss_cls: 4.2887 (3.8864) grad_norm: 4.3013 (4.2926) time: 0.7635 data: 0.0002 max mem: 8421 +[2024-12-05 09:40:59 root] (utils.py 283): INFO Epoch: [10] [ 150/2502] eta: 0:30:04 lr: 0.000016 loss_cls: 4.1628 (3.8859) grad_norm: 4.1776 (4.2772) time: 0.7656 data: 0.0002 max mem: 8421 +[2024-12-05 09:41:06 root] (utils.py 283): INFO Epoch: [10] [ 160/2502] eta: 0:29:55 lr: 0.000016 loss_cls: 4.0596 (3.8887) grad_norm: 4.0274 (4.2655) time: 0.7647 data: 0.0002 max mem: 8421 +[2024-12-05 09:41:14 root] (utils.py 283): INFO Epoch: [10] [ 170/2502] eta: 0:29:47 lr: 0.000016 loss_cls: 4.1810 (3.9068) grad_norm: 4.0090 (4.2584) time: 0.7617 data: 0.0003 max mem: 8421 +[2024-12-05 09:41:22 root] (utils.py 283): INFO Epoch: [10] [ 180/2502] eta: 0:29:39 lr: 0.000016 loss_cls: 4.1450 (3.8959) grad_norm: 4.3340 (4.2828) time: 0.7632 data: 0.0003 max mem: 8421 +[2024-12-05 09:41:29 root] (utils.py 283): INFO Epoch: [10] [ 190/2502] eta: 0:29:31 lr: 0.000016 loss_cls: 3.7888 (3.8969) grad_norm: 4.2946 (4.2747) time: 0.7660 data: 0.0002 max mem: 8421 +[2024-12-05 09:41:37 root] (utils.py 283): INFO Epoch: [10] [ 200/2502] eta: 0:29:23 lr: 0.000016 loss_cls: 4.0406 (3.9056) grad_norm: 4.0790 (4.2652) time: 0.7649 data: 0.0003 max mem: 8421 +[2024-12-05 09:41:44 root] (utils.py 283): INFO Epoch: [10] [ 210/2502] eta: 0:29:15 lr: 0.000016 loss_cls: 4.1680 (3.9074) grad_norm: 4.0276 (4.2568) time: 0.7626 data: 0.0003 max mem: 8421 +[2024-12-05 09:41:52 root] (utils.py 283): INFO Epoch: [10] [ 220/2502] eta: 0:29:07 lr: 0.000016 loss_cls: 4.0216 (3.9079) grad_norm: 4.0666 (4.2593) time: 0.7606 data: 0.0003 max mem: 8421 +[2024-12-05 09:42:00 root] (utils.py 283): INFO Epoch: [10] [ 230/2502] eta: 0:28:58 lr: 0.000016 loss_cls: 3.8144 (3.9008) grad_norm: 4.1388 (4.2617) time: 0.7573 data: 0.0002 max mem: 8421 +[2024-12-05 09:42:07 root] (utils.py 283): INFO Epoch: [10] [ 240/2502] eta: 0:28:50 lr: 0.000016 loss_cls: 3.9282 (3.9083) grad_norm: 4.0478 (4.2728) time: 0.7578 data: 0.0002 max mem: 8421 +[2024-12-05 09:42:15 root] (utils.py 283): INFO Epoch: [10] [ 250/2502] eta: 0:28:43 lr: 0.000016 loss_cls: 4.2078 (3.9083) grad_norm: 4.2227 (4.2693) time: 0.7660 data: 0.0002 max mem: 8421 +[2024-12-05 09:42:23 root] (utils.py 283): INFO Epoch: [10] [ 260/2502] eta: 0:28:35 lr: 0.000016 loss_cls: 4.0260 (3.9076) grad_norm: 4.2166 (4.2698) time: 0.7652 data: 0.0002 max mem: 8421 +[2024-12-05 09:42:30 root] (utils.py 283): INFO Epoch: [10] [ 270/2502] eta: 0:28:27 lr: 0.000016 loss_cls: 4.1405 (3.9174) grad_norm: 4.1614 (4.2680) time: 0.7591 data: 0.0002 max mem: 8421 +[2024-12-05 09:42:38 root] (utils.py 283): INFO Epoch: [10] [ 280/2502] eta: 0:28:19 lr: 0.000016 loss_cls: 4.1803 (3.9203) grad_norm: 4.2365 (4.2819) time: 0.7600 data: 0.0002 max mem: 8421 +[2024-12-05 09:42:45 root] (utils.py 283): INFO Epoch: [10] [ 290/2502] eta: 0:28:11 lr: 0.000016 loss_cls: 3.8793 (3.9167) grad_norm: 4.1288 (4.2742) time: 0.7650 data: 0.0003 max mem: 8421 +[2024-12-05 09:42:53 root] (utils.py 283): INFO Epoch: [10] [ 300/2502] eta: 0:28:04 lr: 0.000016 loss_cls: 3.9623 (3.9164) grad_norm: 4.0820 (4.2690) time: 0.7666 data: 0.0003 max mem: 8421 +[2024-12-05 09:43:01 root] (utils.py 283): INFO Epoch: [10] [ 310/2502] eta: 0:27:56 lr: 0.000016 loss_cls: 3.9984 (3.9084) grad_norm: 4.0497 (4.2662) time: 0.7620 data: 0.0002 max mem: 8421 +[2024-12-05 09:43:08 root] (utils.py 283): INFO Epoch: [10] [ 320/2502] eta: 0:27:48 lr: 0.000016 loss_cls: 3.8190 (3.9014) grad_norm: 4.0333 (4.2630) time: 0.7617 data: 0.0002 max mem: 8421 +[2024-12-05 09:43:16 root] (utils.py 283): INFO Epoch: [10] [ 330/2502] eta: 0:27:40 lr: 0.000016 loss_cls: 3.8069 (3.9008) grad_norm: 3.9929 (4.2525) time: 0.7624 data: 0.0002 max mem: 8421 +[2024-12-05 09:43:24 root] (utils.py 283): INFO Epoch: [10] [ 340/2502] eta: 0:27:33 lr: 0.000016 loss_cls: 3.8879 (3.9043) grad_norm: 4.0218 (4.2575) time: 0.7645 data: 0.0003 max mem: 8421 +[2024-12-05 09:43:31 root] (utils.py 283): INFO Epoch: [10] [ 350/2502] eta: 0:27:25 lr: 0.000016 loss_cls: 4.1112 (3.9075) grad_norm: 4.1359 (4.2542) time: 0.7676 data: 0.0003 max mem: 8421 +[2024-12-05 09:43:39 root] (utils.py 283): INFO Epoch: [10] [ 360/2502] eta: 0:27:18 lr: 0.000016 loss_cls: 4.0603 (3.9125) grad_norm: 4.0665 (4.2522) time: 0.7668 data: 0.0003 max mem: 8421 +[2024-12-05 09:43:47 root] (utils.py 283): INFO Epoch: [10] [ 370/2502] eta: 0:27:10 lr: 0.000016 loss_cls: 4.1545 (3.9176) grad_norm: 4.1901 (4.2542) time: 0.7625 data: 0.0003 max mem: 8421 +[2024-12-05 09:43:54 root] (utils.py 283): INFO Epoch: [10] [ 380/2502] eta: 0:27:02 lr: 0.000016 loss_cls: 4.0212 (3.9069) grad_norm: 4.3063 (4.2522) time: 0.7639 data: 0.0003 max mem: 8421 +[2024-12-05 09:44:02 root] (utils.py 283): INFO Epoch: [10] [ 390/2502] eta: 0:26:56 lr: 0.000016 loss_cls: 3.5932 (3.9059) grad_norm: 4.3256 (4.2517) time: 0.7770 data: 0.0003 max mem: 8421 +[2024-12-05 09:44:10 root] (utils.py 283): INFO Epoch: [10] [ 400/2502] eta: 0:26:48 lr: 0.000016 loss_cls: 4.0635 (3.9074) grad_norm: 4.0825 (4.2465) time: 0.7743 data: 0.0003 max mem: 8421 +[2024-12-05 09:44:17 root] (utils.py 283): INFO Epoch: [10] [ 410/2502] eta: 0:26:40 lr: 0.000016 loss_cls: 4.0719 (3.9055) grad_norm: 4.0204 (4.2411) time: 0.7630 data: 0.0003 max mem: 8421 +[2024-12-05 09:44:25 root] (utils.py 283): INFO Epoch: [10] [ 420/2502] eta: 0:26:32 lr: 0.000016 loss_cls: 3.6002 (3.9003) grad_norm: 3.8449 (4.2336) time: 0.7631 data: 0.0003 max mem: 8421 +[2024-12-05 09:44:33 root] (utils.py 283): INFO Epoch: [10] [ 430/2502] eta: 0:26:25 lr: 0.000016 loss_cls: 3.9883 (3.9020) grad_norm: 3.9541 (4.2332) time: 0.7629 data: 0.0002 max mem: 8421 +[2024-12-05 09:44:40 root] (utils.py 283): INFO Epoch: [10] [ 440/2502] eta: 0:26:17 lr: 0.000016 loss_cls: 3.9883 (3.9011) grad_norm: 4.0811 (4.2330) time: 0.7632 data: 0.0003 max mem: 8421 +[2024-12-05 09:44:48 root] (utils.py 283): INFO Epoch: [10] [ 450/2502] eta: 0:26:10 lr: 0.000016 loss_cls: 3.9505 (3.8998) grad_norm: 4.2403 (4.2422) time: 0.7725 data: 0.0003 max mem: 8421 +[2024-12-05 09:44:56 root] (utils.py 283): INFO Epoch: [10] [ 460/2502] eta: 0:26:03 lr: 0.000016 loss_cls: 3.5009 (3.8921) grad_norm: 4.2510 (4.2443) time: 0.7851 data: 0.0003 max mem: 8421 +[2024-12-05 09:45:04 root] (utils.py 283): INFO Epoch: [10] [ 470/2502] eta: 0:25:57 lr: 0.000016 loss_cls: 3.7983 (3.8920) grad_norm: 4.1258 (4.2477) time: 0.7868 data: 0.0003 max mem: 8421 +[2024-12-05 09:45:12 root] (utils.py 283): INFO Epoch: [10] [ 480/2502] eta: 0:25:50 lr: 0.000016 loss_cls: 4.0564 (3.8996) grad_norm: 4.1470 (4.2533) time: 0.7877 data: 0.0003 max mem: 8421 +[2024-12-05 09:45:20 root] (utils.py 283): INFO Epoch: [10] [ 490/2502] eta: 0:25:43 lr: 0.000016 loss_cls: 4.0881 (3.8981) grad_norm: 4.1824 (4.2507) time: 0.7926 data: 0.0003 max mem: 8421 +[2024-12-05 09:45:28 root] (utils.py 283): INFO Epoch: [10] [ 500/2502] eta: 0:25:37 lr: 0.000016 loss_cls: 4.0299 (3.8958) grad_norm: 4.0253 (4.2481) time: 0.7910 data: 0.0002 max mem: 8421 +[2024-12-05 09:45:35 root] (utils.py 283): INFO Epoch: [10] [ 510/2502] eta: 0:25:30 lr: 0.000016 loss_cls: 4.0507 (3.8998) grad_norm: 4.0192 (4.2445) time: 0.7868 data: 0.0002 max mem: 8421 +[2024-12-05 09:45:43 root] (utils.py 283): INFO Epoch: [10] [ 520/2502] eta: 0:25:23 lr: 0.000016 loss_cls: 4.1214 (3.9001) grad_norm: 4.0216 (4.2510) time: 0.7882 data: 0.0003 max mem: 8421 +[2024-12-05 09:45:51 root] (utils.py 283): INFO Epoch: [10] [ 530/2502] eta: 0:25:16 lr: 0.000016 loss_cls: 4.1214 (3.9038) grad_norm: 4.3899 (4.2547) time: 0.7902 data: 0.0003 max mem: 8421 +[2024-12-05 09:45:59 root] (utils.py 283): INFO Epoch: [10] [ 540/2502] eta: 0:25:08 lr: 0.000016 loss_cls: 4.0790 (3.9043) grad_norm: 4.3754 (4.2557) time: 0.7811 data: 0.0002 max mem: 8421 +[2024-12-05 09:46:07 root] (utils.py 283): INFO Epoch: [10] [ 550/2502] eta: 0:25:01 lr: 0.000016 loss_cls: 4.0433 (3.9053) grad_norm: 4.3881 (4.2563) time: 0.7749 data: 0.0003 max mem: 8421 +[2024-12-05 09:46:14 root] (utils.py 283): INFO Epoch: [10] [ 560/2502] eta: 0:24:53 lr: 0.000016 loss_cls: 4.1525 (3.9101) grad_norm: 4.0856 (4.2541) time: 0.7704 data: 0.0003 max mem: 8421 +[2024-12-05 09:46:22 root] (utils.py 283): INFO Epoch: [10] [ 570/2502] eta: 0:24:45 lr: 0.000016 loss_cls: 4.2101 (3.9074) grad_norm: 4.2543 (4.2553) time: 0.7605 data: 0.0002 max mem: 8421 +[2024-12-05 09:46:30 root] (utils.py 283): INFO Epoch: [10] [ 580/2502] eta: 0:24:37 lr: 0.000016 loss_cls: 3.4673 (3.9047) grad_norm: 4.2955 (4.2533) time: 0.7604 data: 0.0003 max mem: 8421 +[2024-12-05 09:46:37 root] (utils.py 283): INFO Epoch: [10] [ 590/2502] eta: 0:24:29 lr: 0.000016 loss_cls: 3.8823 (3.9033) grad_norm: 4.1104 (4.2519) time: 0.7643 data: 0.0003 max mem: 8421 +[2024-12-05 09:46:45 root] (utils.py 283): INFO Epoch: [10] [ 600/2502] eta: 0:24:22 lr: 0.000016 loss_cls: 4.0427 (3.9051) grad_norm: 4.1839 (4.2713) time: 0.7696 data: 0.0003 max mem: 8421 +[2024-12-05 09:46:53 root] (utils.py 283): INFO Epoch: [10] [ 610/2502] eta: 0:24:14 lr: 0.000016 loss_cls: 4.1035 (3.9070) grad_norm: 4.3517 (4.2741) time: 0.7658 data: 0.0002 max mem: 8421 +[2024-12-05 09:47:00 root] (utils.py 283): INFO Epoch: [10] [ 620/2502] eta: 0:24:06 lr: 0.000016 loss_cls: 3.8434 (3.9079) grad_norm: 4.2606 (4.2733) time: 0.7621 data: 0.0002 max mem: 8421 +[2024-12-05 09:47:08 root] (utils.py 283): INFO Epoch: [10] [ 630/2502] eta: 0:23:58 lr: 0.000016 loss_cls: 3.7508 (3.9043) grad_norm: 4.1562 (4.2730) time: 0.7620 data: 0.0003 max mem: 8421 +[2024-12-05 09:47:15 root] (utils.py 283): INFO Epoch: [10] [ 640/2502] eta: 0:23:50 lr: 0.000016 loss_cls: 3.6850 (3.9018) grad_norm: 4.1782 (4.2723) time: 0.7584 data: 0.0003 max mem: 8421 +[2024-12-05 09:47:23 root] (utils.py 283): INFO Epoch: [10] [ 650/2502] eta: 0:23:42 lr: 0.000016 loss_cls: 3.8878 (3.9042) grad_norm: 4.2663 (4.2728) time: 0.7624 data: 0.0003 max mem: 8421 +[2024-12-05 09:47:31 root] (utils.py 283): INFO Epoch: [10] [ 660/2502] eta: 0:23:34 lr: 0.000016 loss_cls: 4.1208 (3.9056) grad_norm: 4.2663 (4.2714) time: 0.7607 data: 0.0003 max mem: 8421 +[2024-12-05 09:47:38 root] (utils.py 283): INFO Epoch: [10] [ 670/2502] eta: 0:23:26 lr: 0.000016 loss_cls: 4.1146 (3.9042) grad_norm: 4.0884 (4.2703) time: 0.7576 data: 0.0003 max mem: 8421 +[2024-12-05 09:47:46 root] (utils.py 283): INFO Epoch: [10] [ 680/2502] eta: 0:23:18 lr: 0.000016 loss_cls: 3.7727 (3.9033) grad_norm: 4.0178 (4.2665) time: 0.7594 data: 0.0003 max mem: 8421 +[2024-12-05 09:47:53 root] (utils.py 283): INFO Epoch: [10] [ 690/2502] eta: 0:23:11 lr: 0.000016 loss_cls: 3.9512 (3.9033) grad_norm: 3.9869 (4.2649) time: 0.7601 data: 0.0003 max mem: 8421 +[2024-12-05 09:48:01 root] (utils.py 283): INFO Epoch: [10] [ 700/2502] eta: 0:23:03 lr: 0.000016 loss_cls: 3.9512 (3.9019) grad_norm: 4.0649 (4.2675) time: 0.7605 data: 0.0002 max mem: 8421 +[2024-12-05 09:48:09 root] (utils.py 283): INFO Epoch: [10] [ 710/2502] eta: 0:22:55 lr: 0.000016 loss_cls: 3.8087 (3.9018) grad_norm: 4.0011 (4.2677) time: 0.7591 data: 0.0002 max mem: 8421 +[2024-12-05 09:48:16 root] (utils.py 283): INFO Epoch: [10] [ 720/2502] eta: 0:22:47 lr: 0.000016 loss_cls: 4.0602 (3.9020) grad_norm: 4.0247 (4.2662) time: 0.7588 data: 0.0002 max mem: 8421 +[2024-12-05 09:48:24 root] (utils.py 283): INFO Epoch: [10] [ 730/2502] eta: 0:22:39 lr: 0.000016 loss_cls: 4.0602 (3.9018) grad_norm: 4.0528 (4.2649) time: 0.7670 data: 0.0003 max mem: 8421 +[2024-12-05 09:48:31 root] (utils.py 283): INFO Epoch: [10] [ 740/2502] eta: 0:22:31 lr: 0.000016 loss_cls: 4.0362 (3.9021) grad_norm: 4.0590 (4.2640) time: 0.7674 data: 0.0003 max mem: 8421 +[2024-12-05 09:48:39 root] (utils.py 283): INFO Epoch: [10] [ 750/2502] eta: 0:22:24 lr: 0.000016 loss_cls: 4.1222 (3.9026) grad_norm: 3.9851 (4.2623) time: 0.7623 data: 0.0002 max mem: 8421 +[2024-12-05 09:48:47 root] (utils.py 283): INFO Epoch: [10] [ 760/2502] eta: 0:22:16 lr: 0.000016 loss_cls: 3.9820 (3.9013) grad_norm: 3.9800 (4.2617) time: 0.7664 data: 0.0002 max mem: 8421 +[2024-12-05 09:48:54 root] (utils.py 283): INFO Epoch: [10] [ 770/2502] eta: 0:22:08 lr: 0.000016 loss_cls: 3.8518 (3.8985) grad_norm: 4.2132 (4.2632) time: 0.7669 data: 0.0002 max mem: 8421 +[2024-12-05 09:49:02 root] (utils.py 283): INFO Epoch: [10] [ 780/2502] eta: 0:22:01 lr: 0.000016 loss_cls: 3.9702 (3.9007) grad_norm: 4.3155 (4.2650) time: 0.7679 data: 0.0003 max mem: 8421 +[2024-12-05 09:49:10 root] (utils.py 283): INFO Epoch: [10] [ 790/2502] eta: 0:21:53 lr: 0.000016 loss_cls: 4.1293 (3.9009) grad_norm: 4.1676 (4.2629) time: 0.7635 data: 0.0003 max mem: 8421 +[2024-12-05 09:49:17 root] (utils.py 283): INFO Epoch: [10] [ 800/2502] eta: 0:21:45 lr: 0.000016 loss_cls: 3.8484 (3.8983) grad_norm: 4.0219 (4.2618) time: 0.7599 data: 0.0002 max mem: 8421 +[2024-12-05 09:49:25 root] (utils.py 283): INFO Epoch: [10] [ 810/2502] eta: 0:21:38 lr: 0.000016 loss_cls: 3.8536 (3.9023) grad_norm: 4.2148 (4.2622) time: 0.7677 data: 0.0003 max mem: 8421 +[2024-12-05 09:49:33 root] (utils.py 283): INFO Epoch: [10] [ 820/2502] eta: 0:21:30 lr: 0.000016 loss_cls: 4.2911 (3.9054) grad_norm: 4.2390 (4.2644) time: 0.7836 data: 0.0003 max mem: 8421 +[2024-12-05 09:49:41 root] (utils.py 283): INFO Epoch: [10] [ 830/2502] eta: 0:21:23 lr: 0.000016 loss_cls: 4.0509 (3.9075) grad_norm: 4.1164 (4.2642) time: 0.7914 data: 0.0003 max mem: 8421 +[2024-12-05 09:49:49 root] (utils.py 283): INFO Epoch: [10] [ 840/2502] eta: 0:21:16 lr: 0.000016 loss_cls: 4.0386 (3.9049) grad_norm: 4.1164 (4.2637) time: 0.7857 data: 0.0003 max mem: 8421 +[2024-12-05 09:49:57 root] (utils.py 283): INFO Epoch: [10] [ 850/2502] eta: 0:21:08 lr: 0.000016 loss_cls: 4.1630 (3.9085) grad_norm: 4.1418 (4.2629) time: 0.7835 data: 0.0003 max mem: 8421 +[2024-12-05 09:50:04 root] (utils.py 283): INFO Epoch: [10] [ 860/2502] eta: 0:21:01 lr: 0.000016 loss_cls: 4.1963 (3.9095) grad_norm: 4.0879 (4.2610) time: 0.7797 data: 0.0003 max mem: 8421 +[2024-12-05 09:50:12 root] (utils.py 283): INFO Epoch: [10] [ 870/2502] eta: 0:20:53 lr: 0.000016 loss_cls: 4.2573 (3.9142) grad_norm: 4.0937 (4.2606) time: 0.7674 data: 0.0002 max mem: 8421 +[2024-12-05 09:50:20 root] (utils.py 283): INFO Epoch: [10] [ 880/2502] eta: 0:20:45 lr: 0.000016 loss_cls: 4.2806 (3.9161) grad_norm: 4.1476 (4.2587) time: 0.7595 data: 0.0002 max mem: 8421 +[2024-12-05 09:50:27 root] (utils.py 283): INFO Epoch: [10] [ 890/2502] eta: 0:20:37 lr: 0.000016 loss_cls: 3.9626 (3.9144) grad_norm: 4.1652 (4.2599) time: 0.7589 data: 0.0003 max mem: 8421 +[2024-12-05 09:50:35 root] (utils.py 283): INFO Epoch: [10] [ 900/2502] eta: 0:20:30 lr: 0.000016 loss_cls: 3.8820 (3.9137) grad_norm: 4.3083 (4.2651) time: 0.7597 data: 0.0003 max mem: 8421 +[2024-12-05 09:50:43 root] (utils.py 283): INFO Epoch: [10] [ 910/2502] eta: 0:20:22 lr: 0.000016 loss_cls: 3.6976 (3.9116) grad_norm: 4.2814 (4.2641) time: 0.7698 data: 0.0003 max mem: 8421 +[2024-12-05 09:50:50 root] (utils.py 283): INFO Epoch: [10] [ 920/2502] eta: 0:20:14 lr: 0.000016 loss_cls: 3.3635 (3.9058) grad_norm: 4.2383 (4.2636) time: 0.7701 data: 0.0003 max mem: 8421 +[2024-12-05 09:50:58 root] (utils.py 283): INFO Epoch: [10] [ 930/2502] eta: 0:20:07 lr: 0.000016 loss_cls: 3.8022 (3.9069) grad_norm: 4.0542 (4.2627) time: 0.7699 data: 0.0003 max mem: 8421 +[2024-12-05 09:51:06 root] (utils.py 283): INFO Epoch: [10] [ 940/2502] eta: 0:19:59 lr: 0.000016 loss_cls: 3.9628 (3.9044) grad_norm: 3.9630 (4.2589) time: 0.7778 data: 0.0003 max mem: 8421 +[2024-12-05 09:51:13 root] (utils.py 283): INFO Epoch: [10] [ 950/2502] eta: 0:19:51 lr: 0.000016 loss_cls: 3.7269 (3.9051) grad_norm: 3.9606 (4.2576) time: 0.7690 data: 0.0003 max mem: 8421 +[2024-12-05 09:51:21 root] (utils.py 283): INFO Epoch: [10] [ 960/2502] eta: 0:19:44 lr: 0.000016 loss_cls: 4.1281 (3.9043) grad_norm: 4.0384 (4.2561) time: 0.7617 data: 0.0003 max mem: 8421 +[2024-12-05 09:51:29 root] (utils.py 283): INFO Epoch: [10] [ 970/2502] eta: 0:19:36 lr: 0.000016 loss_cls: 4.0990 (3.9035) grad_norm: 4.1256 (4.2557) time: 0.7619 data: 0.0002 max mem: 8421 +[2024-12-05 09:51:36 root] (utils.py 283): INFO Epoch: [10] [ 980/2502] eta: 0:19:28 lr: 0.000016 loss_cls: 3.6908 (3.9016) grad_norm: 4.1825 (4.2562) time: 0.7650 data: 0.0002 max mem: 8421 +[2024-12-05 09:51:44 root] (utils.py 283): INFO Epoch: [10] [ 990/2502] eta: 0:19:20 lr: 0.000016 loss_cls: 4.0092 (3.9041) grad_norm: 4.1200 (4.2551) time: 0.7657 data: 0.0002 max mem: 8421 +[2024-12-05 09:51:52 root] (utils.py 283): INFO Epoch: [10] [1000/2502] eta: 0:19:13 lr: 0.000016 loss_cls: 4.0092 (3.9021) grad_norm: 4.2010 (4.2555) time: 0.7625 data: 0.0003 max mem: 8421 +[2024-12-05 09:51:59 root] (utils.py 283): INFO Epoch: [10] [1010/2502] eta: 0:19:05 lr: 0.000016 loss_cls: 3.9669 (3.9029) grad_norm: 4.2227 (4.2549) time: 0.7642 data: 0.0003 max mem: 8421 +[2024-12-05 09:52:07 root] (utils.py 283): INFO Epoch: [10] [1020/2502] eta: 0:18:57 lr: 0.000016 loss_cls: 4.0146 (3.9019) grad_norm: 4.0939 (4.2551) time: 0.7646 data: 0.0002 max mem: 8421 +[2024-12-05 09:52:15 root] (utils.py 283): INFO Epoch: [10] [1030/2502] eta: 0:18:50 lr: 0.000016 loss_cls: 3.9879 (3.9015) grad_norm: 3.9918 (4.2569) time: 0.7673 data: 0.0002 max mem: 8421 +[2024-12-05 09:52:22 root] (utils.py 283): INFO Epoch: [10] [1040/2502] eta: 0:18:42 lr: 0.000016 loss_cls: 3.8841 (3.9010) grad_norm: 4.1437 (4.2594) time: 0.7665 data: 0.0002 max mem: 8421 +[2024-12-05 09:52:30 root] (utils.py 283): INFO Epoch: [10] [1050/2502] eta: 0:18:34 lr: 0.000016 loss_cls: 3.7916 (3.8999) grad_norm: 4.3279 (4.2597) time: 0.7616 data: 0.0002 max mem: 8421 +[2024-12-05 09:52:37 root] (utils.py 283): INFO Epoch: [10] [1060/2502] eta: 0:18:26 lr: 0.000016 loss_cls: 3.9557 (3.9000) grad_norm: 4.1523 (4.2576) time: 0.7638 data: 0.0002 max mem: 8421 +[2024-12-05 09:52:45 root] (utils.py 283): INFO Epoch: [10] [1070/2502] eta: 0:18:19 lr: 0.000016 loss_cls: 4.0798 (3.9023) grad_norm: 4.0735 (4.2584) time: 0.7636 data: 0.0002 max mem: 8421 +[2024-12-05 09:52:53 root] (utils.py 283): INFO Epoch: [10] [1080/2502] eta: 0:18:11 lr: 0.000016 loss_cls: 4.0634 (3.9011) grad_norm: 4.1876 (4.2556) time: 0.7623 data: 0.0002 max mem: 8421 +[2024-12-05 09:53:00 root] (utils.py 283): INFO Epoch: [10] [1090/2502] eta: 0:18:03 lr: 0.000016 loss_cls: 3.9760 (3.8991) grad_norm: 4.0769 (4.2547) time: 0.7628 data: 0.0002 max mem: 8421 +[2024-12-05 09:53:08 root] (utils.py 283): INFO Epoch: [10] [1100/2502] eta: 0:17:55 lr: 0.000016 loss_cls: 3.9198 (3.8988) grad_norm: 3.9966 (4.2527) time: 0.7638 data: 0.0002 max mem: 8421 +[2024-12-05 09:53:16 root] (utils.py 283): INFO Epoch: [10] [1110/2502] eta: 0:17:48 lr: 0.000016 loss_cls: 3.9198 (3.8991) grad_norm: 4.0719 (4.2548) time: 0.7635 data: 0.0003 max mem: 8421 +[2024-12-05 09:53:23 root] (utils.py 283): INFO Epoch: [10] [1120/2502] eta: 0:17:40 lr: 0.000016 loss_cls: 3.8407 (3.8976) grad_norm: 4.3368 (4.2538) time: 0.7618 data: 0.0003 max mem: 8421 +[2024-12-05 09:53:31 root] (utils.py 283): INFO Epoch: [10] [1130/2502] eta: 0:17:32 lr: 0.000016 loss_cls: 4.0351 (3.8998) grad_norm: 4.0492 (4.2530) time: 0.7628 data: 0.0002 max mem: 8421 +[2024-12-05 09:53:38 root] (utils.py 283): INFO Epoch: [10] [1140/2502] eta: 0:17:25 lr: 0.000016 loss_cls: 4.2590 (3.9018) grad_norm: 4.0242 (4.2515) time: 0.7625 data: 0.0002 max mem: 8421 +[2024-12-05 09:53:46 root] (utils.py 283): INFO Epoch: [10] [1150/2502] eta: 0:17:17 lr: 0.000016 loss_cls: 4.0929 (3.9027) grad_norm: 4.2913 (4.2554) time: 0.7636 data: 0.0002 max mem: 8421 +[2024-12-05 09:53:54 root] (utils.py 283): INFO Epoch: [10] [1160/2502] eta: 0:17:09 lr: 0.000016 loss_cls: 4.0929 (3.9023) grad_norm: 4.2913 (4.2691) time: 0.7734 data: 0.0002 max mem: 8421 +[2024-12-05 09:54:02 root] (utils.py 283): INFO Epoch: [10] [1170/2502] eta: 0:17:02 lr: 0.000016 loss_cls: 3.7012 (3.8984) grad_norm: 4.2210 (4.2710) time: 0.7817 data: 0.0002 max mem: 8421 +[2024-12-05 09:54:09 root] (utils.py 283): INFO Epoch: [10] [1180/2502] eta: 0:16:54 lr: 0.000016 loss_cls: 3.7130 (3.8988) grad_norm: 4.2970 (4.2710) time: 0.7789 data: 0.0002 max mem: 8421 +[2024-12-05 09:54:17 root] (utils.py 283): INFO Epoch: [10] [1190/2502] eta: 0:16:47 lr: 0.000016 loss_cls: 4.0935 (3.9009) grad_norm: 4.2499 (4.2719) time: 0.7704 data: 0.0003 max mem: 8421 +[2024-12-05 09:54:25 root] (utils.py 283): INFO Epoch: [10] [1200/2502] eta: 0:16:39 lr: 0.000016 loss_cls: 4.2139 (3.9010) grad_norm: 4.3630 (4.2771) time: 0.7681 data: 0.0003 max mem: 8421 +[2024-12-05 09:54:32 root] (utils.py 283): INFO Epoch: [10] [1210/2502] eta: 0:16:31 lr: 0.000016 loss_cls: 4.0650 (3.9002) grad_norm: 4.3530 (4.2774) time: 0.7687 data: 0.0003 max mem: 8421 +[2024-12-05 09:54:40 root] (utils.py 283): INFO Epoch: [10] [1220/2502] eta: 0:16:23 lr: 0.000016 loss_cls: 4.0609 (3.9012) grad_norm: 4.0650 (4.2749) time: 0.7638 data: 0.0002 max mem: 8421 +[2024-12-05 09:54:48 root] (utils.py 283): INFO Epoch: [10] [1230/2502] eta: 0:16:16 lr: 0.000016 loss_cls: 3.8208 (3.8975) grad_norm: 4.0498 (4.2740) time: 0.7626 data: 0.0002 max mem: 8421 +[2024-12-05 09:54:55 root] (utils.py 283): INFO Epoch: [10] [1240/2502] eta: 0:16:08 lr: 0.000016 loss_cls: 3.4494 (3.8975) grad_norm: 4.1363 (4.2752) time: 0.7644 data: 0.0003 max mem: 8421 +[2024-12-05 09:55:03 root] (utils.py 283): INFO Epoch: [10] [1250/2502] eta: 0:16:00 lr: 0.000016 loss_cls: 3.9901 (3.8984) grad_norm: 4.2637 (4.2760) time: 0.7652 data: 0.0003 max mem: 8421 +[2024-12-05 09:55:11 root] (utils.py 283): INFO Epoch: [10] [1260/2502] eta: 0:15:53 lr: 0.000016 loss_cls: 3.9901 (3.8973) grad_norm: 4.2637 (4.2751) time: 0.7654 data: 0.0003 max mem: 8421 +[2024-12-05 09:55:18 root] (utils.py 283): INFO Epoch: [10] [1270/2502] eta: 0:15:45 lr: 0.000016 loss_cls: 3.9196 (3.8965) grad_norm: 4.0678 (4.2746) time: 0.7635 data: 0.0002 max mem: 8421 +[2024-12-05 09:55:26 root] (utils.py 283): INFO Epoch: [10] [1280/2502] eta: 0:15:37 lr: 0.000016 loss_cls: 3.7247 (3.8957) grad_norm: 4.0678 (4.2731) time: 0.7612 data: 0.0002 max mem: 8421 +[2024-12-05 09:55:34 root] (utils.py 283): INFO Epoch: [10] [1290/2502] eta: 0:15:29 lr: 0.000016 loss_cls: 3.6296 (3.8937) grad_norm: 4.0411 (4.2712) time: 0.7625 data: 0.0002 max mem: 8421 +[2024-12-05 09:55:41 root] (utils.py 283): INFO Epoch: [10] [1300/2502] eta: 0:15:22 lr: 0.000016 loss_cls: 3.8536 (3.8945) grad_norm: 4.0411 (4.2692) time: 0.7633 data: 0.0002 max mem: 8421 +[2024-12-05 09:55:49 root] (utils.py 283): INFO Epoch: [10] [1310/2502] eta: 0:15:14 lr: 0.000016 loss_cls: 4.0092 (3.8958) grad_norm: 4.1229 (4.2694) time: 0.7644 data: 0.0002 max mem: 8421 +[2024-12-05 09:55:57 root] (utils.py 283): INFO Epoch: [10] [1320/2502] eta: 0:15:06 lr: 0.000016 loss_cls: 3.8427 (3.8949) grad_norm: 4.3672 (4.2722) time: 0.7652 data: 0.0002 max mem: 8421 +[2024-12-05 09:56:04 root] (utils.py 283): INFO Epoch: [10] [1330/2502] eta: 0:14:59 lr: 0.000016 loss_cls: 3.8252 (3.8951) grad_norm: 4.1385 (4.2713) time: 0.7651 data: 0.0002 max mem: 8421 +[2024-12-05 09:56:12 root] (utils.py 283): INFO Epoch: [10] [1340/2502] eta: 0:14:51 lr: 0.000016 loss_cls: 3.7000 (3.8936) grad_norm: 4.2475 (4.2718) time: 0.7654 data: 0.0003 max mem: 8421 +[2024-12-05 09:56:19 root] (utils.py 283): INFO Epoch: [10] [1350/2502] eta: 0:14:43 lr: 0.000016 loss_cls: 3.7000 (3.8928) grad_norm: 4.2475 (4.2739) time: 0.7641 data: 0.0003 max mem: 8421 +[2024-12-05 09:56:27 root] (utils.py 283): INFO Epoch: [10] [1360/2502] eta: 0:14:36 lr: 0.000016 loss_cls: 3.8756 (3.8918) grad_norm: 3.9959 (4.2721) time: 0.7688 data: 0.0003 max mem: 8421 +[2024-12-05 09:56:35 root] (utils.py 283): INFO Epoch: [10] [1370/2502] eta: 0:14:28 lr: 0.000016 loss_cls: 3.8756 (3.8913) grad_norm: 4.1299 (4.2740) time: 0.7855 data: 0.0003 max mem: 8421 +[2024-12-05 09:56:43 root] (utils.py 283): INFO Epoch: [10] [1380/2502] eta: 0:14:21 lr: 0.000016 loss_cls: 4.0571 (3.8913) grad_norm: 4.4819 (4.2766) time: 0.7892 data: 0.0002 max mem: 8421 +[2024-12-05 09:56:51 root] (utils.py 283): INFO Epoch: [10] [1390/2502] eta: 0:14:13 lr: 0.000016 loss_cls: 3.9642 (3.8909) grad_norm: 4.3189 (4.2768) time: 0.7844 data: 0.0002 max mem: 8421 +[2024-12-05 09:56:59 root] (utils.py 283): INFO Epoch: [10] [1400/2502] eta: 0:14:06 lr: 0.000016 loss_cls: 3.8920 (3.8914) grad_norm: 4.3412 (4.2785) time: 0.7854 data: 0.0003 max mem: 8421 +[2024-12-05 09:57:06 root] (utils.py 283): INFO Epoch: [10] [1410/2502] eta: 0:13:58 lr: 0.000016 loss_cls: 3.8728 (3.8904) grad_norm: 4.3498 (4.2788) time: 0.7809 data: 0.0003 max mem: 8421 +[2024-12-05 09:57:14 root] (utils.py 283): INFO Epoch: [10] [1420/2502] eta: 0:13:51 lr: 0.000016 loss_cls: 3.6589 (3.8899) grad_norm: 4.0440 (4.2767) time: 0.7825 data: 0.0003 max mem: 8421 +[2024-12-05 09:57:22 root] (utils.py 283): INFO Epoch: [10] [1430/2502] eta: 0:13:43 lr: 0.000016 loss_cls: 4.1620 (3.8926) grad_norm: 4.0039 (4.2765) time: 0.7878 data: 0.0002 max mem: 8421 +[2024-12-05 09:57:30 root] (utils.py 283): INFO Epoch: [10] [1440/2502] eta: 0:13:36 lr: 0.000016 loss_cls: 4.0881 (3.8920) grad_norm: 4.0447 (4.2752) time: 0.7939 data: 0.0002 max mem: 8421 +[2024-12-05 09:57:38 root] (utils.py 283): INFO Epoch: [10] [1450/2502] eta: 0:13:28 lr: 0.000016 loss_cls: 3.7464 (3.8906) grad_norm: 4.1324 (4.2751) time: 0.7912 data: 0.0002 max mem: 8421 +[2024-12-05 09:57:46 root] (utils.py 283): INFO Epoch: [10] [1460/2502] eta: 0:13:20 lr: 0.000016 loss_cls: 3.9181 (3.8908) grad_norm: 4.3310 (4.2754) time: 0.7835 data: 0.0002 max mem: 8421 +[2024-12-05 09:57:54 root] (utils.py 283): INFO Epoch: [10] [1470/2502] eta: 0:13:13 lr: 0.000016 loss_cls: 3.7730 (3.8901) grad_norm: 4.0803 (4.2757) time: 0.7835 data: 0.0003 max mem: 8421 +[2024-12-05 09:58:02 root] (utils.py 283): INFO Epoch: [10] [1480/2502] eta: 0:13:05 lr: 0.000016 loss_cls: 3.7493 (3.8900) grad_norm: 4.0769 (4.2760) time: 0.7886 data: 0.0003 max mem: 8421 +[2024-12-05 09:58:09 root] (utils.py 283): INFO Epoch: [10] [1490/2502] eta: 0:12:58 lr: 0.000016 loss_cls: 4.2233 (3.8923) grad_norm: 4.2455 (4.2776) time: 0.7851 data: 0.0002 max mem: 8421 +[2024-12-05 09:58:17 root] (utils.py 283): INFO Epoch: [10] [1500/2502] eta: 0:12:50 lr: 0.000016 loss_cls: 4.2929 (3.8940) grad_norm: 4.1907 (4.2767) time: 0.7695 data: 0.0002 max mem: 8421 +[2024-12-05 09:58:25 root] (utils.py 283): INFO Epoch: [10] [1510/2502] eta: 0:12:42 lr: 0.000016 loss_cls: 4.1437 (3.8930) grad_norm: 4.1066 (4.2825) time: 0.7635 data: 0.0002 max mem: 8421 +[2024-12-05 09:58:33 root] (utils.py 283): INFO Epoch: [10] [1520/2502] eta: 0:12:35 lr: 0.000016 loss_cls: 4.0064 (3.8935) grad_norm: 4.1551 (4.2842) time: 0.7741 data: 0.0002 max mem: 8421 +[2024-12-05 09:58:40 root] (utils.py 283): INFO Epoch: [10] [1530/2502] eta: 0:12:27 lr: 0.000016 loss_cls: 3.9664 (3.8925) grad_norm: 4.1993 (4.2838) time: 0.7854 data: 0.0002 max mem: 8421 +[2024-12-05 09:58:48 root] (utils.py 283): INFO Epoch: [10] [1540/2502] eta: 0:12:19 lr: 0.000016 loss_cls: 3.8189 (3.8912) grad_norm: 4.1518 (4.2821) time: 0.7839 data: 0.0003 max mem: 8421 +[2024-12-05 09:58:56 root] (utils.py 283): INFO Epoch: [10] [1550/2502] eta: 0:12:12 lr: 0.000016 loss_cls: 3.6754 (3.8904) grad_norm: 4.1147 (4.2818) time: 0.7706 data: 0.0003 max mem: 8421 +[2024-12-05 09:59:03 root] (utils.py 283): INFO Epoch: [10] [1560/2502] eta: 0:12:04 lr: 0.000016 loss_cls: 3.8312 (3.8908) grad_norm: 4.1147 (4.2811) time: 0.7630 data: 0.0002 max mem: 8421 +[2024-12-05 09:59:11 root] (utils.py 283): INFO Epoch: [10] [1570/2502] eta: 0:11:56 lr: 0.000016 loss_cls: 4.0823 (3.8917) grad_norm: 3.9845 (4.2790) time: 0.7667 data: 0.0002 max mem: 8421 +[2024-12-05 09:59:19 root] (utils.py 283): INFO Epoch: [10] [1580/2502] eta: 0:11:49 lr: 0.000016 loss_cls: 4.1504 (3.8929) grad_norm: 4.0901 (4.2820) time: 0.7667 data: 0.0002 max mem: 8421 +[2024-12-05 09:59:27 root] (utils.py 283): INFO Epoch: [10] [1590/2502] eta: 0:11:41 lr: 0.000016 loss_cls: 4.0965 (3.8928) grad_norm: 4.2786 (4.2831) time: 0.7667 data: 0.0002 max mem: 8421 +[2024-12-05 09:59:34 root] (utils.py 283): INFO Epoch: [10] [1600/2502] eta: 0:11:33 lr: 0.000016 loss_cls: 4.0602 (3.8926) grad_norm: 4.1255 (4.2814) time: 0.7700 data: 0.0002 max mem: 8421 +[2024-12-05 09:59:42 root] (utils.py 283): INFO Epoch: [10] [1610/2502] eta: 0:11:25 lr: 0.000016 loss_cls: 4.0602 (3.8918) grad_norm: 4.0999 (4.2810) time: 0.7689 data: 0.0002 max mem: 8421 +[2024-12-05 09:59:50 root] (utils.py 283): INFO Epoch: [10] [1620/2502] eta: 0:11:18 lr: 0.000016 loss_cls: 4.0849 (3.8937) grad_norm: 4.1467 (4.2809) time: 0.7657 data: 0.0002 max mem: 8421 +[2024-12-05 09:59:57 root] (utils.py 283): INFO Epoch: [10] [1630/2502] eta: 0:11:10 lr: 0.000016 loss_cls: 4.0945 (3.8937) grad_norm: 4.1029 (4.2800) time: 0.7621 data: 0.0002 max mem: 8421 +[2024-12-05 10:00:05 root] (utils.py 283): INFO Epoch: [10] [1640/2502] eta: 0:11:02 lr: 0.000016 loss_cls: 4.1810 (3.8928) grad_norm: 4.1067 (4.2795) time: 0.7610 data: 0.0002 max mem: 8421 +[2024-12-05 10:00:12 root] (utils.py 283): INFO Epoch: [10] [1650/2502] eta: 0:10:55 lr: 0.000016 loss_cls: 4.1120 (3.8932) grad_norm: 4.3166 (4.2856) time: 0.7616 data: 0.0002 max mem: 8421 +[2024-12-05 10:00:20 root] (utils.py 283): INFO Epoch: [10] [1660/2502] eta: 0:10:47 lr: 0.000016 loss_cls: 4.0533 (3.8933) grad_norm: 4.3166 (4.2852) time: 0.7602 data: 0.0002 max mem: 8421 +[2024-12-05 10:00:28 root] (utils.py 283): INFO Epoch: [10] [1670/2502] eta: 0:10:39 lr: 0.000016 loss_cls: 4.0533 (3.8933) grad_norm: 4.1173 (4.2869) time: 0.7676 data: 0.0002 max mem: 8421 +[2024-12-05 10:00:35 root] (utils.py 283): INFO Epoch: [10] [1680/2502] eta: 0:10:32 lr: 0.000016 loss_cls: 3.8417 (3.8932) grad_norm: 4.1922 (4.2885) time: 0.7743 data: 0.0002 max mem: 8421 +[2024-12-05 10:00:43 root] (utils.py 283): INFO Epoch: [10] [1690/2502] eta: 0:10:24 lr: 0.000016 loss_cls: 4.0842 (3.8949) grad_norm: 4.2574 (4.2887) time: 0.7665 data: 0.0002 max mem: 8421 +[2024-12-05 10:00:51 root] (utils.py 283): INFO Epoch: [10] [1700/2502] eta: 0:10:16 lr: 0.000016 loss_cls: 4.0870 (3.8960) grad_norm: 4.2574 (4.2901) time: 0.7587 data: 0.0002 max mem: 8421 +[2024-12-05 10:00:58 root] (utils.py 283): INFO Epoch: [10] [1710/2502] eta: 0:10:08 lr: 0.000016 loss_cls: 3.9560 (3.8946) grad_norm: 4.0428 (4.2897) time: 0.7589 data: 0.0002 max mem: 8421 +[2024-12-05 10:01:06 root] (utils.py 283): INFO Epoch: [10] [1720/2502] eta: 0:10:01 lr: 0.000016 loss_cls: 3.8285 (3.8944) grad_norm: 4.0404 (4.2899) time: 0.7628 data: 0.0002 max mem: 8421 +[2024-12-05 10:01:13 root] (utils.py 283): INFO Epoch: [10] [1730/2502] eta: 0:09:53 lr: 0.000016 loss_cls: 4.0461 (3.8951) grad_norm: 4.1551 (4.2889) time: 0.7607 data: 0.0002 max mem: 8421 +[2024-12-05 10:01:21 root] (utils.py 283): INFO Epoch: [10] [1740/2502] eta: 0:09:45 lr: 0.000016 loss_cls: 4.1960 (3.8968) grad_norm: 4.2401 (4.2935) time: 0.7586 data: 0.0002 max mem: 8421 +[2024-12-05 10:01:29 root] (utils.py 283): INFO Epoch: [10] [1750/2502] eta: 0:09:37 lr: 0.000016 loss_cls: 4.1960 (3.8980) grad_norm: 4.3361 (4.2935) time: 0.7632 data: 0.0002 max mem: 8421 +[2024-12-05 10:01:36 root] (utils.py 283): INFO Epoch: [10] [1760/2502] eta: 0:09:30 lr: 0.000016 loss_cls: 4.2264 (3.9001) grad_norm: 4.2467 (4.2941) time: 0.7647 data: 0.0002 max mem: 8421 +[2024-12-05 10:01:44 root] (utils.py 283): INFO Epoch: [10] [1770/2502] eta: 0:09:22 lr: 0.000016 loss_cls: 4.2020 (3.9001) grad_norm: 4.1956 (4.2946) time: 0.7636 data: 0.0002 max mem: 8421 +[2024-12-05 10:01:52 root] (utils.py 283): INFO Epoch: [10] [1780/2502] eta: 0:09:14 lr: 0.000016 loss_cls: 3.6875 (3.8972) grad_norm: 4.0869 (4.2930) time: 0.7607 data: 0.0002 max mem: 8421 +[2024-12-05 10:01:59 root] (utils.py 283): INFO Epoch: [10] [1790/2502] eta: 0:09:07 lr: 0.000016 loss_cls: 3.6180 (3.8970) grad_norm: 4.0738 (4.2923) time: 0.7598 data: 0.0002 max mem: 8421 +[2024-12-05 10:02:07 root] (utils.py 283): INFO Epoch: [10] [1800/2502] eta: 0:08:59 lr: 0.000016 loss_cls: 3.9835 (3.8956) grad_norm: 4.1246 (4.2913) time: 0.7605 data: 0.0002 max mem: 8421 +[2024-12-05 10:02:14 root] (utils.py 283): INFO Epoch: [10] [1810/2502] eta: 0:08:51 lr: 0.000016 loss_cls: 3.7183 (3.8956) grad_norm: 4.1304 (4.2915) time: 0.7638 data: 0.0002 max mem: 8421 +[2024-12-05 10:02:22 root] (utils.py 283): INFO Epoch: [10] [1820/2502] eta: 0:08:44 lr: 0.000016 loss_cls: 4.0940 (3.8970) grad_norm: 4.2806 (4.2912) time: 0.7742 data: 0.0002 max mem: 8421 +[2024-12-05 10:02:30 root] (utils.py 283): INFO Epoch: [10] [1830/2502] eta: 0:08:36 lr: 0.000016 loss_cls: 4.0940 (3.8980) grad_norm: 4.0248 (4.2901) time: 0.7753 data: 0.0002 max mem: 8421 +[2024-12-05 10:02:38 root] (utils.py 283): INFO Epoch: [10] [1840/2502] eta: 0:08:28 lr: 0.000016 loss_cls: 4.0548 (3.8984) grad_norm: 4.0683 (4.2930) time: 0.7655 data: 0.0002 max mem: 8421 +[2024-12-05 10:02:45 root] (utils.py 283): INFO Epoch: [10] [1850/2502] eta: 0:08:21 lr: 0.000016 loss_cls: 3.9666 (3.8975) grad_norm: 4.3239 (4.2946) time: 0.7722 data: 0.0003 max mem: 8421 +[2024-12-05 10:02:53 root] (utils.py 283): INFO Epoch: [10] [1860/2502] eta: 0:08:13 lr: 0.000016 loss_cls: 4.0511 (3.8985) grad_norm: 4.3239 (4.2972) time: 0.7832 data: 0.0003 max mem: 8421 +[2024-12-05 10:03:01 root] (utils.py 283): INFO Epoch: [10] [1870/2502] eta: 0:08:05 lr: 0.000016 loss_cls: 4.0532 (3.8989) grad_norm: 4.1485 (4.2970) time: 0.7792 data: 0.0002 max mem: 8421 +[2024-12-05 10:03:09 root] (utils.py 283): INFO Epoch: [10] [1880/2502] eta: 0:07:58 lr: 0.000016 loss_cls: 3.9581 (3.8986) grad_norm: 4.2535 (4.3024) time: 0.7693 data: 0.0002 max mem: 8421 +[2024-12-05 10:03:17 root] (utils.py 283): INFO Epoch: [10] [1890/2502] eta: 0:07:50 lr: 0.000016 loss_cls: 3.5939 (3.8972) grad_norm: 4.3162 (4.3023) time: 0.7786 data: 0.0003 max mem: 8421 +[2024-12-05 10:03:24 root] (utils.py 283): INFO Epoch: [10] [1900/2502] eta: 0:07:42 lr: 0.000016 loss_cls: 3.4668 (3.8958) grad_norm: 4.2123 (4.3023) time: 0.7918 data: 0.0003 max mem: 8421 +[2024-12-05 10:03:32 root] (utils.py 283): INFO Epoch: [10] [1910/2502] eta: 0:07:35 lr: 0.000016 loss_cls: 3.8619 (3.8968) grad_norm: 4.0737 (4.3018) time: 0.7947 data: 0.0002 max mem: 8421 +[2024-12-05 10:03:40 root] (utils.py 283): INFO Epoch: [10] [1920/2502] eta: 0:07:27 lr: 0.000016 loss_cls: 4.0713 (3.8963) grad_norm: 4.2651 (4.3021) time: 0.7870 data: 0.0003 max mem: 8421 +[2024-12-05 10:03:48 root] (utils.py 283): INFO Epoch: [10] [1930/2502] eta: 0:07:19 lr: 0.000016 loss_cls: 3.6621 (3.8951) grad_norm: 4.3121 (4.3024) time: 0.7681 data: 0.0002 max mem: 8421 +[2024-12-05 10:03:56 root] (utils.py 283): INFO Epoch: [10] [1940/2502] eta: 0:07:12 lr: 0.000016 loss_cls: 4.1766 (3.8974) grad_norm: 4.3121 (4.3038) time: 0.7633 data: 0.0002 max mem: 8421 +[2024-12-05 10:04:03 root] (utils.py 283): INFO Epoch: [10] [1950/2502] eta: 0:07:04 lr: 0.000016 loss_cls: 4.2140 (3.8970) grad_norm: 4.2343 (4.3038) time: 0.7632 data: 0.0002 max mem: 8421 +[2024-12-05 10:04:11 root] (utils.py 283): INFO Epoch: [10] [1960/2502] eta: 0:06:56 lr: 0.000016 loss_cls: 3.9016 (3.8965) grad_norm: 4.0084 (4.3020) time: 0.7682 data: 0.0002 max mem: 8421 +[2024-12-05 10:04:19 root] (utils.py 283): INFO Epoch: [10] [1970/2502] eta: 0:06:49 lr: 0.000016 loss_cls: 3.9222 (3.8965) grad_norm: 4.0149 (4.3016) time: 0.7819 data: 0.0002 max mem: 8421 +[2024-12-05 10:04:27 root] (utils.py 283): INFO Epoch: [10] [1980/2502] eta: 0:06:41 lr: 0.000016 loss_cls: 4.0413 (3.8964) grad_norm: 4.1653 (4.3015) time: 0.7876 data: 0.0002 max mem: 8421 +[2024-12-05 10:04:34 root] (utils.py 283): INFO Epoch: [10] [1990/2502] eta: 0:06:33 lr: 0.000016 loss_cls: 4.0145 (3.8970) grad_norm: 4.2724 (4.3025) time: 0.7862 data: 0.0003 max mem: 8421 +[2024-12-05 10:04:42 root] (utils.py 283): INFO Epoch: [10] [2000/2502] eta: 0:06:26 lr: 0.000016 loss_cls: 4.0145 (3.8967) grad_norm: 4.3421 (4.3039) time: 0.7838 data: 0.0003 max mem: 8421 +[2024-12-05 10:04:50 root] (utils.py 283): INFO Epoch: [10] [2010/2502] eta: 0:06:18 lr: 0.000016 loss_cls: 3.4786 (3.8944) grad_norm: 4.0830 (4.3025) time: 0.7839 data: 0.0002 max mem: 8421 +[2024-12-05 10:04:58 root] (utils.py 283): INFO Epoch: [10] [2020/2502] eta: 0:06:10 lr: 0.000016 loss_cls: 3.4694 (3.8943) grad_norm: 4.0770 (4.3019) time: 0.7870 data: 0.0002 max mem: 8421 +[2024-12-05 10:05:06 root] (utils.py 283): INFO Epoch: [10] [2030/2502] eta: 0:06:03 lr: 0.000016 loss_cls: 4.0112 (3.8939) grad_norm: 4.1819 (4.3024) time: 0.7880 data: 0.0003 max mem: 8421 +[2024-12-05 10:05:14 root] (utils.py 283): INFO Epoch: [10] [2040/2502] eta: 0:05:55 lr: 0.000016 loss_cls: 3.7691 (3.8933) grad_norm: 4.1254 (4.3010) time: 0.7874 data: 0.0003 max mem: 8421 +[2024-12-05 10:05:22 root] (utils.py 283): INFO Epoch: [10] [2050/2502] eta: 0:05:47 lr: 0.000016 loss_cls: 4.0186 (3.8938) grad_norm: 4.1587 (4.3019) time: 0.7871 data: 0.0003 max mem: 8421 +[2024-12-05 10:05:30 root] (utils.py 283): INFO Epoch: [10] [2060/2502] eta: 0:05:40 lr: 0.000016 loss_cls: 4.0881 (3.8931) grad_norm: 4.1587 (4.3018) time: 0.7862 data: 0.0003 max mem: 8421 +[2024-12-05 10:05:37 root] (utils.py 283): INFO Epoch: [10] [2070/2502] eta: 0:05:32 lr: 0.000016 loss_cls: 4.0881 (3.8939) grad_norm: 3.9422 (4.3009) time: 0.7862 data: 0.0003 max mem: 8421 +[2024-12-05 10:05:45 root] (utils.py 283): INFO Epoch: [10] [2080/2502] eta: 0:05:24 lr: 0.000016 loss_cls: 4.1527 (3.8941) grad_norm: 4.0292 (4.3006) time: 0.7843 data: 0.0003 max mem: 8421 +[2024-12-05 10:05:53 root] (utils.py 283): INFO Epoch: [10] [2090/2502] eta: 0:05:17 lr: 0.000016 loss_cls: 4.1129 (3.8950) grad_norm: 4.0210 (4.2989) time: 0.7840 data: 0.0003 max mem: 8421 +[2024-12-05 10:06:01 root] (utils.py 283): INFO Epoch: [10] [2100/2502] eta: 0:05:09 lr: 0.000016 loss_cls: 4.0840 (3.8948) grad_norm: 3.9551 (4.2996) time: 0.7795 data: 0.0002 max mem: 8421 +[2024-12-05 10:06:09 root] (utils.py 283): INFO Epoch: [10] [2110/2502] eta: 0:05:01 lr: 0.000016 loss_cls: 4.0840 (3.8962) grad_norm: 4.2467 (4.3007) time: 0.7750 data: 0.0002 max mem: 8421 +[2024-12-05 10:06:16 root] (utils.py 283): INFO Epoch: [10] [2120/2502] eta: 0:04:54 lr: 0.000016 loss_cls: 4.1873 (3.8961) grad_norm: 4.2365 (4.3018) time: 0.7691 data: 0.0002 max mem: 8421 +[2024-12-05 10:06:24 root] (utils.py 283): INFO Epoch: [10] [2130/2502] eta: 0:04:46 lr: 0.000016 loss_cls: 3.9561 (3.8962) grad_norm: 4.0621 (4.3010) time: 0.7628 data: 0.0002 max mem: 8421 +[2024-12-05 10:06:31 root] (utils.py 283): INFO Epoch: [10] [2140/2502] eta: 0:04:38 lr: 0.000016 loss_cls: 4.0827 (3.8966) grad_norm: 4.0992 (4.3017) time: 0.7623 data: 0.0002 max mem: 8421 +[2024-12-05 10:06:39 root] (utils.py 283): INFO Epoch: [10] [2150/2502] eta: 0:04:30 lr: 0.000016 loss_cls: 4.1709 (3.8978) grad_norm: 4.2719 (4.3025) time: 0.7607 data: 0.0002 max mem: 8421 +[2024-12-05 10:06:47 root] (utils.py 283): INFO Epoch: [10] [2160/2502] eta: 0:04:23 lr: 0.000016 loss_cls: 4.0043 (3.8977) grad_norm: 4.0972 (4.3013) time: 0.7620 data: 0.0002 max mem: 8421 +[2024-12-05 10:06:54 root] (utils.py 283): INFO Epoch: [10] [2170/2502] eta: 0:04:15 lr: 0.000016 loss_cls: 3.8083 (3.8964) grad_norm: 4.0076 (4.3036) time: 0.7617 data: 0.0003 max mem: 8421 +[2024-12-05 10:07:02 root] (utils.py 283): INFO Epoch: [10] [2180/2502] eta: 0:04:07 lr: 0.000016 loss_cls: 3.4933 (3.8949) grad_norm: 4.1701 (4.3033) time: 0.7613 data: 0.0003 max mem: 8421 +[2024-12-05 10:07:10 root] (utils.py 283): INFO Epoch: [10] [2190/2502] eta: 0:04:00 lr: 0.000016 loss_cls: 4.0105 (3.8961) grad_norm: 4.0582 (4.3049) time: 0.7624 data: 0.0003 max mem: 8421 +[2024-12-05 10:07:17 root] (utils.py 283): INFO Epoch: [10] [2200/2502] eta: 0:03:52 lr: 0.000016 loss_cls: 4.0200 (3.8961) grad_norm: 4.0370 (4.3047) time: 0.7618 data: 0.0003 max mem: 8421 +[2024-12-05 10:07:25 root] (utils.py 283): INFO Epoch: [10] [2210/2502] eta: 0:03:44 lr: 0.000016 loss_cls: 3.9789 (3.8953) grad_norm: 4.1120 (4.3061) time: 0.7617 data: 0.0003 max mem: 8421 +[2024-12-05 10:07:33 root] (utils.py 283): INFO Epoch: [10] [2220/2502] eta: 0:03:37 lr: 0.000016 loss_cls: 3.8003 (3.8953) grad_norm: 4.0116 (4.3047) time: 0.7699 data: 0.0002 max mem: 8421 +[2024-12-05 10:07:40 root] (utils.py 283): INFO Epoch: [10] [2230/2502] eta: 0:03:29 lr: 0.000016 loss_cls: 3.9188 (3.8959) grad_norm: 4.0137 (4.3048) time: 0.7711 data: 0.0002 max mem: 8421 +[2024-12-05 10:07:48 root] (utils.py 283): INFO Epoch: [10] [2240/2502] eta: 0:03:21 lr: 0.000016 loss_cls: 3.9188 (3.8959) grad_norm: 4.0724 (4.3037) time: 0.7646 data: 0.0003 max mem: 8421 +[2024-12-05 10:07:55 root] (utils.py 283): INFO Epoch: [10] [2250/2502] eta: 0:03:13 lr: 0.000016 loss_cls: 3.8923 (3.8965) grad_norm: 4.1175 (4.3079) time: 0.7640 data: 0.0003 max mem: 8421 +[2024-12-05 10:08:03 root] (utils.py 283): INFO Epoch: [10] [2260/2502] eta: 0:03:06 lr: 0.000016 loss_cls: 3.9220 (3.8958) grad_norm: 4.1850 (4.3073) time: 0.7614 data: 0.0003 max mem: 8421 +[2024-12-05 10:08:11 root] (utils.py 283): INFO Epoch: [10] [2270/2502] eta: 0:02:58 lr: 0.000016 loss_cls: 3.9223 (3.8962) grad_norm: 4.0804 (4.3063) time: 0.7606 data: 0.0003 max mem: 8421 +[2024-12-05 10:08:18 root] (utils.py 283): INFO Epoch: [10] [2280/2502] eta: 0:02:50 lr: 0.000016 loss_cls: 4.0034 (3.8971) grad_norm: 4.0804 (4.3061) time: 0.7684 data: 0.0003 max mem: 8421 +[2024-12-05 10:08:26 root] (utils.py 283): INFO Epoch: [10] [2290/2502] eta: 0:02:43 lr: 0.000016 loss_cls: 3.9872 (3.8973) grad_norm: 4.2433 (4.3062) time: 0.7820 data: 0.0003 max mem: 8421 +[2024-12-05 10:08:34 root] (utils.py 283): INFO Epoch: [10] [2300/2502] eta: 0:02:35 lr: 0.000016 loss_cls: 4.0340 (3.8979) grad_norm: 4.2433 (4.3057) time: 0.7875 data: 0.0002 max mem: 8421 +[2024-12-05 10:08:42 root] (utils.py 283): INFO Epoch: [10] [2310/2502] eta: 0:02:27 lr: 0.000016 loss_cls: 4.0340 (3.8977) grad_norm: 4.1399 (4.3051) time: 0.7863 data: 0.0002 max mem: 8421 +[2024-12-05 10:08:50 root] (utils.py 283): INFO Epoch: [10] [2320/2502] eta: 0:02:20 lr: 0.000016 loss_cls: 4.0253 (3.8985) grad_norm: 4.1597 (4.3052) time: 0.7873 data: 0.0003 max mem: 8421 +[2024-12-05 10:08:58 root] (utils.py 283): INFO Epoch: [10] [2330/2502] eta: 0:02:12 lr: 0.000016 loss_cls: 3.9830 (3.8973) grad_norm: 4.0564 (4.3039) time: 0.7813 data: 0.0002 max mem: 8421 +[2024-12-05 10:09:05 root] (utils.py 283): INFO Epoch: [10] [2340/2502] eta: 0:02:04 lr: 0.000016 loss_cls: 3.7730 (3.8973) grad_norm: 3.9630 (4.3030) time: 0.7738 data: 0.0002 max mem: 8421 +[2024-12-05 10:09:13 root] (utils.py 283): INFO Epoch: [10] [2350/2502] eta: 0:01:57 lr: 0.000016 loss_cls: 4.0326 (3.8962) grad_norm: 4.1260 (4.3026) time: 0.7674 data: 0.0002 max mem: 8421 +[2024-12-05 10:09:21 root] (utils.py 283): INFO Epoch: [10] [2360/2502] eta: 0:01:49 lr: 0.000016 loss_cls: 3.8963 (3.8957) grad_norm: 4.0815 (4.3018) time: 0.7614 data: 0.0002 max mem: 8421 +[2024-12-05 10:09:28 root] (utils.py 283): INFO Epoch: [10] [2370/2502] eta: 0:01:41 lr: 0.000016 loss_cls: 3.6024 (3.8944) grad_norm: 4.1449 (4.3048) time: 0.7648 data: 0.0002 max mem: 8421 +[2024-12-05 10:09:36 root] (utils.py 283): INFO Epoch: [10] [2380/2502] eta: 0:01:33 lr: 0.000016 loss_cls: 3.7082 (3.8947) grad_norm: 4.3033 (4.3052) time: 0.7653 data: 0.0002 max mem: 8421 +[2024-12-05 10:09:44 root] (utils.py 283): INFO Epoch: [10] [2390/2502] eta: 0:01:26 lr: 0.000016 loss_cls: 4.1037 (3.8952) grad_norm: 4.1871 (4.3044) time: 0.7646 data: 0.0002 max mem: 8421 +[2024-12-05 10:09:51 root] (utils.py 283): INFO Epoch: [10] [2400/2502] eta: 0:01:18 lr: 0.000016 loss_cls: 3.9757 (3.8954) grad_norm: 3.9986 (4.3036) time: 0.7655 data: 0.0002 max mem: 8421 +[2024-12-05 10:09:59 root] (utils.py 283): INFO Epoch: [10] [2410/2502] eta: 0:01:10 lr: 0.000016 loss_cls: 4.1340 (3.8967) grad_norm: 4.0452 (4.3021) time: 0.7688 data: 0.0002 max mem: 8421 +[2024-12-05 10:10:07 root] (utils.py 283): INFO Epoch: [10] [2420/2502] eta: 0:01:03 lr: 0.000016 loss_cls: 4.0518 (3.8964) grad_norm: 4.0456 (4.3014) time: 0.7702 data: 0.0002 max mem: 8421 +[2024-12-05 10:10:14 root] (utils.py 283): INFO Epoch: [10] [2430/2502] eta: 0:00:55 lr: 0.000016 loss_cls: 3.8491 (3.8963) grad_norm: 3.9699 (4.3010) time: 0.7676 data: 0.0002 max mem: 8421 +[2024-12-05 10:10:22 root] (utils.py 283): INFO Epoch: [10] [2440/2502] eta: 0:00:47 lr: 0.000016 loss_cls: 3.8203 (3.8952) grad_norm: 4.1454 (4.3009) time: 0.7701 data: 0.0002 max mem: 8421 +[2024-12-05 10:10:30 root] (utils.py 283): INFO Epoch: [10] [2450/2502] eta: 0:00:40 lr: 0.000016 loss_cls: 3.6840 (3.8951) grad_norm: 4.1358 (4.3010) time: 0.7649 data: 0.0002 max mem: 8421 +[2024-12-05 10:10:37 root] (utils.py 283): INFO Epoch: [10] [2460/2502] eta: 0:00:32 lr: 0.000016 loss_cls: 3.8916 (3.8951) grad_norm: 4.1351 (4.3004) time: 0.7584 data: 0.0002 max mem: 8421 +[2024-12-05 10:10:45 root] (utils.py 283): INFO Epoch: [10] [2470/2502] eta: 0:00:24 lr: 0.000016 loss_cls: 4.0129 (3.8950) grad_norm: 4.0826 (4.2999) time: 0.7613 data: 0.0002 max mem: 8421 +[2024-12-05 10:10:53 root] (utils.py 283): INFO Epoch: [10] [2480/2502] eta: 0:00:16 lr: 0.000016 loss_cls: 4.1446 (3.8951) grad_norm: 3.9974 (4.2987) time: 0.7665 data: 0.0002 max mem: 8421 +[2024-12-05 10:11:01 root] (utils.py 283): INFO Epoch: [10] [2490/2502] eta: 0:00:09 lr: 0.000016 loss_cls: 3.6528 (3.8938) grad_norm: 3.9974 (4.2983) time: 0.7884 data: 0.0211 max mem: 8421 +[2024-12-05 10:11:08 root] (utils.py 283): INFO Epoch: [10] [2500/2502] eta: 0:00:01 lr: 0.000016 loss_cls: 3.7247 (3.8944) grad_norm: 4.1238 (4.2975) time: 0.7854 data: 0.0211 max mem: 8421 +[2024-12-05 10:11:09 root] (utils.py 283): INFO Epoch: [10] [2501/2502] eta: 0:00:00 lr: 0.000016 loss_cls: 3.9446 (3.8944) grad_norm: 4.1238 (4.2975) time: 0.7861 data: 0.0211 max mem: 8421 +[2024-12-05 10:11:09 root] (utils.py 297): INFO Epoch: [10] Total time: 0:32:06 (0.7699 s / it) +[2024-12-05 10:11:09 root] (engine.py 178): INFO Averaged stats:lr: 0.000016 loss_cls: 3.9446 (3.9027) grad_norm: 4.1238 (4.2975) +[2024-12-05 10:11:10 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7431 (0.7431) acc1: 84.3750 (84.3750) acc3: 94.5312 (94.5312) acc5: 97.6562 (97.6562) time: 0.1309 data: 0.0003 max mem: 8421 +[2024-12-05 10:11:11 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8374 (0.8699) acc1: 84.3750 (82.2443) acc3: 94.5312 (92.9688) acc5: 95.3125 (95.8807) time: 0.1312 data: 0.0004 max mem: 8421 +[2024-12-05 10:11:12 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9076 (0.9175) acc1: 78.9062 (80.5804) acc3: 92.1875 (92.7083) acc5: 95.3125 (95.3125) time: 0.1315 data: 0.0005 max mem: 8421 +[2024-12-05 10:11:13 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9853 (0.9248) acc1: 79.6875 (80.0151) acc3: 92.1875 (92.9940) acc5: 96.0938 (95.4889) time: 0.1318 data: 0.0005 max mem: 8421 +[2024-12-05 10:11:15 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8919 (0.9206) acc1: 79.6875 (80.2401) acc3: 93.7500 (93.0069) acc5: 95.3125 (95.4078) time: 0.1317 data: 0.0004 max mem: 8421 +[2024-12-05 10:11:16 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0273 (1.0168) acc1: 72.6562 (77.9871) acc3: 85.9375 (91.1765) acc5: 91.4062 (94.1789) time: 0.1320 data: 0.0007 max mem: 8421 +[2024-12-05 10:11:17 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3559 (1.0624) acc1: 71.0938 (77.1004) acc3: 85.1562 (90.2920) acc5: 89.0625 (93.3914) time: 0.1334 data: 0.0008 max mem: 8421 +[2024-12-05 10:11:19 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2876 (1.1066) acc1: 70.3125 (76.0123) acc3: 85.9375 (89.6237) acc5: 89.8438 (92.8917) time: 0.1335 data: 0.0005 max mem: 8421 +[2024-12-05 10:11:20 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3001 (1.1434) acc1: 68.7500 (75.1833) acc3: 85.1562 (89.0336) acc5: 89.8438 (92.4286) time: 0.1355 data: 0.0023 max mem: 8421 +[2024-12-05 10:11:22 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3806 (1.1755) acc1: 67.1875 (74.3304) acc3: 83.5938 (88.3757) acc5: 89.0625 (91.9385) time: 0.1375 data: 0.0040 max mem: 8421 +[2024-12-05 10:11:22 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2605 (1.1655) acc1: 69.5312 (74.4560) acc3: 86.7188 (88.5920) acc5: 90.6250 (92.1040) time: 0.1331 data: 0.0023 max mem: 8421 +[2024-12-05 10:11:22 root] (utils.py 297): INFO Test: Total time: 0:00:13 (0.1330 s / it) +[2024-12-05 10:11:24 root] (engine.py 263): INFO * Acc@1 74.326 Acc@3 88.638 Acc@5 92.066 loss 1.162 flops 1.285 layer_flops 1.251 +[2024-12-05 10:11:24 root] (main.py 546): INFO Accuracy of the network on the 50000 test images: 74.3% +[2024-12-05 10:11:24 root] (main.py 550): INFO Max accuracy: 74.41% +[2024-12-05 10:11:25 root] (utils.py 283): INFO Epoch: [11] [ 0/2502] eta: 0:32:02 lr: 0.000015 loss_cls: 3.9563 (3.9563) grad_norm: 3.8935 (3.8935) time: 0.7685 data: 0.0002 max mem: 8421 +[2024-12-05 10:11:32 root] (utils.py 283): INFO Epoch: [11] [ 10/2502] eta: 0:31:41 lr: 0.000015 loss_cls: 4.1734 (4.0806) grad_norm: 4.0897 (4.2384) time: 0.7631 data: 0.0002 max mem: 8421 +[2024-12-05 10:11:40 root] (utils.py 283): INFO Epoch: [11] [ 20/2502] eta: 0:31:32 lr: 0.000015 loss_cls: 4.0968 (3.8971) grad_norm: 4.0897 (4.1848) time: 0.7623 data: 0.0002 max mem: 8421 +[2024-12-05 10:11:48 root] (utils.py 283): INFO Epoch: [11] [ 30/2502] eta: 0:31:40 lr: 0.000015 loss_cls: 4.0968 (3.9285) grad_norm: 4.1600 (4.2400) time: 0.7722 data: 0.0002 max mem: 8421 +[2024-12-05 10:11:55 root] (utils.py 283): INFO Epoch: [11] [ 40/2502] eta: 0:31:29 lr: 0.000015 loss_cls: 3.7244 (3.8714) grad_norm: 4.1434 (4.2245) time: 0.7726 data: 0.0002 max mem: 8421 +[2024-12-05 10:12:03 root] (utils.py 283): INFO Epoch: [11] [ 50/2502] eta: 0:31:25 lr: 0.000015 loss_cls: 3.5725 (3.8481) grad_norm: 4.1003 (4.2023) time: 0.7689 data: 0.0002 max mem: 8421 +[2024-12-05 10:12:11 root] (utils.py 283): INFO Epoch: [11] [ 60/2502] eta: 0:31:16 lr: 0.000015 loss_cls: 4.1980 (3.8842) grad_norm: 3.9539 (4.1710) time: 0.7698 data: 0.0002 max mem: 8421 +[2024-12-05 10:12:18 root] (utils.py 283): INFO Epoch: [11] [ 70/2502] eta: 0:31:06 lr: 0.000015 loss_cls: 4.1679 (3.8842) grad_norm: 4.0734 (4.2513) time: 0.7634 data: 0.0002 max mem: 8421 +[2024-12-05 10:12:26 root] (utils.py 283): INFO Epoch: [11] [ 80/2502] eta: 0:30:58 lr: 0.000015 loss_cls: 3.9546 (3.9022) grad_norm: 4.1064 (4.2281) time: 0.7653 data: 0.0003 max mem: 8421 +[2024-12-05 10:12:34 root] (utils.py 283): INFO Epoch: [11] [ 90/2502] eta: 0:30:51 lr: 0.000015 loss_cls: 3.9126 (3.9041) grad_norm: 4.0203 (4.2087) time: 0.7683 data: 0.0003 max mem: 8421 +[2024-12-05 10:12:41 root] (utils.py 283): INFO Epoch: [11] [ 100/2502] eta: 0:30:46 lr: 0.000015 loss_cls: 3.8305 (3.9022) grad_norm: 4.0484 (4.1996) time: 0.7734 data: 0.0003 max mem: 8421 +[2024-12-05 10:12:49 root] (utils.py 283): INFO Epoch: [11] [ 110/2502] eta: 0:30:37 lr: 0.000015 loss_cls: 3.9810 (3.9005) grad_norm: 4.2397 (4.2113) time: 0.7704 data: 0.0002 max mem: 8421 +[2024-12-05 10:12:57 root] (utils.py 283): INFO Epoch: [11] [ 120/2502] eta: 0:30:29 lr: 0.000015 loss_cls: 3.7908 (3.8766) grad_norm: 4.2655 (4.2260) time: 0.7650 data: 0.0002 max mem: 8421 +[2024-12-05 10:13:04 root] (utils.py 283): INFO Epoch: [11] [ 130/2502] eta: 0:30:22 lr: 0.000015 loss_cls: 3.9054 (3.8843) grad_norm: 4.0376 (4.2216) time: 0.7686 data: 0.0002 max mem: 8421 +[2024-12-05 10:13:12 root] (utils.py 283): INFO Epoch: [11] [ 140/2502] eta: 0:30:17 lr: 0.000015 loss_cls: 4.1202 (3.8948) grad_norm: 4.0944 (4.2461) time: 0.7777 data: 0.0003 max mem: 8421 +[2024-12-05 10:13:20 root] (utils.py 283): INFO Epoch: [11] [ 150/2502] eta: 0:30:12 lr: 0.000015 loss_cls: 4.1202 (3.8971) grad_norm: 4.1892 (4.2607) time: 0.7864 data: 0.0003 max mem: 8421 +[2024-12-05 10:13:28 root] (utils.py 283): INFO Epoch: [11] [ 160/2502] eta: 0:30:06 lr: 0.000015 loss_cls: 4.0742 (3.9129) grad_norm: 4.1892 (4.2733) time: 0.7864 data: 0.0003 max mem: 8421 +[2024-12-05 10:13:36 root] (utils.py 283): INFO Epoch: [11] [ 170/2502] eta: 0:30:01 lr: 0.000015 loss_cls: 4.0447 (3.8883) grad_norm: 4.1543 (4.2658) time: 0.7862 data: 0.0003 max mem: 8421 +[2024-12-05 10:13:44 root] (utils.py 283): INFO Epoch: [11] [ 180/2502] eta: 0:29:55 lr: 0.000015 loss_cls: 3.0425 (3.8542) grad_norm: 4.1543 (4.2627) time: 0.7867 data: 0.0003 max mem: 8421 +[2024-12-05 10:13:52 root] (utils.py 283): INFO Epoch: [11] [ 190/2502] eta: 0:29:51 lr: 0.000015 loss_cls: 3.9543 (3.8740) grad_norm: 4.2198 (4.2697) time: 0.7960 data: 0.0003 max mem: 8421 +[2024-12-05 10:14:00 root] (utils.py 283): INFO Epoch: [11] [ 200/2502] eta: 0:29:45 lr: 0.000015 loss_cls: 4.1263 (3.8704) grad_norm: 4.2198 (4.2688) time: 0.7954 data: 0.0002 max mem: 8421 +[2024-12-05 10:14:08 root] (utils.py 283): INFO Epoch: [11] [ 210/2502] eta: 0:29:38 lr: 0.000015 loss_cls: 3.7740 (3.8680) grad_norm: 4.1093 (4.2631) time: 0.7856 data: 0.0002 max mem: 8421 +[2024-12-05 10:14:15 root] (utils.py 283): INFO Epoch: [11] [ 220/2502] eta: 0:29:31 lr: 0.000015 loss_cls: 3.7357 (3.8623) grad_norm: 4.1165 (4.2575) time: 0.7863 data: 0.0002 max mem: 8421 +[2024-12-05 10:14:23 root] (utils.py 283): INFO Epoch: [11] [ 230/2502] eta: 0:29:25 lr: 0.000015 loss_cls: 3.9757 (3.8714) grad_norm: 4.1222 (4.2535) time: 0.7883 data: 0.0002 max mem: 8421 +[2024-12-05 10:14:31 root] (utils.py 283): INFO Epoch: [11] [ 240/2502] eta: 0:29:18 lr: 0.000015 loss_cls: 4.1682 (3.8850) grad_norm: 4.1617 (4.2557) time: 0.7892 data: 0.0002 max mem: 8421 +[2024-12-05 10:14:39 root] (utils.py 283): INFO Epoch: [11] [ 250/2502] eta: 0:29:11 lr: 0.000015 loss_cls: 4.1682 (3.8856) grad_norm: 4.1072 (4.2459) time: 0.7884 data: 0.0002 max mem: 8421 +[2024-12-05 10:14:47 root] (utils.py 283): INFO Epoch: [11] [ 260/2502] eta: 0:29:03 lr: 0.000015 loss_cls: 3.8075 (3.8847) grad_norm: 4.0159 (4.2516) time: 0.7780 data: 0.0002 max mem: 8421 +[2024-12-05 10:14:54 root] (utils.py 283): INFO Epoch: [11] [ 270/2502] eta: 0:28:54 lr: 0.000015 loss_cls: 3.7897 (3.8790) grad_norm: 4.1870 (4.2659) time: 0.7681 data: 0.0003 max mem: 8421 +[2024-12-05 10:15:02 root] (utils.py 283): INFO Epoch: [11] [ 280/2502] eta: 0:28:46 lr: 0.000015 loss_cls: 3.8528 (3.8773) grad_norm: 4.4312 (4.2741) time: 0.7673 data: 0.0002 max mem: 8421 +[2024-12-05 10:15:10 root] (utils.py 283): INFO Epoch: [11] [ 290/2502] eta: 0:28:37 lr: 0.000015 loss_cls: 4.0297 (3.8797) grad_norm: 4.1373 (4.2710) time: 0.7679 data: 0.0002 max mem: 8421 +[2024-12-05 10:15:17 root] (utils.py 283): INFO Epoch: [11] [ 300/2502] eta: 0:28:29 lr: 0.000015 loss_cls: 4.0469 (3.8803) grad_norm: 4.0846 (4.2724) time: 0.7688 data: 0.0003 max mem: 8421 +[2024-12-05 10:15:25 root] (utils.py 283): INFO Epoch: [11] [ 310/2502] eta: 0:28:21 lr: 0.000015 loss_cls: 3.7939 (3.8721) grad_norm: 4.1460 (4.2682) time: 0.7694 data: 0.0003 max mem: 8421 +[2024-12-05 10:15:33 root] (utils.py 283): INFO Epoch: [11] [ 320/2502] eta: 0:28:12 lr: 0.000015 loss_cls: 3.7939 (3.8726) grad_norm: 4.1571 (4.2684) time: 0.7685 data: 0.0003 max mem: 8421 +[2024-12-05 10:15:40 root] (utils.py 283): INFO Epoch: [11] [ 330/2502] eta: 0:28:04 lr: 0.000015 loss_cls: 4.1229 (3.8779) grad_norm: 4.2282 (4.2802) time: 0.7665 data: 0.0002 max mem: 8421 +[2024-12-05 10:15:48 root] (utils.py 283): INFO Epoch: [11] [ 340/2502] eta: 0:27:55 lr: 0.000015 loss_cls: 4.1047 (3.8729) grad_norm: 4.2851 (4.2794) time: 0.7654 data: 0.0002 max mem: 8421 +[2024-12-05 10:15:56 root] (utils.py 283): INFO Epoch: [11] [ 350/2502] eta: 0:27:47 lr: 0.000015 loss_cls: 3.9156 (3.8745) grad_norm: 4.1113 (4.2816) time: 0.7648 data: 0.0002 max mem: 8421 +[2024-12-05 10:16:04 root] (utils.py 283): INFO Epoch: [11] [ 360/2502] eta: 0:27:40 lr: 0.000015 loss_cls: 4.1373 (3.8804) grad_norm: 4.0894 (4.2834) time: 0.7729 data: 0.0003 max mem: 8421 +[2024-12-05 10:16:11 root] (utils.py 283): INFO Epoch: [11] [ 370/2502] eta: 0:27:32 lr: 0.000015 loss_cls: 4.0451 (3.8733) grad_norm: 4.0650 (4.2791) time: 0.7747 data: 0.0003 max mem: 8421 +[2024-12-05 10:16:19 root] (utils.py 283): INFO Epoch: [11] [ 380/2502] eta: 0:27:23 lr: 0.000015 loss_cls: 3.9683 (3.8762) grad_norm: 4.2283 (4.2894) time: 0.7681 data: 0.0003 max mem: 8421 +[2024-12-05 10:16:27 root] (utils.py 283): INFO Epoch: [11] [ 390/2502] eta: 0:27:15 lr: 0.000015 loss_cls: 4.1126 (3.8788) grad_norm: 4.1406 (4.2848) time: 0.7696 data: 0.0003 max mem: 8421 +[2024-12-05 10:16:34 root] (utils.py 283): INFO Epoch: [11] [ 400/2502] eta: 0:27:08 lr: 0.000015 loss_cls: 4.1112 (3.8831) grad_norm: 4.0802 (4.2858) time: 0.7739 data: 0.0003 max mem: 8421 +[2024-12-05 10:16:42 root] (utils.py 283): INFO Epoch: [11] [ 410/2502] eta: 0:27:00 lr: 0.000015 loss_cls: 4.1682 (3.8864) grad_norm: 4.4159 (4.2939) time: 0.7736 data: 0.0002 max mem: 8421 +[2024-12-05 10:16:50 root] (utils.py 283): INFO Epoch: [11] [ 420/2502] eta: 0:26:51 lr: 0.000015 loss_cls: 3.8519 (3.8813) grad_norm: 4.2885 (4.2940) time: 0.7658 data: 0.0002 max mem: 8421 +[2024-12-05 10:16:57 root] (utils.py 283): INFO Epoch: [11] [ 430/2502] eta: 0:26:43 lr: 0.000015 loss_cls: 3.8639 (3.8857) grad_norm: 4.1701 (4.3005) time: 0.7602 data: 0.0002 max mem: 8421 +[2024-12-05 10:17:05 root] (utils.py 283): INFO Epoch: [11] [ 440/2502] eta: 0:26:35 lr: 0.000015 loss_cls: 3.8412 (3.8808) grad_norm: 4.1040 (4.2994) time: 0.7591 data: 0.0002 max mem: 8421 +[2024-12-05 10:17:13 root] (utils.py 283): INFO Epoch: [11] [ 450/2502] eta: 0:26:26 lr: 0.000015 loss_cls: 3.6737 (3.8808) grad_norm: 4.1040 (4.2966) time: 0.7579 data: 0.0002 max mem: 8421 +[2024-12-05 10:17:20 root] (utils.py 283): INFO Epoch: [11] [ 460/2502] eta: 0:26:18 lr: 0.000015 loss_cls: 4.0366 (3.8835) grad_norm: 4.0954 (4.2948) time: 0.7655 data: 0.0003 max mem: 8421 +[2024-12-05 10:17:28 root] (utils.py 283): INFO Epoch: [11] [ 470/2502] eta: 0:26:10 lr: 0.000015 loss_cls: 3.9267 (3.8766) grad_norm: 4.0691 (4.2957) time: 0.7661 data: 0.0002 max mem: 8421 +[2024-12-05 10:17:35 root] (utils.py 283): INFO Epoch: [11] [ 480/2502] eta: 0:26:02 lr: 0.000015 loss_cls: 3.5572 (3.8738) grad_norm: 4.0596 (4.2927) time: 0.7611 data: 0.0002 max mem: 8421 +[2024-12-05 10:17:43 root] (utils.py 283): INFO Epoch: [11] [ 490/2502] eta: 0:25:54 lr: 0.000015 loss_cls: 4.1182 (3.8783) grad_norm: 4.1041 (4.2938) time: 0.7617 data: 0.0002 max mem: 8421 +[2024-12-05 10:17:51 root] (utils.py 283): INFO Epoch: [11] [ 500/2502] eta: 0:25:46 lr: 0.000015 loss_cls: 4.1711 (3.8802) grad_norm: 4.2168 (4.2933) time: 0.7622 data: 0.0003 max mem: 8421 +[2024-12-05 10:17:58 root] (utils.py 283): INFO Epoch: [11] [ 510/2502] eta: 0:25:37 lr: 0.000015 loss_cls: 4.0117 (3.8826) grad_norm: 3.9518 (4.2864) time: 0.7624 data: 0.0003 max mem: 8421 +[2024-12-05 10:18:06 root] (utils.py 283): INFO Epoch: [11] [ 520/2502] eta: 0:25:29 lr: 0.000015 loss_cls: 3.9055 (3.8789) grad_norm: 3.9514 (4.2873) time: 0.7623 data: 0.0003 max mem: 8421 +[2024-12-05 10:18:14 root] (utils.py 283): INFO Epoch: [11] [ 530/2502] eta: 0:25:21 lr: 0.000015 loss_cls: 3.7130 (3.8799) grad_norm: 4.1727 (4.2889) time: 0.7623 data: 0.0002 max mem: 8421 +[2024-12-05 10:18:21 root] (utils.py 283): INFO Epoch: [11] [ 540/2502] eta: 0:25:14 lr: 0.000015 loss_cls: 3.8943 (3.8761) grad_norm: 4.1159 (4.2870) time: 0.7702 data: 0.0002 max mem: 8421 +[2024-12-05 10:18:29 root] (utils.py 283): INFO Epoch: [11] [ 550/2502] eta: 0:25:06 lr: 0.000015 loss_cls: 3.9695 (3.8754) grad_norm: 4.0632 (4.2833) time: 0.7749 data: 0.0003 max mem: 8421 +[2024-12-05 10:18:37 root] (utils.py 283): INFO Epoch: [11] [ 560/2502] eta: 0:24:59 lr: 0.000015 loss_cls: 4.1213 (3.8797) grad_norm: 4.1860 (4.2948) time: 0.7797 data: 0.0003 max mem: 8421 +[2024-12-05 10:18:45 root] (utils.py 283): INFO Epoch: [11] [ 570/2502] eta: 0:24:52 lr: 0.000015 loss_cls: 4.1937 (3.8810) grad_norm: 4.3121 (4.3021) time: 0.7888 data: 0.0003 max mem: 8421 +[2024-12-05 10:18:53 root] (utils.py 283): INFO Epoch: [11] [ 580/2502] eta: 0:24:44 lr: 0.000015 loss_cls: 4.2273 (3.8869) grad_norm: 4.0841 (4.3003) time: 0.7872 data: 0.0003 max mem: 8421 +[2024-12-05 10:19:01 root] (utils.py 283): INFO Epoch: [11] [ 590/2502] eta: 0:24:37 lr: 0.000015 loss_cls: 4.3324 (3.8917) grad_norm: 4.1378 (4.2990) time: 0.7849 data: 0.0003 max mem: 8421 +[2024-12-05 10:19:08 root] (utils.py 283): INFO Epoch: [11] [ 600/2502] eta: 0:24:30 lr: 0.000015 loss_cls: 4.1800 (3.8926) grad_norm: 4.2725 (4.2998) time: 0.7858 data: 0.0003 max mem: 8421 +[2024-12-05 10:19:16 root] (utils.py 283): INFO Epoch: [11] [ 610/2502] eta: 0:24:22 lr: 0.000015 loss_cls: 3.9710 (3.8929) grad_norm: 4.2041 (4.2975) time: 0.7864 data: 0.0003 max mem: 8421 +[2024-12-05 10:19:24 root] (utils.py 283): INFO Epoch: [11] [ 620/2502] eta: 0:24:15 lr: 0.000015 loss_cls: 3.7721 (3.8902) grad_norm: 4.0774 (4.3223) time: 0.7885 data: 0.0003 max mem: 8421 +[2024-12-05 10:19:32 root] (utils.py 283): INFO Epoch: [11] [ 630/2502] eta: 0:24:08 lr: 0.000015 loss_cls: 3.9160 (3.8909) grad_norm: 4.1069 (4.3233) time: 0.7864 data: 0.0003 max mem: 8421 +[2024-12-05 10:19:40 root] (utils.py 283): INFO Epoch: [11] [ 640/2502] eta: 0:24:00 lr: 0.000015 loss_cls: 3.9806 (3.8908) grad_norm: 4.1069 (4.3312) time: 0.7831 data: 0.0003 max mem: 8421 +[2024-12-05 10:19:48 root] (utils.py 283): INFO Epoch: [11] [ 650/2502] eta: 0:23:53 lr: 0.000015 loss_cls: 3.9099 (3.8885) grad_norm: 4.0306 (4.3429) time: 0.7895 data: 0.0003 max mem: 8421 +[2024-12-05 10:19:55 root] (utils.py 283): INFO Epoch: [11] [ 660/2502] eta: 0:23:45 lr: 0.000015 loss_cls: 4.0913 (3.8914) grad_norm: 4.1012 (4.3453) time: 0.7801 data: 0.0003 max mem: 8421 +[2024-12-05 10:20:03 root] (utils.py 283): INFO Epoch: [11] [ 670/2502] eta: 0:23:37 lr: 0.000015 loss_cls: 4.0913 (3.8909) grad_norm: 4.1832 (4.3429) time: 0.7664 data: 0.0003 max mem: 8421 +[2024-12-05 10:20:11 root] (utils.py 283): INFO Epoch: [11] [ 680/2502] eta: 0:23:30 lr: 0.000015 loss_cls: 4.0633 (3.8925) grad_norm: 4.1332 (4.3395) time: 0.7714 data: 0.0003 max mem: 8421 +[2024-12-05 10:20:19 root] (utils.py 283): INFO Epoch: [11] [ 690/2502] eta: 0:23:22 lr: 0.000015 loss_cls: 4.0905 (3.8939) grad_norm: 3.9810 (4.3358) time: 0.7753 data: 0.0003 max mem: 8421 +[2024-12-05 10:20:26 root] (utils.py 283): INFO Epoch: [11] [ 700/2502] eta: 0:23:14 lr: 0.000015 loss_cls: 4.0165 (3.8961) grad_norm: 4.0785 (4.3357) time: 0.7693 data: 0.0002 max mem: 8421 +[2024-12-05 10:20:34 root] (utils.py 283): INFO Epoch: [11] [ 710/2502] eta: 0:23:06 lr: 0.000015 loss_cls: 4.0165 (3.8966) grad_norm: 4.1198 (4.3392) time: 0.7632 data: 0.0002 max mem: 8421 +[2024-12-05 10:20:42 root] (utils.py 283): INFO Epoch: [11] [ 720/2502] eta: 0:22:58 lr: 0.000015 loss_cls: 4.0912 (3.8978) grad_norm: 4.2160 (4.3367) time: 0.7617 data: 0.0002 max mem: 8421 +[2024-12-05 10:20:49 root] (utils.py 283): INFO Epoch: [11] [ 730/2502] eta: 0:22:50 lr: 0.000015 loss_cls: 4.0658 (3.8988) grad_norm: 4.2364 (4.3395) time: 0.7625 data: 0.0002 max mem: 8421 +[2024-12-05 10:20:57 root] (utils.py 283): INFO Epoch: [11] [ 740/2502] eta: 0:22:42 lr: 0.000015 loss_cls: 4.0658 (3.9003) grad_norm: 4.1889 (4.3372) time: 0.7673 data: 0.0002 max mem: 8421 +[2024-12-05 10:21:05 root] (utils.py 283): INFO Epoch: [11] [ 750/2502] eta: 0:22:34 lr: 0.000015 loss_cls: 4.0689 (3.9037) grad_norm: 4.1875 (4.3385) time: 0.7747 data: 0.0003 max mem: 8421 +[2024-12-05 10:21:13 root] (utils.py 283): INFO Epoch: [11] [ 760/2502] eta: 0:22:27 lr: 0.000015 loss_cls: 4.0654 (3.9068) grad_norm: 4.3851 (4.3435) time: 0.7827 data: 0.0003 max mem: 8421 +[2024-12-05 10:21:20 root] (utils.py 283): INFO Epoch: [11] [ 770/2502] eta: 0:22:19 lr: 0.000015 loss_cls: 4.1759 (3.9093) grad_norm: 4.1720 (4.3405) time: 0.7803 data: 0.0003 max mem: 8421 +[2024-12-05 10:21:28 root] (utils.py 283): INFO Epoch: [11] [ 780/2502] eta: 0:22:11 lr: 0.000015 loss_cls: 3.9713 (3.9069) grad_norm: 4.2331 (4.3740) time: 0.7676 data: 0.0003 max mem: 8421 +[2024-12-05 10:21:36 root] (utils.py 283): INFO Epoch: [11] [ 790/2502] eta: 0:22:03 lr: 0.000015 loss_cls: 3.8621 (3.9094) grad_norm: 4.4339 (4.3733) time: 0.7652 data: 0.0002 max mem: 8421 +[2024-12-05 10:21:43 root] (utils.py 283): INFO Epoch: [11] [ 800/2502] eta: 0:21:56 lr: 0.000015 loss_cls: 3.7498 (3.9073) grad_norm: 4.1432 (4.3715) time: 0.7653 data: 0.0002 max mem: 8421 +[2024-12-05 10:21:51 root] (utils.py 283): INFO Epoch: [11] [ 810/2502] eta: 0:21:48 lr: 0.000015 loss_cls: 4.0014 (3.9098) grad_norm: 4.1432 (4.3699) time: 0.7618 data: 0.0002 max mem: 8421 +[2024-12-05 10:21:58 root] (utils.py 283): INFO Epoch: [11] [ 820/2502] eta: 0:21:39 lr: 0.000015 loss_cls: 4.0323 (3.9082) grad_norm: 4.1232 (4.3668) time: 0.7597 data: 0.0003 max mem: 8421 +[2024-12-05 10:22:06 root] (utils.py 283): INFO Epoch: [11] [ 830/2502] eta: 0:21:32 lr: 0.000015 loss_cls: 3.7323 (3.9062) grad_norm: 4.0052 (4.3633) time: 0.7598 data: 0.0002 max mem: 8421 +[2024-12-05 10:22:14 root] (utils.py 283): INFO Epoch: [11] [ 840/2502] eta: 0:21:24 lr: 0.000015 loss_cls: 3.5878 (3.9042) grad_norm: 4.0052 (4.3614) time: 0.7684 data: 0.0002 max mem: 8421 +[2024-12-05 10:22:21 root] (utils.py 283): INFO Epoch: [11] [ 850/2502] eta: 0:21:16 lr: 0.000015 loss_cls: 3.8987 (3.9031) grad_norm: 4.0895 (4.3574) time: 0.7683 data: 0.0002 max mem: 8421 +[2024-12-05 10:22:29 root] (utils.py 283): INFO Epoch: [11] [ 860/2502] eta: 0:21:08 lr: 0.000015 loss_cls: 3.9194 (3.9033) grad_norm: 4.0895 (4.3559) time: 0.7605 data: 0.0002 max mem: 8421 +[2024-12-05 10:22:37 root] (utils.py 283): INFO Epoch: [11] [ 870/2502] eta: 0:21:00 lr: 0.000015 loss_cls: 3.9937 (3.9056) grad_norm: 4.2222 (4.3585) time: 0.7601 data: 0.0002 max mem: 8421 +[2024-12-05 10:22:44 root] (utils.py 283): INFO Epoch: [11] [ 880/2502] eta: 0:20:52 lr: 0.000015 loss_cls: 4.2003 (3.9047) grad_norm: 4.2832 (4.3565) time: 0.7629 data: 0.0003 max mem: 8421 +[2024-12-05 10:22:52 root] (utils.py 283): INFO Epoch: [11] [ 890/2502] eta: 0:20:44 lr: 0.000015 loss_cls: 4.1919 (3.9069) grad_norm: 4.1136 (4.3576) time: 0.7633 data: 0.0003 max mem: 8421 +[2024-12-05 10:23:00 root] (utils.py 283): INFO Epoch: [11] [ 900/2502] eta: 0:20:36 lr: 0.000015 loss_cls: 4.0188 (3.9086) grad_norm: 4.1136 (4.3541) time: 0.7643 data: 0.0003 max mem: 8421 +[2024-12-05 10:23:07 root] (utils.py 283): INFO Epoch: [11] [ 910/2502] eta: 0:20:28 lr: 0.000015 loss_cls: 3.9171 (3.9084) grad_norm: 4.0065 (4.3513) time: 0.7642 data: 0.0003 max mem: 8421 +[2024-12-05 10:23:15 root] (utils.py 283): INFO Epoch: [11] [ 920/2502] eta: 0:20:21 lr: 0.000015 loss_cls: 3.9582 (3.9072) grad_norm: 4.1140 (4.3502) time: 0.7701 data: 0.0003 max mem: 8421 +[2024-12-05 10:23:23 root] (utils.py 283): INFO Epoch: [11] [ 930/2502] eta: 0:20:13 lr: 0.000015 loss_cls: 3.8711 (3.9057) grad_norm: 4.2016 (4.3489) time: 0.7711 data: 0.0003 max mem: 8421 +[2024-12-05 10:23:30 root] (utils.py 283): INFO Epoch: [11] [ 940/2502] eta: 0:20:05 lr: 0.000015 loss_cls: 3.8199 (3.9039) grad_norm: 4.2417 (4.3522) time: 0.7648 data: 0.0003 max mem: 8421 +[2024-12-05 10:23:38 root] (utils.py 283): INFO Epoch: [11] [ 950/2502] eta: 0:19:57 lr: 0.000015 loss_cls: 3.6587 (3.9026) grad_norm: 4.2417 (4.3573) time: 0.7659 data: 0.0002 max mem: 8421 +[2024-12-05 10:23:46 root] (utils.py 283): INFO Epoch: [11] [ 960/2502] eta: 0:19:50 lr: 0.000015 loss_cls: 4.0793 (3.9051) grad_norm: 4.1497 (4.3573) time: 0.7651 data: 0.0002 max mem: 8421 +[2024-12-05 10:23:53 root] (utils.py 283): INFO Epoch: [11] [ 970/2502] eta: 0:19:42 lr: 0.000015 loss_cls: 4.1502 (3.9049) grad_norm: 4.1111 (4.3554) time: 0.7649 data: 0.0002 max mem: 8421 +[2024-12-05 10:24:01 root] (utils.py 283): INFO Epoch: [11] [ 980/2502] eta: 0:19:34 lr: 0.000015 loss_cls: 4.1171 (3.9062) grad_norm: 4.1802 (4.3551) time: 0.7674 data: 0.0002 max mem: 8421 +[2024-12-05 10:24:09 root] (utils.py 283): INFO Epoch: [11] [ 990/2502] eta: 0:19:26 lr: 0.000015 loss_cls: 4.2391 (3.9082) grad_norm: 4.1542 (4.3540) time: 0.7674 data: 0.0003 max mem: 8421 +[2024-12-05 10:24:16 root] (utils.py 283): INFO Epoch: [11] [1000/2502] eta: 0:19:18 lr: 0.000015 loss_cls: 4.0996 (3.9069) grad_norm: 4.1434 (4.3519) time: 0.7676 data: 0.0003 max mem: 8421 +[2024-12-05 10:24:24 root] (utils.py 283): INFO Epoch: [11] [1010/2502] eta: 0:19:11 lr: 0.000015 loss_cls: 3.8874 (3.9081) grad_norm: 4.1496 (4.3522) time: 0.7658 data: 0.0002 max mem: 8421 +[2024-12-05 10:24:31 root] (utils.py 283): INFO Epoch: [11] [1020/2502] eta: 0:19:03 lr: 0.000015 loss_cls: 4.0492 (3.9083) grad_norm: 4.2406 (4.3527) time: 0.7602 data: 0.0002 max mem: 8421 +[2024-12-05 10:24:39 root] (utils.py 283): INFO Epoch: [11] [1030/2502] eta: 0:18:55 lr: 0.000015 loss_cls: 3.3323 (3.9038) grad_norm: 4.1458 (4.3500) time: 0.7609 data: 0.0002 max mem: 8421 +[2024-12-05 10:24:47 root] (utils.py 283): INFO Epoch: [11] [1040/2502] eta: 0:18:47 lr: 0.000015 loss_cls: 3.7876 (3.9056) grad_norm: 4.1021 (4.3541) time: 0.7637 data: 0.0002 max mem: 8421 +[2024-12-05 10:24:54 root] (utils.py 283): INFO Epoch: [11] [1050/2502] eta: 0:18:39 lr: 0.000015 loss_cls: 3.9611 (3.9039) grad_norm: 4.1151 (4.3518) time: 0.7624 data: 0.0003 max mem: 8421 +[2024-12-05 10:25:02 root] (utils.py 283): INFO Epoch: [11] [1060/2502] eta: 0:18:31 lr: 0.000015 loss_cls: 3.8642 (3.9037) grad_norm: 4.1846 (4.3508) time: 0.7651 data: 0.0003 max mem: 8421 +[2024-12-05 10:25:10 root] (utils.py 283): INFO Epoch: [11] [1070/2502] eta: 0:18:24 lr: 0.000015 loss_cls: 3.8642 (3.9035) grad_norm: 4.0293 (4.3477) time: 0.7661 data: 0.0003 max mem: 8421 +[2024-12-05 10:25:17 root] (utils.py 283): INFO Epoch: [11] [1080/2502] eta: 0:18:16 lr: 0.000015 loss_cls: 3.7845 (3.9030) grad_norm: 4.0601 (4.3465) time: 0.7687 data: 0.0003 max mem: 8421 +[2024-12-05 10:25:25 root] (utils.py 283): INFO Epoch: [11] [1090/2502] eta: 0:18:08 lr: 0.000015 loss_cls: 4.1088 (3.9033) grad_norm: 4.2134 (4.3479) time: 0.7744 data: 0.0003 max mem: 8421 +[2024-12-05 10:25:33 root] (utils.py 283): INFO Epoch: [11] [1100/2502] eta: 0:18:00 lr: 0.000015 loss_cls: 4.1088 (3.9033) grad_norm: 4.2129 (4.3475) time: 0.7669 data: 0.0002 max mem: 8421 +[2024-12-05 10:25:40 root] (utils.py 283): INFO Epoch: [11] [1110/2502] eta: 0:17:53 lr: 0.000015 loss_cls: 3.9316 (3.9013) grad_norm: 4.0802 (4.3452) time: 0.7604 data: 0.0002 max mem: 8421 +[2024-12-05 10:25:48 root] (utils.py 283): INFO Epoch: [11] [1120/2502] eta: 0:17:45 lr: 0.000015 loss_cls: 3.5909 (3.8988) grad_norm: 4.0743 (4.3432) time: 0.7656 data: 0.0003 max mem: 8421 +[2024-12-05 10:25:56 root] (utils.py 283): INFO Epoch: [11] [1130/2502] eta: 0:17:37 lr: 0.000015 loss_cls: 3.8137 (3.9004) grad_norm: 4.0243 (4.3421) time: 0.7684 data: 0.0003 max mem: 8421 +[2024-12-05 10:26:03 root] (utils.py 283): INFO Epoch: [11] [1140/2502] eta: 0:17:29 lr: 0.000015 loss_cls: 3.9869 (3.8996) grad_norm: 4.0299 (4.3403) time: 0.7695 data: 0.0002 max mem: 8421 +[2024-12-05 10:26:11 root] (utils.py 283): INFO Epoch: [11] [1150/2502] eta: 0:17:22 lr: 0.000015 loss_cls: 3.8953 (3.8998) grad_norm: 4.0895 (4.3383) time: 0.7721 data: 0.0003 max mem: 8421 +[2024-12-05 10:26:19 root] (utils.py 283): INFO Epoch: [11] [1160/2502] eta: 0:17:14 lr: 0.000015 loss_cls: 3.9756 (3.8992) grad_norm: 4.1722 (4.3377) time: 0.7660 data: 0.0003 max mem: 8421 +[2024-12-05 10:26:26 root] (utils.py 283): INFO Epoch: [11] [1170/2502] eta: 0:17:06 lr: 0.000015 loss_cls: 3.9931 (3.8991) grad_norm: 4.2995 (4.3377) time: 0.7629 data: 0.0003 max mem: 8421 +[2024-12-05 10:26:34 root] (utils.py 283): INFO Epoch: [11] [1180/2502] eta: 0:16:58 lr: 0.000015 loss_cls: 4.0983 (3.9016) grad_norm: 4.1171 (4.3352) time: 0.7691 data: 0.0003 max mem: 8421 +[2024-12-05 10:26:42 root] (utils.py 283): INFO Epoch: [11] [1190/2502] eta: 0:16:51 lr: 0.000015 loss_cls: 4.0306 (3.9004) grad_norm: 4.0488 (4.3339) time: 0.7673 data: 0.0003 max mem: 8421 +[2024-12-05 10:26:49 root] (utils.py 283): INFO Epoch: [11] [1200/2502] eta: 0:16:43 lr: 0.000015 loss_cls: 3.6288 (3.8984) grad_norm: 4.0307 (4.3318) time: 0.7634 data: 0.0003 max mem: 8421 +[2024-12-05 10:26:57 root] (utils.py 283): INFO Epoch: [11] [1210/2502] eta: 0:16:35 lr: 0.000015 loss_cls: 3.6295 (3.8983) grad_norm: 4.1881 (4.3317) time: 0.7645 data: 0.0003 max mem: 8421 +[2024-12-05 10:27:05 root] (utils.py 283): INFO Epoch: [11] [1220/2502] eta: 0:16:27 lr: 0.000015 loss_cls: 3.7625 (3.8959) grad_norm: 4.2213 (4.3306) time: 0.7629 data: 0.0003 max mem: 8421 +[2024-12-05 10:27:12 root] (utils.py 283): INFO Epoch: [11] [1230/2502] eta: 0:16:19 lr: 0.000015 loss_cls: 3.7625 (3.8957) grad_norm: 4.1008 (4.3280) time: 0.7615 data: 0.0003 max mem: 8421 +[2024-12-05 10:27:20 root] (utils.py 283): INFO Epoch: [11] [1240/2502] eta: 0:16:12 lr: 0.000015 loss_cls: 3.9006 (3.8947) grad_norm: 3.9425 (4.3250) time: 0.7673 data: 0.0003 max mem: 8421 +[2024-12-05 10:27:28 root] (utils.py 283): INFO Epoch: [11] [1250/2502] eta: 0:16:04 lr: 0.000015 loss_cls: 3.7055 (3.8934) grad_norm: 4.0247 (4.3234) time: 0.7696 data: 0.0003 max mem: 8421 +[2024-12-05 10:27:35 root] (utils.py 283): INFO Epoch: [11] [1260/2502] eta: 0:15:56 lr: 0.000015 loss_cls: 3.9120 (3.8954) grad_norm: 4.1395 (4.3245) time: 0.7632 data: 0.0003 max mem: 8421 +[2024-12-05 10:27:43 root] (utils.py 283): INFO Epoch: [11] [1270/2502] eta: 0:15:49 lr: 0.000015 loss_cls: 4.2443 (3.8970) grad_norm: 4.0538 (4.3232) time: 0.7686 data: 0.0003 max mem: 8421 +[2024-12-05 10:27:51 root] (utils.py 283): INFO Epoch: [11] [1280/2502] eta: 0:15:41 lr: 0.000015 loss_cls: 4.1315 (3.8975) grad_norm: 4.0115 (4.3209) time: 0.7681 data: 0.0003 max mem: 8421 +[2024-12-05 10:27:58 root] (utils.py 283): INFO Epoch: [11] [1290/2502] eta: 0:15:33 lr: 0.000015 loss_cls: 4.0113 (3.8977) grad_norm: 3.9882 (4.3194) time: 0.7618 data: 0.0003 max mem: 8421 +[2024-12-05 10:28:06 root] (utils.py 283): INFO Epoch: [11] [1300/2502] eta: 0:15:25 lr: 0.000015 loss_cls: 3.9798 (3.8988) grad_norm: 4.0953 (4.3196) time: 0.7625 data: 0.0003 max mem: 8421 +[2024-12-05 10:28:14 root] (utils.py 283): INFO Epoch: [11] [1310/2502] eta: 0:15:18 lr: 0.000015 loss_cls: 3.9008 (3.8974) grad_norm: 4.1506 (4.3184) time: 0.7627 data: 0.0003 max mem: 8421 +[2024-12-05 10:28:21 root] (utils.py 283): INFO Epoch: [11] [1320/2502] eta: 0:15:10 lr: 0.000015 loss_cls: 3.9187 (3.8969) grad_norm: 4.1054 (4.3176) time: 0.7620 data: 0.0002 max mem: 8421 +[2024-12-05 10:28:29 root] (utils.py 283): INFO Epoch: [11] [1330/2502] eta: 0:15:02 lr: 0.000015 loss_cls: 3.9187 (3.8947) grad_norm: 4.0584 (4.3158) time: 0.7663 data: 0.0003 max mem: 8421 +[2024-12-05 10:28:37 root] (utils.py 283): INFO Epoch: [11] [1340/2502] eta: 0:14:54 lr: 0.000015 loss_cls: 3.8018 (3.8928) grad_norm: 4.0584 (4.3173) time: 0.7795 data: 0.0003 max mem: 8421 +[2024-12-05 10:28:45 root] (utils.py 283): INFO Epoch: [11] [1350/2502] eta: 0:14:47 lr: 0.000015 loss_cls: 3.9370 (3.8940) grad_norm: 4.1073 (4.3174) time: 0.7860 data: 0.0003 max mem: 8421 +[2024-12-05 10:28:52 root] (utils.py 283): INFO Epoch: [11] [1360/2502] eta: 0:14:39 lr: 0.000015 loss_cls: 4.0694 (3.8936) grad_norm: 4.0884 (4.3156) time: 0.7860 data: 0.0003 max mem: 8421 +[2024-12-05 10:29:00 root] (utils.py 283): INFO Epoch: [11] [1370/2502] eta: 0:14:32 lr: 0.000015 loss_cls: 4.1355 (3.8949) grad_norm: 3.9885 (4.3145) time: 0.7888 data: 0.0003 max mem: 8421 +[2024-12-05 10:29:08 root] (utils.py 283): INFO Epoch: [11] [1380/2502] eta: 0:14:24 lr: 0.000015 loss_cls: 4.2225 (3.8981) grad_norm: 4.1876 (4.3152) time: 0.7898 data: 0.0003 max mem: 8421 +[2024-12-05 10:29:16 root] (utils.py 283): INFO Epoch: [11] [1390/2502] eta: 0:14:17 lr: 0.000015 loss_cls: 4.0952 (3.8990) grad_norm: 4.3357 (4.3159) time: 0.7794 data: 0.0002 max mem: 8421 +[2024-12-05 10:29:24 root] (utils.py 283): INFO Epoch: [11] [1400/2502] eta: 0:14:09 lr: 0.000015 loss_cls: 3.9580 (3.8978) grad_norm: 4.3312 (4.3154) time: 0.7662 data: 0.0002 max mem: 8421 +[2024-12-05 10:29:31 root] (utils.py 283): INFO Epoch: [11] [1410/2502] eta: 0:14:01 lr: 0.000015 loss_cls: 3.8750 (3.8982) grad_norm: 4.1248 (4.3151) time: 0.7637 data: 0.0002 max mem: 8421 +[2024-12-05 10:29:39 root] (utils.py 283): INFO Epoch: [11] [1420/2502] eta: 0:13:53 lr: 0.000015 loss_cls: 3.9952 (3.8970) grad_norm: 4.1248 (4.3145) time: 0.7676 data: 0.0002 max mem: 8421 +[2024-12-05 10:29:47 root] (utils.py 283): INFO Epoch: [11] [1430/2502] eta: 0:13:46 lr: 0.000015 loss_cls: 3.8737 (3.8965) grad_norm: 4.1408 (4.3167) time: 0.7656 data: 0.0002 max mem: 8421 +[2024-12-05 10:29:54 root] (utils.py 283): INFO Epoch: [11] [1440/2502] eta: 0:13:38 lr: 0.000015 loss_cls: 3.6661 (3.8946) grad_norm: 4.2028 (4.3200) time: 0.7612 data: 0.0003 max mem: 8421 +[2024-12-05 10:30:02 root] (utils.py 283): INFO Epoch: [11] [1450/2502] eta: 0:13:30 lr: 0.000015 loss_cls: 3.9313 (3.8941) grad_norm: 4.2223 (4.3192) time: 0.7659 data: 0.0002 max mem: 8421 +[2024-12-05 10:30:10 root] (utils.py 283): INFO Epoch: [11] [1460/2502] eta: 0:13:22 lr: 0.000015 loss_cls: 3.9775 (3.8966) grad_norm: 4.0930 (4.3176) time: 0.7783 data: 0.0003 max mem: 8421 +[2024-12-05 10:30:17 root] (utils.py 283): INFO Epoch: [11] [1470/2502] eta: 0:13:15 lr: 0.000015 loss_cls: 4.1514 (3.8961) grad_norm: 4.0923 (4.3176) time: 0.7761 data: 0.0003 max mem: 8421 +[2024-12-05 10:30:25 root] (utils.py 283): INFO Epoch: [11] [1480/2502] eta: 0:13:07 lr: 0.000015 loss_cls: 3.6655 (3.8944) grad_norm: 4.1904 (4.3180) time: 0.7752 data: 0.0002 max mem: 8421 +[2024-12-05 10:30:33 root] (utils.py 283): INFO Epoch: [11] [1490/2502] eta: 0:13:00 lr: 0.000015 loss_cls: 4.0549 (3.8947) grad_norm: 4.3187 (4.3254) time: 0.7855 data: 0.0002 max mem: 8421 +[2024-12-05 10:30:41 root] (utils.py 283): INFO Epoch: [11] [1500/2502] eta: 0:12:52 lr: 0.000015 loss_cls: 4.1122 (3.8942) grad_norm: 4.2231 (4.3238) time: 0.7883 data: 0.0003 max mem: 8421 +[2024-12-05 10:30:49 root] (utils.py 283): INFO Epoch: [11] [1510/2502] eta: 0:12:44 lr: 0.000015 loss_cls: 3.9292 (3.8950) grad_norm: 4.2231 (4.3245) time: 0.7872 data: 0.0003 max mem: 8421 +[2024-12-05 10:30:57 root] (utils.py 283): INFO Epoch: [11] [1520/2502] eta: 0:12:37 lr: 0.000015 loss_cls: 3.9127 (3.8936) grad_norm: 4.2185 (4.3240) time: 0.7864 data: 0.0002 max mem: 8421 +[2024-12-05 10:31:05 root] (utils.py 283): INFO Epoch: [11] [1530/2502] eta: 0:12:29 lr: 0.000015 loss_cls: 3.5241 (3.8918) grad_norm: 4.1040 (4.3235) time: 0.7924 data: 0.0003 max mem: 8421 +[2024-12-05 10:31:13 root] (utils.py 283): INFO Epoch: [11] [1540/2502] eta: 0:12:22 lr: 0.000015 loss_cls: 4.0461 (3.8927) grad_norm: 4.2248 (4.3247) time: 0.7919 data: 0.0002 max mem: 8421 +[2024-12-05 10:31:20 root] (utils.py 283): INFO Epoch: [11] [1550/2502] eta: 0:12:14 lr: 0.000015 loss_cls: 4.0461 (3.8927) grad_norm: 4.0794 (4.3229) time: 0.7874 data: 0.0002 max mem: 8421 +[2024-12-05 10:31:28 root] (utils.py 283): INFO Epoch: [11] [1560/2502] eta: 0:12:06 lr: 0.000015 loss_cls: 3.8441 (3.8927) grad_norm: 4.0794 (4.3223) time: 0.7884 data: 0.0003 max mem: 8421 +[2024-12-05 10:31:36 root] (utils.py 283): INFO Epoch: [11] [1570/2502] eta: 0:11:59 lr: 0.000015 loss_cls: 4.0818 (3.8945) grad_norm: 4.1688 (4.3218) time: 0.7868 data: 0.0003 max mem: 8421 +[2024-12-05 10:31:44 root] (utils.py 283): INFO Epoch: [11] [1580/2502] eta: 0:11:51 lr: 0.000015 loss_cls: 4.0351 (3.8936) grad_norm: 4.1147 (4.3222) time: 0.7865 data: 0.0003 max mem: 8421 +[2024-12-05 10:31:52 root] (utils.py 283): INFO Epoch: [11] [1590/2502] eta: 0:11:43 lr: 0.000015 loss_cls: 3.9574 (3.8953) grad_norm: 4.3012 (4.3217) time: 0.7878 data: 0.0003 max mem: 8421 +[2024-12-05 10:32:00 root] (utils.py 283): INFO Epoch: [11] [1600/2502] eta: 0:11:36 lr: 0.000015 loss_cls: 4.2668 (3.8972) grad_norm: 4.3421 (4.3269) time: 0.7869 data: 0.0002 max mem: 8421 +[2024-12-05 10:32:08 root] (utils.py 283): INFO Epoch: [11] [1610/2502] eta: 0:11:28 lr: 0.000015 loss_cls: 4.1446 (3.8962) grad_norm: 4.2968 (4.3253) time: 0.7859 data: 0.0002 max mem: 8421 +[2024-12-05 10:32:16 root] (utils.py 283): INFO Epoch: [11] [1620/2502] eta: 0:11:21 lr: 0.000015 loss_cls: 3.5407 (3.8944) grad_norm: 3.9377 (4.3252) time: 0.7858 data: 0.0002 max mem: 8421 +[2024-12-05 10:32:23 root] (utils.py 283): INFO Epoch: [11] [1630/2502] eta: 0:11:13 lr: 0.000015 loss_cls: 3.9086 (3.8944) grad_norm: 4.2270 (4.3253) time: 0.7762 data: 0.0002 max mem: 8421 +[2024-12-05 10:32:31 root] (utils.py 283): INFO Epoch: [11] [1640/2502] eta: 0:11:05 lr: 0.000015 loss_cls: 3.9467 (3.8940) grad_norm: 4.1927 (4.3265) time: 0.7630 data: 0.0003 max mem: 8421 +[2024-12-05 10:32:38 root] (utils.py 283): INFO Epoch: [11] [1650/2502] eta: 0:10:57 lr: 0.000015 loss_cls: 4.0593 (3.8937) grad_norm: 4.1935 (4.3262) time: 0.7590 data: 0.0003 max mem: 8421 +[2024-12-05 10:32:46 root] (utils.py 283): INFO Epoch: [11] [1660/2502] eta: 0:10:50 lr: 0.000015 loss_cls: 4.2412 (3.8952) grad_norm: 4.2833 (4.3263) time: 0.7746 data: 0.0003 max mem: 8421 +[2024-12-05 10:32:54 root] (utils.py 283): INFO Epoch: [11] [1670/2502] eta: 0:10:42 lr: 0.000015 loss_cls: 4.1656 (3.8951) grad_norm: 4.2398 (4.3260) time: 0.7834 data: 0.0003 max mem: 8421 +[2024-12-05 10:33:02 root] (utils.py 283): INFO Epoch: [11] [1680/2502] eta: 0:10:34 lr: 0.000015 loss_cls: 3.9347 (3.8953) grad_norm: 4.1837 (4.3257) time: 0.7837 data: 0.0003 max mem: 8421 +[2024-12-05 10:33:10 root] (utils.py 283): INFO Epoch: [11] [1690/2502] eta: 0:10:27 lr: 0.000015 loss_cls: 3.9347 (3.8956) grad_norm: 4.1321 (4.3247) time: 0.7906 data: 0.0003 max mem: 8421 +[2024-12-05 10:33:18 root] (utils.py 283): INFO Epoch: [11] [1700/2502] eta: 0:10:19 lr: 0.000015 loss_cls: 4.1855 (3.8969) grad_norm: 4.2060 (4.3242) time: 0.7896 data: 0.0002 max mem: 8421 +[2024-12-05 10:33:26 root] (utils.py 283): INFO Epoch: [11] [1710/2502] eta: 0:10:11 lr: 0.000015 loss_cls: 4.2126 (3.8983) grad_norm: 4.2118 (4.3261) time: 0.7888 data: 0.0003 max mem: 8421 +[2024-12-05 10:33:34 root] (utils.py 283): INFO Epoch: [11] [1720/2502] eta: 0:10:04 lr: 0.000015 loss_cls: 4.1520 (3.8996) grad_norm: 4.2585 (4.3262) time: 0.7980 data: 0.0003 max mem: 8421 +[2024-12-05 10:33:41 root] (utils.py 283): INFO Epoch: [11] [1730/2502] eta: 0:09:56 lr: 0.000015 loss_cls: 4.0131 (3.8991) grad_norm: 4.3038 (4.3261) time: 0.7855 data: 0.0003 max mem: 8421 +[2024-12-05 10:33:49 root] (utils.py 283): INFO Epoch: [11] [1740/2502] eta: 0:09:48 lr: 0.000015 loss_cls: 3.9312 (3.8984) grad_norm: 4.1562 (4.3267) time: 0.7635 data: 0.0003 max mem: 8421 +[2024-12-05 10:33:57 root] (utils.py 283): INFO Epoch: [11] [1750/2502] eta: 0:09:40 lr: 0.000015 loss_cls: 3.8479 (3.8979) grad_norm: 4.1178 (4.3259) time: 0.7621 data: 0.0003 max mem: 8421 +[2024-12-05 10:34:04 root] (utils.py 283): INFO Epoch: [11] [1760/2502] eta: 0:09:33 lr: 0.000015 loss_cls: 3.6393 (3.8969) grad_norm: 4.1749 (4.3262) time: 0.7633 data: 0.0002 max mem: 8421 +[2024-12-05 10:34:12 root] (utils.py 283): INFO Epoch: [11] [1770/2502] eta: 0:09:25 lr: 0.000015 loss_cls: 3.5673 (3.8955) grad_norm: 4.2486 (4.3259) time: 0.7636 data: 0.0003 max mem: 8421 +[2024-12-05 10:34:19 root] (utils.py 283): INFO Epoch: [11] [1780/2502] eta: 0:09:17 lr: 0.000015 loss_cls: 4.0543 (3.8961) grad_norm: 4.2486 (4.3260) time: 0.7611 data: 0.0003 max mem: 8421 +[2024-12-05 10:34:27 root] (utils.py 283): INFO Epoch: [11] [1790/2502] eta: 0:09:09 lr: 0.000015 loss_cls: 4.0022 (3.8956) grad_norm: 4.1688 (4.3250) time: 0.7614 data: 0.0003 max mem: 8421 +[2024-12-05 10:34:35 root] (utils.py 283): INFO Epoch: [11] [1800/2502] eta: 0:09:02 lr: 0.000015 loss_cls: 3.9151 (3.8957) grad_norm: 4.1041 (4.3265) time: 0.7624 data: 0.0003 max mem: 8421 +[2024-12-05 10:34:42 root] (utils.py 283): INFO Epoch: [11] [1810/2502] eta: 0:08:54 lr: 0.000015 loss_cls: 3.8475 (3.8948) grad_norm: 4.1041 (4.3272) time: 0.7606 data: 0.0003 max mem: 8421 +[2024-12-05 10:34:50 root] (utils.py 283): INFO Epoch: [11] [1820/2502] eta: 0:08:46 lr: 0.000015 loss_cls: 3.6783 (3.8934) grad_norm: 4.1516 (4.3267) time: 0.7654 data: 0.0003 max mem: 8421 +[2024-12-05 10:34:58 root] (utils.py 283): INFO Epoch: [11] [1830/2502] eta: 0:08:38 lr: 0.000015 loss_cls: 4.0370 (3.8951) grad_norm: 4.1224 (4.3257) time: 0.7663 data: 0.0003 max mem: 8421 +[2024-12-05 10:35:05 root] (utils.py 283): INFO Epoch: [11] [1840/2502] eta: 0:08:31 lr: 0.000015 loss_cls: 4.0531 (3.8954) grad_norm: 4.0089 (4.3247) time: 0.7634 data: 0.0002 max mem: 8421 +[2024-12-05 10:35:13 root] (utils.py 283): INFO Epoch: [11] [1850/2502] eta: 0:08:23 lr: 0.000015 loss_cls: 4.0255 (3.8957) grad_norm: 4.0559 (4.3239) time: 0.7653 data: 0.0003 max mem: 8421 +[2024-12-05 10:35:21 root] (utils.py 283): INFO Epoch: [11] [1860/2502] eta: 0:08:15 lr: 0.000015 loss_cls: 3.7131 (3.8941) grad_norm: 4.1327 (4.3227) time: 0.7638 data: 0.0003 max mem: 8421 +[2024-12-05 10:35:28 root] (utils.py 283): INFO Epoch: [11] [1870/2502] eta: 0:08:07 lr: 0.000015 loss_cls: 3.6419 (3.8933) grad_norm: 4.1619 (4.3233) time: 0.7636 data: 0.0003 max mem: 8421 +[2024-12-05 10:35:36 root] (utils.py 283): INFO Epoch: [11] [1880/2502] eta: 0:08:00 lr: 0.000015 loss_cls: 3.7109 (3.8909) grad_norm: 4.3347 (4.3230) time: 0.7644 data: 0.0002 max mem: 8421 +[2024-12-05 10:35:44 root] (utils.py 283): INFO Epoch: [11] [1890/2502] eta: 0:07:52 lr: 0.000015 loss_cls: 3.7413 (3.8916) grad_norm: 4.2652 (4.3222) time: 0.7645 data: 0.0002 max mem: 8421 +[2024-12-05 10:35:51 root] (utils.py 283): INFO Epoch: [11] [1900/2502] eta: 0:07:44 lr: 0.000015 loss_cls: 4.1287 (3.8925) grad_norm: 4.0953 (4.3213) time: 0.7650 data: 0.0002 max mem: 8421 +[2024-12-05 10:35:59 root] (utils.py 283): INFO Epoch: [11] [1910/2502] eta: 0:07:36 lr: 0.000015 loss_cls: 4.3095 (3.8948) grad_norm: 4.0632 (4.3211) time: 0.7648 data: 0.0002 max mem: 8421 +[2024-12-05 10:36:07 root] (utils.py 283): INFO Epoch: [11] [1920/2502] eta: 0:07:29 lr: 0.000015 loss_cls: 4.0558 (3.8944) grad_norm: 4.0275 (4.3197) time: 0.7663 data: 0.0002 max mem: 8421 +[2024-12-05 10:36:14 root] (utils.py 283): INFO Epoch: [11] [1930/2502] eta: 0:07:21 lr: 0.000015 loss_cls: 4.0292 (3.8953) grad_norm: 4.0497 (4.3198) time: 0.7673 data: 0.0002 max mem: 8421 +[2024-12-05 10:36:22 root] (utils.py 283): INFO Epoch: [11] [1940/2502] eta: 0:07:13 lr: 0.000015 loss_cls: 4.1733 (3.8958) grad_norm: 4.1331 (4.3201) time: 0.7686 data: 0.0002 max mem: 8421 +[2024-12-05 10:36:30 root] (utils.py 283): INFO Epoch: [11] [1950/2502] eta: 0:07:05 lr: 0.000015 loss_cls: 4.1733 (3.8959) grad_norm: 4.1981 (4.3197) time: 0.7676 data: 0.0002 max mem: 8421 +[2024-12-05 10:36:37 root] (utils.py 283): INFO Epoch: [11] [1960/2502] eta: 0:06:58 lr: 0.000015 loss_cls: 3.9251 (3.8960) grad_norm: 4.1984 (4.3193) time: 0.7743 data: 0.0002 max mem: 8421 +[2024-12-05 10:36:45 root] (utils.py 283): INFO Epoch: [11] [1970/2502] eta: 0:06:50 lr: 0.000015 loss_cls: 4.1489 (3.8969) grad_norm: 4.0502 (4.3179) time: 0.7848 data: 0.0003 max mem: 8421 +[2024-12-05 10:36:53 root] (utils.py 283): INFO Epoch: [11] [1980/2502] eta: 0:06:42 lr: 0.000015 loss_cls: 4.2170 (3.8974) grad_norm: 3.9868 (4.3165) time: 0.7986 data: 0.0003 max mem: 8421 +[2024-12-05 10:37:01 root] (utils.py 283): INFO Epoch: [11] [1990/2502] eta: 0:06:35 lr: 0.000015 loss_cls: 4.0627 (3.8967) grad_norm: 3.9820 (4.3153) time: 0.7945 data: 0.0003 max mem: 8421 +[2024-12-05 10:37:09 root] (utils.py 283): INFO Epoch: [11] [2000/2502] eta: 0:06:27 lr: 0.000015 loss_cls: 3.7288 (3.8959) grad_norm: 4.1261 (4.3155) time: 0.7737 data: 0.0003 max mem: 8421 +[2024-12-05 10:37:17 root] (utils.py 283): INFO Epoch: [11] [2010/2502] eta: 0:06:19 lr: 0.000015 loss_cls: 3.9467 (3.8957) grad_norm: 4.3236 (4.3199) time: 0.7771 data: 0.0003 max mem: 8421 +[2024-12-05 10:37:25 root] (utils.py 283): INFO Epoch: [11] [2020/2502] eta: 0:06:12 lr: 0.000015 loss_cls: 3.9025 (3.8969) grad_norm: 4.3236 (4.3201) time: 0.7859 data: 0.0003 max mem: 8421 +[2024-12-05 10:37:32 root] (utils.py 283): INFO Epoch: [11] [2030/2502] eta: 0:06:04 lr: 0.000015 loss_cls: 4.0801 (3.8974) grad_norm: 4.2713 (4.3200) time: 0.7777 data: 0.0003 max mem: 8421 +[2024-12-05 10:37:40 root] (utils.py 283): INFO Epoch: [11] [2040/2502] eta: 0:05:56 lr: 0.000015 loss_cls: 4.0801 (3.8978) grad_norm: 4.1812 (4.3195) time: 0.7644 data: 0.0003 max mem: 8421 +[2024-12-05 10:37:48 root] (utils.py 283): INFO Epoch: [11] [2050/2502] eta: 0:05:48 lr: 0.000015 loss_cls: 3.8913 (3.8979) grad_norm: 4.1812 (4.3203) time: 0.7635 data: 0.0003 max mem: 8421 +[2024-12-05 10:37:55 root] (utils.py 283): INFO Epoch: [11] [2060/2502] eta: 0:05:41 lr: 0.000015 loss_cls: 3.9310 (3.8988) grad_norm: 4.1728 (4.3200) time: 0.7639 data: 0.0002 max mem: 8421 +[2024-12-05 10:38:03 root] (utils.py 283): INFO Epoch: [11] [2070/2502] eta: 0:05:33 lr: 0.000015 loss_cls: 3.9927 (3.8983) grad_norm: 4.1138 (4.3195) time: 0.7632 data: 0.0002 max mem: 8421 +[2024-12-05 10:38:10 root] (utils.py 283): INFO Epoch: [11] [2080/2502] eta: 0:05:25 lr: 0.000015 loss_cls: 3.9018 (3.8987) grad_norm: 4.1795 (4.3202) time: 0.7631 data: 0.0002 max mem: 8421 +[2024-12-05 10:38:18 root] (utils.py 283): INFO Epoch: [11] [2090/2502] eta: 0:05:18 lr: 0.000015 loss_cls: 3.9233 (3.8987) grad_norm: 4.1795 (4.3208) time: 0.7647 data: 0.0002 max mem: 8421 +[2024-12-05 10:38:26 root] (utils.py 283): INFO Epoch: [11] [2100/2502] eta: 0:05:10 lr: 0.000015 loss_cls: 3.9414 (3.8989) grad_norm: 4.2023 (4.3203) time: 0.7667 data: 0.0003 max mem: 8421 +[2024-12-05 10:38:33 root] (utils.py 283): INFO Epoch: [11] [2110/2502] eta: 0:05:02 lr: 0.000015 loss_cls: 3.9414 (3.8992) grad_norm: 4.2023 (4.3232) time: 0.7643 data: 0.0003 max mem: 8421 +[2024-12-05 10:38:41 root] (utils.py 283): INFO Epoch: [11] [2120/2502] eta: 0:04:54 lr: 0.000015 loss_cls: 4.1315 (3.8993) grad_norm: 4.3668 (4.3231) time: 0.7629 data: 0.0003 max mem: 8421 +[2024-12-05 10:38:49 root] (utils.py 283): INFO Epoch: [11] [2130/2502] eta: 0:04:47 lr: 0.000015 loss_cls: 4.1315 (3.8985) grad_norm: 4.1787 (4.3225) time: 0.7625 data: 0.0003 max mem: 8421 +[2024-12-05 10:38:56 root] (utils.py 283): INFO Epoch: [11] [2140/2502] eta: 0:04:39 lr: 0.000015 loss_cls: 3.6836 (3.8974) grad_norm: 4.0909 (4.3223) time: 0.7637 data: 0.0003 max mem: 8421 +[2024-12-05 10:39:04 root] (utils.py 283): INFO Epoch: [11] [2150/2502] eta: 0:04:31 lr: 0.000015 loss_cls: 3.6836 (3.8964) grad_norm: 4.1357 (4.3219) time: 0.7652 data: 0.0003 max mem: 8421 +[2024-12-05 10:39:12 root] (utils.py 283): INFO Epoch: [11] [2160/2502] eta: 0:04:23 lr: 0.000015 loss_cls: 3.8614 (3.8966) grad_norm: 4.1253 (4.3223) time: 0.7634 data: 0.0003 max mem: 8421 +[2024-12-05 10:39:19 root] (utils.py 283): INFO Epoch: [11] [2170/2502] eta: 0:04:16 lr: 0.000015 loss_cls: 3.8614 (3.8964) grad_norm: 4.1979 (4.3225) time: 0.7707 data: 0.0003 max mem: 8421 +[2024-12-05 10:39:27 root] (utils.py 283): INFO Epoch: [11] [2180/2502] eta: 0:04:08 lr: 0.000015 loss_cls: 4.0073 (3.8959) grad_norm: 4.2902 (4.3240) time: 0.7835 data: 0.0003 max mem: 8421 +[2024-12-05 10:39:35 root] (utils.py 283): INFO Epoch: [11] [2190/2502] eta: 0:04:00 lr: 0.000015 loss_cls: 4.1096 (3.8968) grad_norm: 4.2029 (4.3229) time: 0.7749 data: 0.0003 max mem: 8421 +[2024-12-05 10:39:42 root] (utils.py 283): INFO Epoch: [11] [2200/2502] eta: 0:03:53 lr: 0.000015 loss_cls: 3.8074 (3.8955) grad_norm: 4.0493 (4.3219) time: 0.7610 data: 0.0003 max mem: 8421 +[2024-12-05 10:39:50 root] (utils.py 283): INFO Epoch: [11] [2210/2502] eta: 0:03:45 lr: 0.000015 loss_cls: 3.7505 (3.8963) grad_norm: 4.0968 (4.3214) time: 0.7616 data: 0.0003 max mem: 8421 +[2024-12-05 10:39:58 root] (utils.py 283): INFO Epoch: [11] [2220/2502] eta: 0:03:37 lr: 0.000015 loss_cls: 4.2047 (3.8969) grad_norm: 4.1560 (4.3207) time: 0.7621 data: 0.0002 max mem: 8421 +[2024-12-05 10:40:05 root] (utils.py 283): INFO Epoch: [11] [2230/2502] eta: 0:03:29 lr: 0.000015 loss_cls: 4.0465 (3.8968) grad_norm: 4.0849 (4.3194) time: 0.7623 data: 0.0002 max mem: 8421 +[2024-12-05 10:40:13 root] (utils.py 283): INFO Epoch: [11] [2240/2502] eta: 0:03:22 lr: 0.000015 loss_cls: 4.0051 (3.8967) grad_norm: 4.1131 (4.3187) time: 0.7626 data: 0.0002 max mem: 8421 +[2024-12-05 10:40:21 root] (utils.py 283): INFO Epoch: [11] [2250/2502] eta: 0:03:14 lr: 0.000015 loss_cls: 4.0359 (3.8968) grad_norm: 3.9026 (4.3168) time: 0.7636 data: 0.0003 max mem: 8421 +[2024-12-05 10:40:28 root] (utils.py 283): INFO Epoch: [11] [2260/2502] eta: 0:03:06 lr: 0.000015 loss_cls: 4.0359 (3.8963) grad_norm: 3.9026 (4.3164) time: 0.7654 data: 0.0002 max mem: 8421 +[2024-12-05 10:40:36 root] (utils.py 283): INFO Epoch: [11] [2270/2502] eta: 0:02:58 lr: 0.000015 loss_cls: 3.8533 (3.8963) grad_norm: 4.0088 (4.3149) time: 0.7750 data: 0.0002 max mem: 8421 +[2024-12-05 10:40:44 root] (utils.py 283): INFO Epoch: [11] [2280/2502] eta: 0:02:51 lr: 0.000015 loss_cls: 3.8001 (3.8961) grad_norm: 4.1220 (4.3154) time: 0.7795 data: 0.0002 max mem: 8421 +[2024-12-05 10:40:51 root] (utils.py 283): INFO Epoch: [11] [2290/2502] eta: 0:02:43 lr: 0.000015 loss_cls: 4.0438 (3.8962) grad_norm: 4.1645 (4.3150) time: 0.7668 data: 0.0003 max mem: 8421 +[2024-12-05 10:40:59 root] (utils.py 283): INFO Epoch: [11] [2300/2502] eta: 0:02:35 lr: 0.000015 loss_cls: 3.8878 (3.8949) grad_norm: 4.1494 (4.3146) time: 0.7604 data: 0.0003 max mem: 8421 +[2024-12-05 10:41:07 root] (utils.py 283): INFO Epoch: [11] [2310/2502] eta: 0:02:28 lr: 0.000015 loss_cls: 3.7309 (3.8940) grad_norm: 4.1353 (4.3138) time: 0.7603 data: 0.0003 max mem: 8421 +[2024-12-05 10:41:14 root] (utils.py 283): INFO Epoch: [11] [2320/2502] eta: 0:02:20 lr: 0.000015 loss_cls: 3.9753 (3.8939) grad_norm: 4.1265 (4.3167) time: 0.7598 data: 0.0003 max mem: 8421 +[2024-12-05 10:41:22 root] (utils.py 283): INFO Epoch: [11] [2330/2502] eta: 0:02:12 lr: 0.000015 loss_cls: 4.0266 (3.8942) grad_norm: 4.2953 (4.3180) time: 0.7617 data: 0.0002 max mem: 8421 +[2024-12-05 10:41:29 root] (utils.py 283): INFO Epoch: [11] [2340/2502] eta: 0:02:04 lr: 0.000015 loss_cls: 3.7536 (3.8926) grad_norm: 4.1898 (4.3173) time: 0.7624 data: 0.0002 max mem: 8421 +[2024-12-05 10:41:37 root] (utils.py 283): INFO Epoch: [11] [2350/2502] eta: 0:01:57 lr: 0.000015 loss_cls: 4.0004 (3.8938) grad_norm: 4.0632 (4.3183) time: 0.7604 data: 0.0002 max mem: 8421 +[2024-12-05 10:41:45 root] (utils.py 283): INFO Epoch: [11] [2360/2502] eta: 0:01:49 lr: 0.000015 loss_cls: 4.2077 (3.8947) grad_norm: 4.0632 (4.3192) time: 0.7625 data: 0.0002 max mem: 8421 +[2024-12-05 10:41:52 root] (utils.py 283): INFO Epoch: [11] [2370/2502] eta: 0:01:41 lr: 0.000015 loss_cls: 4.2077 (3.8951) grad_norm: 4.0618 (4.3182) time: 0.7644 data: 0.0002 max mem: 8421 +[2024-12-05 10:42:00 root] (utils.py 283): INFO Epoch: [11] [2380/2502] eta: 0:01:34 lr: 0.000015 loss_cls: 4.1182 (3.8955) grad_norm: 4.0651 (4.3180) time: 0.7635 data: 0.0002 max mem: 8421 +[2024-12-05 10:42:08 root] (utils.py 283): INFO Epoch: [11] [2390/2502] eta: 0:01:26 lr: 0.000015 loss_cls: 4.1182 (3.8957) grad_norm: 4.1912 (4.3181) time: 0.7658 data: 0.0002 max mem: 8421 +[2024-12-05 10:42:15 root] (utils.py 283): INFO Epoch: [11] [2400/2502] eta: 0:01:18 lr: 0.000015 loss_cls: 3.9821 (3.8952) grad_norm: 4.2963 (4.3175) time: 0.7655 data: 0.0003 max mem: 8421 +[2024-12-05 10:42:23 root] (utils.py 283): INFO Epoch: [11] [2410/2502] eta: 0:01:10 lr: 0.000015 loss_cls: 3.9710 (3.8961) grad_norm: 4.2907 (4.3179) time: 0.7716 data: 0.0003 max mem: 8421 +[2024-12-05 10:42:31 root] (utils.py 283): INFO Epoch: [11] [2420/2502] eta: 0:01:03 lr: 0.000015 loss_cls: 4.0297 (3.8962) grad_norm: 4.2877 (4.3191) time: 0.7716 data: 0.0002 max mem: 8421 +[2024-12-05 10:42:39 root] (utils.py 283): INFO Epoch: [11] [2430/2502] eta: 0:00:55 lr: 0.000015 loss_cls: 3.7898 (3.8959) grad_norm: 4.1514 (4.3192) time: 0.7691 data: 0.0002 max mem: 8421 +[2024-12-05 10:42:46 root] (utils.py 283): INFO Epoch: [11] [2440/2502] eta: 0:00:47 lr: 0.000015 loss_cls: 3.7898 (3.8965) grad_norm: 4.1171 (4.3183) time: 0.7720 data: 0.0002 max mem: 8421 +[2024-12-05 10:42:54 root] (utils.py 283): INFO Epoch: [11] [2450/2502] eta: 0:00:40 lr: 0.000015 loss_cls: 4.1035 (3.8960) grad_norm: 4.0063 (4.3179) time: 0.7664 data: 0.0002 max mem: 8421 +[2024-12-05 10:43:02 root] (utils.py 283): INFO Epoch: [11] [2460/2502] eta: 0:00:32 lr: 0.000015 loss_cls: 3.8421 (3.8951) grad_norm: 4.0098 (4.3167) time: 0.7647 data: 0.0002 max mem: 8421 +[2024-12-05 10:43:09 root] (utils.py 283): INFO Epoch: [11] [2470/2502] eta: 0:00:24 lr: 0.000015 loss_cls: 3.6255 (3.8949) grad_norm: 4.0736 (4.3166) time: 0.7660 data: 0.0002 max mem: 8421 +[2024-12-05 10:43:17 root] (utils.py 283): INFO Epoch: [11] [2480/2502] eta: 0:00:16 lr: 0.000015 loss_cls: 3.8165 (3.8950) grad_norm: 4.0612 (4.3157) time: 0.7662 data: 0.0002 max mem: 8421 +[2024-12-05 10:43:25 root] (utils.py 283): INFO Epoch: [11] [2490/2502] eta: 0:00:09 lr: 0.000015 loss_cls: 3.8165 (3.8957) grad_norm: 4.0183 (4.3153) time: 0.7907 data: 0.0262 max mem: 8421 +[2024-12-05 10:43:33 root] (utils.py 283): INFO Epoch: [11] [2500/2502] eta: 0:00:01 lr: 0.000015 loss_cls: 3.8278 (3.8949) grad_norm: 4.0648 (4.3151) time: 0.7906 data: 0.0262 max mem: 8421 +[2024-12-05 10:43:33 root] (utils.py 283): INFO Epoch: [11] [2501/2502] eta: 0:00:00 lr: 0.000015 loss_cls: 3.9606 (3.8950) grad_norm: 4.0648 (4.3149) time: 0.7902 data: 0.0262 max mem: 8421 +[2024-12-05 10:43:33 root] (utils.py 297): INFO Epoch: [11] Total time: 0:32:09 (0.7712 s / it) +[2024-12-05 10:43:33 root] (engine.py 178): INFO Averaged stats:lr: 0.000015 loss_cls: 3.9606 (3.8924) grad_norm: 4.0648 (4.3149) +[2024-12-05 10:43:34 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7167 (0.7167) acc1: 85.9375 (85.9375) acc3: 95.3125 (95.3125) acc5: 96.8750 (96.8750) time: 0.1309 data: 0.0004 max mem: 8421 +[2024-12-05 10:43:35 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7983 (0.8599) acc1: 83.5938 (81.9602) acc3: 93.7500 (92.8267) acc5: 96.8750 (95.8097) time: 0.1313 data: 0.0004 max mem: 8421 +[2024-12-05 10:43:37 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8894 (0.9179) acc1: 78.1250 (80.1711) acc3: 92.1875 (92.3363) acc5: 96.0938 (95.0521) time: 0.1317 data: 0.0004 max mem: 8421 +[2024-12-05 10:43:38 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9569 (0.9235) acc1: 78.1250 (79.4103) acc3: 92.1875 (92.6663) acc5: 95.3125 (95.2117) time: 0.1324 data: 0.0004 max mem: 8421 +[2024-12-05 10:43:39 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8701 (0.9167) acc1: 78.9062 (79.7447) acc3: 92.9688 (92.7020) acc5: 96.0938 (95.2934) time: 0.1350 data: 0.0005 max mem: 8421 +[2024-12-05 10:43:41 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0304 (1.0069) acc1: 75.0000 (77.7267) acc3: 88.2812 (91.1305) acc5: 92.9688 (94.0870) time: 0.1345 data: 0.0005 max mem: 8421 +[2024-12-05 10:43:42 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3096 (1.0544) acc1: 70.3125 (76.7546) acc3: 85.1562 (90.2664) acc5: 89.8438 (93.3530) time: 0.1359 data: 0.0044 max mem: 8421 +[2024-12-05 10:43:43 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3096 (1.0986) acc1: 71.0938 (75.5722) acc3: 86.7188 (89.7447) acc5: 89.8438 (92.9027) time: 0.1359 data: 0.0044 max mem: 8421 +[2024-12-05 10:43:45 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3581 (1.1363) acc1: 68.7500 (74.8650) acc3: 85.1562 (89.0818) acc5: 89.8438 (92.3900) time: 0.1323 data: 0.0009 max mem: 8421 +[2024-12-05 10:43:46 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3812 (1.1686) acc1: 68.7500 (74.0556) acc3: 83.5938 (88.4358) acc5: 89.0625 (91.9385) time: 0.1348 data: 0.0031 max mem: 8421 +[2024-12-05 10:43:47 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2179 (1.1569) acc1: 72.6562 (74.2880) acc3: 87.5000 (88.6000) acc5: 91.4062 (92.1280) time: 0.1329 data: 0.0030 max mem: 8421 +[2024-12-05 10:43:47 root] (utils.py 297): INFO Test: Total time: 0:00:13 (0.1338 s / it) +[2024-12-05 10:43:47 root] (engine.py 263): INFO * Acc@1 74.300 Acc@3 88.652 Acc@5 92.042 loss 1.157 flops 1.285 layer_flops 1.251 +[2024-12-05 10:43:47 root] (main.py 546): INFO Accuracy of the network on the 50000 test images: 74.3% +[2024-12-05 10:43:47 root] (main.py 550): INFO Max accuracy: 74.41% +[2024-12-05 10:43:48 root] (utils.py 283): INFO Epoch: [12] [ 0/2502] eta: 0:35:59 lr: 0.000014 loss_cls: 4.0266 (4.0266) grad_norm: 5.2665 (5.2665) time: 0.8633 data: 0.0002 max mem: 8421 +[2024-12-05 10:43:56 root] (utils.py 283): INFO Epoch: [12] [ 10/2502] eta: 0:33:12 lr: 0.000014 loss_cls: 4.0266 (3.8638) grad_norm: 4.2738 (4.4306) time: 0.7995 data: 0.0002 max mem: 8421 +[2024-12-05 10:44:04 root] (utils.py 283): INFO Epoch: [12] [ 20/2502] eta: 0:32:59 lr: 0.000014 loss_cls: 4.1667 (4.0189) grad_norm: 4.2439 (4.6838) time: 0.7941 data: 0.0002 max mem: 8421 +[2024-12-05 10:44:12 root] (utils.py 283): INFO Epoch: [12] [ 30/2502] eta: 0:32:50 lr: 0.000014 loss_cls: 4.2978 (4.0512) grad_norm: 4.0886 (4.5531) time: 0.7957 data: 0.0002 max mem: 8421 +[2024-12-05 10:44:20 root] (utils.py 283): INFO Epoch: [12] [ 40/2502] eta: 0:32:38 lr: 0.000014 loss_cls: 4.0912 (4.0201) grad_norm: 4.0371 (4.4280) time: 0.7939 data: 0.0002 max mem: 8421 +[2024-12-05 10:44:28 root] (utils.py 283): INFO Epoch: [12] [ 50/2502] eta: 0:32:33 lr: 0.000014 loss_cls: 4.0374 (3.9996) grad_norm: 3.9823 (4.3831) time: 0.7964 data: 0.0002 max mem: 8421 +[2024-12-05 10:44:35 root] (utils.py 283): INFO Epoch: [12] [ 60/2502] eta: 0:32:23 lr: 0.000014 loss_cls: 4.0099 (4.0115) grad_norm: 4.0927 (4.4127) time: 0.7964 data: 0.0002 max mem: 8421 +[2024-12-05 10:44:43 root] (utils.py 283): INFO Epoch: [12] [ 70/2502] eta: 0:32:14 lr: 0.000014 loss_cls: 4.0558 (4.0173) grad_norm: 4.0280 (4.3534) time: 0.7922 data: 0.0002 max mem: 8421 +[2024-12-05 10:44:51 root] (utils.py 283): INFO Epoch: [12] [ 80/2502] eta: 0:32:06 lr: 0.000014 loss_cls: 4.1834 (4.0237) grad_norm: 4.0280 (4.3510) time: 0.7935 data: 0.0002 max mem: 8421 +[2024-12-05 10:44:59 root] (utils.py 283): INFO Epoch: [12] [ 90/2502] eta: 0:31:58 lr: 0.000014 loss_cls: 4.1170 (4.0012) grad_norm: 4.1919 (4.3322) time: 0.7945 data: 0.0002 max mem: 8421 +[2024-12-05 10:45:07 root] (utils.py 283): INFO Epoch: [12] [ 100/2502] eta: 0:31:50 lr: 0.000014 loss_cls: 3.6546 (3.9665) grad_norm: 4.1052 (4.3130) time: 0.7951 data: 0.0003 max mem: 8421 +[2024-12-05 10:45:15 root] (utils.py 283): INFO Epoch: [12] [ 110/2502] eta: 0:31:42 lr: 0.000014 loss_cls: 3.6546 (3.9493) grad_norm: 4.1819 (4.3090) time: 0.7948 data: 0.0003 max mem: 8421 +[2024-12-05 10:45:23 root] (utils.py 283): INFO Epoch: [12] [ 120/2502] eta: 0:31:32 lr: 0.000014 loss_cls: 3.9002 (3.9382) grad_norm: 4.2340 (4.3023) time: 0.7904 data: 0.0003 max mem: 8421 +[2024-12-05 10:45:31 root] (utils.py 283): INFO Epoch: [12] [ 130/2502] eta: 0:31:22 lr: 0.000014 loss_cls: 3.8944 (3.9313) grad_norm: 4.1499 (4.2966) time: 0.7854 data: 0.0003 max mem: 8421 +[2024-12-05 10:45:39 root] (utils.py 283): INFO Epoch: [12] [ 140/2502] eta: 0:31:14 lr: 0.000014 loss_cls: 3.9025 (3.9388) grad_norm: 4.0447 (4.2731) time: 0.7869 data: 0.0003 max mem: 8421 +[2024-12-05 10:45:47 root] (utils.py 283): INFO Epoch: [12] [ 150/2502] eta: 0:31:07 lr: 0.000014 loss_cls: 4.0014 (3.9295) grad_norm: 3.9449 (4.2627) time: 0.7974 data: 0.0003 max mem: 8421 +[2024-12-05 10:45:55 root] (utils.py 283): INFO Epoch: [12] [ 160/2502] eta: 0:30:58 lr: 0.000014 loss_cls: 3.8158 (3.9239) grad_norm: 4.2445 (4.2660) time: 0.7959 data: 0.0003 max mem: 8421 +[2024-12-05 10:46:02 root] (utils.py 283): INFO Epoch: [12] [ 170/2502] eta: 0:30:46 lr: 0.000014 loss_cls: 3.6615 (3.9006) grad_norm: 4.1943 (4.2570) time: 0.7747 data: 0.0002 max mem: 8421 +[2024-12-05 10:46:10 root] (utils.py 283): INFO Epoch: [12] [ 180/2502] eta: 0:30:35 lr: 0.000014 loss_cls: 3.5954 (3.8979) grad_norm: 4.0449 (4.2561) time: 0.7649 data: 0.0003 max mem: 8421 +[2024-12-05 10:46:18 root] (utils.py 283): INFO Epoch: [12] [ 190/2502] eta: 0:30:24 lr: 0.000014 loss_cls: 3.9574 (3.8943) grad_norm: 4.2349 (4.2744) time: 0.7661 data: 0.0002 max mem: 8421 +[2024-12-05 10:46:25 root] (utils.py 283): INFO Epoch: [12] [ 200/2502] eta: 0:30:13 lr: 0.000014 loss_cls: 3.8869 (3.8895) grad_norm: 4.2349 (4.2968) time: 0.7641 data: 0.0002 max mem: 8421 +[2024-12-05 10:46:33 root] (utils.py 283): INFO Epoch: [12] [ 210/2502] eta: 0:30:04 lr: 0.000014 loss_cls: 3.9327 (3.8930) grad_norm: 4.0770 (4.2940) time: 0.7684 data: 0.0002 max mem: 8421 +[2024-12-05 10:46:41 root] (utils.py 283): INFO Epoch: [12] [ 220/2502] eta: 0:29:53 lr: 0.000014 loss_cls: 3.9489 (3.8931) grad_norm: 4.0285 (4.3059) time: 0.7686 data: 0.0002 max mem: 8421 +[2024-12-05 10:46:48 root] (utils.py 283): INFO Epoch: [12] [ 230/2502] eta: 0:29:43 lr: 0.000014 loss_cls: 4.0844 (3.8996) grad_norm: 4.1213 (4.2990) time: 0.7633 data: 0.0002 max mem: 8421 +[2024-12-05 10:46:56 root] (utils.py 283): INFO Epoch: [12] [ 240/2502] eta: 0:29:35 lr: 0.000014 loss_cls: 3.8495 (3.8794) grad_norm: 4.2127 (4.2981) time: 0.7691 data: 0.0002 max mem: 8421 +[2024-12-05 10:47:04 root] (utils.py 283): INFO Epoch: [12] [ 250/2502] eta: 0:29:26 lr: 0.000014 loss_cls: 3.5721 (3.8808) grad_norm: 4.2415 (4.3030) time: 0.7742 data: 0.0002 max mem: 8421 +[2024-12-05 10:47:11 root] (utils.py 283): INFO Epoch: [12] [ 260/2502] eta: 0:29:16 lr: 0.000014 loss_cls: 3.9504 (3.8808) grad_norm: 4.1500 (4.3006) time: 0.7680 data: 0.0002 max mem: 8421 +[2024-12-05 10:47:19 root] (utils.py 283): INFO Epoch: [12] [ 270/2502] eta: 0:29:06 lr: 0.000014 loss_cls: 4.1794 (3.8915) grad_norm: 4.1454 (4.2946) time: 0.7606 data: 0.0002 max mem: 8421 +[2024-12-05 10:47:27 root] (utils.py 283): INFO Epoch: [12] [ 280/2502] eta: 0:28:57 lr: 0.000014 loss_cls: 4.3079 (3.9022) grad_norm: 4.1172 (4.2960) time: 0.7593 data: 0.0002 max mem: 8421 +[2024-12-05 10:47:34 root] (utils.py 283): INFO Epoch: [12] [ 290/2502] eta: 0:28:48 lr: 0.000014 loss_cls: 3.9399 (3.8910) grad_norm: 4.0786 (4.2901) time: 0.7635 data: 0.0002 max mem: 8421 +[2024-12-05 10:47:42 root] (utils.py 283): INFO Epoch: [12] [ 300/2502] eta: 0:28:38 lr: 0.000014 loss_cls: 3.8399 (3.8948) grad_norm: 4.0355 (4.2836) time: 0.7647 data: 0.0003 max mem: 8421 +[2024-12-05 10:47:50 root] (utils.py 283): INFO Epoch: [12] [ 310/2502] eta: 0:28:29 lr: 0.000014 loss_cls: 4.0864 (3.8946) grad_norm: 4.0694 (4.2800) time: 0.7600 data: 0.0003 max mem: 8421 +[2024-12-05 10:47:57 root] (utils.py 283): INFO Epoch: [12] [ 320/2502] eta: 0:28:20 lr: 0.000014 loss_cls: 4.0864 (3.9037) grad_norm: 4.0868 (4.2758) time: 0.7595 data: 0.0003 max mem: 8421 +[2024-12-05 10:48:05 root] (utils.py 283): INFO Epoch: [12] [ 330/2502] eta: 0:28:11 lr: 0.000014 loss_cls: 3.7364 (3.8926) grad_norm: 4.2059 (4.2826) time: 0.7600 data: 0.0003 max mem: 8421 +[2024-12-05 10:48:12 root] (utils.py 283): INFO Epoch: [12] [ 340/2502] eta: 0:28:02 lr: 0.000014 loss_cls: 3.5356 (3.8944) grad_norm: 4.2485 (4.2801) time: 0.7603 data: 0.0003 max mem: 8421 +[2024-12-05 10:48:20 root] (utils.py 283): INFO Epoch: [12] [ 350/2502] eta: 0:27:53 lr: 0.000014 loss_cls: 3.7880 (3.8928) grad_norm: 4.2465 (4.2784) time: 0.7596 data: 0.0002 max mem: 8421 +[2024-12-05 10:48:28 root] (utils.py 283): INFO Epoch: [12] [ 360/2502] eta: 0:27:45 lr: 0.000014 loss_cls: 4.0503 (3.8969) grad_norm: 4.2029 (4.2990) time: 0.7631 data: 0.0002 max mem: 8421 +[2024-12-05 10:48:35 root] (utils.py 283): INFO Epoch: [12] [ 370/2502] eta: 0:27:36 lr: 0.000014 loss_cls: 4.0296 (3.8940) grad_norm: 4.2029 (4.2951) time: 0.7660 data: 0.0002 max mem: 8421 +[2024-12-05 10:48:43 root] (utils.py 283): INFO Epoch: [12] [ 380/2502] eta: 0:27:28 lr: 0.000014 loss_cls: 4.0296 (3.8972) grad_norm: 4.0645 (4.2898) time: 0.7654 data: 0.0003 max mem: 8421 +[2024-12-05 10:48:51 root] (utils.py 283): INFO Epoch: [12] [ 390/2502] eta: 0:27:20 lr: 0.000014 loss_cls: 3.9741 (3.8918) grad_norm: 4.1054 (4.2909) time: 0.7726 data: 0.0003 max mem: 8421 +[2024-12-05 10:48:59 root] (utils.py 283): INFO Epoch: [12] [ 400/2502] eta: 0:27:13 lr: 0.000014 loss_cls: 3.7074 (3.8893) grad_norm: 4.1885 (4.2903) time: 0.7888 data: 0.0003 max mem: 8421 +[2024-12-05 10:49:06 root] (utils.py 283): INFO Epoch: [12] [ 410/2502] eta: 0:27:05 lr: 0.000014 loss_cls: 4.1064 (3.8953) grad_norm: 4.1042 (4.2868) time: 0.7829 data: 0.0002 max mem: 8421 +[2024-12-05 10:49:14 root] (utils.py 283): INFO Epoch: [12] [ 420/2502] eta: 0:26:57 lr: 0.000014 loss_cls: 4.1524 (3.8975) grad_norm: 4.0083 (4.2816) time: 0.7653 data: 0.0002 max mem: 8421 +[2024-12-05 10:49:22 root] (utils.py 283): INFO Epoch: [12] [ 430/2502] eta: 0:26:48 lr: 0.000014 loss_cls: 3.8798 (3.8926) grad_norm: 4.0764 (4.2823) time: 0.7649 data: 0.0002 max mem: 8421 +[2024-12-05 10:49:29 root] (utils.py 283): INFO Epoch: [12] [ 440/2502] eta: 0:26:40 lr: 0.000014 loss_cls: 3.8092 (3.8950) grad_norm: 4.2137 (4.2856) time: 0.7649 data: 0.0002 max mem: 8421 +[2024-12-05 10:49:37 root] (utils.py 283): INFO Epoch: [12] [ 450/2502] eta: 0:26:32 lr: 0.000014 loss_cls: 3.8409 (3.8945) grad_norm: 4.0629 (4.2793) time: 0.7633 data: 0.0002 max mem: 8421 +[2024-12-05 10:49:45 root] (utils.py 283): INFO Epoch: [12] [ 460/2502] eta: 0:26:23 lr: 0.000014 loss_cls: 3.9168 (3.8970) grad_norm: 4.0629 (4.2849) time: 0.7647 data: 0.0003 max mem: 8421 +[2024-12-05 10:49:52 root] (utils.py 283): INFO Epoch: [12] [ 470/2502] eta: 0:26:15 lr: 0.000014 loss_cls: 4.0552 (3.9005) grad_norm: 4.3078 (4.2864) time: 0.7677 data: 0.0003 max mem: 8421 +[2024-12-05 10:50:00 root] (utils.py 283): INFO Epoch: [12] [ 480/2502] eta: 0:26:07 lr: 0.000014 loss_cls: 4.0233 (3.8987) grad_norm: 4.3055 (4.2852) time: 0.7681 data: 0.0002 max mem: 8421 +[2024-12-05 10:50:08 root] (utils.py 283): INFO Epoch: [12] [ 490/2502] eta: 0:25:59 lr: 0.000014 loss_cls: 3.9264 (3.8977) grad_norm: 4.3585 (4.2873) time: 0.7701 data: 0.0002 max mem: 8421 +[2024-12-05 10:50:15 root] (utils.py 283): INFO Epoch: [12] [ 500/2502] eta: 0:25:52 lr: 0.000014 loss_cls: 4.1230 (3.8987) grad_norm: 4.1491 (4.2849) time: 0.7719 data: 0.0002 max mem: 8421 +[2024-12-05 10:50:23 root] (utils.py 283): INFO Epoch: [12] [ 510/2502] eta: 0:25:44 lr: 0.000014 loss_cls: 4.0190 (3.8988) grad_norm: 4.0179 (4.2827) time: 0.7698 data: 0.0002 max mem: 8421 +[2024-12-05 10:50:31 root] (utils.py 283): INFO Epoch: [12] [ 520/2502] eta: 0:25:35 lr: 0.000014 loss_cls: 4.0190 (3.8981) grad_norm: 3.9984 (4.2799) time: 0.7653 data: 0.0003 max mem: 8421 +[2024-12-05 10:50:38 root] (utils.py 283): INFO Epoch: [12] [ 530/2502] eta: 0:25:27 lr: 0.000014 loss_cls: 3.9799 (3.9011) grad_norm: 4.0463 (4.2788) time: 0.7635 data: 0.0002 max mem: 8421 +[2024-12-05 10:50:46 root] (utils.py 283): INFO Epoch: [12] [ 540/2502] eta: 0:25:19 lr: 0.000014 loss_cls: 3.9673 (3.8957) grad_norm: 4.1573 (4.2817) time: 0.7678 data: 0.0003 max mem: 8421 +[2024-12-05 10:50:54 root] (utils.py 283): INFO Epoch: [12] [ 550/2502] eta: 0:25:11 lr: 0.000014 loss_cls: 3.7581 (3.8975) grad_norm: 4.3300 (4.2822) time: 0.7720 data: 0.0003 max mem: 8421 +[2024-12-05 10:51:01 root] (utils.py 283): INFO Epoch: [12] [ 560/2502] eta: 0:25:03 lr: 0.000014 loss_cls: 4.0666 (3.8988) grad_norm: 4.2792 (4.2838) time: 0.7702 data: 0.0003 max mem: 8421 +[2024-12-05 10:51:09 root] (utils.py 283): INFO Epoch: [12] [ 570/2502] eta: 0:24:55 lr: 0.000014 loss_cls: 4.1610 (3.9015) grad_norm: 4.1708 (4.2849) time: 0.7668 data: 0.0003 max mem: 8421 +[2024-12-05 10:51:17 root] (utils.py 283): INFO Epoch: [12] [ 580/2502] eta: 0:24:48 lr: 0.000014 loss_cls: 4.1204 (3.8999) grad_norm: 4.1237 (4.2825) time: 0.7699 data: 0.0003 max mem: 8421 +[2024-12-05 10:51:25 root] (utils.py 283): INFO Epoch: [12] [ 590/2502] eta: 0:24:40 lr: 0.000014 loss_cls: 3.9471 (3.9016) grad_norm: 4.0989 (4.2830) time: 0.7690 data: 0.0003 max mem: 8421 +[2024-12-05 10:51:32 root] (utils.py 283): INFO Epoch: [12] [ 600/2502] eta: 0:24:32 lr: 0.000014 loss_cls: 4.0840 (3.9079) grad_norm: 4.0846 (4.2797) time: 0.7673 data: 0.0003 max mem: 8421 +[2024-12-05 10:51:40 root] (utils.py 283): INFO Epoch: [12] [ 610/2502] eta: 0:24:24 lr: 0.000014 loss_cls: 4.1754 (3.9059) grad_norm: 4.0846 (4.2774) time: 0.7673 data: 0.0003 max mem: 8421 +[2024-12-05 10:51:48 root] (utils.py 283): INFO Epoch: [12] [ 620/2502] eta: 0:24:16 lr: 0.000014 loss_cls: 3.9158 (3.9046) grad_norm: 4.1126 (4.2755) time: 0.7697 data: 0.0003 max mem: 8421 +[2024-12-05 10:51:56 root] (utils.py 283): INFO Epoch: [12] [ 630/2502] eta: 0:24:09 lr: 0.000014 loss_cls: 3.9158 (3.9046) grad_norm: 4.1126 (4.2794) time: 0.7876 data: 0.0003 max mem: 8421 +[2024-12-05 10:52:04 root] (utils.py 283): INFO Epoch: [12] [ 640/2502] eta: 0:24:02 lr: 0.000014 loss_cls: 4.1329 (3.9079) grad_norm: 4.2625 (4.2811) time: 0.7993 data: 0.0003 max mem: 8421 +[2024-12-05 10:52:11 root] (utils.py 283): INFO Epoch: [12] [ 650/2502] eta: 0:23:54 lr: 0.000014 loss_cls: 4.0054 (3.9058) grad_norm: 4.3659 (4.2850) time: 0.7867 data: 0.0003 max mem: 8421 +[2024-12-05 10:52:19 root] (utils.py 283): INFO Epoch: [12] [ 660/2502] eta: 0:23:46 lr: 0.000014 loss_cls: 3.9631 (3.9076) grad_norm: 4.5633 (4.2889) time: 0.7687 data: 0.0002 max mem: 8421 +[2024-12-05 10:52:27 root] (utils.py 283): INFO Epoch: [12] [ 670/2502] eta: 0:23:38 lr: 0.000014 loss_cls: 3.9254 (3.9069) grad_norm: 4.1284 (4.2857) time: 0.7629 data: 0.0002 max mem: 8421 +[2024-12-05 10:52:34 root] (utils.py 283): INFO Epoch: [12] [ 680/2502] eta: 0:23:31 lr: 0.000014 loss_cls: 3.9743 (3.9085) grad_norm: 4.0953 (4.2839) time: 0.7735 data: 0.0002 max mem: 8421 +[2024-12-05 10:52:42 root] (utils.py 283): INFO Epoch: [12] [ 690/2502] eta: 0:23:23 lr: 0.000014 loss_cls: 3.9743 (3.9078) grad_norm: 4.0914 (4.2985) time: 0.7746 data: 0.0002 max mem: 8421 +[2024-12-05 10:52:50 root] (utils.py 283): INFO Epoch: [12] [ 700/2502] eta: 0:23:15 lr: 0.000014 loss_cls: 3.8663 (3.9078) grad_norm: 4.0914 (4.2969) time: 0.7675 data: 0.0002 max mem: 8421 +[2024-12-05 10:52:57 root] (utils.py 283): INFO Epoch: [12] [ 710/2502] eta: 0:23:07 lr: 0.000014 loss_cls: 4.0241 (3.9085) grad_norm: 4.0470 (4.2934) time: 0.7682 data: 0.0003 max mem: 8421 +[2024-12-05 10:53:05 root] (utils.py 283): INFO Epoch: [12] [ 720/2502] eta: 0:22:59 lr: 0.000014 loss_cls: 3.8421 (3.9060) grad_norm: 3.9562 (4.2889) time: 0.7647 data: 0.0003 max mem: 8421 +[2024-12-05 10:53:13 root] (utils.py 283): INFO Epoch: [12] [ 730/2502] eta: 0:22:51 lr: 0.000014 loss_cls: 3.8421 (3.9059) grad_norm: 3.9839 (4.2864) time: 0.7618 data: 0.0002 max mem: 8421 +[2024-12-05 10:53:20 root] (utils.py 283): INFO Epoch: [12] [ 740/2502] eta: 0:22:43 lr: 0.000014 loss_cls: 4.0991 (3.9064) grad_norm: 4.0796 (4.2871) time: 0.7644 data: 0.0002 max mem: 8421 +[2024-12-05 10:53:28 root] (utils.py 283): INFO Epoch: [12] [ 750/2502] eta: 0:22:35 lr: 0.000014 loss_cls: 4.0251 (3.9064) grad_norm: 4.1238 (4.2863) time: 0.7654 data: 0.0002 max mem: 8421 +[2024-12-05 10:53:36 root] (utils.py 283): INFO Epoch: [12] [ 760/2502] eta: 0:22:27 lr: 0.000014 loss_cls: 3.9604 (3.9059) grad_norm: 4.1151 (4.2853) time: 0.7628 data: 0.0002 max mem: 8421 +[2024-12-05 10:53:43 root] (utils.py 283): INFO Epoch: [12] [ 770/2502] eta: 0:22:19 lr: 0.000014 loss_cls: 4.1841 (3.9083) grad_norm: 4.1481 (4.2849) time: 0.7630 data: 0.0002 max mem: 8421 +[2024-12-05 10:53:51 root] (utils.py 283): INFO Epoch: [12] [ 780/2502] eta: 0:22:11 lr: 0.000014 loss_cls: 4.1841 (3.9043) grad_norm: 4.3310 (4.2886) time: 0.7621 data: 0.0002 max mem: 8421 +[2024-12-05 10:53:58 root] (utils.py 283): INFO Epoch: [12] [ 790/2502] eta: 0:22:03 lr: 0.000014 loss_cls: 3.6895 (3.9029) grad_norm: 4.3281 (4.2894) time: 0.7613 data: 0.0002 max mem: 8421 +[2024-12-05 10:54:06 root] (utils.py 283): INFO Epoch: [12] [ 800/2502] eta: 0:21:55 lr: 0.000014 loss_cls: 4.1084 (3.9043) grad_norm: 4.1712 (4.2874) time: 0.7617 data: 0.0002 max mem: 8421 +[2024-12-05 10:54:14 root] (utils.py 283): INFO Epoch: [12] [ 810/2502] eta: 0:21:47 lr: 0.000014 loss_cls: 4.0213 (3.9028) grad_norm: 4.1295 (4.2870) time: 0.7627 data: 0.0002 max mem: 8421 +[2024-12-05 10:54:21 root] (utils.py 283): INFO Epoch: [12] [ 820/2502] eta: 0:21:39 lr: 0.000014 loss_cls: 4.1265 (3.9060) grad_norm: 4.1048 (4.2855) time: 0.7630 data: 0.0002 max mem: 8421 +[2024-12-05 10:54:29 root] (utils.py 283): INFO Epoch: [12] [ 830/2502] eta: 0:21:32 lr: 0.000014 loss_cls: 4.2413 (3.9089) grad_norm: 4.0665 (4.2867) time: 0.7745 data: 0.0002 max mem: 8421 +[2024-12-05 10:54:37 root] (utils.py 283): INFO Epoch: [12] [ 840/2502] eta: 0:21:24 lr: 0.000014 loss_cls: 4.0162 (3.9079) grad_norm: 4.0619 (4.2856) time: 0.7886 data: 0.0002 max mem: 8421 +[2024-12-05 10:54:45 root] (utils.py 283): INFO Epoch: [12] [ 850/2502] eta: 0:21:17 lr: 0.000014 loss_cls: 3.7996 (3.9038) grad_norm: 4.0619 (4.2843) time: 0.7894 data: 0.0002 max mem: 8421 +[2024-12-05 10:54:53 root] (utils.py 283): INFO Epoch: [12] [ 860/2502] eta: 0:21:09 lr: 0.000014 loss_cls: 3.9667 (3.9053) grad_norm: 4.2049 (4.2848) time: 0.7771 data: 0.0002 max mem: 8421 +[2024-12-05 10:55:00 root] (utils.py 283): INFO Epoch: [12] [ 870/2502] eta: 0:21:01 lr: 0.000014 loss_cls: 3.8630 (3.9008) grad_norm: 4.2049 (4.2852) time: 0.7653 data: 0.0002 max mem: 8421 +[2024-12-05 10:55:08 root] (utils.py 283): INFO Epoch: [12] [ 880/2502] eta: 0:20:53 lr: 0.000014 loss_cls: 3.8467 (3.9012) grad_norm: 4.0495 (4.2833) time: 0.7659 data: 0.0002 max mem: 8421 +[2024-12-05 10:55:16 root] (utils.py 283): INFO Epoch: [12] [ 890/2502] eta: 0:20:45 lr: 0.000014 loss_cls: 3.8841 (3.9013) grad_norm: 4.0632 (4.2838) time: 0.7652 data: 0.0002 max mem: 8421 +[2024-12-05 10:55:23 root] (utils.py 283): INFO Epoch: [12] [ 900/2502] eta: 0:20:37 lr: 0.000014 loss_cls: 3.9012 (3.9014) grad_norm: 4.1647 (4.2822) time: 0.7636 data: 0.0002 max mem: 8421 +[2024-12-05 10:55:31 root] (utils.py 283): INFO Epoch: [12] [ 910/2502] eta: 0:20:30 lr: 0.000014 loss_cls: 4.0778 (3.9014) grad_norm: 4.0684 (4.2830) time: 0.7618 data: 0.0002 max mem: 8421 +[2024-12-05 10:55:39 root] (utils.py 283): INFO Epoch: [12] [ 920/2502] eta: 0:20:22 lr: 0.000014 loss_cls: 4.0416 (3.8999) grad_norm: 4.2258 (4.2824) time: 0.7610 data: 0.0002 max mem: 8421 +[2024-12-05 10:55:46 root] (utils.py 283): INFO Epoch: [12] [ 930/2502] eta: 0:20:14 lr: 0.000014 loss_cls: 3.5554 (3.8948) grad_norm: 4.2334 (4.2820) time: 0.7630 data: 0.0002 max mem: 8421 +[2024-12-05 10:55:54 root] (utils.py 283): INFO Epoch: [12] [ 940/2502] eta: 0:20:06 lr: 0.000014 loss_cls: 3.7559 (3.8981) grad_norm: 4.2874 (4.2821) time: 0.7692 data: 0.0003 max mem: 8421 +[2024-12-05 10:56:02 root] (utils.py 283): INFO Epoch: [12] [ 950/2502] eta: 0:19:58 lr: 0.000014 loss_cls: 4.3106 (3.8979) grad_norm: 4.2793 (4.2838) time: 0.7720 data: 0.0003 max mem: 8421 +[2024-12-05 10:56:09 root] (utils.py 283): INFO Epoch: [12] [ 960/2502] eta: 0:19:51 lr: 0.000014 loss_cls: 3.9746 (3.8950) grad_norm: 4.2872 (4.2832) time: 0.7737 data: 0.0002 max mem: 8421 +[2024-12-05 10:56:17 root] (utils.py 283): INFO Epoch: [12] [ 970/2502] eta: 0:19:43 lr: 0.000014 loss_cls: 3.7132 (3.8940) grad_norm: 4.2872 (4.2938) time: 0.7809 data: 0.0002 max mem: 8421 +[2024-12-05 10:56:25 root] (utils.py 283): INFO Epoch: [12] [ 980/2502] eta: 0:19:36 lr: 0.000014 loss_cls: 3.9414 (3.8953) grad_norm: 4.2212 (4.2951) time: 0.7890 data: 0.0003 max mem: 8421 +[2024-12-05 10:56:33 root] (utils.py 283): INFO Epoch: [12] [ 990/2502] eta: 0:19:28 lr: 0.000014 loss_cls: 3.9874 (3.8967) grad_norm: 4.1516 (4.2941) time: 0.7962 data: 0.0003 max mem: 8421 +[2024-12-05 10:56:41 root] (utils.py 283): INFO Epoch: [12] [1000/2502] eta: 0:19:21 lr: 0.000014 loss_cls: 3.9523 (3.8948) grad_norm: 4.0725 (4.2927) time: 0.7902 data: 0.0002 max mem: 8421 +[2024-12-05 10:56:49 root] (utils.py 283): INFO Epoch: [12] [1010/2502] eta: 0:19:13 lr: 0.000014 loss_cls: 3.6146 (3.8927) grad_norm: 4.2572 (4.2992) time: 0.7720 data: 0.0002 max mem: 8421 +[2024-12-05 10:56:56 root] (utils.py 283): INFO Epoch: [12] [1020/2502] eta: 0:19:05 lr: 0.000014 loss_cls: 3.9112 (3.8927) grad_norm: 4.2768 (4.2996) time: 0.7626 data: 0.0002 max mem: 8421 +[2024-12-05 10:57:04 root] (utils.py 283): INFO Epoch: [12] [1030/2502] eta: 0:18:57 lr: 0.000014 loss_cls: 3.9587 (3.8926) grad_norm: 4.0793 (4.2991) time: 0.7728 data: 0.0002 max mem: 8421 +[2024-12-05 10:57:12 root] (utils.py 283): INFO Epoch: [12] [1040/2502] eta: 0:18:50 lr: 0.000014 loss_cls: 3.8647 (3.8917) grad_norm: 4.1865 (4.2982) time: 0.7721 data: 0.0002 max mem: 8421 +[2024-12-05 10:57:19 root] (utils.py 283): INFO Epoch: [12] [1050/2502] eta: 0:18:42 lr: 0.000014 loss_cls: 4.0645 (3.8928) grad_norm: 4.2624 (4.2996) time: 0.7631 data: 0.0002 max mem: 8421 +[2024-12-05 10:57:27 root] (utils.py 283): INFO Epoch: [12] [1060/2502] eta: 0:18:34 lr: 0.000014 loss_cls: 4.0200 (3.8915) grad_norm: 4.2923 (4.3010) time: 0.7617 data: 0.0002 max mem: 8421 +[2024-12-05 10:57:35 root] (utils.py 283): INFO Epoch: [12] [1070/2502] eta: 0:18:26 lr: 0.000014 loss_cls: 3.8924 (3.8915) grad_norm: 4.2074 (4.3004) time: 0.7610 data: 0.0002 max mem: 8421 +[2024-12-05 10:57:42 root] (utils.py 283): INFO Epoch: [12] [1080/2502] eta: 0:18:18 lr: 0.000014 loss_cls: 4.0606 (3.8904) grad_norm: 4.3225 (4.3031) time: 0.7613 data: 0.0002 max mem: 8421 +[2024-12-05 10:57:50 root] (utils.py 283): INFO Epoch: [12] [1090/2502] eta: 0:18:10 lr: 0.000014 loss_cls: 4.0504 (3.8899) grad_norm: 4.2485 (4.3023) time: 0.7587 data: 0.0002 max mem: 8421 +[2024-12-05 10:57:57 root] (utils.py 283): INFO Epoch: [12] [1100/2502] eta: 0:18:02 lr: 0.000014 loss_cls: 3.8195 (3.8893) grad_norm: 4.1083 (4.3018) time: 0.7611 data: 0.0002 max mem: 8421 +[2024-12-05 10:58:05 root] (utils.py 283): INFO Epoch: [12] [1110/2502] eta: 0:17:54 lr: 0.000014 loss_cls: 3.6328 (3.8877) grad_norm: 4.1768 (4.3018) time: 0.7654 data: 0.0002 max mem: 8421 +[2024-12-05 10:58:13 root] (utils.py 283): INFO Epoch: [12] [1120/2502] eta: 0:17:47 lr: 0.000014 loss_cls: 3.5288 (3.8855) grad_norm: 4.2445 (4.3023) time: 0.7654 data: 0.0002 max mem: 8421 +[2024-12-05 10:58:20 root] (utils.py 283): INFO Epoch: [12] [1130/2502] eta: 0:17:39 lr: 0.000014 loss_cls: 3.9638 (3.8867) grad_norm: 4.1435 (4.3014) time: 0.7700 data: 0.0002 max mem: 8421 +[2024-12-05 10:58:28 root] (utils.py 283): INFO Epoch: [12] [1140/2502] eta: 0:17:31 lr: 0.000014 loss_cls: 4.0457 (3.8870) grad_norm: 4.1435 (4.3016) time: 0.7750 data: 0.0002 max mem: 8421 +[2024-12-05 10:58:36 root] (utils.py 283): INFO Epoch: [12] [1150/2502] eta: 0:17:23 lr: 0.000014 loss_cls: 4.0407 (3.8879) grad_norm: 4.1728 (4.3009) time: 0.7674 data: 0.0002 max mem: 8421 +[2024-12-05 10:58:43 root] (utils.py 283): INFO Epoch: [12] [1160/2502] eta: 0:17:16 lr: 0.000014 loss_cls: 3.9642 (3.8869) grad_norm: 4.1085 (4.3002) time: 0.7619 data: 0.0002 max mem: 8421 +[2024-12-05 10:58:51 root] (utils.py 283): INFO Epoch: [12] [1170/2502] eta: 0:17:08 lr: 0.000014 loss_cls: 4.0244 (3.8887) grad_norm: 4.0951 (4.2988) time: 0.7645 data: 0.0002 max mem: 8421 +[2024-12-05 10:58:59 root] (utils.py 283): INFO Epoch: [12] [1180/2502] eta: 0:17:00 lr: 0.000014 loss_cls: 4.1850 (3.8907) grad_norm: 4.1451 (4.3003) time: 0.7621 data: 0.0003 max mem: 8421 +[2024-12-05 10:59:06 root] (utils.py 283): INFO Epoch: [12] [1190/2502] eta: 0:16:52 lr: 0.000014 loss_cls: 4.0055 (3.8907) grad_norm: 4.1644 (4.2983) time: 0.7635 data: 0.0003 max mem: 8421 +[2024-12-05 10:59:14 root] (utils.py 283): INFO Epoch: [12] [1200/2502] eta: 0:16:44 lr: 0.000014 loss_cls: 4.0975 (3.8934) grad_norm: 4.0569 (4.2979) time: 0.7676 data: 0.0003 max mem: 8421 +[2024-12-05 10:59:22 root] (utils.py 283): INFO Epoch: [12] [1210/2502] eta: 0:16:37 lr: 0.000014 loss_cls: 4.2859 (3.8939) grad_norm: 4.1327 (4.2976) time: 0.7661 data: 0.0003 max mem: 8421 +[2024-12-05 10:59:29 root] (utils.py 283): INFO Epoch: [12] [1220/2502] eta: 0:16:29 lr: 0.000014 loss_cls: 4.1268 (3.8936) grad_norm: 4.1551 (4.2971) time: 0.7641 data: 0.0003 max mem: 8421 +[2024-12-05 10:59:37 root] (utils.py 283): INFO Epoch: [12] [1230/2502] eta: 0:16:21 lr: 0.000014 loss_cls: 4.1268 (3.8952) grad_norm: 4.1069 (4.2961) time: 0.7630 data: 0.0003 max mem: 8421 +[2024-12-05 10:59:45 root] (utils.py 283): INFO Epoch: [12] [1240/2502] eta: 0:16:13 lr: 0.000014 loss_cls: 4.1269 (3.8949) grad_norm: 4.0306 (4.2951) time: 0.7651 data: 0.0003 max mem: 8421 +[2024-12-05 10:59:52 root] (utils.py 283): INFO Epoch: [12] [1250/2502] eta: 0:16:05 lr: 0.000014 loss_cls: 3.9819 (3.8946) grad_norm: 4.0322 (4.2941) time: 0.7643 data: 0.0003 max mem: 8421 +[2024-12-05 11:00:00 root] (utils.py 283): INFO Epoch: [12] [1260/2502] eta: 0:15:58 lr: 0.000014 loss_cls: 3.9540 (3.8945) grad_norm: 4.2335 (4.2945) time: 0.7619 data: 0.0003 max mem: 8421 +[2024-12-05 11:00:07 root] (utils.py 283): INFO Epoch: [12] [1270/2502] eta: 0:15:50 lr: 0.000014 loss_cls: 4.0719 (3.8963) grad_norm: 4.1446 (4.2938) time: 0.7627 data: 0.0003 max mem: 8421 +[2024-12-05 11:00:15 root] (utils.py 283): INFO Epoch: [12] [1280/2502] eta: 0:15:42 lr: 0.000014 loss_cls: 3.9154 (3.8941) grad_norm: 4.1096 (4.2927) time: 0.7654 data: 0.0002 max mem: 8421 +[2024-12-05 11:00:23 root] (utils.py 283): INFO Epoch: [12] [1290/2502] eta: 0:15:34 lr: 0.000014 loss_cls: 3.6789 (3.8924) grad_norm: 4.1559 (4.2924) time: 0.7641 data: 0.0002 max mem: 8421 +[2024-12-05 11:00:30 root] (utils.py 283): INFO Epoch: [12] [1300/2502] eta: 0:15:26 lr: 0.000014 loss_cls: 3.1913 (3.8874) grad_norm: 4.1233 (4.2906) time: 0.7615 data: 0.0002 max mem: 8421 +[2024-12-05 11:00:38 root] (utils.py 283): INFO Epoch: [12] [1310/2502] eta: 0:15:19 lr: 0.000014 loss_cls: 3.6474 (3.8886) grad_norm: 3.9760 (4.2884) time: 0.7614 data: 0.0002 max mem: 8421 +[2024-12-05 11:00:46 root] (utils.py 283): INFO Epoch: [12] [1320/2502] eta: 0:15:11 lr: 0.000014 loss_cls: 4.0655 (3.8890) grad_norm: 3.9778 (4.2886) time: 0.7605 data: 0.0002 max mem: 8421 +[2024-12-05 11:00:53 root] (utils.py 283): INFO Epoch: [12] [1330/2502] eta: 0:15:03 lr: 0.000014 loss_cls: 3.9536 (3.8896) grad_norm: 4.1782 (4.2872) time: 0.7637 data: 0.0002 max mem: 8421 +[2024-12-05 11:01:01 root] (utils.py 283): INFO Epoch: [12] [1340/2502] eta: 0:14:55 lr: 0.000014 loss_cls: 4.0272 (3.8910) grad_norm: 4.1782 (4.2862) time: 0.7656 data: 0.0002 max mem: 8421 +[2024-12-05 11:01:09 root] (utils.py 283): INFO Epoch: [12] [1350/2502] eta: 0:14:48 lr: 0.000014 loss_cls: 4.0900 (3.8922) grad_norm: 4.1035 (4.2856) time: 0.7631 data: 0.0002 max mem: 8421 +[2024-12-05 11:01:16 root] (utils.py 283): INFO Epoch: [12] [1360/2502] eta: 0:14:40 lr: 0.000014 loss_cls: 4.0459 (3.8919) grad_norm: 4.1039 (4.2853) time: 0.7645 data: 0.0002 max mem: 8421 +[2024-12-05 11:01:24 root] (utils.py 283): INFO Epoch: [12] [1370/2502] eta: 0:14:32 lr: 0.000014 loss_cls: 3.9642 (3.8905) grad_norm: 4.2437 (4.2851) time: 0.7731 data: 0.0002 max mem: 8421 +[2024-12-05 11:01:32 root] (utils.py 283): INFO Epoch: [12] [1380/2502] eta: 0:14:24 lr: 0.000014 loss_cls: 3.8552 (3.8903) grad_norm: 4.1612 (4.2848) time: 0.7764 data: 0.0002 max mem: 8421 +[2024-12-05 11:01:39 root] (utils.py 283): INFO Epoch: [12] [1390/2502] eta: 0:14:17 lr: 0.000014 loss_cls: 3.8552 (3.8899) grad_norm: 4.1594 (4.2853) time: 0.7687 data: 0.0002 max mem: 8421 +[2024-12-05 11:01:47 root] (utils.py 283): INFO Epoch: [12] [1400/2502] eta: 0:14:09 lr: 0.000014 loss_cls: 3.7970 (3.8894) grad_norm: 4.2638 (4.2866) time: 0.7627 data: 0.0002 max mem: 8421 +[2024-12-05 11:01:55 root] (utils.py 283): INFO Epoch: [12] [1410/2502] eta: 0:14:01 lr: 0.000014 loss_cls: 3.9205 (3.8887) grad_norm: 4.2457 (4.2859) time: 0.7633 data: 0.0002 max mem: 8421 +[2024-12-05 11:02:02 root] (utils.py 283): INFO Epoch: [12] [1420/2502] eta: 0:13:53 lr: 0.000014 loss_cls: 3.9205 (3.8880) grad_norm: 4.1691 (4.2849) time: 0.7635 data: 0.0002 max mem: 8421 +[2024-12-05 11:02:10 root] (utils.py 283): INFO Epoch: [12] [1430/2502] eta: 0:13:46 lr: 0.000014 loss_cls: 3.7805 (3.8873) grad_norm: 4.1691 (4.2845) time: 0.7632 data: 0.0002 max mem: 8421 +[2024-12-05 11:02:18 root] (utils.py 283): INFO Epoch: [12] [1440/2502] eta: 0:13:38 lr: 0.000014 loss_cls: 3.9204 (3.8874) grad_norm: 4.2185 (4.2875) time: 0.7647 data: 0.0002 max mem: 8421 +[2024-12-05 11:02:25 root] (utils.py 283): INFO Epoch: [12] [1450/2502] eta: 0:13:30 lr: 0.000014 loss_cls: 3.9945 (3.8856) grad_norm: 4.2270 (4.2891) time: 0.7671 data: 0.0003 max mem: 8421 +[2024-12-05 11:02:33 root] (utils.py 283): INFO Epoch: [12] [1460/2502] eta: 0:13:22 lr: 0.000014 loss_cls: 3.9945 (3.8863) grad_norm: 4.2429 (4.2894) time: 0.7716 data: 0.0002 max mem: 8421 +[2024-12-05 11:02:41 root] (utils.py 283): INFO Epoch: [12] [1470/2502] eta: 0:13:15 lr: 0.000014 loss_cls: 4.0791 (3.8864) grad_norm: 4.3103 (4.2915) time: 0.7795 data: 0.0003 max mem: 8421 +[2024-12-05 11:02:49 root] (utils.py 283): INFO Epoch: [12] [1480/2502] eta: 0:13:07 lr: 0.000014 loss_cls: 3.6441 (3.8846) grad_norm: 4.3103 (4.2906) time: 0.7836 data: 0.0003 max mem: 8421 +[2024-12-05 11:02:57 root] (utils.py 283): INFO Epoch: [12] [1490/2502] eta: 0:13:00 lr: 0.000014 loss_cls: 3.4024 (3.8829) grad_norm: 4.0727 (4.2901) time: 0.7844 data: 0.0003 max mem: 8421 +[2024-12-05 11:03:04 root] (utils.py 283): INFO Epoch: [12] [1500/2502] eta: 0:12:52 lr: 0.000014 loss_cls: 3.6566 (3.8816) grad_norm: 4.0727 (4.2882) time: 0.7755 data: 0.0003 max mem: 8421 +[2024-12-05 11:03:12 root] (utils.py 283): INFO Epoch: [12] [1510/2502] eta: 0:12:44 lr: 0.000014 loss_cls: 3.9978 (3.8829) grad_norm: 4.1499 (4.2895) time: 0.7645 data: 0.0002 max mem: 8421 +[2024-12-05 11:03:19 root] (utils.py 283): INFO Epoch: [12] [1520/2502] eta: 0:12:36 lr: 0.000014 loss_cls: 3.9974 (3.8809) grad_norm: 4.2030 (4.2890) time: 0.7644 data: 0.0002 max mem: 8421 +[2024-12-05 11:03:27 root] (utils.py 283): INFO Epoch: [12] [1530/2502] eta: 0:12:29 lr: 0.000014 loss_cls: 3.3188 (3.8798) grad_norm: 4.2280 (4.2889) time: 0.7640 data: 0.0002 max mem: 8421 +[2024-12-05 11:03:35 root] (utils.py 283): INFO Epoch: [12] [1540/2502] eta: 0:12:21 lr: 0.000014 loss_cls: 3.5405 (3.8783) grad_norm: 4.2306 (4.2891) time: 0.7650 data: 0.0002 max mem: 8421 +[2024-12-05 11:03:42 root] (utils.py 283): INFO Epoch: [12] [1550/2502] eta: 0:12:13 lr: 0.000014 loss_cls: 3.9307 (3.8792) grad_norm: 4.2077 (4.2910) time: 0.7678 data: 0.0003 max mem: 8421 +[2024-12-05 11:03:50 root] (utils.py 283): INFO Epoch: [12] [1560/2502] eta: 0:12:06 lr: 0.000014 loss_cls: 4.0453 (3.8790) grad_norm: 4.0720 (4.2958) time: 0.7732 data: 0.0003 max mem: 8421 +[2024-12-05 11:03:58 root] (utils.py 283): INFO Epoch: [12] [1570/2502] eta: 0:11:58 lr: 0.000014 loss_cls: 4.0583 (3.8795) grad_norm: 4.0720 (4.2962) time: 0.7724 data: 0.0002 max mem: 8421 +[2024-12-05 11:04:06 root] (utils.py 283): INFO Epoch: [12] [1580/2502] eta: 0:11:50 lr: 0.000014 loss_cls: 3.8862 (3.8770) grad_norm: 4.1038 (4.2954) time: 0.7687 data: 0.0002 max mem: 8421 +[2024-12-05 11:04:13 root] (utils.py 283): INFO Epoch: [12] [1590/2502] eta: 0:11:42 lr: 0.000014 loss_cls: 3.7712 (3.8757) grad_norm: 4.0579 (4.2935) time: 0.7788 data: 0.0003 max mem: 8421 +[2024-12-05 11:04:21 root] (utils.py 283): INFO Epoch: [12] [1600/2502] eta: 0:11:35 lr: 0.000014 loss_cls: 3.4633 (3.8730) grad_norm: 4.0632 (4.2932) time: 0.7735 data: 0.0003 max mem: 8421 +[2024-12-05 11:04:29 root] (utils.py 283): INFO Epoch: [12] [1610/2502] eta: 0:11:27 lr: 0.000014 loss_cls: 4.0294 (3.8744) grad_norm: 4.2099 (4.2934) time: 0.7626 data: 0.0002 max mem: 8421 +[2024-12-05 11:04:36 root] (utils.py 283): INFO Epoch: [12] [1620/2502] eta: 0:11:19 lr: 0.000014 loss_cls: 4.0410 (3.8741) grad_norm: 4.2099 (4.2931) time: 0.7647 data: 0.0002 max mem: 8421 +[2024-12-05 11:04:44 root] (utils.py 283): INFO Epoch: [12] [1630/2502] eta: 0:11:11 lr: 0.000014 loss_cls: 4.0847 (3.8744) grad_norm: 4.0210 (4.2933) time: 0.7620 data: 0.0002 max mem: 8421 +[2024-12-05 11:04:52 root] (utils.py 283): INFO Epoch: [12] [1640/2502] eta: 0:11:04 lr: 0.000014 loss_cls: 4.1365 (3.8747) grad_norm: 4.0829 (4.2934) time: 0.7631 data: 0.0002 max mem: 8421 +[2024-12-05 11:04:59 root] (utils.py 283): INFO Epoch: [12] [1650/2502] eta: 0:10:56 lr: 0.000014 loss_cls: 3.6619 (3.8744) grad_norm: 4.2229 (4.2941) time: 0.7655 data: 0.0002 max mem: 8421 +[2024-12-05 11:05:07 root] (utils.py 283): INFO Epoch: [12] [1660/2502] eta: 0:10:48 lr: 0.000014 loss_cls: 3.7990 (3.8750) grad_norm: 4.0947 (4.2941) time: 0.7641 data: 0.0002 max mem: 8421 +[2024-12-05 11:05:15 root] (utils.py 283): INFO Epoch: [12] [1670/2502] eta: 0:10:41 lr: 0.000014 loss_cls: 3.8848 (3.8751) grad_norm: 4.4254 (4.2974) time: 0.7632 data: 0.0002 max mem: 8421 +[2024-12-05 11:05:22 root] (utils.py 283): INFO Epoch: [12] [1680/2502] eta: 0:10:33 lr: 0.000014 loss_cls: 4.0427 (3.8760) grad_norm: 4.2579 (4.2966) time: 0.7690 data: 0.0003 max mem: 8421 +[2024-12-05 11:05:30 root] (utils.py 283): INFO Epoch: [12] [1690/2502] eta: 0:10:25 lr: 0.000014 loss_cls: 4.1852 (3.8759) grad_norm: 4.1248 (4.2987) time: 0.7689 data: 0.0003 max mem: 8421 +[2024-12-05 11:05:38 root] (utils.py 283): INFO Epoch: [12] [1700/2502] eta: 0:10:17 lr: 0.000014 loss_cls: 3.8352 (3.8767) grad_norm: 4.1247 (4.2991) time: 0.7629 data: 0.0003 max mem: 8421 +[2024-12-05 11:05:45 root] (utils.py 283): INFO Epoch: [12] [1710/2502] eta: 0:10:10 lr: 0.000014 loss_cls: 4.0963 (3.8781) grad_norm: 4.0935 (4.2978) time: 0.7619 data: 0.0003 max mem: 8421 +[2024-12-05 11:05:53 root] (utils.py 283): INFO Epoch: [12] [1720/2502] eta: 0:10:02 lr: 0.000014 loss_cls: 4.0197 (3.8783) grad_norm: 4.0935 (4.2979) time: 0.7611 data: 0.0003 max mem: 8421 +[2024-12-05 11:06:00 root] (utils.py 283): INFO Epoch: [12] [1730/2502] eta: 0:09:54 lr: 0.000014 loss_cls: 3.9548 (3.8775) grad_norm: 4.0585 (4.2969) time: 0.7609 data: 0.0003 max mem: 8421 +[2024-12-05 11:06:08 root] (utils.py 283): INFO Epoch: [12] [1740/2502] eta: 0:09:46 lr: 0.000014 loss_cls: 3.9548 (3.8777) grad_norm: 4.1383 (4.2967) time: 0.7622 data: 0.0003 max mem: 8421 +[2024-12-05 11:06:16 root] (utils.py 283): INFO Epoch: [12] [1750/2502] eta: 0:09:39 lr: 0.000014 loss_cls: 4.0438 (3.8796) grad_norm: 4.1844 (4.2967) time: 0.7634 data: 0.0003 max mem: 8421 +[2024-12-05 11:06:23 root] (utils.py 283): INFO Epoch: [12] [1760/2502] eta: 0:09:31 lr: 0.000014 loss_cls: 4.1261 (3.8781) grad_norm: 4.2347 (4.2967) time: 0.7645 data: 0.0003 max mem: 8421 +[2024-12-05 11:06:31 root] (utils.py 283): INFO Epoch: [12] [1770/2502] eta: 0:09:23 lr: 0.000014 loss_cls: 3.7587 (3.8789) grad_norm: 4.2461 (4.2963) time: 0.7714 data: 0.0003 max mem: 8421 +[2024-12-05 11:06:39 root] (utils.py 283): INFO Epoch: [12] [1780/2502] eta: 0:09:16 lr: 0.000014 loss_cls: 3.8276 (3.8774) grad_norm: 4.2819 (4.2970) time: 0.7722 data: 0.0003 max mem: 8421 +[2024-12-05 11:06:46 root] (utils.py 283): INFO Epoch: [12] [1790/2502] eta: 0:09:08 lr: 0.000014 loss_cls: 3.9452 (3.8777) grad_norm: 4.4283 (4.2968) time: 0.7656 data: 0.0003 max mem: 8421 +[2024-12-05 11:06:54 root] (utils.py 283): INFO Epoch: [12] [1800/2502] eta: 0:09:00 lr: 0.000014 loss_cls: 4.1400 (3.8795) grad_norm: 4.4010 (4.2990) time: 0.7639 data: 0.0002 max mem: 8421 +[2024-12-05 11:07:02 root] (utils.py 283): INFO Epoch: [12] [1810/2502] eta: 0:08:52 lr: 0.000014 loss_cls: 4.1723 (3.8804) grad_norm: 4.2483 (4.3007) time: 0.7660 data: 0.0002 max mem: 8421 +[2024-12-05 11:07:09 root] (utils.py 283): INFO Epoch: [12] [1820/2502] eta: 0:08:45 lr: 0.000014 loss_cls: 4.0322 (3.8799) grad_norm: 4.3219 (4.3036) time: 0.7682 data: 0.0003 max mem: 8421 +[2024-12-05 11:07:17 root] (utils.py 283): INFO Epoch: [12] [1830/2502] eta: 0:08:37 lr: 0.000014 loss_cls: 3.8754 (3.8801) grad_norm: 4.3219 (4.3039) time: 0.7683 data: 0.0003 max mem: 8421 +[2024-12-05 11:07:25 root] (utils.py 283): INFO Epoch: [12] [1840/2502] eta: 0:08:29 lr: 0.000014 loss_cls: 3.7865 (3.8783) grad_norm: 4.1734 (4.3038) time: 0.7735 data: 0.0002 max mem: 8421 +[2024-12-05 11:07:33 root] (utils.py 283): INFO Epoch: [12] [1850/2502] eta: 0:08:22 lr: 0.000014 loss_cls: 3.6650 (3.8787) grad_norm: 4.2203 (4.3063) time: 0.7766 data: 0.0002 max mem: 8421 +[2024-12-05 11:07:40 root] (utils.py 283): INFO Epoch: [12] [1860/2502] eta: 0:08:14 lr: 0.000014 loss_cls: 4.1243 (3.8798) grad_norm: 4.1218 (4.3057) time: 0.7794 data: 0.0003 max mem: 8421 +[2024-12-05 11:07:48 root] (utils.py 283): INFO Epoch: [12] [1870/2502] eta: 0:08:06 lr: 0.000014 loss_cls: 3.7754 (3.8780) grad_norm: 4.1183 (4.3055) time: 0.7780 data: 0.0003 max mem: 8421 +[2024-12-05 11:07:56 root] (utils.py 283): INFO Epoch: [12] [1880/2502] eta: 0:07:59 lr: 0.000014 loss_cls: 3.9615 (3.8798) grad_norm: 4.1765 (4.3056) time: 0.7661 data: 0.0002 max mem: 8421 +[2024-12-05 11:08:03 root] (utils.py 283): INFO Epoch: [12] [1890/2502] eta: 0:07:51 lr: 0.000014 loss_cls: 4.1581 (3.8802) grad_norm: 4.0541 (4.3042) time: 0.7629 data: 0.0002 max mem: 8421 +[2024-12-05 11:08:11 root] (utils.py 283): INFO Epoch: [12] [1900/2502] eta: 0:07:43 lr: 0.000014 loss_cls: 3.9158 (3.8795) grad_norm: 4.1375 (4.3041) time: 0.7634 data: 0.0002 max mem: 8421 +[2024-12-05 11:08:19 root] (utils.py 283): INFO Epoch: [12] [1910/2502] eta: 0:07:35 lr: 0.000014 loss_cls: 4.0009 (3.8805) grad_norm: 4.3030 (4.3073) time: 0.7620 data: 0.0002 max mem: 8421 +[2024-12-05 11:08:26 root] (utils.py 283): INFO Epoch: [12] [1920/2502] eta: 0:07:28 lr: 0.000014 loss_cls: 4.0023 (3.8806) grad_norm: 4.2567 (4.3072) time: 0.7613 data: 0.0003 max mem: 8421 +[2024-12-05 11:08:34 root] (utils.py 283): INFO Epoch: [12] [1930/2502] eta: 0:07:20 lr: 0.000014 loss_cls: 3.9422 (3.8805) grad_norm: 4.0696 (4.3057) time: 0.7629 data: 0.0003 max mem: 8421 +[2024-12-05 11:08:42 root] (utils.py 283): INFO Epoch: [12] [1940/2502] eta: 0:07:12 lr: 0.000014 loss_cls: 3.8910 (3.8796) grad_norm: 4.0731 (4.3058) time: 0.7673 data: 0.0002 max mem: 8421 +[2024-12-05 11:08:50 root] (utils.py 283): INFO Epoch: [12] [1950/2502] eta: 0:07:05 lr: 0.000014 loss_cls: 3.7046 (3.8788) grad_norm: 4.1731 (4.3052) time: 0.7775 data: 0.0002 max mem: 8421 +[2024-12-05 11:08:57 root] (utils.py 283): INFO Epoch: [12] [1960/2502] eta: 0:06:57 lr: 0.000014 loss_cls: 3.7461 (3.8803) grad_norm: 4.1357 (4.3044) time: 0.7855 data: 0.0003 max mem: 8421 +[2024-12-05 11:09:05 root] (utils.py 283): INFO Epoch: [12] [1970/2502] eta: 0:06:49 lr: 0.000014 loss_cls: 4.1364 (3.8806) grad_norm: 4.0522 (4.3041) time: 0.7853 data: 0.0002 max mem: 8421 +[2024-12-05 11:09:13 root] (utils.py 283): INFO Epoch: [12] [1980/2502] eta: 0:06:42 lr: 0.000014 loss_cls: 4.0709 (3.8809) grad_norm: 4.0928 (4.3038) time: 0.7867 data: 0.0002 max mem: 8421 +[2024-12-05 11:09:21 root] (utils.py 283): INFO Epoch: [12] [1990/2502] eta: 0:06:34 lr: 0.000014 loss_cls: 4.0709 (3.8819) grad_norm: 4.1227 (4.3033) time: 0.7880 data: 0.0003 max mem: 8421 +[2024-12-05 11:09:29 root] (utils.py 283): INFO Epoch: [12] [2000/2502] eta: 0:06:26 lr: 0.000014 loss_cls: 4.0785 (3.8820) grad_norm: 4.1227 (4.3024) time: 0.7866 data: 0.0003 max mem: 8421 +[2024-12-05 11:09:37 root] (utils.py 283): INFO Epoch: [12] [2010/2502] eta: 0:06:19 lr: 0.000014 loss_cls: 3.7036 (3.8802) grad_norm: 4.0142 (4.3011) time: 0.7856 data: 0.0002 max mem: 8421 +[2024-12-05 11:09:45 root] (utils.py 283): INFO Epoch: [12] [2020/2502] eta: 0:06:11 lr: 0.000014 loss_cls: 3.5971 (3.8787) grad_norm: 4.0774 (4.3066) time: 0.7876 data: 0.0002 max mem: 8421 +[2024-12-05 11:09:52 root] (utils.py 283): INFO Epoch: [12] [2030/2502] eta: 0:06:03 lr: 0.000014 loss_cls: 4.0142 (3.8795) grad_norm: 4.2704 (4.3074) time: 0.7893 data: 0.0002 max mem: 8421 +[2024-12-05 11:10:00 root] (utils.py 283): INFO Epoch: [12] [2040/2502] eta: 0:05:56 lr: 0.000014 loss_cls: 4.0872 (3.8799) grad_norm: 4.2704 (4.3073) time: 0.7942 data: 0.0003 max mem: 8421 +[2024-12-05 11:10:08 root] (utils.py 283): INFO Epoch: [12] [2050/2502] eta: 0:05:48 lr: 0.000014 loss_cls: 4.0288 (3.8803) grad_norm: 4.1441 (4.3062) time: 0.7922 data: 0.0003 max mem: 8421 +[2024-12-05 11:10:16 root] (utils.py 283): INFO Epoch: [12] [2060/2502] eta: 0:05:40 lr: 0.000014 loss_cls: 4.0391 (3.8802) grad_norm: 4.0302 (4.3058) time: 0.7865 data: 0.0002 max mem: 8421 +[2024-12-05 11:10:24 root] (utils.py 283): INFO Epoch: [12] [2070/2502] eta: 0:05:33 lr: 0.000014 loss_cls: 3.9704 (3.8798) grad_norm: 4.1835 (4.3053) time: 0.7847 data: 0.0002 max mem: 8421 +[2024-12-05 11:10:32 root] (utils.py 283): INFO Epoch: [12] [2080/2502] eta: 0:05:25 lr: 0.000014 loss_cls: 3.6584 (3.8794) grad_norm: 4.1785 (4.3068) time: 0.7866 data: 0.0003 max mem: 8421 +[2024-12-05 11:10:40 root] (utils.py 283): INFO Epoch: [12] [2090/2502] eta: 0:05:17 lr: 0.000014 loss_cls: 3.9243 (3.8803) grad_norm: 4.0683 (4.3063) time: 0.7904 data: 0.0002 max mem: 8421 +[2024-12-05 11:10:48 root] (utils.py 283): INFO Epoch: [12] [2100/2502] eta: 0:05:10 lr: 0.000014 loss_cls: 3.8330 (3.8786) grad_norm: 4.0635 (4.3070) time: 0.7874 data: 0.0002 max mem: 8421 +[2024-12-05 11:10:55 root] (utils.py 283): INFO Epoch: [12] [2110/2502] eta: 0:05:02 lr: 0.000014 loss_cls: 3.5911 (3.8785) grad_norm: 4.3533 (4.3076) time: 0.7793 data: 0.0003 max mem: 8421 +[2024-12-05 11:11:03 root] (utils.py 283): INFO Epoch: [12] [2120/2502] eta: 0:04:54 lr: 0.000014 loss_cls: 3.8680 (3.8785) grad_norm: 4.4185 (4.3078) time: 0.7680 data: 0.0002 max mem: 8421 +[2024-12-05 11:11:11 root] (utils.py 283): INFO Epoch: [12] [2130/2502] eta: 0:04:46 lr: 0.000014 loss_cls: 3.8146 (3.8779) grad_norm: 4.1934 (4.3077) time: 0.7637 data: 0.0002 max mem: 8421 +[2024-12-05 11:11:18 root] (utils.py 283): INFO Epoch: [12] [2140/2502] eta: 0:04:39 lr: 0.000014 loss_cls: 3.8146 (3.8778) grad_norm: 4.1605 (4.3087) time: 0.7649 data: 0.0002 max mem: 8421 +[2024-12-05 11:11:26 root] (utils.py 283): INFO Epoch: [12] [2150/2502] eta: 0:04:31 lr: 0.000014 loss_cls: 3.6951 (3.8774) grad_norm: 4.1737 (4.3090) time: 0.7795 data: 0.0003 max mem: 8421 +[2024-12-05 11:11:34 root] (utils.py 283): INFO Epoch: [12] [2160/2502] eta: 0:04:23 lr: 0.000014 loss_cls: 4.1071 (3.8781) grad_norm: 4.3175 (4.3102) time: 0.7969 data: 0.0003 max mem: 8421 +[2024-12-05 11:11:42 root] (utils.py 283): INFO Epoch: [12] [2170/2502] eta: 0:04:16 lr: 0.000014 loss_cls: 4.1302 (3.8798) grad_norm: 4.1535 (4.3120) time: 0.8001 data: 0.0003 max mem: 8421 +[2024-12-05 11:11:50 root] (utils.py 283): INFO Epoch: [12] [2180/2502] eta: 0:04:08 lr: 0.000014 loss_cls: 4.0911 (3.8789) grad_norm: 4.2965 (4.3124) time: 0.7996 data: 0.0003 max mem: 8421 +[2024-12-05 11:11:58 root] (utils.py 283): INFO Epoch: [12] [2190/2502] eta: 0:04:00 lr: 0.000014 loss_cls: 4.0306 (3.8787) grad_norm: 4.2300 (4.3117) time: 0.7959 data: 0.0003 max mem: 8421 +[2024-12-05 11:12:06 root] (utils.py 283): INFO Epoch: [12] [2200/2502] eta: 0:03:53 lr: 0.000014 loss_cls: 3.7363 (3.8778) grad_norm: 4.1083 (4.3126) time: 0.7938 data: 0.0003 max mem: 8421 +[2024-12-05 11:12:14 root] (utils.py 283): INFO Epoch: [12] [2210/2502] eta: 0:03:45 lr: 0.000014 loss_cls: 3.7505 (3.8784) grad_norm: 4.2441 (4.3132) time: 0.7936 data: 0.0003 max mem: 8421 +[2024-12-05 11:12:22 root] (utils.py 283): INFO Epoch: [12] [2220/2502] eta: 0:03:37 lr: 0.000014 loss_cls: 3.8419 (3.8777) grad_norm: 4.1317 (4.3138) time: 0.7919 data: 0.0003 max mem: 8421 +[2024-12-05 11:12:30 root] (utils.py 283): INFO Epoch: [12] [2230/2502] eta: 0:03:30 lr: 0.000014 loss_cls: 3.8419 (3.8781) grad_norm: 4.2934 (4.3169) time: 0.7909 data: 0.0003 max mem: 8421 +[2024-12-05 11:12:38 root] (utils.py 283): INFO Epoch: [12] [2240/2502] eta: 0:03:22 lr: 0.000014 loss_cls: 4.0081 (3.8789) grad_norm: 4.3191 (4.3170) time: 0.7928 data: 0.0003 max mem: 8421 +[2024-12-05 11:12:46 root] (utils.py 283): INFO Epoch: [12] [2250/2502] eta: 0:03:14 lr: 0.000014 loss_cls: 4.1684 (3.8808) grad_norm: 4.2185 (4.3173) time: 0.7942 data: 0.0003 max mem: 8421 +[2024-12-05 11:12:54 root] (utils.py 283): INFO Epoch: [12] [2260/2502] eta: 0:03:06 lr: 0.000014 loss_cls: 4.3316 (3.8826) grad_norm: 4.2307 (4.3175) time: 0.7933 data: 0.0003 max mem: 8421 +[2024-12-05 11:13:02 root] (utils.py 283): INFO Epoch: [12] [2270/2502] eta: 0:02:59 lr: 0.000014 loss_cls: 4.2388 (3.8825) grad_norm: 4.2522 (4.3171) time: 0.7936 data: 0.0003 max mem: 8421 +[2024-12-05 11:13:10 root] (utils.py 283): INFO Epoch: [12] [2280/2502] eta: 0:02:51 lr: 0.000014 loss_cls: 3.8524 (3.8824) grad_norm: 4.2678 (4.3166) time: 0.7929 data: 0.0003 max mem: 8421 +[2024-12-05 11:13:17 root] (utils.py 283): INFO Epoch: [12] [2290/2502] eta: 0:02:43 lr: 0.000014 loss_cls: 3.7465 (3.8818) grad_norm: 4.2854 (4.3181) time: 0.7914 data: 0.0003 max mem: 8421 +[2024-12-05 11:13:25 root] (utils.py 283): INFO Epoch: [12] [2300/2502] eta: 0:02:36 lr: 0.000014 loss_cls: 3.8887 (3.8826) grad_norm: 4.2074 (4.3174) time: 0.7933 data: 0.0002 max mem: 8421 +[2024-12-05 11:13:33 root] (utils.py 283): INFO Epoch: [12] [2310/2502] eta: 0:02:28 lr: 0.000014 loss_cls: 4.0207 (3.8827) grad_norm: 4.1010 (4.3178) time: 0.7955 data: 0.0003 max mem: 8421 +[2024-12-05 11:13:41 root] (utils.py 283): INFO Epoch: [12] [2320/2502] eta: 0:02:20 lr: 0.000014 loss_cls: 4.1133 (3.8846) grad_norm: 4.0825 (4.3177) time: 0.7939 data: 0.0003 max mem: 8421 +[2024-12-05 11:13:49 root] (utils.py 283): INFO Epoch: [12] [2330/2502] eta: 0:02:12 lr: 0.000014 loss_cls: 4.1853 (3.8853) grad_norm: 4.1090 (4.3170) time: 0.7943 data: 0.0002 max mem: 8421 +[2024-12-05 11:13:57 root] (utils.py 283): INFO Epoch: [12] [2340/2502] eta: 0:02:05 lr: 0.000014 loss_cls: 3.8103 (3.8844) grad_norm: 4.0805 (4.3166) time: 0.7956 data: 0.0003 max mem: 8421 +[2024-12-05 11:14:05 root] (utils.py 283): INFO Epoch: [12] [2350/2502] eta: 0:01:57 lr: 0.000014 loss_cls: 3.7093 (3.8849) grad_norm: 4.0971 (4.3165) time: 0.7906 data: 0.0003 max mem: 8421 +[2024-12-05 11:14:13 root] (utils.py 283): INFO Epoch: [12] [2360/2502] eta: 0:01:49 lr: 0.000014 loss_cls: 4.0472 (3.8843) grad_norm: 4.2388 (4.3168) time: 0.7777 data: 0.0003 max mem: 8421 +[2024-12-05 11:14:21 root] (utils.py 283): INFO Epoch: [12] [2370/2502] eta: 0:01:42 lr: 0.000014 loss_cls: 4.0385 (3.8852) grad_norm: 4.2847 (4.3165) time: 0.7720 data: 0.0003 max mem: 8421 +[2024-12-05 11:14:28 root] (utils.py 283): INFO Epoch: [12] [2380/2502] eta: 0:01:34 lr: 0.000014 loss_cls: 4.0794 (3.8860) grad_norm: 4.1471 (4.3172) time: 0.7725 data: 0.0002 max mem: 8421 +[2024-12-05 11:14:36 root] (utils.py 283): INFO Epoch: [12] [2390/2502] eta: 0:01:26 lr: 0.000014 loss_cls: 4.1448 (3.8860) grad_norm: 4.2391 (4.3169) time: 0.7766 data: 0.0002 max mem: 8421 +[2024-12-05 11:14:44 root] (utils.py 283): INFO Epoch: [12] [2400/2502] eta: 0:01:18 lr: 0.000014 loss_cls: 4.1108 (3.8861) grad_norm: 4.2014 (4.3164) time: 0.7744 data: 0.0002 max mem: 8421 +[2024-12-05 11:14:51 root] (utils.py 283): INFO Epoch: [12] [2410/2502] eta: 0:01:11 lr: 0.000014 loss_cls: 3.8212 (3.8855) grad_norm: 4.0662 (4.3155) time: 0.7641 data: 0.0002 max mem: 8421 +[2024-12-05 11:14:59 root] (utils.py 283): INFO Epoch: [12] [2420/2502] eta: 0:01:03 lr: 0.000014 loss_cls: 3.8640 (3.8858) grad_norm: 4.1104 (4.3158) time: 0.7627 data: 0.0002 max mem: 8421 +[2024-12-05 11:15:07 root] (utils.py 283): INFO Epoch: [12] [2430/2502] eta: 0:00:55 lr: 0.000014 loss_cls: 4.1773 (3.8872) grad_norm: 4.3521 (4.3178) time: 0.7639 data: 0.0002 max mem: 8421 +[2024-12-05 11:15:14 root] (utils.py 283): INFO Epoch: [12] [2440/2502] eta: 0:00:47 lr: 0.000014 loss_cls: 4.1773 (3.8871) grad_norm: 4.1277 (4.3175) time: 0.7627 data: 0.0002 max mem: 8421 +[2024-12-05 11:15:22 root] (utils.py 283): INFO Epoch: [12] [2450/2502] eta: 0:00:40 lr: 0.000014 loss_cls: 4.0836 (3.8884) grad_norm: 4.1924 (4.3175) time: 0.7622 data: 0.0002 max mem: 8421 +[2024-12-05 11:15:30 root] (utils.py 283): INFO Epoch: [12] [2460/2502] eta: 0:00:32 lr: 0.000014 loss_cls: 4.1311 (3.8880) grad_norm: 4.2329 (4.3175) time: 0.7647 data: 0.0002 max mem: 8421 +[2024-12-05 11:15:37 root] (utils.py 283): INFO Epoch: [12] [2470/2502] eta: 0:00:24 lr: 0.000014 loss_cls: 3.9789 (3.8879) grad_norm: 4.2324 (4.3176) time: 0.7637 data: 0.0002 max mem: 8421 +[2024-12-05 11:15:45 root] (utils.py 283): INFO Epoch: [12] [2480/2502] eta: 0:00:17 lr: 0.000014 loss_cls: 3.9655 (3.8873) grad_norm: 4.2144 (4.3173) time: 0.7634 data: 0.0002 max mem: 8421 +[2024-12-05 11:15:53 root] (utils.py 283): INFO Epoch: [12] [2490/2502] eta: 0:00:09 lr: 0.000014 loss_cls: 3.8403 (3.8878) grad_norm: 4.1245 (4.3181) time: 0.8009 data: 0.0217 max mem: 8421 +[2024-12-05 11:16:01 root] (utils.py 283): INFO Epoch: [12] [2500/2502] eta: 0:00:01 lr: 0.000014 loss_cls: 4.1814 (3.8898) grad_norm: 4.1205 (4.3184) time: 0.8037 data: 0.0217 max mem: 8421 +[2024-12-05 11:16:02 root] (utils.py 283): INFO Epoch: [12] [2501/2502] eta: 0:00:00 lr: 0.000014 loss_cls: 4.1814 (3.8898) grad_norm: 4.1034 (4.3183) time: 0.7963 data: 0.0217 max mem: 8421 +[2024-12-05 11:16:02 root] (utils.py 297): INFO Epoch: [12] Total time: 0:32:14 (0.7733 s / it) +[2024-12-05 11:16:02 root] (engine.py 178): INFO Averaged stats:lr: 0.000014 loss_cls: 4.1814 (3.8894) grad_norm: 4.1034 (4.3183) +[2024-12-05 11:16:02 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7274 (0.7274) acc1: 85.9375 (85.9375) acc3: 95.3125 (95.3125) acc5: 98.4375 (98.4375) time: 0.1313 data: 0.0005 max mem: 8421 +[2024-12-05 11:16:03 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8235 (0.8728) acc1: 82.8125 (81.6051) acc3: 92.9688 (92.6847) acc5: 95.3125 (95.7386) time: 0.1314 data: 0.0004 max mem: 8421 +[2024-12-05 11:16:05 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8866 (0.9228) acc1: 80.4688 (80.5432) acc3: 92.9688 (92.2619) acc5: 94.5312 (95.0893) time: 0.1316 data: 0.0004 max mem: 8421 +[2024-12-05 11:16:06 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9729 (0.9302) acc1: 79.6875 (79.9647) acc3: 92.9688 (92.4395) acc5: 95.3125 (95.3125) time: 0.1319 data: 0.0004 max mem: 8421 +[2024-12-05 11:16:07 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8577 (0.9227) acc1: 80.4688 (80.2020) acc3: 93.7500 (92.5305) acc5: 95.3125 (95.2553) time: 0.1318 data: 0.0004 max mem: 8421 +[2024-12-05 11:16:09 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0291 (1.0156) acc1: 72.6562 (77.9871) acc3: 87.5000 (90.9467) acc5: 91.4062 (94.0870) time: 0.1316 data: 0.0004 max mem: 8421 +[2024-12-05 11:16:10 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3115 (1.0589) acc1: 70.3125 (77.1132) acc3: 85.9375 (90.1895) acc5: 89.8438 (93.4554) time: 0.1316 data: 0.0004 max mem: 8421 +[2024-12-05 11:16:11 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2860 (1.1027) acc1: 71.0938 (75.8693) acc3: 86.7188 (89.5136) acc5: 89.8438 (92.9688) time: 0.1320 data: 0.0004 max mem: 8421 +[2024-12-05 11:16:13 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3134 (1.1400) acc1: 67.9688 (75.0482) acc3: 83.5938 (88.8503) acc5: 89.8438 (92.4479) time: 0.1324 data: 0.0007 max mem: 8421 +[2024-12-05 11:16:14 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3540 (1.1698) acc1: 67.9688 (74.3475) acc3: 83.5938 (88.3413) acc5: 89.8438 (92.0244) time: 0.1498 data: 0.0181 max mem: 8421 +[2024-12-05 11:16:15 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2590 (1.1592) acc1: 72.6562 (74.5600) acc3: 88.2812 (88.5280) acc5: 90.6250 (92.1760) time: 0.1549 data: 0.0235 max mem: 8421 +[2024-12-05 11:16:15 root] (utils.py 297): INFO Test: Total time: 0:00:13 (0.1367 s / it) +[2024-12-05 11:16:15 root] (engine.py 263): INFO * Acc@1 74.350 Acc@3 88.636 Acc@5 92.180 loss 1.158 flops 1.285 layer_flops 1.251 +[2024-12-05 11:16:15 root] (main.py 546): INFO Accuracy of the network on the 50000 test images: 74.3% +[2024-12-05 11:16:15 root] (main.py 550): INFO Max accuracy: 74.41% +[2024-12-05 11:16:16 root] (utils.py 283): INFO Epoch: [13] [ 0/2502] eta: 0:34:24 lr: 0.000013 loss_cls: 3.8589 (3.8589) grad_norm: 3.9848 (3.9848) time: 0.8250 data: 0.0004 max mem: 8421 +[2024-12-05 11:16:24 root] (utils.py 283): INFO Epoch: [13] [ 10/2502] eta: 0:32:37 lr: 0.000013 loss_cls: 4.0024 (3.9632) grad_norm: 4.3507 (4.6387) time: 0.7857 data: 0.0003 max mem: 8421 +[2024-12-05 11:16:32 root] (utils.py 283): INFO Epoch: [13] [ 20/2502] eta: 0:32:00 lr: 0.000013 loss_cls: 3.9929 (3.8665) grad_norm: 4.2698 (4.4249) time: 0.7710 data: 0.0002 max mem: 8421 +[2024-12-05 11:16:39 root] (utils.py 283): INFO Epoch: [13] [ 30/2502] eta: 0:31:42 lr: 0.000013 loss_cls: 3.7778 (3.8119) grad_norm: 4.1425 (4.4276) time: 0.7607 data: 0.0002 max mem: 8421 +[2024-12-05 11:16:47 root] (utils.py 283): INFO Epoch: [13] [ 40/2502] eta: 0:31:33 lr: 0.000013 loss_cls: 3.9933 (3.8478) grad_norm: 4.1425 (4.3702) time: 0.7640 data: 0.0002 max mem: 8421 +[2024-12-05 11:16:55 root] (utils.py 283): INFO Epoch: [13] [ 50/2502] eta: 0:31:23 lr: 0.000013 loss_cls: 3.6043 (3.7570) grad_norm: 4.0757 (4.3358) time: 0.7663 data: 0.0002 max mem: 8421 +[2024-12-05 11:17:02 root] (utils.py 283): INFO Epoch: [13] [ 60/2502] eta: 0:31:15 lr: 0.000013 loss_cls: 3.6626 (3.8143) grad_norm: 4.2256 (4.4837) time: 0.7668 data: 0.0002 max mem: 8421 +[2024-12-05 11:17:10 root] (utils.py 283): INFO Epoch: [13] [ 70/2502] eta: 0:31:06 lr: 0.000013 loss_cls: 3.9856 (3.8005) grad_norm: 4.3785 (4.5119) time: 0.7654 data: 0.0002 max mem: 8421 +[2024-12-05 11:17:18 root] (utils.py 283): INFO Epoch: [13] [ 80/2502] eta: 0:31:02 lr: 0.000013 loss_cls: 3.8834 (3.7931) grad_norm: 4.3113 (4.4757) time: 0.7710 data: 0.0002 max mem: 8421 +[2024-12-05 11:17:25 root] (utils.py 283): INFO Epoch: [13] [ 90/2502] eta: 0:30:53 lr: 0.000013 loss_cls: 3.9304 (3.8162) grad_norm: 4.1705 (4.4611) time: 0.7711 data: 0.0002 max mem: 8421 +[2024-12-05 11:17:33 root] (utils.py 283): INFO Epoch: [13] [ 100/2502] eta: 0:30:50 lr: 0.000013 loss_cls: 3.9304 (3.8125) grad_norm: 4.2499 (4.4772) time: 0.7759 data: 0.0003 max mem: 8421 +[2024-12-05 11:17:41 root] (utils.py 283): INFO Epoch: [13] [ 110/2502] eta: 0:30:40 lr: 0.000013 loss_cls: 4.0517 (3.8479) grad_norm: 4.2499 (4.4706) time: 0.7744 data: 0.0003 max mem: 8421 +[2024-12-05 11:17:48 root] (utils.py 283): INFO Epoch: [13] [ 120/2502] eta: 0:30:30 lr: 0.000013 loss_cls: 4.0746 (3.8642) grad_norm: 4.0687 (4.4460) time: 0.7598 data: 0.0003 max mem: 8421 +[2024-12-05 11:17:56 root] (utils.py 283): INFO Epoch: [13] [ 130/2502] eta: 0:30:21 lr: 0.000013 loss_cls: 3.9739 (3.8653) grad_norm: 4.1650 (4.4338) time: 0.7604 data: 0.0002 max mem: 8421 +[2024-12-05 11:18:04 root] (utils.py 283): INFO Epoch: [13] [ 140/2502] eta: 0:30:15 lr: 0.000013 loss_cls: 4.1434 (3.8875) grad_norm: 4.1443 (4.4151) time: 0.7688 data: 0.0002 max mem: 8421 +[2024-12-05 11:18:12 root] (utils.py 283): INFO Epoch: [13] [ 150/2502] eta: 0:30:09 lr: 0.000013 loss_cls: 4.1641 (3.8890) grad_norm: 4.1477 (4.4009) time: 0.7789 data: 0.0003 max mem: 8421 +[2024-12-05 11:18:20 root] (utils.py 283): INFO Epoch: [13] [ 160/2502] eta: 0:30:15 lr: 0.000013 loss_cls: 3.7581 (3.8762) grad_norm: 4.1582 (4.3876) time: 0.8227 data: 0.0004 max mem: 8421 +[2024-12-05 11:18:37 root] (utils.py 283): INFO Epoch: [13] [ 170/2502] eta: 0:32:15 lr: 0.000013 loss_cls: 3.6553 (3.8518) grad_norm: 4.0247 (4.3668) time: 1.2883 data: 0.0004 max mem: 8421 +[2024-12-05 11:18:51 root] (utils.py 283): INFO Epoch: [13] [ 180/2502] eta: 0:33:12 lr: 0.000013 loss_cls: 3.5489 (3.8346) grad_norm: 4.0247 (4.3717) time: 1.5246 data: 0.0003 max mem: 8421 +[2024-12-05 11:18:58 root] (utils.py 283): INFO Epoch: [13] [ 190/2502] eta: 0:32:52 lr: 0.000013 loss_cls: 3.6036 (3.8314) grad_norm: 4.1650 (4.3663) time: 1.0492 data: 0.0003 max mem: 8421 +[2024-12-05 11:19:06 root] (utils.py 283): INFO Epoch: [13] [ 200/2502] eta: 0:32:36 lr: 0.000013 loss_cls: 3.9397 (3.8363) grad_norm: 4.1650 (4.4153) time: 0.7744 data: 0.0003 max mem: 8421 +[2024-12-05 11:19:15 root] (utils.py 283): INFO Epoch: [13] [ 210/2502] eta: 0:32:26 lr: 0.000013 loss_cls: 3.8901 (3.8311) grad_norm: 4.0032 (4.4000) time: 0.8143 data: 0.0003 max mem: 8421 +[2024-12-05 11:19:22 root] (utils.py 283): INFO Epoch: [13] [ 220/2502] eta: 0:32:09 lr: 0.000013 loss_cls: 3.9709 (3.8417) grad_norm: 4.0425 (4.3881) time: 0.8039 data: 0.0003 max mem: 8421 +[2024-12-05 11:19:30 root] (utils.py 283): INFO Epoch: [13] [ 230/2502] eta: 0:31:53 lr: 0.000013 loss_cls: 4.2023 (3.8460) grad_norm: 4.2574 (4.3945) time: 0.7648 data: 0.0003 max mem: 8421 +[2024-12-05 11:19:38 root] (utils.py 283): INFO Epoch: [13] [ 240/2502] eta: 0:31:37 lr: 0.000013 loss_cls: 4.2023 (3.8513) grad_norm: 4.2654 (4.3877) time: 0.7656 data: 0.0002 max mem: 8421 +[2024-12-05 11:19:45 root] (utils.py 283): INFO Epoch: [13] [ 250/2502] eta: 0:31:22 lr: 0.000013 loss_cls: 4.1833 (3.8585) grad_norm: 4.1382 (4.3739) time: 0.7666 data: 0.0002 max mem: 8421 +[2024-12-05 11:19:53 root] (utils.py 283): INFO Epoch: [13] [ 260/2502] eta: 0:31:08 lr: 0.000013 loss_cls: 3.8646 (3.8527) grad_norm: 4.0620 (4.3640) time: 0.7650 data: 0.0003 max mem: 8421 +[2024-12-05 11:20:01 root] (utils.py 283): INFO Epoch: [13] [ 270/2502] eta: 0:30:54 lr: 0.000013 loss_cls: 3.9474 (3.8582) grad_norm: 4.0418 (4.3720) time: 0.7659 data: 0.0003 max mem: 8421 +[2024-12-05 11:20:08 root] (utils.py 283): INFO Epoch: [13] [ 280/2502] eta: 0:30:40 lr: 0.000013 loss_cls: 4.2390 (3.8656) grad_norm: 4.1306 (4.3653) time: 0.7646 data: 0.0003 max mem: 8421 +[2024-12-05 11:20:16 root] (utils.py 283): INFO Epoch: [13] [ 290/2502] eta: 0:30:27 lr: 0.000013 loss_cls: 4.2603 (3.8756) grad_norm: 4.0336 (4.3572) time: 0.7623 data: 0.0003 max mem: 8421 +[2024-12-05 11:20:24 root] (utils.py 283): INFO Epoch: [13] [ 300/2502] eta: 0:30:15 lr: 0.000013 loss_cls: 4.1749 (3.8732) grad_norm: 4.0227 (4.3516) time: 0.7657 data: 0.0003 max mem: 8421 +[2024-12-05 11:20:31 root] (utils.py 283): INFO Epoch: [13] [ 310/2502] eta: 0:30:02 lr: 0.000013 loss_cls: 3.7105 (3.8645) grad_norm: 4.1205 (4.3473) time: 0.7674 data: 0.0002 max mem: 8421 +[2024-12-05 11:20:39 root] (utils.py 283): INFO Epoch: [13] [ 320/2502] eta: 0:29:50 lr: 0.000013 loss_cls: 3.6974 (3.8656) grad_norm: 4.1336 (4.3432) time: 0.7641 data: 0.0002 max mem: 8421 +[2024-12-05 11:20:46 root] (utils.py 283): INFO Epoch: [13] [ 330/2502] eta: 0:29:38 lr: 0.000013 loss_cls: 4.0693 (3.8713) grad_norm: 4.0691 (4.3332) time: 0.7628 data: 0.0002 max mem: 8421 +[2024-12-05 11:20:54 root] (utils.py 283): INFO Epoch: [13] [ 340/2502] eta: 0:29:26 lr: 0.000013 loss_cls: 4.0693 (3.8680) grad_norm: 4.0839 (4.3777) time: 0.7628 data: 0.0003 max mem: 8421 +[2024-12-05 11:21:02 root] (utils.py 283): INFO Epoch: [13] [ 350/2502] eta: 0:29:15 lr: 0.000013 loss_cls: 4.0122 (3.8696) grad_norm: 4.3637 (4.3816) time: 0.7635 data: 0.0002 max mem: 8421 +[2024-12-05 11:21:09 root] (utils.py 283): INFO Epoch: [13] [ 360/2502] eta: 0:29:04 lr: 0.000013 loss_cls: 3.5330 (3.8561) grad_norm: 4.4123 (4.3916) time: 0.7661 data: 0.0003 max mem: 8421 +[2024-12-05 11:21:17 root] (utils.py 283): INFO Epoch: [13] [ 370/2502] eta: 0:28:53 lr: 0.000013 loss_cls: 3.6856 (3.8602) grad_norm: 4.1352 (4.3872) time: 0.7652 data: 0.0003 max mem: 8421 +[2024-12-05 11:21:25 root] (utils.py 283): INFO Epoch: [13] [ 380/2502] eta: 0:28:42 lr: 0.000013 loss_cls: 4.1027 (3.8608) grad_norm: 4.1992 (4.4102) time: 0.7666 data: 0.0003 max mem: 8421 +[2024-12-05 11:21:32 root] (utils.py 283): INFO Epoch: [13] [ 390/2502] eta: 0:28:32 lr: 0.000013 loss_cls: 3.9860 (3.8622) grad_norm: 4.2408 (4.4106) time: 0.7680 data: 0.0003 max mem: 8421 +[2024-12-05 11:21:40 root] (utils.py 283): INFO Epoch: [13] [ 400/2502] eta: 0:28:21 lr: 0.000013 loss_cls: 4.1277 (3.8681) grad_norm: 4.1404 (4.4067) time: 0.7641 data: 0.0003 max mem: 8421 +[2024-12-05 11:21:48 root] (utils.py 283): INFO Epoch: [13] [ 410/2502] eta: 0:28:11 lr: 0.000013 loss_cls: 3.8454 (3.8648) grad_norm: 4.0975 (4.4026) time: 0.7675 data: 0.0002 max mem: 8421 +[2024-12-05 11:21:55 root] (utils.py 283): INFO Epoch: [13] [ 420/2502] eta: 0:28:01 lr: 0.000013 loss_cls: 4.0239 (3.8693) grad_norm: 4.0128 (4.3964) time: 0.7703 data: 0.0003 max mem: 8421 +[2024-12-05 11:22:03 root] (utils.py 283): INFO Epoch: [13] [ 430/2502] eta: 0:27:52 lr: 0.000013 loss_cls: 4.0944 (3.8684) grad_norm: 4.1190 (4.3911) time: 0.7778 data: 0.0003 max mem: 8421 +[2024-12-05 11:22:11 root] (utils.py 283): INFO Epoch: [13] [ 440/2502] eta: 0:27:43 lr: 0.000013 loss_cls: 4.0427 (3.8707) grad_norm: 4.1764 (4.3925) time: 0.7858 data: 0.0003 max mem: 8421 +[2024-12-05 11:22:19 root] (utils.py 283): INFO Epoch: [13] [ 450/2502] eta: 0:27:34 lr: 0.000013 loss_cls: 4.1395 (3.8708) grad_norm: 4.3369 (4.4122) time: 0.7887 data: 0.0002 max mem: 8421 +[2024-12-05 11:22:27 root] (utils.py 283): INFO Epoch: [13] [ 460/2502] eta: 0:27:25 lr: 0.000013 loss_cls: 3.7571 (3.8680) grad_norm: 4.2672 (4.4053) time: 0.7876 data: 0.0003 max mem: 8421 +[2024-12-05 11:22:35 root] (utils.py 283): INFO Epoch: [13] [ 470/2502] eta: 0:27:16 lr: 0.000013 loss_cls: 3.8985 (3.8690) grad_norm: 4.0899 (4.3990) time: 0.7831 data: 0.0003 max mem: 8421 +[2024-12-05 11:22:43 root] (utils.py 283): INFO Epoch: [13] [ 480/2502] eta: 0:27:08 lr: 0.000013 loss_cls: 4.0521 (3.8672) grad_norm: 4.1027 (4.3938) time: 0.7917 data: 0.0003 max mem: 8421 +[2024-12-05 11:22:56 root] (utils.py 283): INFO Epoch: [13] [ 490/2502] eta: 0:27:19 lr: 0.000013 loss_cls: 4.0332 (3.8721) grad_norm: 4.1654 (4.3943) time: 1.0416 data: 0.0003 max mem: 8421 +[2024-12-05 11:23:13 root] (utils.py 283): INFO Epoch: [13] [ 500/2502] eta: 0:27:47 lr: 0.000013 loss_cls: 4.0442 (3.8751) grad_norm: 4.1828 (4.3898) time: 1.5003 data: 0.0003 max mem: 8421 +[2024-12-05 11:23:28 root] (utils.py 283): INFO Epoch: [13] [ 510/2502] eta: 0:28:07 lr: 0.000013 loss_cls: 3.9386 (3.8734) grad_norm: 4.1461 (4.3858) time: 1.6415 data: 0.0003 max mem: 8421 +[2024-12-05 11:23:36 root] (utils.py 283): INFO Epoch: [13] [ 520/2502] eta: 0:27:56 lr: 0.000013 loss_cls: 3.8399 (3.8708) grad_norm: 4.0653 (4.3864) time: 1.1665 data: 0.0003 max mem: 8421 +[2024-12-05 11:23:44 root] (utils.py 283): INFO Epoch: [13] [ 530/2502] eta: 0:27:44 lr: 0.000013 loss_cls: 3.4662 (3.8634) grad_norm: 4.4116 (4.3875) time: 0.7666 data: 0.0003 max mem: 8421 +[2024-12-05 11:23:51 root] (utils.py 283): INFO Epoch: [13] [ 540/2502] eta: 0:27:33 lr: 0.000013 loss_cls: 3.4662 (3.8629) grad_norm: 4.3059 (4.3856) time: 0.7700 data: 0.0003 max mem: 8421 +[2024-12-05 11:23:59 root] (utils.py 283): INFO Epoch: [13] [ 550/2502] eta: 0:27:23 lr: 0.000013 loss_cls: 3.9805 (3.8631) grad_norm: 4.1157 (4.3844) time: 0.7776 data: 0.0003 max mem: 8421 +[2024-12-05 11:24:07 root] (utils.py 283): INFO Epoch: [13] [ 560/2502] eta: 0:27:13 lr: 0.000013 loss_cls: 4.0742 (3.8626) grad_norm: 4.1257 (4.3795) time: 0.7878 data: 0.0003 max mem: 8421 +[2024-12-05 11:24:19 root] (utils.py 283): INFO Epoch: [13] [ 570/2502] eta: 0:27:17 lr: 0.000013 loss_cls: 4.0711 (3.8642) grad_norm: 4.1745 (4.3802) time: 1.0062 data: 0.0004 max mem: 8421 +[2024-12-05 11:24:31 root] (utils.py 283): INFO Epoch: [13] [ 580/2502] eta: 0:27:19 lr: 0.000013 loss_cls: 4.0338 (3.8635) grad_norm: 4.2442 (4.3833) time: 1.1962 data: 0.0004 max mem: 8421 +[2024-12-05 11:24:39 root] (utils.py 283): INFO Epoch: [13] [ 590/2502] eta: 0:27:08 lr: 0.000013 loss_cls: 4.2155 (3.8674) grad_norm: 4.2517 (4.3820) time: 0.9759 data: 0.0003 max mem: 8421 +[2024-12-05 11:24:47 root] (utils.py 283): INFO Epoch: [13] [ 600/2502] eta: 0:26:58 lr: 0.000013 loss_cls: 3.9794 (3.8626) grad_norm: 4.0939 (4.3757) time: 0.7890 data: 0.0003 max mem: 8421 +[2024-12-05 11:24:55 root] (utils.py 283): INFO Epoch: [13] [ 610/2502] eta: 0:26:48 lr: 0.000013 loss_cls: 3.7141 (3.8641) grad_norm: 4.0111 (4.3725) time: 0.7902 data: 0.0003 max mem: 8421 +[2024-12-05 11:25:03 root] (utils.py 283): INFO Epoch: [13] [ 620/2502] eta: 0:26:39 lr: 0.000013 loss_cls: 3.7141 (3.8608) grad_norm: 4.2072 (4.3713) time: 0.8088 data: 0.0003 max mem: 8421 +[2024-12-05 11:25:15 root] (utils.py 283): INFO Epoch: [13] [ 630/2502] eta: 0:26:42 lr: 0.000013 loss_cls: 3.9742 (3.8617) grad_norm: 4.1885 (4.3695) time: 1.0352 data: 0.0003 max mem: 8421 +[2024-12-05 11:25:23 root] (utils.py 283): INFO Epoch: [13] [ 640/2502] eta: 0:26:31 lr: 0.000013 loss_cls: 3.9742 (3.8626) grad_norm: 4.1719 (4.3699) time: 1.0040 data: 0.0003 max mem: 8421 +[2024-12-05 11:25:31 root] (utils.py 283): INFO Epoch: [13] [ 650/2502] eta: 0:26:19 lr: 0.000013 loss_cls: 3.9137 (3.8599) grad_norm: 4.0231 (4.3654) time: 0.7679 data: 0.0003 max mem: 8421 +[2024-12-05 11:25:39 root] (utils.py 283): INFO Epoch: [13] [ 660/2502] eta: 0:26:08 lr: 0.000013 loss_cls: 3.8712 (3.8601) grad_norm: 3.9992 (4.3609) time: 0.7641 data: 0.0003 max mem: 8421 +[2024-12-05 11:25:46 root] (utils.py 283): INFO Epoch: [13] [ 670/2502] eta: 0:25:58 lr: 0.000013 loss_cls: 3.9402 (3.8583) grad_norm: 4.1486 (4.3592) time: 0.7764 data: 0.0002 max mem: 8421 +[2024-12-05 11:25:54 root] (utils.py 283): INFO Epoch: [13] [ 680/2502] eta: 0:25:48 lr: 0.000013 loss_cls: 4.1149 (3.8625) grad_norm: 4.2974 (4.3591) time: 0.7892 data: 0.0002 max mem: 8421 +[2024-12-05 11:26:02 root] (utils.py 283): INFO Epoch: [13] [ 690/2502] eta: 0:25:38 lr: 0.000013 loss_cls: 4.1653 (3.8615) grad_norm: 4.2338 (4.3586) time: 0.7903 data: 0.0002 max mem: 8421 +[2024-12-05 11:26:10 root] (utils.py 283): INFO Epoch: [13] [ 700/2502] eta: 0:25:28 lr: 0.000013 loss_cls: 3.7901 (3.8615) grad_norm: 4.1585 (4.3602) time: 0.7902 data: 0.0003 max mem: 8421 +[2024-12-05 11:26:18 root] (utils.py 283): INFO Epoch: [13] [ 710/2502] eta: 0:25:18 lr: 0.000013 loss_cls: 4.0073 (3.8647) grad_norm: 4.1568 (4.3586) time: 0.7896 data: 0.0003 max mem: 8421 +[2024-12-05 11:26:26 root] (utils.py 283): INFO Epoch: [13] [ 720/2502] eta: 0:25:08 lr: 0.000013 loss_cls: 3.9833 (3.8663) grad_norm: 4.1568 (4.3552) time: 0.7875 data: 0.0003 max mem: 8421 +[2024-12-05 11:26:34 root] (utils.py 283): INFO Epoch: [13] [ 730/2502] eta: 0:24:58 lr: 0.000013 loss_cls: 3.8495 (3.8654) grad_norm: 4.0869 (4.3524) time: 0.7897 data: 0.0003 max mem: 8421 +[2024-12-05 11:26:42 root] (utils.py 283): INFO Epoch: [13] [ 740/2502] eta: 0:24:48 lr: 0.000013 loss_cls: 3.8817 (3.8670) grad_norm: 4.0932 (4.3503) time: 0.7895 data: 0.0003 max mem: 8421 +[2024-12-05 11:26:50 root] (utils.py 283): INFO Epoch: [13] [ 750/2502] eta: 0:24:40 lr: 0.000013 loss_cls: 3.9761 (3.8699) grad_norm: 4.2242 (4.3480) time: 0.8280 data: 0.0003 max mem: 8421 +[2024-12-05 11:27:02 root] (utils.py 283): INFO Epoch: [13] [ 760/2502] eta: 0:24:39 lr: 0.000013 loss_cls: 3.9761 (3.8709) grad_norm: 4.2609 (4.3487) time: 1.0102 data: 0.0003 max mem: 8421 +[2024-12-05 11:27:10 root] (utils.py 283): INFO Epoch: [13] [ 770/2502] eta: 0:24:29 lr: 0.000013 loss_cls: 3.6413 (3.8639) grad_norm: 4.1124 (4.3477) time: 0.9595 data: 0.0003 max mem: 8421 +[2024-12-05 11:27:17 root] (utils.py 283): INFO Epoch: [13] [ 780/2502] eta: 0:24:18 lr: 0.000013 loss_cls: 3.7071 (3.8624) grad_norm: 4.0857 (4.3447) time: 0.7654 data: 0.0003 max mem: 8421 +[2024-12-05 11:27:25 root] (utils.py 283): INFO Epoch: [13] [ 790/2502] eta: 0:24:08 lr: 0.000013 loss_cls: 3.9373 (3.8649) grad_norm: 4.1113 (4.3429) time: 0.7674 data: 0.0003 max mem: 8421 +[2024-12-05 11:27:33 root] (utils.py 283): INFO Epoch: [13] [ 800/2502] eta: 0:23:58 lr: 0.000013 loss_cls: 4.1773 (3.8681) grad_norm: 4.1113 (4.3413) time: 0.7693 data: 0.0003 max mem: 8421 +[2024-12-05 11:27:40 root] (utils.py 283): INFO Epoch: [13] [ 810/2502] eta: 0:23:48 lr: 0.000013 loss_cls: 4.1350 (3.8695) grad_norm: 4.1161 (4.3383) time: 0.7662 data: 0.0003 max mem: 8421 +[2024-12-05 11:27:48 root] (utils.py 283): INFO Epoch: [13] [ 820/2502] eta: 0:23:38 lr: 0.000013 loss_cls: 3.9728 (3.8687) grad_norm: 4.0302 (4.3365) time: 0.7674 data: 0.0003 max mem: 8421 +[2024-12-05 11:27:56 root] (utils.py 283): INFO Epoch: [13] [ 830/2502] eta: 0:23:28 lr: 0.000013 loss_cls: 4.0527 (3.8711) grad_norm: 4.3293 (4.3386) time: 0.7733 data: 0.0003 max mem: 8421 +[2024-12-05 11:28:04 root] (utils.py 283): INFO Epoch: [13] [ 840/2502] eta: 0:23:19 lr: 0.000013 loss_cls: 4.1370 (3.8705) grad_norm: 4.3788 (4.3360) time: 0.7928 data: 0.0003 max mem: 8421 +[2024-12-05 11:28:12 root] (utils.py 283): INFO Epoch: [13] [ 850/2502] eta: 0:23:10 lr: 0.000013 loss_cls: 4.1099 (3.8708) grad_norm: 3.9587 (4.3316) time: 0.8057 data: 0.0003 max mem: 8421 +[2024-12-05 11:28:23 root] (utils.py 283): INFO Epoch: [13] [ 860/2502] eta: 0:23:08 lr: 0.000013 loss_cls: 4.1313 (3.8700) grad_norm: 3.9519 (4.3283) time: 0.9842 data: 0.0003 max mem: 8421 +[2024-12-05 11:28:31 root] (utils.py 283): INFO Epoch: [13] [ 870/2502] eta: 0:22:58 lr: 0.000013 loss_cls: 4.1361 (3.8688) grad_norm: 4.1810 (4.3292) time: 0.9706 data: 0.0003 max mem: 8421 +[2024-12-05 11:28:39 root] (utils.py 283): INFO Epoch: [13] [ 880/2502] eta: 0:22:48 lr: 0.000013 loss_cls: 4.0486 (3.8684) grad_norm: 4.3211 (4.3311) time: 0.7719 data: 0.0003 max mem: 8421 +[2024-12-05 11:28:47 root] (utils.py 283): INFO Epoch: [13] [ 890/2502] eta: 0:22:39 lr: 0.000013 loss_cls: 3.9741 (3.8647) grad_norm: 4.2490 (4.3319) time: 0.7768 data: 0.0002 max mem: 8421 +[2024-12-05 11:28:54 root] (utils.py 283): INFO Epoch: [13] [ 900/2502] eta: 0:22:29 lr: 0.000013 loss_cls: 3.7677 (3.8622) grad_norm: 4.2490 (4.3306) time: 0.7747 data: 0.0003 max mem: 8421 +[2024-12-05 11:29:02 root] (utils.py 283): INFO Epoch: [13] [ 910/2502] eta: 0:22:19 lr: 0.000013 loss_cls: 3.9481 (3.8616) grad_norm: 4.1884 (4.3288) time: 0.7661 data: 0.0003 max mem: 8421 +[2024-12-05 11:29:10 root] (utils.py 283): INFO Epoch: [13] [ 920/2502] eta: 0:22:09 lr: 0.000013 loss_cls: 4.0336 (3.8617) grad_norm: 4.2555 (4.3301) time: 0.7690 data: 0.0003 max mem: 8421 +[2024-12-05 11:29:18 root] (utils.py 283): INFO Epoch: [13] [ 930/2502] eta: 0:22:00 lr: 0.000013 loss_cls: 3.9585 (3.8633) grad_norm: 4.3316 (4.3302) time: 0.7732 data: 0.0003 max mem: 8421 +[2024-12-05 11:29:25 root] (utils.py 283): INFO Epoch: [13] [ 940/2502] eta: 0:21:50 lr: 0.000013 loss_cls: 3.9982 (3.8642) grad_norm: 4.1214 (4.3290) time: 0.7757 data: 0.0003 max mem: 8421 +[2024-12-05 11:29:33 root] (utils.py 283): INFO Epoch: [13] [ 950/2502] eta: 0:21:41 lr: 0.000013 loss_cls: 4.1910 (3.8661) grad_norm: 4.1214 (4.3306) time: 0.7726 data: 0.0003 max mem: 8421 +[2024-12-05 11:29:41 root] (utils.py 283): INFO Epoch: [13] [ 960/2502] eta: 0:21:32 lr: 0.000013 loss_cls: 3.9654 (3.8666) grad_norm: 4.1541 (4.3308) time: 0.7724 data: 0.0002 max mem: 8421 +[2024-12-05 11:29:48 root] (utils.py 283): INFO Epoch: [13] [ 970/2502] eta: 0:21:22 lr: 0.000013 loss_cls: 3.9112 (3.8660) grad_norm: 4.1287 (4.3281) time: 0.7712 data: 0.0002 max mem: 8421 +[2024-12-05 11:29:56 root] (utils.py 283): INFO Epoch: [13] [ 980/2502] eta: 0:21:13 lr: 0.000013 loss_cls: 3.6763 (3.8633) grad_norm: 4.1171 (4.3273) time: 0.7773 data: 0.0002 max mem: 8421 +[2024-12-05 11:30:04 root] (utils.py 283): INFO Epoch: [13] [ 990/2502] eta: 0:21:03 lr: 0.000013 loss_cls: 3.7137 (3.8635) grad_norm: 4.0367 (4.3236) time: 0.7757 data: 0.0002 max mem: 8421 +[2024-12-05 11:30:12 root] (utils.py 283): INFO Epoch: [13] [1000/2502] eta: 0:20:54 lr: 0.000013 loss_cls: 4.0674 (3.8641) grad_norm: 4.0346 (4.3218) time: 0.7622 data: 0.0003 max mem: 8421 +[2024-12-05 11:30:19 root] (utils.py 283): INFO Epoch: [13] [1010/2502] eta: 0:20:44 lr: 0.000013 loss_cls: 3.9081 (3.8622) grad_norm: 4.1851 (4.3215) time: 0.7604 data: 0.0002 max mem: 8421 +[2024-12-05 11:30:27 root] (utils.py 283): INFO Epoch: [13] [1020/2502] eta: 0:20:35 lr: 0.000013 loss_cls: 3.9081 (3.8631) grad_norm: 4.2937 (4.3213) time: 0.7632 data: 0.0002 max mem: 8421 +[2024-12-05 11:30:34 root] (utils.py 283): INFO Epoch: [13] [1030/2502] eta: 0:20:26 lr: 0.000013 loss_cls: 4.0923 (3.8633) grad_norm: 4.2995 (4.3211) time: 0.7660 data: 0.0002 max mem: 8421 +[2024-12-05 11:30:42 root] (utils.py 283): INFO Epoch: [13] [1040/2502] eta: 0:20:17 lr: 0.000013 loss_cls: 3.8715 (3.8638) grad_norm: 4.2284 (4.3259) time: 0.7657 data: 0.0003 max mem: 8421 +[2024-12-05 11:30:50 root] (utils.py 283): INFO Epoch: [13] [1050/2502] eta: 0:20:07 lr: 0.000013 loss_cls: 4.1767 (3.8659) grad_norm: 4.2284 (4.3323) time: 0.7707 data: 0.0002 max mem: 8421 +[2024-12-05 11:30:58 root] (utils.py 283): INFO Epoch: [13] [1060/2502] eta: 0:19:58 lr: 0.000013 loss_cls: 4.1793 (3.8676) grad_norm: 4.3105 (4.3333) time: 0.7801 data: 0.0002 max mem: 8421 +[2024-12-05 11:31:06 root] (utils.py 283): INFO Epoch: [13] [1070/2502] eta: 0:19:50 lr: 0.000013 loss_cls: 4.0470 (3.8678) grad_norm: 4.3069 (4.3505) time: 0.8086 data: 0.0003 max mem: 8421 +[2024-12-05 11:31:18 root] (utils.py 283): INFO Epoch: [13] [1080/2502] eta: 0:19:46 lr: 0.000013 loss_cls: 4.0470 (3.8701) grad_norm: 4.3069 (4.3530) time: 0.9989 data: 0.0004 max mem: 8421 +[2024-12-05 11:31:25 root] (utils.py 283): INFO Epoch: [13] [1090/2502] eta: 0:19:37 lr: 0.000013 loss_cls: 4.3067 (3.8731) grad_norm: 4.4525 (4.3559) time: 0.9619 data: 0.0003 max mem: 8421 +[2024-12-05 11:31:33 root] (utils.py 283): INFO Epoch: [13] [1100/2502] eta: 0:19:28 lr: 0.000013 loss_cls: 4.2772 (3.8749) grad_norm: 4.4166 (4.3561) time: 0.7724 data: 0.0003 max mem: 8421 +[2024-12-05 11:31:41 root] (utils.py 283): INFO Epoch: [13] [1110/2502] eta: 0:19:19 lr: 0.000013 loss_cls: 4.0682 (3.8755) grad_norm: 4.1601 (4.3542) time: 0.7871 data: 0.0003 max mem: 8421 +[2024-12-05 11:31:49 root] (utils.py 283): INFO Epoch: [13] [1120/2502] eta: 0:19:10 lr: 0.000013 loss_cls: 3.8084 (3.8753) grad_norm: 4.0553 (4.3521) time: 0.7867 data: 0.0003 max mem: 8421 +[2024-12-05 11:31:57 root] (utils.py 283): INFO Epoch: [13] [1130/2502] eta: 0:19:01 lr: 0.000013 loss_cls: 3.8434 (3.8735) grad_norm: 4.1580 (4.3525) time: 0.7739 data: 0.0002 max mem: 8421 +[2024-12-05 11:32:04 root] (utils.py 283): INFO Epoch: [13] [1140/2502] eta: 0:18:52 lr: 0.000013 loss_cls: 3.8813 (3.8735) grad_norm: 4.0493 (4.3511) time: 0.7631 data: 0.0002 max mem: 8421 +[2024-12-05 11:32:12 root] (utils.py 283): INFO Epoch: [13] [1150/2502] eta: 0:18:43 lr: 0.000013 loss_cls: 4.0348 (3.8739) grad_norm: 4.0493 (4.3493) time: 0.7647 data: 0.0002 max mem: 8421 +[2024-12-05 11:32:19 root] (utils.py 283): INFO Epoch: [13] [1160/2502] eta: 0:18:34 lr: 0.000013 loss_cls: 4.1040 (3.8766) grad_norm: 4.1648 (4.3480) time: 0.7656 data: 0.0002 max mem: 8421 +[2024-12-05 11:32:27 root] (utils.py 283): INFO Epoch: [13] [1170/2502] eta: 0:18:25 lr: 0.000013 loss_cls: 4.0879 (3.8756) grad_norm: 4.1897 (4.3475) time: 0.7670 data: 0.0002 max mem: 8421 +[2024-12-05 11:32:35 root] (utils.py 283): INFO Epoch: [13] [1180/2502] eta: 0:18:16 lr: 0.000013 loss_cls: 3.8477 (3.8764) grad_norm: 4.2152 (4.3461) time: 0.7795 data: 0.0002 max mem: 8421 +[2024-12-05 11:32:43 root] (utils.py 283): INFO Epoch: [13] [1190/2502] eta: 0:18:07 lr: 0.000013 loss_cls: 3.8477 (3.8761) grad_norm: 4.1915 (4.3487) time: 0.7883 data: 0.0002 max mem: 8421 +[2024-12-05 11:32:51 root] (utils.py 283): INFO Epoch: [13] [1200/2502] eta: 0:17:58 lr: 0.000013 loss_cls: 4.1430 (3.8778) grad_norm: 4.1736 (4.3495) time: 0.7746 data: 0.0002 max mem: 8421 +[2024-12-05 11:32:58 root] (utils.py 283): INFO Epoch: [13] [1210/2502] eta: 0:17:49 lr: 0.000013 loss_cls: 4.1430 (3.8765) grad_norm: 4.1546 (4.3484) time: 0.7618 data: 0.0002 max mem: 8421 +[2024-12-05 11:33:06 root] (utils.py 283): INFO Epoch: [13] [1220/2502] eta: 0:17:40 lr: 0.000013 loss_cls: 3.9748 (3.8770) grad_norm: 4.1546 (4.3461) time: 0.7623 data: 0.0002 max mem: 8421 +[2024-12-05 11:33:13 root] (utils.py 283): INFO Epoch: [13] [1230/2502] eta: 0:17:31 lr: 0.000013 loss_cls: 3.9207 (3.8758) grad_norm: 4.1268 (4.3446) time: 0.7646 data: 0.0002 max mem: 8421 +[2024-12-05 11:33:21 root] (utils.py 283): INFO Epoch: [13] [1240/2502] eta: 0:17:22 lr: 0.000013 loss_cls: 3.7393 (3.8742) grad_norm: 4.1504 (4.3443) time: 0.7651 data: 0.0002 max mem: 8421 +[2024-12-05 11:33:29 root] (utils.py 283): INFO Epoch: [13] [1250/2502] eta: 0:17:14 lr: 0.000013 loss_cls: 4.0533 (3.8756) grad_norm: 4.2795 (4.3451) time: 0.7649 data: 0.0003 max mem: 8421 +[2024-12-05 11:33:37 root] (utils.py 283): INFO Epoch: [13] [1260/2502] eta: 0:17:05 lr: 0.000013 loss_cls: 4.0533 (3.8753) grad_norm: 4.3137 (4.3450) time: 0.7739 data: 0.0003 max mem: 8421 +[2024-12-05 11:33:44 root] (utils.py 283): INFO Epoch: [13] [1270/2502] eta: 0:16:56 lr: 0.000013 loss_cls: 3.7811 (3.8717) grad_norm: 4.0908 (4.3448) time: 0.7774 data: 0.0003 max mem: 8421 +[2024-12-05 11:33:52 root] (utils.py 283): INFO Epoch: [13] [1280/2502] eta: 0:16:47 lr: 0.000013 loss_cls: 3.5497 (3.8705) grad_norm: 4.0770 (4.3436) time: 0.7691 data: 0.0003 max mem: 8421 +[2024-12-05 11:34:00 root] (utils.py 283): INFO Epoch: [13] [1290/2502] eta: 0:16:38 lr: 0.000013 loss_cls: 3.9608 (3.8703) grad_norm: 4.0934 (4.3415) time: 0.7645 data: 0.0003 max mem: 8421 +[2024-12-05 11:34:07 root] (utils.py 283): INFO Epoch: [13] [1300/2502] eta: 0:16:30 lr: 0.000013 loss_cls: 3.9201 (3.8694) grad_norm: 4.1316 (4.3432) time: 0.7759 data: 0.0003 max mem: 8421 +[2024-12-05 11:34:16 root] (utils.py 283): INFO Epoch: [13] [1310/2502] eta: 0:16:22 lr: 0.000013 loss_cls: 3.9201 (3.8696) grad_norm: 4.2663 (4.3429) time: 0.8021 data: 0.0003 max mem: 8421 +[2024-12-05 11:34:24 root] (utils.py 283): INFO Epoch: [13] [1320/2502] eta: 0:16:13 lr: 0.000013 loss_cls: 4.1059 (3.8711) grad_norm: 4.2502 (4.3426) time: 0.8099 data: 0.0003 max mem: 8421 +[2024-12-05 11:34:31 root] (utils.py 283): INFO Epoch: [13] [1330/2502] eta: 0:16:04 lr: 0.000013 loss_cls: 4.0193 (3.8684) grad_norm: 4.1074 (4.3415) time: 0.7834 data: 0.0003 max mem: 8421 +[2024-12-05 11:34:39 root] (utils.py 283): INFO Epoch: [13] [1340/2502] eta: 0:15:56 lr: 0.000013 loss_cls: 3.5881 (3.8692) grad_norm: 3.9927 (4.3415) time: 0.7758 data: 0.0002 max mem: 8421 +[2024-12-05 11:34:47 root] (utils.py 283): INFO Epoch: [13] [1350/2502] eta: 0:15:47 lr: 0.000013 loss_cls: 4.1055 (3.8722) grad_norm: 4.0895 (4.3421) time: 0.7933 data: 0.0003 max mem: 8421 +[2024-12-05 11:34:56 root] (utils.py 283): INFO Epoch: [13] [1360/2502] eta: 0:15:39 lr: 0.000013 loss_cls: 4.0936 (3.8712) grad_norm: 4.1457 (4.3404) time: 0.8195 data: 0.0004 max mem: 8421 +[2024-12-05 11:35:06 root] (utils.py 283): INFO Epoch: [13] [1370/2502] eta: 0:15:33 lr: 0.000013 loss_cls: 4.0315 (3.8720) grad_norm: 4.0295 (4.3384) time: 0.9289 data: 0.0003 max mem: 8421 +[2024-12-05 11:35:15 root] (utils.py 283): INFO Epoch: [13] [1380/2502] eta: 0:15:25 lr: 0.000013 loss_cls: 4.1891 (3.8733) grad_norm: 4.0929 (4.3369) time: 0.9708 data: 0.0002 max mem: 8421 +[2024-12-05 11:35:23 root] (utils.py 283): INFO Epoch: [13] [1390/2502] eta: 0:15:17 lr: 0.000013 loss_cls: 4.1044 (3.8730) grad_norm: 4.1309 (4.3379) time: 0.8457 data: 0.0003 max mem: 8421 +[2024-12-05 11:35:30 root] (utils.py 283): INFO Epoch: [13] [1400/2502] eta: 0:15:08 lr: 0.000013 loss_cls: 4.0369 (3.8727) grad_norm: 4.1556 (4.3360) time: 0.7684 data: 0.0003 max mem: 8421 +[2024-12-05 11:35:38 root] (utils.py 283): INFO Epoch: [13] [1410/2502] eta: 0:14:59 lr: 0.000013 loss_cls: 3.7944 (3.8723) grad_norm: 4.1494 (4.3371) time: 0.7706 data: 0.0003 max mem: 8421 +[2024-12-05 11:35:46 root] (utils.py 283): INFO Epoch: [13] [1420/2502] eta: 0:14:51 lr: 0.000013 loss_cls: 3.7944 (3.8733) grad_norm: 4.1344 (4.3354) time: 0.7865 data: 0.0003 max mem: 8421 +[2024-12-05 11:35:55 root] (utils.py 283): INFO Epoch: [13] [1430/2502] eta: 0:14:43 lr: 0.000013 loss_cls: 3.8011 (3.8730) grad_norm: 4.1344 (4.3349) time: 0.8338 data: 0.0003 max mem: 8421 +[2024-12-05 11:36:02 root] (utils.py 283): INFO Epoch: [13] [1440/2502] eta: 0:14:34 lr: 0.000013 loss_cls: 3.8011 (3.8734) grad_norm: 4.2019 (4.3338) time: 0.8172 data: 0.0002 max mem: 8421 +[2024-12-05 11:36:10 root] (utils.py 283): INFO Epoch: [13] [1450/2502] eta: 0:14:26 lr: 0.000013 loss_cls: 4.1116 (3.8752) grad_norm: 4.2019 (4.3332) time: 0.7779 data: 0.0002 max mem: 8421 +[2024-12-05 11:36:18 root] (utils.py 283): INFO Epoch: [13] [1460/2502] eta: 0:14:17 lr: 0.000013 loss_cls: 4.2056 (3.8750) grad_norm: 4.2307 (4.3404) time: 0.7879 data: 0.0003 max mem: 8421 +[2024-12-05 11:36:26 root] (utils.py 283): INFO Epoch: [13] [1470/2502] eta: 0:14:09 lr: 0.000013 loss_cls: 3.8045 (3.8752) grad_norm: 4.2307 (4.3397) time: 0.7869 data: 0.0003 max mem: 8421 +[2024-12-05 11:36:35 root] (utils.py 283): INFO Epoch: [13] [1480/2502] eta: 0:14:01 lr: 0.000013 loss_cls: 3.8807 (3.8743) grad_norm: 4.1775 (4.3398) time: 0.8298 data: 0.0002 max mem: 8421 +[2024-12-05 11:36:43 root] (utils.py 283): INFO Epoch: [13] [1490/2502] eta: 0:13:52 lr: 0.000013 loss_cls: 4.1308 (3.8769) grad_norm: 4.0514 (4.3388) time: 0.8315 data: 0.0002 max mem: 8421 +[2024-12-05 11:36:50 root] (utils.py 283): INFO Epoch: [13] [1500/2502] eta: 0:13:44 lr: 0.000013 loss_cls: 4.1686 (3.8772) grad_norm: 4.1782 (4.3399) time: 0.7826 data: 0.0002 max mem: 8421 +[2024-12-05 11:36:58 root] (utils.py 283): INFO Epoch: [13] [1510/2502] eta: 0:13:35 lr: 0.000013 loss_cls: 4.0455 (3.8759) grad_norm: 4.3047 (4.3400) time: 0.7807 data: 0.0002 max mem: 8421 +[2024-12-05 11:37:06 root] (utils.py 283): INFO Epoch: [13] [1520/2502] eta: 0:13:27 lr: 0.000013 loss_cls: 3.7962 (3.8754) grad_norm: 4.2415 (4.3392) time: 0.7758 data: 0.0002 max mem: 8421 +[2024-12-05 11:37:15 root] (utils.py 283): INFO Epoch: [13] [1530/2502] eta: 0:13:19 lr: 0.000013 loss_cls: 3.8719 (3.8754) grad_norm: 4.1837 (4.3389) time: 0.8115 data: 0.0003 max mem: 8421 +[2024-12-05 11:37:23 root] (utils.py 283): INFO Epoch: [13] [1540/2502] eta: 0:13:11 lr: 0.000013 loss_cls: 3.9476 (3.8756) grad_norm: 4.1558 (4.3384) time: 0.8541 data: 0.0003 max mem: 8421 +[2024-12-05 11:37:31 root] (utils.py 283): INFO Epoch: [13] [1550/2502] eta: 0:13:02 lr: 0.000013 loss_cls: 3.8438 (3.8749) grad_norm: 4.0773 (4.3380) time: 0.8086 data: 0.0003 max mem: 8421 +[2024-12-05 11:37:38 root] (utils.py 283): INFO Epoch: [13] [1560/2502] eta: 0:12:54 lr: 0.000013 loss_cls: 3.7401 (3.8749) grad_norm: 4.0686 (4.3363) time: 0.7655 data: 0.0002 max mem: 8421 +[2024-12-05 11:37:46 root] (utils.py 283): INFO Epoch: [13] [1570/2502] eta: 0:12:45 lr: 0.000013 loss_cls: 3.9417 (3.8748) grad_norm: 4.1790 (4.3374) time: 0.7702 data: 0.0003 max mem: 8421 +[2024-12-05 11:37:54 root] (utils.py 283): INFO Epoch: [13] [1580/2502] eta: 0:12:37 lr: 0.000013 loss_cls: 4.0742 (3.8771) grad_norm: 4.1874 (4.3374) time: 0.7677 data: 0.0002 max mem: 8421 +[2024-12-05 11:38:02 root] (utils.py 283): INFO Epoch: [13] [1590/2502] eta: 0:12:28 lr: 0.000013 loss_cls: 4.1778 (3.8774) grad_norm: 4.1693 (4.3366) time: 0.7709 data: 0.0002 max mem: 8421 +[2024-12-05 11:38:10 root] (utils.py 283): INFO Epoch: [13] [1600/2502] eta: 0:12:20 lr: 0.000013 loss_cls: 4.1235 (3.8774) grad_norm: 4.1236 (4.3356) time: 0.7952 data: 0.0003 max mem: 8421 +[2024-12-05 11:38:17 root] (utils.py 283): INFO Epoch: [13] [1610/2502] eta: 0:12:11 lr: 0.000013 loss_cls: 4.0389 (3.8763) grad_norm: 4.0777 (4.3345) time: 0.7932 data: 0.0003 max mem: 8421 +[2024-12-05 11:38:25 root] (utils.py 283): INFO Epoch: [13] [1620/2502] eta: 0:12:03 lr: 0.000013 loss_cls: 3.9311 (3.8777) grad_norm: 4.1081 (4.3332) time: 0.7716 data: 0.0003 max mem: 8421 +[2024-12-05 11:38:33 root] (utils.py 283): INFO Epoch: [13] [1630/2502] eta: 0:11:54 lr: 0.000013 loss_cls: 4.1310 (3.8790) grad_norm: 4.1080 (4.3320) time: 0.7765 data: 0.0003 max mem: 8421 +[2024-12-05 11:38:41 root] (utils.py 283): INFO Epoch: [13] [1640/2502] eta: 0:11:46 lr: 0.000013 loss_cls: 4.2034 (3.8801) grad_norm: 4.0370 (4.3312) time: 0.7767 data: 0.0003 max mem: 8421 +[2024-12-05 11:38:48 root] (utils.py 283): INFO Epoch: [13] [1650/2502] eta: 0:11:38 lr: 0.000013 loss_cls: 4.1872 (3.8811) grad_norm: 4.0112 (4.3304) time: 0.7657 data: 0.0002 max mem: 8421 +[2024-12-05 11:38:56 root] (utils.py 283): INFO Epoch: [13] [1660/2502] eta: 0:11:29 lr: 0.000013 loss_cls: 3.8472 (3.8792) grad_norm: 4.1239 (4.3302) time: 0.7644 data: 0.0003 max mem: 8421 +[2024-12-05 11:39:04 root] (utils.py 283): INFO Epoch: [13] [1670/2502] eta: 0:11:21 lr: 0.000013 loss_cls: 3.8472 (3.8797) grad_norm: 4.1402 (4.3298) time: 0.7640 data: 0.0003 max mem: 8421 +[2024-12-05 11:39:11 root] (utils.py 283): INFO Epoch: [13] [1680/2502] eta: 0:11:12 lr: 0.000013 loss_cls: 4.0679 (3.8805) grad_norm: 4.1377 (4.3287) time: 0.7617 data: 0.0003 max mem: 8421 +[2024-12-05 11:39:19 root] (utils.py 283): INFO Epoch: [13] [1690/2502] eta: 0:11:04 lr: 0.000013 loss_cls: 4.1133 (3.8804) grad_norm: 4.0162 (4.3271) time: 0.7615 data: 0.0002 max mem: 8421 +[2024-12-05 11:39:26 root] (utils.py 283): INFO Epoch: [13] [1700/2502] eta: 0:10:55 lr: 0.000013 loss_cls: 3.9741 (3.8796) grad_norm: 4.0852 (4.3264) time: 0.7632 data: 0.0002 max mem: 8421 +[2024-12-05 11:39:34 root] (utils.py 283): INFO Epoch: [13] [1710/2502] eta: 0:10:47 lr: 0.000013 loss_cls: 3.9952 (3.8796) grad_norm: 4.1914 (4.3253) time: 0.7624 data: 0.0003 max mem: 8421 +[2024-12-05 11:39:43 root] (utils.py 283): INFO Epoch: [13] [1720/2502] eta: 0:10:39 lr: 0.000013 loss_cls: 4.0301 (3.8787) grad_norm: 4.1851 (4.3282) time: 0.8063 data: 0.0003 max mem: 8421 +[2024-12-05 11:39:50 root] (utils.py 283): INFO Epoch: [13] [1730/2502] eta: 0:10:30 lr: 0.000013 loss_cls: 3.8957 (3.8795) grad_norm: 4.0971 (4.3267) time: 0.8193 data: 0.0003 max mem: 8421 +[2024-12-05 11:39:58 root] (utils.py 283): INFO Epoch: [13] [1740/2502] eta: 0:10:22 lr: 0.000013 loss_cls: 3.8645 (3.8790) grad_norm: 4.1766 (4.3284) time: 0.7875 data: 0.0003 max mem: 8421 +[2024-12-05 11:40:06 root] (utils.py 283): INFO Epoch: [13] [1750/2502] eta: 0:10:14 lr: 0.000013 loss_cls: 3.9773 (3.8798) grad_norm: 4.3734 (4.3325) time: 0.7890 data: 0.0003 max mem: 8421 +[2024-12-05 11:40:14 root] (utils.py 283): INFO Epoch: [13] [1760/2502] eta: 0:10:06 lr: 0.000013 loss_cls: 4.0522 (3.8807) grad_norm: 4.2088 (4.3319) time: 0.7886 data: 0.0003 max mem: 8421 +[2024-12-05 11:40:22 root] (utils.py 283): INFO Epoch: [13] [1770/2502] eta: 0:09:57 lr: 0.000013 loss_cls: 3.9164 (3.8792) grad_norm: 4.2088 (4.3318) time: 0.7793 data: 0.0003 max mem: 8421 +[2024-12-05 11:40:29 root] (utils.py 283): INFO Epoch: [13] [1780/2502] eta: 0:09:49 lr: 0.000013 loss_cls: 3.8296 (3.8792) grad_norm: 4.2140 (4.3421) time: 0.7673 data: 0.0003 max mem: 8421 +[2024-12-05 11:40:37 root] (utils.py 283): INFO Epoch: [13] [1790/2502] eta: 0:09:41 lr: 0.000013 loss_cls: 4.0116 (3.8809) grad_norm: 4.2140 (4.3422) time: 0.7773 data: 0.0003 max mem: 8421 +[2024-12-05 11:40:45 root] (utils.py 283): INFO Epoch: [13] [1800/2502] eta: 0:09:32 lr: 0.000013 loss_cls: 4.0079 (3.8810) grad_norm: 4.2191 (4.3418) time: 0.7758 data: 0.0003 max mem: 8421 +[2024-12-05 11:40:53 root] (utils.py 283): INFO Epoch: [13] [1810/2502] eta: 0:09:24 lr: 0.000013 loss_cls: 3.9252 (3.8810) grad_norm: 4.2545 (4.3413) time: 0.7639 data: 0.0002 max mem: 8421 +[2024-12-05 11:41:00 root] (utils.py 283): INFO Epoch: [13] [1820/2502] eta: 0:09:16 lr: 0.000013 loss_cls: 4.0455 (3.8817) grad_norm: 4.2620 (4.3415) time: 0.7691 data: 0.0003 max mem: 8421 +[2024-12-05 11:41:08 root] (utils.py 283): INFO Epoch: [13] [1830/2502] eta: 0:09:07 lr: 0.000013 loss_cls: 4.0707 (3.8816) grad_norm: 4.1479 (4.3411) time: 0.7697 data: 0.0003 max mem: 8421 +[2024-12-05 11:41:16 root] (utils.py 283): INFO Epoch: [13] [1840/2502] eta: 0:08:59 lr: 0.000013 loss_cls: 3.9178 (3.8811) grad_norm: 3.9898 (4.3403) time: 0.7690 data: 0.0003 max mem: 8421 +[2024-12-05 11:41:24 root] (utils.py 283): INFO Epoch: [13] [1850/2502] eta: 0:08:51 lr: 0.000013 loss_cls: 3.7208 (3.8805) grad_norm: 4.2355 (4.3423) time: 0.7796 data: 0.0003 max mem: 8421 +[2024-12-05 11:41:31 root] (utils.py 283): INFO Epoch: [13] [1860/2502] eta: 0:08:42 lr: 0.000013 loss_cls: 3.6275 (3.8798) grad_norm: 4.4624 (4.3454) time: 0.7887 data: 0.0003 max mem: 8421 +[2024-12-05 11:41:39 root] (utils.py 283): INFO Epoch: [13] [1870/2502] eta: 0:08:34 lr: 0.000013 loss_cls: 3.6146 (3.8789) grad_norm: 4.4569 (4.3471) time: 0.7865 data: 0.0003 max mem: 8421 +[2024-12-05 11:41:47 root] (utils.py 283): INFO Epoch: [13] [1880/2502] eta: 0:08:26 lr: 0.000013 loss_cls: 3.6642 (3.8786) grad_norm: 4.2373 (4.3464) time: 0.7775 data: 0.0003 max mem: 8421 +[2024-12-05 11:41:55 root] (utils.py 283): INFO Epoch: [13] [1890/2502] eta: 0:08:18 lr: 0.000013 loss_cls: 3.9607 (3.8791) grad_norm: 4.2373 (4.3460) time: 0.7749 data: 0.0003 max mem: 8421 +[2024-12-05 11:42:03 root] (utils.py 283): INFO Epoch: [13] [1900/2502] eta: 0:08:09 lr: 0.000013 loss_cls: 3.9103 (3.8787) grad_norm: 4.2260 (4.3458) time: 0.7839 data: 0.0002 max mem: 8421 +[2024-12-05 11:42:11 root] (utils.py 283): INFO Epoch: [13] [1910/2502] eta: 0:08:01 lr: 0.000013 loss_cls: 3.7935 (3.8775) grad_norm: 4.1196 (4.3450) time: 0.7871 data: 0.0003 max mem: 8421 +[2024-12-05 11:42:18 root] (utils.py 283): INFO Epoch: [13] [1920/2502] eta: 0:07:53 lr: 0.000013 loss_cls: 3.9400 (3.8775) grad_norm: 4.0385 (4.3434) time: 0.7835 data: 0.0003 max mem: 8421 +[2024-12-05 11:42:26 root] (utils.py 283): INFO Epoch: [13] [1930/2502] eta: 0:07:45 lr: 0.000013 loss_cls: 3.6299 (3.8750) grad_norm: 4.1288 (4.3433) time: 0.7761 data: 0.0003 max mem: 8421 +[2024-12-05 11:42:34 root] (utils.py 283): INFO Epoch: [13] [1940/2502] eta: 0:07:37 lr: 0.000013 loss_cls: 3.3714 (3.8751) grad_norm: 4.1333 (4.3421) time: 0.7818 data: 0.0003 max mem: 8421 +[2024-12-05 11:42:42 root] (utils.py 283): INFO Epoch: [13] [1950/2502] eta: 0:07:28 lr: 0.000013 loss_cls: 4.0762 (3.8751) grad_norm: 4.1191 (4.3413) time: 0.7856 data: 0.0003 max mem: 8421 +[2024-12-05 11:42:50 root] (utils.py 283): INFO Epoch: [13] [1960/2502] eta: 0:07:20 lr: 0.000013 loss_cls: 4.0050 (3.8762) grad_norm: 4.1799 (4.3419) time: 0.7754 data: 0.0003 max mem: 8421 +[2024-12-05 11:42:57 root] (utils.py 283): INFO Epoch: [13] [1970/2502] eta: 0:07:12 lr: 0.000013 loss_cls: 4.0050 (3.8764) grad_norm: 4.0230 (4.3400) time: 0.7800 data: 0.0003 max mem: 8421 +[2024-12-05 11:43:05 root] (utils.py 283): INFO Epoch: [13] [1980/2502] eta: 0:07:04 lr: 0.000013 loss_cls: 3.9985 (3.8761) grad_norm: 3.9271 (4.3389) time: 0.7859 data: 0.0003 max mem: 8421 +[2024-12-05 11:43:14 root] (utils.py 283): INFO Epoch: [13] [1990/2502] eta: 0:06:56 lr: 0.000013 loss_cls: 4.0302 (3.8765) grad_norm: 3.9855 (4.3375) time: 0.8113 data: 0.0003 max mem: 8421 +[2024-12-05 11:43:31 root] (utils.py 283): INFO Epoch: [13] [2000/2502] eta: 0:06:50 lr: 0.000013 loss_cls: 4.0712 (3.8764) grad_norm: 3.9697 (4.3358) time: 1.2812 data: 0.0004 max mem: 8421 +[2024-12-05 11:43:42 root] (utils.py 283): INFO Epoch: [13] [2010/2502] eta: 0:06:42 lr: 0.000013 loss_cls: 3.9081 (3.8760) grad_norm: 4.1302 (4.3354) time: 1.4313 data: 0.0003 max mem: 8421 +[2024-12-05 11:43:50 root] (utils.py 283): INFO Epoch: [13] [2020/2502] eta: 0:06:34 lr: 0.000013 loss_cls: 4.1695 (3.8775) grad_norm: 4.2086 (4.3364) time: 0.9510 data: 0.0003 max mem: 8421 +[2024-12-05 11:43:58 root] (utils.py 283): INFO Epoch: [13] [2030/2502] eta: 0:06:26 lr: 0.000013 loss_cls: 4.2180 (3.8792) grad_norm: 4.2086 (4.3359) time: 0.7626 data: 0.0002 max mem: 8421 +[2024-12-05 11:44:06 root] (utils.py 283): INFO Epoch: [13] [2040/2502] eta: 0:06:18 lr: 0.000013 loss_cls: 3.9588 (3.8781) grad_norm: 4.1602 (4.3420) time: 0.7831 data: 0.0003 max mem: 8421 +[2024-12-05 11:44:13 root] (utils.py 283): INFO Epoch: [13] [2050/2502] eta: 0:06:09 lr: 0.000013 loss_cls: 3.5556 (3.8774) grad_norm: 4.0876 (4.3409) time: 0.7913 data: 0.0003 max mem: 8421 +[2024-12-05 11:44:21 root] (utils.py 283): INFO Epoch: [13] [2060/2502] eta: 0:06:01 lr: 0.000013 loss_cls: 4.0779 (3.8783) grad_norm: 3.8886 (4.3391) time: 0.7705 data: 0.0003 max mem: 8421 +[2024-12-05 11:44:29 root] (utils.py 283): INFO Epoch: [13] [2070/2502] eta: 0:05:53 lr: 0.000013 loss_cls: 3.9625 (3.8782) grad_norm: 4.1524 (4.3394) time: 0.7611 data: 0.0002 max mem: 8421 +[2024-12-05 11:44:36 root] (utils.py 283): INFO Epoch: [13] [2080/2502] eta: 0:05:44 lr: 0.000013 loss_cls: 3.9136 (3.8783) grad_norm: 4.2195 (4.3385) time: 0.7621 data: 0.0002 max mem: 8421 +[2024-12-05 11:44:44 root] (utils.py 283): INFO Epoch: [13] [2090/2502] eta: 0:05:36 lr: 0.000013 loss_cls: 3.9136 (3.8780) grad_norm: 4.0934 (4.3380) time: 0.7628 data: 0.0002 max mem: 8421 +[2024-12-05 11:44:51 root] (utils.py 283): INFO Epoch: [13] [2100/2502] eta: 0:05:28 lr: 0.000013 loss_cls: 3.7962 (3.8776) grad_norm: 4.0910 (4.3377) time: 0.7616 data: 0.0002 max mem: 8421 +[2024-12-05 11:44:59 root] (utils.py 283): INFO Epoch: [13] [2110/2502] eta: 0:05:20 lr: 0.000013 loss_cls: 4.0054 (3.8790) grad_norm: 4.2601 (4.3388) time: 0.7700 data: 0.0002 max mem: 8421 +[2024-12-05 11:45:07 root] (utils.py 283): INFO Epoch: [13] [2120/2502] eta: 0:05:11 lr: 0.000013 loss_cls: 3.9984 (3.8785) grad_norm: 4.2670 (4.3380) time: 0.7845 data: 0.0003 max mem: 8421 +[2024-12-05 11:45:15 root] (utils.py 283): INFO Epoch: [13] [2130/2502] eta: 0:05:03 lr: 0.000013 loss_cls: 3.9584 (3.8790) grad_norm: 4.1861 (4.3374) time: 0.7855 data: 0.0003 max mem: 8421 +[2024-12-05 11:45:23 root] (utils.py 283): INFO Epoch: [13] [2140/2502] eta: 0:04:55 lr: 0.000013 loss_cls: 4.1543 (3.8803) grad_norm: 4.1959 (4.3387) time: 0.7713 data: 0.0002 max mem: 8421 +[2024-12-05 11:45:30 root] (utils.py 283): INFO Epoch: [13] [2150/2502] eta: 0:04:47 lr: 0.000013 loss_cls: 4.2099 (3.8806) grad_norm: 4.3272 (4.3382) time: 0.7665 data: 0.0002 max mem: 8421 +[2024-12-05 11:45:38 root] (utils.py 283): INFO Epoch: [13] [2160/2502] eta: 0:04:38 lr: 0.000013 loss_cls: 4.0884 (3.8804) grad_norm: 4.0167 (4.3370) time: 0.7674 data: 0.0002 max mem: 8421 +[2024-12-05 11:45:46 root] (utils.py 283): INFO Epoch: [13] [2170/2502] eta: 0:04:30 lr: 0.000013 loss_cls: 3.9541 (3.8808) grad_norm: 4.0776 (4.3364) time: 0.7635 data: 0.0002 max mem: 8421 +[2024-12-05 11:45:53 root] (utils.py 283): INFO Epoch: [13] [2180/2502] eta: 0:04:22 lr: 0.000013 loss_cls: 4.1386 (3.8816) grad_norm: 4.3874 (4.3374) time: 0.7646 data: 0.0003 max mem: 8421 +[2024-12-05 11:46:01 root] (utils.py 283): INFO Epoch: [13] [2190/2502] eta: 0:04:14 lr: 0.000013 loss_cls: 4.0570 (3.8810) grad_norm: 4.2998 (4.3364) time: 0.7643 data: 0.0003 max mem: 8421 +[2024-12-05 11:46:09 root] (utils.py 283): INFO Epoch: [13] [2200/2502] eta: 0:04:05 lr: 0.000013 loss_cls: 3.9685 (3.8813) grad_norm: 4.2466 (4.3367) time: 0.7650 data: 0.0003 max mem: 8421 +[2024-12-05 11:46:16 root] (utils.py 283): INFO Epoch: [13] [2210/2502] eta: 0:03:57 lr: 0.000013 loss_cls: 3.9978 (3.8814) grad_norm: 4.1935 (4.3368) time: 0.7680 data: 0.0003 max mem: 8421 +[2024-12-05 11:46:24 root] (utils.py 283): INFO Epoch: [13] [2220/2502] eta: 0:03:49 lr: 0.000013 loss_cls: 4.0834 (3.8834) grad_norm: 4.1598 (4.3363) time: 0.7708 data: 0.0003 max mem: 8421 +[2024-12-05 11:46:32 root] (utils.py 283): INFO Epoch: [13] [2230/2502] eta: 0:03:41 lr: 0.000013 loss_cls: 4.3504 (3.8851) grad_norm: 4.0603 (4.3351) time: 0.7673 data: 0.0003 max mem: 8421 +[2024-12-05 11:46:39 root] (utils.py 283): INFO Epoch: [13] [2240/2502] eta: 0:03:33 lr: 0.000013 loss_cls: 4.2053 (3.8856) grad_norm: 4.0143 (4.3338) time: 0.7703 data: 0.0003 max mem: 8421 +[2024-12-05 11:46:48 root] (utils.py 283): INFO Epoch: [13] [2250/2502] eta: 0:03:25 lr: 0.000013 loss_cls: 3.9782 (3.8855) grad_norm: 3.9536 (4.3323) time: 0.8138 data: 0.0003 max mem: 8421 +[2024-12-05 11:46:55 root] (utils.py 283): INFO Epoch: [13] [2260/2502] eta: 0:03:16 lr: 0.000013 loss_cls: 4.0748 (3.8867) grad_norm: 4.0645 (4.3325) time: 0.8057 data: 0.0003 max mem: 8421 +[2024-12-05 11:47:03 root] (utils.py 283): INFO Epoch: [13] [2270/2502] eta: 0:03:08 lr: 0.000013 loss_cls: 3.6246 (3.8833) grad_norm: 4.1925 (4.3322) time: 0.7635 data: 0.0003 max mem: 8421 +[2024-12-05 11:47:11 root] (utils.py 283): INFO Epoch: [13] [2280/2502] eta: 0:03:00 lr: 0.000013 loss_cls: 3.4188 (3.8830) grad_norm: 4.1323 (4.3325) time: 0.7641 data: 0.0003 max mem: 8421 +[2024-12-05 11:47:18 root] (utils.py 283): INFO Epoch: [13] [2290/2502] eta: 0:02:52 lr: 0.000013 loss_cls: 3.8320 (3.8819) grad_norm: 4.1554 (4.3316) time: 0.7617 data: 0.0003 max mem: 8421 +[2024-12-05 11:47:26 root] (utils.py 283): INFO Epoch: [13] [2300/2502] eta: 0:02:44 lr: 0.000013 loss_cls: 4.0297 (3.8826) grad_norm: 4.1554 (4.3328) time: 0.7628 data: 0.0002 max mem: 8421 +[2024-12-05 11:47:34 root] (utils.py 283): INFO Epoch: [13] [2310/2502] eta: 0:02:36 lr: 0.000013 loss_cls: 4.1275 (3.8822) grad_norm: 4.1464 (4.3323) time: 0.7637 data: 0.0003 max mem: 8421 +[2024-12-05 11:47:41 root] (utils.py 283): INFO Epoch: [13] [2320/2502] eta: 0:02:27 lr: 0.000013 loss_cls: 3.9186 (3.8820) grad_norm: 4.1744 (4.3314) time: 0.7611 data: 0.0002 max mem: 8421 +[2024-12-05 11:47:49 root] (utils.py 283): INFO Epoch: [13] [2330/2502] eta: 0:02:19 lr: 0.000013 loss_cls: 3.9487 (3.8824) grad_norm: 4.0747 (4.3305) time: 0.7638 data: 0.0002 max mem: 8421 +[2024-12-05 11:47:57 root] (utils.py 283): INFO Epoch: [13] [2340/2502] eta: 0:02:11 lr: 0.000013 loss_cls: 4.0870 (3.8831) grad_norm: 4.0747 (4.3295) time: 0.7672 data: 0.0002 max mem: 8421 +[2024-12-05 11:48:04 root] (utils.py 283): INFO Epoch: [13] [2350/2502] eta: 0:02:03 lr: 0.000013 loss_cls: 4.1093 (3.8834) grad_norm: 4.1998 (4.3299) time: 0.7658 data: 0.0002 max mem: 8421 +[2024-12-05 11:48:12 root] (utils.py 283): INFO Epoch: [13] [2360/2502] eta: 0:01:55 lr: 0.000013 loss_cls: 3.8413 (3.8835) grad_norm: 4.1998 (4.3296) time: 0.7641 data: 0.0002 max mem: 8421 +[2024-12-05 11:48:20 root] (utils.py 283): INFO Epoch: [13] [2370/2502] eta: 0:01:47 lr: 0.000013 loss_cls: 3.8999 (3.8842) grad_norm: 4.1848 (4.3290) time: 0.7651 data: 0.0002 max mem: 8421 +[2024-12-05 11:48:27 root] (utils.py 283): INFO Epoch: [13] [2380/2502] eta: 0:01:38 lr: 0.000013 loss_cls: 3.9654 (3.8831) grad_norm: 4.0741 (4.3277) time: 0.7793 data: 0.0003 max mem: 8421 +[2024-12-05 11:48:35 root] (utils.py 283): INFO Epoch: [13] [2390/2502] eta: 0:01:30 lr: 0.000013 loss_cls: 3.7667 (3.8828) grad_norm: 4.0418 (4.3267) time: 0.7973 data: 0.0003 max mem: 8421 +[2024-12-05 11:48:44 root] (utils.py 283): INFO Epoch: [13] [2400/2502] eta: 0:01:22 lr: 0.000013 loss_cls: 3.8089 (3.8818) grad_norm: 4.1780 (4.3268) time: 0.8090 data: 0.0003 max mem: 8421 +[2024-12-05 11:48:51 root] (utils.py 283): INFO Epoch: [13] [2410/2502] eta: 0:01:14 lr: 0.000013 loss_cls: 3.9267 (3.8822) grad_norm: 4.1842 (4.3263) time: 0.7920 data: 0.0003 max mem: 8421 +[2024-12-05 11:48:59 root] (utils.py 283): INFO Epoch: [13] [2420/2502] eta: 0:01:06 lr: 0.000013 loss_cls: 4.1223 (3.8815) grad_norm: 4.1793 (4.3264) time: 0.7648 data: 0.0002 max mem: 8421 +[2024-12-05 11:49:07 root] (utils.py 283): INFO Epoch: [13] [2430/2502] eta: 0:00:58 lr: 0.000013 loss_cls: 4.1866 (3.8820) grad_norm: 4.1793 (4.3258) time: 0.7646 data: 0.0002 max mem: 8421 +[2024-12-05 11:49:14 root] (utils.py 283): INFO Epoch: [13] [2440/2502] eta: 0:00:50 lr: 0.000013 loss_cls: 4.1491 (3.8833) grad_norm: 4.1621 (4.3261) time: 0.7661 data: 0.0002 max mem: 8421 +[2024-12-05 11:49:22 root] (utils.py 283): INFO Epoch: [13] [2450/2502] eta: 0:00:42 lr: 0.000013 loss_cls: 4.0655 (3.8834) grad_norm: 4.1485 (4.3255) time: 0.7637 data: 0.0002 max mem: 8421 +[2024-12-05 11:49:30 root] (utils.py 283): INFO Epoch: [13] [2460/2502] eta: 0:00:34 lr: 0.000013 loss_cls: 4.2534 (3.8854) grad_norm: 4.2354 (4.3254) time: 0.7627 data: 0.0002 max mem: 8421 +[2024-12-05 11:49:37 root] (utils.py 283): INFO Epoch: [13] [2470/2502] eta: 0:00:25 lr: 0.000013 loss_cls: 4.1799 (3.8847) grad_norm: 4.2042 (4.3245) time: 0.7611 data: 0.0003 max mem: 8421 +[2024-12-05 11:49:45 root] (utils.py 283): INFO Epoch: [13] [2480/2502] eta: 0:00:17 lr: 0.000013 loss_cls: 3.8185 (3.8850) grad_norm: 4.0516 (4.3244) time: 0.7610 data: 0.0003 max mem: 8421 +[2024-12-05 11:49:53 root] (utils.py 283): INFO Epoch: [13] [2490/2502] eta: 0:00:09 lr: 0.000013 loss_cls: 4.1307 (3.8853) grad_norm: 4.0891 (4.3238) time: 0.7936 data: 0.0226 max mem: 8421 +[2024-12-05 11:50:01 root] (utils.py 283): INFO Epoch: [13] [2500/2502] eta: 0:00:01 lr: 0.000013 loss_cls: 4.1553 (3.8850) grad_norm: 4.1359 (4.3233) time: 0.7949 data: 0.0226 max mem: 8421 +[2024-12-05 11:50:01 root] (utils.py 283): INFO Epoch: [13] [2501/2502] eta: 0:00:00 lr: 0.000013 loss_cls: 4.1631 (3.8851) grad_norm: 4.1714 (4.3234) time: 0.7945 data: 0.0226 max mem: 8421 +[2024-12-05 11:50:01 root] (utils.py 297): INFO Epoch: [13] Total time: 0:33:45 (0.8097 s / it) +[2024-12-05 11:50:01 root] (engine.py 178): INFO Averaged stats:lr: 0.000013 loss_cls: 4.1631 (3.8802) grad_norm: 4.1714 (4.3234) +[2024-12-05 11:50:02 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7111 (0.7111) acc1: 83.5938 (83.5938) acc3: 95.3125 (95.3125) acc5: 96.8750 (96.8750) time: 0.1312 data: 0.0004 max mem: 8421 +[2024-12-05 11:50:03 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8096 (0.8577) acc1: 82.8125 (81.7472) acc3: 93.7500 (93.3949) acc5: 96.0938 (95.7386) time: 0.1315 data: 0.0003 max mem: 8421 +[2024-12-05 11:50:05 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9089 (0.9047) acc1: 78.9062 (80.5060) acc3: 92.1875 (92.6711) acc5: 95.3125 (95.4241) time: 0.1319 data: 0.0004 max mem: 8421 +[2024-12-05 11:50:06 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9285 (0.9132) acc1: 78.9062 (79.5867) acc3: 92.1875 (92.9688) acc5: 95.3125 (95.6149) time: 0.1380 data: 0.0005 max mem: 8421 +[2024-12-05 11:50:08 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8432 (0.9050) acc1: 79.6875 (79.9162) acc3: 92.9688 (92.9688) acc5: 96.8750 (95.5602) time: 0.1499 data: 0.0005 max mem: 8421 +[2024-12-05 11:50:10 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9831 (0.9966) acc1: 74.2188 (77.9718) acc3: 89.0625 (91.2837) acc5: 92.1875 (94.2402) time: 0.2084 data: 0.0005 max mem: 8421 +[2024-12-05 11:50:12 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3263 (1.0423) acc1: 71.8750 (77.1260) acc3: 85.9375 (90.4713) acc5: 88.2812 (93.4298) time: 0.2131 data: 0.0005 max mem: 8421 +[2024-12-05 11:50:13 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:04 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2233 (1.0838) acc1: 73.4375 (76.1334) acc3: 86.7188 (89.8107) acc5: 89.0625 (92.9908) time: 0.1491 data: 0.0005 max mem: 8421 +[2024-12-05 11:50:15 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3233 (1.1246) acc1: 70.3125 (75.2218) acc3: 83.5938 (89.1011) acc5: 88.2812 (92.4286) time: 0.1376 data: 0.0011 max mem: 8421 +[2024-12-05 11:50:16 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3763 (1.1565) acc1: 67.9688 (74.4162) acc3: 83.5938 (88.4444) acc5: 89.0625 (92.0158) time: 0.1380 data: 0.0010 max mem: 8421 +[2024-12-05 11:50:17 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2021 (1.1456) acc1: 72.6562 (74.5600) acc3: 85.9375 (88.6240) acc5: 90.6250 (92.2000) time: 0.1320 data: 0.0008 max mem: 8421 +[2024-12-05 11:50:17 root] (utils.py 297): INFO Test: Total time: 0:00:15 (0.1532 s / it) +[2024-12-05 11:50:17 root] (engine.py 263): INFO * Acc@1 74.486 Acc@3 88.778 Acc@5 92.230 loss 1.146 flops 1.285 layer_flops 1.251 +[2024-12-05 11:50:17 root] (main.py 546): INFO Accuracy of the network on the 50000 test images: 74.5% +[2024-12-05 11:50:17 root] (main.py 550): INFO Max accuracy: 74.49% +[2024-12-05 11:50:18 root] (utils.py 283): INFO Epoch: [14] [ 0/2502] eta: 0:32:02 lr: 0.000012 loss_cls: 3.6021 (3.6021) grad_norm: 5.6351 (5.6351) time: 0.7684 data: 0.0005 max mem: 8421 +[2024-12-05 11:50:25 root] (utils.py 283): INFO Epoch: [14] [ 10/2502] eta: 0:31:37 lr: 0.000012 loss_cls: 4.0509 (3.8979) grad_norm: 4.2384 (4.3046) time: 0.7616 data: 0.0003 max mem: 8421 +[2024-12-05 11:50:33 root] (utils.py 283): INFO Epoch: [14] [ 20/2502] eta: 0:31:44 lr: 0.000012 loss_cls: 4.0509 (3.9273) grad_norm: 4.0494 (4.3244) time: 0.7675 data: 0.0003 max mem: 8421 +[2024-12-05 11:50:41 root] (utils.py 283): INFO Epoch: [14] [ 30/2502] eta: 0:31:36 lr: 0.000012 loss_cls: 4.0115 (3.9085) grad_norm: 4.1110 (4.3263) time: 0.7700 data: 0.0003 max mem: 8421 +[2024-12-05 11:50:50 root] (utils.py 283): INFO Epoch: [14] [ 40/2502] eta: 0:33:14 lr: 0.000012 loss_cls: 3.7776 (3.8310) grad_norm: 4.1926 (4.4119) time: 0.8546 data: 0.0003 max mem: 8421 +[2024-12-05 11:50:58 root] (utils.py 283): INFO Epoch: [14] [ 50/2502] eta: 0:32:41 lr: 0.000012 loss_cls: 3.5749 (3.8079) grad_norm: 4.0087 (4.3434) time: 0.8512 data: 0.0002 max mem: 8421 +[2024-12-05 11:51:05 root] (utils.py 283): INFO Epoch: [14] [ 60/2502] eta: 0:32:19 lr: 0.000012 loss_cls: 3.8215 (3.8177) grad_norm: 3.9617 (4.3118) time: 0.7613 data: 0.0002 max mem: 8421 +[2024-12-05 11:51:13 root] (utils.py 283): INFO Epoch: [14] [ 70/2502] eta: 0:32:00 lr: 0.000012 loss_cls: 4.0803 (3.8474) grad_norm: 3.9427 (4.2993) time: 0.7631 data: 0.0002 max mem: 8421 +[2024-12-05 11:51:21 root] (utils.py 283): INFO Epoch: [14] [ 80/2502] eta: 0:31:44 lr: 0.000012 loss_cls: 3.9807 (3.8528) grad_norm: 4.0591 (4.2756) time: 0.7636 data: 0.0002 max mem: 8421 +[2024-12-05 11:51:28 root] (utils.py 283): INFO Epoch: [14] [ 90/2502] eta: 0:31:33 lr: 0.000012 loss_cls: 3.8098 (3.8282) grad_norm: 4.1157 (4.2827) time: 0.7678 data: 0.0002 max mem: 8421 +[2024-12-05 11:51:36 root] (utils.py 283): INFO Epoch: [14] [ 100/2502] eta: 0:31:23 lr: 0.000012 loss_cls: 3.1637 (3.7836) grad_norm: 4.0619 (4.2633) time: 0.7750 data: 0.0002 max mem: 8421 +[2024-12-05 11:51:44 root] (utils.py 283): INFO Epoch: [14] [ 110/2502] eta: 0:31:15 lr: 0.000012 loss_cls: 3.5870 (3.7949) grad_norm: 4.0729 (4.2559) time: 0.7796 data: 0.0002 max mem: 8421 +[2024-12-05 11:51:52 root] (utils.py 283): INFO Epoch: [14] [ 120/2502] eta: 0:31:10 lr: 0.000012 loss_cls: 3.9136 (3.7972) grad_norm: 4.1058 (4.2515) time: 0.7900 data: 0.0002 max mem: 8421 +[2024-12-05 11:52:00 root] (utils.py 283): INFO Epoch: [14] [ 130/2502] eta: 0:30:59 lr: 0.000012 loss_cls: 3.7484 (3.7935) grad_norm: 4.1548 (4.2565) time: 0.7850 data: 0.0003 max mem: 8421 +[2024-12-05 11:52:07 root] (utils.py 283): INFO Epoch: [14] [ 140/2502] eta: 0:30:48 lr: 0.000012 loss_cls: 3.7484 (3.7911) grad_norm: 4.1946 (4.2687) time: 0.7684 data: 0.0003 max mem: 8421 +[2024-12-05 11:52:15 root] (utils.py 283): INFO Epoch: [14] [ 150/2502] eta: 0:30:38 lr: 0.000012 loss_cls: 4.0323 (3.8081) grad_norm: 4.1219 (4.2674) time: 0.7645 data: 0.0003 max mem: 8421 +[2024-12-05 11:52:23 root] (utils.py 283): INFO Epoch: [14] [ 160/2502] eta: 0:30:27 lr: 0.000012 loss_cls: 4.1634 (3.8168) grad_norm: 4.0327 (4.2551) time: 0.7630 data: 0.0003 max mem: 8421 +[2024-12-05 11:52:30 root] (utils.py 283): INFO Epoch: [14] [ 170/2502] eta: 0:30:17 lr: 0.000012 loss_cls: 3.9957 (3.8026) grad_norm: 4.1218 (4.2609) time: 0.7622 data: 0.0003 max mem: 8421 +[2024-12-05 11:52:38 root] (utils.py 283): INFO Epoch: [14] [ 180/2502] eta: 0:30:10 lr: 0.000012 loss_cls: 3.9824 (3.8156) grad_norm: 4.2638 (4.2668) time: 0.7764 data: 0.0002 max mem: 8421 +[2024-12-05 11:52:46 root] (utils.py 283): INFO Epoch: [14] [ 190/2502] eta: 0:30:02 lr: 0.000012 loss_cls: 4.0896 (3.8388) grad_norm: 4.2421 (4.2693) time: 0.7847 data: 0.0003 max mem: 8421 +[2024-12-05 11:52:54 root] (utils.py 283): INFO Epoch: [14] [ 200/2502] eta: 0:29:53 lr: 0.000012 loss_cls: 4.2234 (3.8592) grad_norm: 4.1555 (4.2665) time: 0.7724 data: 0.0003 max mem: 8421 +[2024-12-05 11:53:01 root] (utils.py 283): INFO Epoch: [14] [ 210/2502] eta: 0:29:44 lr: 0.000012 loss_cls: 4.1839 (3.8684) grad_norm: 4.1132 (4.2631) time: 0.7665 data: 0.0002 max mem: 8421 +[2024-12-05 11:53:09 root] (utils.py 283): INFO Epoch: [14] [ 220/2502] eta: 0:29:36 lr: 0.000012 loss_cls: 4.0343 (3.8728) grad_norm: 4.0591 (4.2486) time: 0.7694 data: 0.0002 max mem: 8421 +[2024-12-05 11:53:17 root] (utils.py 283): INFO Epoch: [14] [ 230/2502] eta: 0:29:27 lr: 0.000012 loss_cls: 4.0271 (3.8752) grad_norm: 3.9990 (4.2448) time: 0.7715 data: 0.0002 max mem: 8421 +[2024-12-05 11:53:24 root] (utils.py 283): INFO Epoch: [14] [ 240/2502] eta: 0:29:18 lr: 0.000012 loss_cls: 4.0203 (3.8705) grad_norm: 4.1421 (4.2599) time: 0.7693 data: 0.0002 max mem: 8421 +[2024-12-05 11:53:32 root] (utils.py 283): INFO Epoch: [14] [ 250/2502] eta: 0:29:10 lr: 0.000012 loss_cls: 3.9163 (3.8665) grad_norm: 4.1635 (4.2566) time: 0.7684 data: 0.0003 max mem: 8421 +[2024-12-05 11:53:40 root] (utils.py 283): INFO Epoch: [14] [ 260/2502] eta: 0:29:01 lr: 0.000012 loss_cls: 3.9163 (3.8643) grad_norm: 4.1542 (4.2624) time: 0.7696 data: 0.0003 max mem: 8421 +[2024-12-05 11:53:47 root] (utils.py 283): INFO Epoch: [14] [ 270/2502] eta: 0:28:53 lr: 0.000012 loss_cls: 3.8912 (3.8660) grad_norm: 4.2578 (4.2673) time: 0.7701 data: 0.0003 max mem: 8421 +[2024-12-05 11:53:55 root] (utils.py 283): INFO Epoch: [14] [ 280/2502] eta: 0:28:45 lr: 0.000012 loss_cls: 4.1238 (3.8712) grad_norm: 4.2578 (4.2733) time: 0.7711 data: 0.0003 max mem: 8421 +[2024-12-05 11:54:03 root] (utils.py 283): INFO Epoch: [14] [ 290/2502] eta: 0:28:37 lr: 0.000012 loss_cls: 3.9273 (3.8672) grad_norm: 4.2560 (4.2731) time: 0.7711 data: 0.0003 max mem: 8421 +[2024-12-05 11:54:11 root] (utils.py 283): INFO Epoch: [14] [ 300/2502] eta: 0:28:28 lr: 0.000012 loss_cls: 3.7055 (3.8664) grad_norm: 4.2560 (4.2718) time: 0.7668 data: 0.0003 max mem: 8421 +[2024-12-05 11:54:18 root] (utils.py 283): INFO Epoch: [14] [ 310/2502] eta: 0:28:20 lr: 0.000012 loss_cls: 3.7707 (3.8655) grad_norm: 4.1889 (4.2666) time: 0.7654 data: 0.0003 max mem: 8421 +[2024-12-05 11:54:26 root] (utils.py 283): INFO Epoch: [14] [ 320/2502] eta: 0:28:12 lr: 0.000012 loss_cls: 3.9361 (3.8730) grad_norm: 3.9661 (4.2562) time: 0.7704 data: 0.0003 max mem: 8421 +[2024-12-05 11:54:34 root] (utils.py 283): INFO Epoch: [14] [ 330/2502] eta: 0:28:03 lr: 0.000012 loss_cls: 4.0413 (3.8717) grad_norm: 3.9415 (4.2523) time: 0.7714 data: 0.0003 max mem: 8421 +[2024-12-05 11:54:41 root] (utils.py 283): INFO Epoch: [14] [ 340/2502] eta: 0:27:55 lr: 0.000012 loss_cls: 4.0413 (3.8736) grad_norm: 4.0668 (4.2470) time: 0.7676 data: 0.0003 max mem: 8421 +[2024-12-05 11:54:49 root] (utils.py 283): INFO Epoch: [14] [ 350/2502] eta: 0:27:47 lr: 0.000012 loss_cls: 4.0843 (3.8732) grad_norm: 4.0580 (4.2441) time: 0.7712 data: 0.0003 max mem: 8421 +[2024-12-05 11:54:57 root] (utils.py 283): INFO Epoch: [14] [ 360/2502] eta: 0:27:42 lr: 0.000012 loss_cls: 3.9862 (3.8663) grad_norm: 4.2949 (4.2632) time: 0.7943 data: 0.0003 max mem: 8421 +[2024-12-05 11:55:06 root] (utils.py 283): INFO Epoch: [14] [ 370/2502] eta: 0:27:39 lr: 0.000012 loss_cls: 3.9040 (3.8663) grad_norm: 4.0290 (4.2539) time: 0.8346 data: 0.0003 max mem: 8421 +[2024-12-05 11:55:14 root] (utils.py 283): INFO Epoch: [14] [ 380/2502] eta: 0:27:31 lr: 0.000012 loss_cls: 3.9013 (3.8638) grad_norm: 4.0097 (4.2577) time: 0.8188 data: 0.0003 max mem: 8421 +[2024-12-05 11:55:21 root] (utils.py 283): INFO Epoch: [14] [ 390/2502] eta: 0:27:23 lr: 0.000012 loss_cls: 3.8296 (3.8631) grad_norm: 4.1859 (4.2600) time: 0.7738 data: 0.0002 max mem: 8421 +[2024-12-05 11:55:29 root] (utils.py 283): INFO Epoch: [14] [ 400/2502] eta: 0:27:15 lr: 0.000012 loss_cls: 3.8296 (3.8620) grad_norm: 4.0404 (4.2533) time: 0.7724 data: 0.0003 max mem: 8421 +[2024-12-05 11:55:39 root] (utils.py 283): INFO Epoch: [14] [ 410/2502] eta: 0:27:17 lr: 0.000012 loss_cls: 3.7554 (3.8599) grad_norm: 4.0404 (4.2554) time: 0.8774 data: 0.0003 max mem: 8421 +[2024-12-05 11:55:47 root] (utils.py 283): INFO Epoch: [14] [ 420/2502] eta: 0:27:10 lr: 0.000012 loss_cls: 4.0217 (3.8676) grad_norm: 4.2761 (4.2639) time: 0.8865 data: 0.0003 max mem: 8421 +[2024-12-05 11:55:55 root] (utils.py 283): INFO Epoch: [14] [ 430/2502] eta: 0:27:02 lr: 0.000012 loss_cls: 4.1050 (3.8762) grad_norm: 4.2761 (4.2698) time: 0.7872 data: 0.0003 max mem: 8421 +[2024-12-05 11:56:02 root] (utils.py 283): INFO Epoch: [14] [ 440/2502] eta: 0:26:54 lr: 0.000012 loss_cls: 4.0067 (3.8662) grad_norm: 4.1109 (4.2654) time: 0.7776 data: 0.0003 max mem: 8421 +[2024-12-05 11:56:10 root] (utils.py 283): INFO Epoch: [14] [ 450/2502] eta: 0:26:45 lr: 0.000012 loss_cls: 3.3951 (3.8598) grad_norm: 4.1457 (4.2691) time: 0.7729 data: 0.0003 max mem: 8421 +[2024-12-05 11:56:18 root] (utils.py 283): INFO Epoch: [14] [ 460/2502] eta: 0:26:37 lr: 0.000012 loss_cls: 3.9713 (3.8625) grad_norm: 4.1975 (4.2734) time: 0.7673 data: 0.0003 max mem: 8421 +[2024-12-05 11:56:25 root] (utils.py 283): INFO Epoch: [14] [ 470/2502] eta: 0:26:28 lr: 0.000012 loss_cls: 4.2847 (3.8630) grad_norm: 4.0854 (4.2704) time: 0.7663 data: 0.0003 max mem: 8421 +[2024-12-05 11:56:33 root] (utils.py 283): INFO Epoch: [14] [ 480/2502] eta: 0:26:20 lr: 0.000012 loss_cls: 3.5454 (3.8588) grad_norm: 3.9718 (4.2707) time: 0.7633 data: 0.0003 max mem: 8421 +[2024-12-05 11:56:41 root] (utils.py 283): INFO Epoch: [14] [ 490/2502] eta: 0:26:11 lr: 0.000012 loss_cls: 3.4836 (3.8545) grad_norm: 4.0594 (4.2695) time: 0.7613 data: 0.0002 max mem: 8421 +[2024-12-05 11:56:48 root] (utils.py 283): INFO Epoch: [14] [ 500/2502] eta: 0:26:03 lr: 0.000012 loss_cls: 3.7853 (3.8509) grad_norm: 4.2118 (4.2813) time: 0.7622 data: 0.0003 max mem: 8421 +[2024-12-05 11:56:56 root] (utils.py 283): INFO Epoch: [14] [ 510/2502] eta: 0:25:54 lr: 0.000012 loss_cls: 3.7522 (3.8441) grad_norm: 4.3277 (4.2820) time: 0.7620 data: 0.0003 max mem: 8421 +[2024-12-05 11:57:03 root] (utils.py 283): INFO Epoch: [14] [ 520/2502] eta: 0:25:46 lr: 0.000012 loss_cls: 3.5025 (3.8416) grad_norm: 4.3930 (4.2827) time: 0.7643 data: 0.0002 max mem: 8421 +[2024-12-05 11:57:11 root] (utils.py 283): INFO Epoch: [14] [ 530/2502] eta: 0:25:37 lr: 0.000012 loss_cls: 3.8216 (3.8432) grad_norm: 4.3361 (4.2821) time: 0.7635 data: 0.0002 max mem: 8421 +[2024-12-05 11:57:19 root] (utils.py 283): INFO Epoch: [14] [ 540/2502] eta: 0:25:29 lr: 0.000012 loss_cls: 3.9902 (3.8398) grad_norm: 4.2583 (4.2818) time: 0.7612 data: 0.0002 max mem: 8421 +[2024-12-05 11:57:26 root] (utils.py 283): INFO Epoch: [14] [ 550/2502] eta: 0:25:21 lr: 0.000012 loss_cls: 3.9647 (3.8409) grad_norm: 4.3543 (4.2877) time: 0.7662 data: 0.0002 max mem: 8421 +[2024-12-05 11:57:34 root] (utils.py 283): INFO Epoch: [14] [ 560/2502] eta: 0:25:12 lr: 0.000012 loss_cls: 3.9647 (3.8422) grad_norm: 4.2951 (4.2887) time: 0.7664 data: 0.0002 max mem: 8421 +[2024-12-05 11:57:42 root] (utils.py 283): INFO Epoch: [14] [ 570/2502] eta: 0:25:04 lr: 0.000012 loss_cls: 4.0279 (3.8460) grad_norm: 4.2143 (4.2942) time: 0.7654 data: 0.0002 max mem: 8421 +[2024-12-05 11:57:49 root] (utils.py 283): INFO Epoch: [14] [ 580/2502] eta: 0:24:56 lr: 0.000012 loss_cls: 4.0279 (3.8453) grad_norm: 4.2731 (4.2965) time: 0.7638 data: 0.0002 max mem: 8421 +[2024-12-05 11:57:57 root] (utils.py 283): INFO Epoch: [14] [ 590/2502] eta: 0:24:47 lr: 0.000012 loss_cls: 4.0634 (3.8471) grad_norm: 4.3004 (4.2961) time: 0.7621 data: 0.0002 max mem: 8421 +[2024-12-05 11:58:05 root] (utils.py 283): INFO Epoch: [14] [ 600/2502] eta: 0:24:39 lr: 0.000012 loss_cls: 4.1126 (3.8522) grad_norm: 4.2153 (4.2974) time: 0.7651 data: 0.0002 max mem: 8421 +[2024-12-05 11:58:12 root] (utils.py 283): INFO Epoch: [14] [ 610/2502] eta: 0:24:31 lr: 0.000012 loss_cls: 4.0970 (3.8514) grad_norm: 4.1357 (4.2949) time: 0.7636 data: 0.0002 max mem: 8421 +[2024-12-05 11:58:20 root] (utils.py 283): INFO Epoch: [14] [ 620/2502] eta: 0:24:23 lr: 0.000012 loss_cls: 3.9078 (3.8499) grad_norm: 4.1033 (4.3002) time: 0.7657 data: 0.0002 max mem: 8421 +[2024-12-05 11:58:28 root] (utils.py 283): INFO Epoch: [14] [ 630/2502] eta: 0:24:15 lr: 0.000012 loss_cls: 3.9078 (3.8478) grad_norm: 4.0696 (4.2965) time: 0.7665 data: 0.0002 max mem: 8421 +[2024-12-05 11:58:35 root] (utils.py 283): INFO Epoch: [14] [ 640/2502] eta: 0:24:07 lr: 0.000012 loss_cls: 3.9277 (3.8473) grad_norm: 4.0921 (4.2981) time: 0.7631 data: 0.0002 max mem: 8421 +[2024-12-05 11:58:43 root] (utils.py 283): INFO Epoch: [14] [ 650/2502] eta: 0:23:58 lr: 0.000012 loss_cls: 3.9443 (3.8462) grad_norm: 4.1743 (4.2979) time: 0.7643 data: 0.0003 max mem: 8421 +[2024-12-05 11:58:50 root] (utils.py 283): INFO Epoch: [14] [ 660/2502] eta: 0:23:50 lr: 0.000012 loss_cls: 4.0863 (3.8450) grad_norm: 4.1743 (4.2973) time: 0.7658 data: 0.0003 max mem: 8421 +[2024-12-05 11:58:58 root] (utils.py 283): INFO Epoch: [14] [ 670/2502] eta: 0:23:42 lr: 0.000012 loss_cls: 3.6645 (3.8419) grad_norm: 4.0966 (4.2954) time: 0.7656 data: 0.0002 max mem: 8421 +[2024-12-05 11:59:06 root] (utils.py 283): INFO Epoch: [14] [ 680/2502] eta: 0:23:34 lr: 0.000012 loss_cls: 3.5268 (3.8390) grad_norm: 4.1955 (4.3024) time: 0.7634 data: 0.0002 max mem: 8421 +[2024-12-05 11:59:14 root] (utils.py 283): INFO Epoch: [14] [ 690/2502] eta: 0:23:26 lr: 0.000012 loss_cls: 3.9828 (3.8418) grad_norm: 4.3585 (4.3100) time: 0.7686 data: 0.0003 max mem: 8421 +[2024-12-05 11:59:21 root] (utils.py 283): INFO Epoch: [14] [ 700/2502] eta: 0:23:19 lr: 0.000012 loss_cls: 4.1341 (3.8462) grad_norm: 4.3402 (4.3094) time: 0.7811 data: 0.0003 max mem: 8421 +[2024-12-05 11:59:29 root] (utils.py 283): INFO Epoch: [14] [ 710/2502] eta: 0:23:11 lr: 0.000012 loss_cls: 4.0916 (3.8461) grad_norm: 4.3221 (4.3097) time: 0.7807 data: 0.0003 max mem: 8421 +[2024-12-05 11:59:37 root] (utils.py 283): INFO Epoch: [14] [ 720/2502] eta: 0:23:05 lr: 0.000012 loss_cls: 4.1254 (3.8491) grad_norm: 4.3221 (4.3116) time: 0.8030 data: 0.0003 max mem: 8421 +[2024-12-05 11:59:47 root] (utils.py 283): INFO Epoch: [14] [ 730/2502] eta: 0:23:01 lr: 0.000012 loss_cls: 4.0350 (3.8466) grad_norm: 4.1854 (4.3112) time: 0.8896 data: 0.0003 max mem: 8421 +[2024-12-05 11:59:55 root] (utils.py 283): INFO Epoch: [14] [ 740/2502] eta: 0:22:53 lr: 0.000012 loss_cls: 4.1321 (3.8500) grad_norm: 4.1391 (4.3094) time: 0.8581 data: 0.0003 max mem: 8421 +[2024-12-05 12:00:02 root] (utils.py 283): INFO Epoch: [14] [ 750/2502] eta: 0:22:45 lr: 0.000012 loss_cls: 3.9769 (3.8453) grad_norm: 4.1615 (4.3103) time: 0.7681 data: 0.0003 max mem: 8421 +[2024-12-05 12:00:10 root] (utils.py 283): INFO Epoch: [14] [ 760/2502] eta: 0:22:37 lr: 0.000012 loss_cls: 3.5786 (3.8437) grad_norm: 4.1774 (4.3085) time: 0.7697 data: 0.0003 max mem: 8421 +[2024-12-05 12:00:18 root] (utils.py 283): INFO Epoch: [14] [ 770/2502] eta: 0:22:29 lr: 0.000012 loss_cls: 3.7449 (3.8431) grad_norm: 4.0880 (4.3056) time: 0.7724 data: 0.0003 max mem: 8421 +[2024-12-05 12:00:25 root] (utils.py 283): INFO Epoch: [14] [ 780/2502] eta: 0:22:21 lr: 0.000012 loss_cls: 3.7449 (3.8434) grad_norm: 4.0308 (4.3066) time: 0.7720 data: 0.0003 max mem: 8421 +[2024-12-05 12:00:33 root] (utils.py 283): INFO Epoch: [14] [ 790/2502] eta: 0:22:13 lr: 0.000012 loss_cls: 3.9018 (3.8462) grad_norm: 4.0878 (4.3056) time: 0.7761 data: 0.0002 max mem: 8421 +[2024-12-05 12:00:41 root] (utils.py 283): INFO Epoch: [14] [ 800/2502] eta: 0:22:06 lr: 0.000012 loss_cls: 3.9018 (3.8456) grad_norm: 4.1984 (4.3025) time: 0.7884 data: 0.0003 max mem: 8421 +[2024-12-05 12:00:49 root] (utils.py 283): INFO Epoch: [14] [ 810/2502] eta: 0:21:58 lr: 0.000012 loss_cls: 3.7861 (3.8460) grad_norm: 4.1114 (4.3022) time: 0.7992 data: 0.0003 max mem: 8421 +[2024-12-05 12:00:59 root] (utils.py 283): INFO Epoch: [14] [ 820/2502] eta: 0:21:55 lr: 0.000012 loss_cls: 3.8022 (3.8468) grad_norm: 4.1345 (4.3021) time: 0.8887 data: 0.0003 max mem: 8421 +[2024-12-05 12:01:07 root] (utils.py 283): INFO Epoch: [14] [ 830/2502] eta: 0:21:47 lr: 0.000012 loss_cls: 4.0014 (3.8496) grad_norm: 4.2358 (4.3031) time: 0.8738 data: 0.0003 max mem: 8421 +[2024-12-05 12:01:14 root] (utils.py 283): INFO Epoch: [14] [ 840/2502] eta: 0:21:38 lr: 0.000012 loss_cls: 4.1170 (3.8510) grad_norm: 4.2661 (4.3021) time: 0.7675 data: 0.0002 max mem: 8421 +[2024-12-05 12:01:22 root] (utils.py 283): INFO Epoch: [14] [ 850/2502] eta: 0:21:30 lr: 0.000012 loss_cls: 4.1467 (3.8539) grad_norm: 4.1858 (4.3036) time: 0.7636 data: 0.0002 max mem: 8421 +[2024-12-05 12:01:30 root] (utils.py 283): INFO Epoch: [14] [ 860/2502] eta: 0:21:22 lr: 0.000012 loss_cls: 3.8528 (3.8502) grad_norm: 4.1590 (4.3018) time: 0.7699 data: 0.0002 max mem: 8421 +[2024-12-05 12:01:38 root] (utils.py 283): INFO Epoch: [14] [ 870/2502] eta: 0:21:15 lr: 0.000012 loss_cls: 3.3787 (3.8449) grad_norm: 4.1057 (4.3007) time: 0.7862 data: 0.0003 max mem: 8421 +[2024-12-05 12:01:46 root] (utils.py 283): INFO Epoch: [14] [ 880/2502] eta: 0:21:07 lr: 0.000012 loss_cls: 3.4525 (3.8442) grad_norm: 4.2615 (4.3055) time: 0.7964 data: 0.0003 max mem: 8421 +[2024-12-05 12:01:54 root] (utils.py 283): INFO Epoch: [14] [ 890/2502] eta: 0:21:00 lr: 0.000012 loss_cls: 3.7584 (3.8405) grad_norm: 4.2432 (4.3033) time: 0.8112 data: 0.0003 max mem: 8421 +[2024-12-05 12:02:02 root] (utils.py 283): INFO Epoch: [14] [ 900/2502] eta: 0:20:52 lr: 0.000012 loss_cls: 3.7805 (3.8411) grad_norm: 4.0635 (4.3052) time: 0.7937 data: 0.0003 max mem: 8421 +[2024-12-05 12:02:11 root] (utils.py 283): INFO Epoch: [14] [ 910/2502] eta: 0:20:47 lr: 0.000012 loss_cls: 3.9461 (3.8424) grad_norm: 4.1863 (4.3049) time: 0.8499 data: 0.0003 max mem: 8421 +[2024-12-05 12:02:19 root] (utils.py 283): INFO Epoch: [14] [ 920/2502] eta: 0:20:39 lr: 0.000012 loss_cls: 4.1841 (3.8431) grad_norm: 4.2082 (4.3039) time: 0.8534 data: 0.0003 max mem: 8421 +[2024-12-05 12:02:26 root] (utils.py 283): INFO Epoch: [14] [ 930/2502] eta: 0:20:31 lr: 0.000012 loss_cls: 3.7000 (3.8407) grad_norm: 4.2082 (4.3043) time: 0.7686 data: 0.0002 max mem: 8421 +[2024-12-05 12:02:34 root] (utils.py 283): INFO Epoch: [14] [ 940/2502] eta: 0:20:23 lr: 0.000012 loss_cls: 3.7000 (3.8425) grad_norm: 4.1662 (4.3038) time: 0.7759 data: 0.0009 max mem: 8421 +[2024-12-05 12:02:42 root] (utils.py 283): INFO Epoch: [14] [ 950/2502] eta: 0:20:16 lr: 0.000012 loss_cls: 3.9770 (3.8422) grad_norm: 4.0507 (4.3030) time: 0.8067 data: 0.0009 max mem: 8421 +[2024-12-05 12:02:50 root] (utils.py 283): INFO Epoch: [14] [ 960/2502] eta: 0:20:08 lr: 0.000012 loss_cls: 3.7099 (3.8405) grad_norm: 4.2542 (4.3042) time: 0.8160 data: 0.0003 max mem: 8421 +[2024-12-05 12:02:58 root] (utils.py 283): INFO Epoch: [14] [ 970/2502] eta: 0:20:00 lr: 0.000012 loss_cls: 3.6987 (3.8380) grad_norm: 4.2211 (4.3033) time: 0.7851 data: 0.0003 max mem: 8421 +[2024-12-05 12:03:06 root] (utils.py 283): INFO Epoch: [14] [ 980/2502] eta: 0:19:52 lr: 0.000012 loss_cls: 3.6926 (3.8358) grad_norm: 4.1198 (4.3072) time: 0.7715 data: 0.0003 max mem: 8421 +[2024-12-05 12:03:14 root] (utils.py 283): INFO Epoch: [14] [ 990/2502] eta: 0:19:45 lr: 0.000012 loss_cls: 3.8680 (3.8366) grad_norm: 4.1198 (4.3074) time: 0.7805 data: 0.0002 max mem: 8421 +[2024-12-05 12:03:21 root] (utils.py 283): INFO Epoch: [14] [1000/2502] eta: 0:19:36 lr: 0.000012 loss_cls: 3.8901 (3.8359) grad_norm: 4.1670 (4.3048) time: 0.7776 data: 0.0003 max mem: 8421 +[2024-12-05 12:03:29 root] (utils.py 283): INFO Epoch: [14] [1010/2502] eta: 0:19:28 lr: 0.000012 loss_cls: 3.8944 (3.8366) grad_norm: 4.2130 (4.3094) time: 0.7692 data: 0.0003 max mem: 8421 +[2024-12-05 12:03:37 root] (utils.py 283): INFO Epoch: [14] [1020/2502] eta: 0:19:20 lr: 0.000012 loss_cls: 3.8944 (3.8365) grad_norm: 4.4119 (4.3106) time: 0.7682 data: 0.0003 max mem: 8421 +[2024-12-05 12:03:44 root] (utils.py 283): INFO Epoch: [14] [1030/2502] eta: 0:19:12 lr: 0.000012 loss_cls: 3.8463 (3.8352) grad_norm: 4.2656 (4.3088) time: 0.7651 data: 0.0003 max mem: 8421 +[2024-12-05 12:03:52 root] (utils.py 283): INFO Epoch: [14] [1040/2502] eta: 0:19:04 lr: 0.000012 loss_cls: 3.8020 (3.8348) grad_norm: 4.1094 (4.3116) time: 0.7654 data: 0.0003 max mem: 8421 +[2024-12-05 12:04:00 root] (utils.py 283): INFO Epoch: [14] [1050/2502] eta: 0:18:56 lr: 0.000012 loss_cls: 3.7556 (3.8325) grad_norm: 4.1225 (4.3128) time: 0.7681 data: 0.0003 max mem: 8421 +[2024-12-05 12:04:07 root] (utils.py 283): INFO Epoch: [14] [1060/2502] eta: 0:18:48 lr: 0.000012 loss_cls: 3.9412 (3.8325) grad_norm: 4.1523 (4.3120) time: 0.7662 data: 0.0003 max mem: 8421 +[2024-12-05 12:04:15 root] (utils.py 283): INFO Epoch: [14] [1070/2502] eta: 0:18:40 lr: 0.000012 loss_cls: 4.0613 (3.8355) grad_norm: 4.2489 (4.3133) time: 0.7611 data: 0.0003 max mem: 8421 +[2024-12-05 12:04:23 root] (utils.py 283): INFO Epoch: [14] [1080/2502] eta: 0:18:32 lr: 0.000012 loss_cls: 4.0051 (3.8350) grad_norm: 4.2489 (4.3128) time: 0.7599 data: 0.0003 max mem: 8421 +[2024-12-05 12:04:30 root] (utils.py 283): INFO Epoch: [14] [1090/2502] eta: 0:18:24 lr: 0.000012 loss_cls: 3.9233 (3.8358) grad_norm: 4.2397 (4.3134) time: 0.7612 data: 0.0003 max mem: 8421 +[2024-12-05 12:04:38 root] (utils.py 283): INFO Epoch: [14] [1100/2502] eta: 0:18:16 lr: 0.000012 loss_cls: 3.8058 (3.8336) grad_norm: 4.3479 (4.3140) time: 0.7654 data: 0.0003 max mem: 8421 +[2024-12-05 12:04:46 root] (utils.py 283): INFO Epoch: [14] [1110/2502] eta: 0:18:08 lr: 0.000012 loss_cls: 3.7722 (3.8337) grad_norm: 4.3479 (4.3166) time: 0.7702 data: 0.0003 max mem: 8421 +[2024-12-05 12:04:53 root] (utils.py 283): INFO Epoch: [14] [1120/2502] eta: 0:18:00 lr: 0.000012 loss_cls: 3.9843 (3.8344) grad_norm: 4.3846 (4.3177) time: 0.7687 data: 0.0002 max mem: 8421 +[2024-12-05 12:05:01 root] (utils.py 283): INFO Epoch: [14] [1130/2502] eta: 0:17:52 lr: 0.000012 loss_cls: 3.7475 (3.8330) grad_norm: 4.3883 (4.3183) time: 0.7632 data: 0.0002 max mem: 8421 +[2024-12-05 12:05:09 root] (utils.py 283): INFO Epoch: [14] [1140/2502] eta: 0:17:44 lr: 0.000012 loss_cls: 3.5304 (3.8314) grad_norm: 4.1447 (4.3163) time: 0.7618 data: 0.0003 max mem: 8421 +[2024-12-05 12:05:16 root] (utils.py 283): INFO Epoch: [14] [1150/2502] eta: 0:17:36 lr: 0.000012 loss_cls: 4.1840 (3.8328) grad_norm: 4.2170 (4.3184) time: 0.7635 data: 0.0003 max mem: 8421 +[2024-12-05 12:05:24 root] (utils.py 283): INFO Epoch: [14] [1160/2502] eta: 0:17:28 lr: 0.000012 loss_cls: 4.0959 (3.8338) grad_norm: 4.2678 (4.3171) time: 0.7649 data: 0.0003 max mem: 8421 +[2024-12-05 12:05:32 root] (utils.py 283): INFO Epoch: [14] [1170/2502] eta: 0:17:20 lr: 0.000012 loss_cls: 3.9411 (3.8332) grad_norm: 4.1376 (4.3153) time: 0.7637 data: 0.0003 max mem: 8421 +[2024-12-05 12:05:39 root] (utils.py 283): INFO Epoch: [14] [1180/2502] eta: 0:17:12 lr: 0.000012 loss_cls: 3.9475 (3.8339) grad_norm: 4.1334 (4.3147) time: 0.7632 data: 0.0002 max mem: 8421 +[2024-12-05 12:05:47 root] (utils.py 283): INFO Epoch: [14] [1190/2502] eta: 0:17:04 lr: 0.000012 loss_cls: 3.9097 (3.8327) grad_norm: 4.1334 (4.3144) time: 0.7650 data: 0.0002 max mem: 8421 +[2024-12-05 12:05:54 root] (utils.py 283): INFO Epoch: [14] [1200/2502] eta: 0:16:56 lr: 0.000012 loss_cls: 3.7034 (3.8342) grad_norm: 4.2425 (4.3145) time: 0.7647 data: 0.0002 max mem: 8421 +[2024-12-05 12:06:02 root] (utils.py 283): INFO Epoch: [14] [1210/2502] eta: 0:16:48 lr: 0.000012 loss_cls: 4.1464 (3.8338) grad_norm: 4.1517 (4.3135) time: 0.7675 data: 0.0002 max mem: 8421 +[2024-12-05 12:06:10 root] (utils.py 283): INFO Epoch: [14] [1220/2502] eta: 0:16:40 lr: 0.000012 loss_cls: 4.0255 (3.8335) grad_norm: 4.0701 (4.3119) time: 0.7697 data: 0.0003 max mem: 8421 +[2024-12-05 12:06:18 root] (utils.py 283): INFO Epoch: [14] [1230/2502] eta: 0:16:32 lr: 0.000012 loss_cls: 4.0379 (3.8353) grad_norm: 4.1283 (4.3114) time: 0.7677 data: 0.0003 max mem: 8421 +[2024-12-05 12:06:25 root] (utils.py 283): INFO Epoch: [14] [1240/2502] eta: 0:16:24 lr: 0.000012 loss_cls: 4.2761 (3.8380) grad_norm: 4.1936 (4.3110) time: 0.7668 data: 0.0003 max mem: 8421 +[2024-12-05 12:06:33 root] (utils.py 283): INFO Epoch: [14] [1250/2502] eta: 0:16:16 lr: 0.000012 loss_cls: 4.1773 (3.8386) grad_norm: 4.1425 (4.3091) time: 0.7679 data: 0.0003 max mem: 8421 +[2024-12-05 12:06:41 root] (utils.py 283): INFO Epoch: [14] [1260/2502] eta: 0:16:08 lr: 0.000012 loss_cls: 4.0359 (3.8392) grad_norm: 4.1929 (4.3095) time: 0.7661 data: 0.0003 max mem: 8421 +[2024-12-05 12:06:48 root] (utils.py 283): INFO Epoch: [14] [1270/2502] eta: 0:16:00 lr: 0.000012 loss_cls: 4.0084 (3.8404) grad_norm: 4.1929 (4.3090) time: 0.7629 data: 0.0002 max mem: 8421 +[2024-12-05 12:06:56 root] (utils.py 283): INFO Epoch: [14] [1280/2502] eta: 0:15:52 lr: 0.000012 loss_cls: 4.0084 (3.8407) grad_norm: 4.1849 (4.3094) time: 0.7642 data: 0.0003 max mem: 8421 +[2024-12-05 12:07:04 root] (utils.py 283): INFO Epoch: [14] [1290/2502] eta: 0:15:44 lr: 0.000012 loss_cls: 3.8947 (3.8398) grad_norm: 4.4413 (4.3103) time: 0.7740 data: 0.0003 max mem: 8421 +[2024-12-05 12:07:11 root] (utils.py 283): INFO Epoch: [14] [1300/2502] eta: 0:15:37 lr: 0.000012 loss_cls: 4.0099 (3.8409) grad_norm: 4.4707 (4.3108) time: 0.7805 data: 0.0003 max mem: 8421 +[2024-12-05 12:07:19 root] (utils.py 283): INFO Epoch: [14] [1310/2502] eta: 0:15:29 lr: 0.000012 loss_cls: 4.0250 (3.8410) grad_norm: 4.3642 (4.3107) time: 0.7693 data: 0.0002 max mem: 8421 +[2024-12-05 12:07:27 root] (utils.py 283): INFO Epoch: [14] [1320/2502] eta: 0:15:21 lr: 0.000012 loss_cls: 4.1079 (3.8427) grad_norm: 4.2651 (4.3114) time: 0.7603 data: 0.0002 max mem: 8421 +[2024-12-05 12:07:34 root] (utils.py 283): INFO Epoch: [14] [1330/2502] eta: 0:15:13 lr: 0.000012 loss_cls: 3.8037 (3.8422) grad_norm: 4.2531 (4.3109) time: 0.7600 data: 0.0003 max mem: 8421 +[2024-12-05 12:07:42 root] (utils.py 283): INFO Epoch: [14] [1340/2502] eta: 0:15:05 lr: 0.000012 loss_cls: 3.8562 (3.8431) grad_norm: 4.2441 (4.3105) time: 0.7675 data: 0.0003 max mem: 8421 +[2024-12-05 12:07:50 root] (utils.py 283): INFO Epoch: [14] [1350/2502] eta: 0:14:57 lr: 0.000012 loss_cls: 4.0040 (3.8435) grad_norm: 4.2441 (4.3122) time: 0.7696 data: 0.0002 max mem: 8421 +[2024-12-05 12:07:57 root] (utils.py 283): INFO Epoch: [14] [1360/2502] eta: 0:14:49 lr: 0.000012 loss_cls: 4.1463 (3.8444) grad_norm: 4.2011 (4.3122) time: 0.7638 data: 0.0002 max mem: 8421 +[2024-12-05 12:08:05 root] (utils.py 283): INFO Epoch: [14] [1370/2502] eta: 0:14:41 lr: 0.000012 loss_cls: 4.0457 (3.8452) grad_norm: 4.0626 (4.3117) time: 0.7729 data: 0.0003 max mem: 8421 +[2024-12-05 12:08:13 root] (utils.py 283): INFO Epoch: [14] [1380/2502] eta: 0:14:34 lr: 0.000012 loss_cls: 3.9707 (3.8465) grad_norm: 3.8928 (4.3101) time: 0.7819 data: 0.0003 max mem: 8421 +[2024-12-05 12:08:21 root] (utils.py 283): INFO Epoch: [14] [1390/2502] eta: 0:14:26 lr: 0.000012 loss_cls: 4.0318 (3.8455) grad_norm: 4.0778 (4.3098) time: 0.8104 data: 0.0003 max mem: 8421 +[2024-12-05 12:08:31 root] (utils.py 283): INFO Epoch: [14] [1400/2502] eta: 0:14:20 lr: 0.000012 loss_cls: 4.0318 (3.8472) grad_norm: 4.1920 (4.3084) time: 0.9050 data: 0.0003 max mem: 8421 +[2024-12-05 12:08:39 root] (utils.py 283): INFO Epoch: [14] [1410/2502] eta: 0:14:12 lr: 0.000012 loss_cls: 4.2046 (3.8478) grad_norm: 4.1308 (4.3075) time: 0.8738 data: 0.0003 max mem: 8421 +[2024-12-05 12:08:46 root] (utils.py 283): INFO Epoch: [14] [1420/2502] eta: 0:14:04 lr: 0.000012 loss_cls: 4.0374 (3.8484) grad_norm: 4.0461 (4.3057) time: 0.7704 data: 0.0003 max mem: 8421 +[2024-12-05 12:08:54 root] (utils.py 283): INFO Epoch: [14] [1430/2502] eta: 0:13:56 lr: 0.000012 loss_cls: 4.0374 (3.8491) grad_norm: 4.0401 (4.3047) time: 0.7676 data: 0.0003 max mem: 8421 +[2024-12-05 12:09:02 root] (utils.py 283): INFO Epoch: [14] [1440/2502] eta: 0:13:48 lr: 0.000012 loss_cls: 4.0710 (3.8495) grad_norm: 4.0283 (4.3031) time: 0.7686 data: 0.0002 max mem: 8421 +[2024-12-05 12:09:09 root] (utils.py 283): INFO Epoch: [14] [1450/2502] eta: 0:13:40 lr: 0.000012 loss_cls: 4.0654 (3.8495) grad_norm: 4.1994 (4.3038) time: 0.7654 data: 0.0003 max mem: 8421 +[2024-12-05 12:09:17 root] (utils.py 283): INFO Epoch: [14] [1460/2502] eta: 0:13:33 lr: 0.000012 loss_cls: 3.7038 (3.8478) grad_norm: 4.3202 (4.3041) time: 0.7653 data: 0.0003 max mem: 8421 +[2024-12-05 12:09:25 root] (utils.py 283): INFO Epoch: [14] [1470/2502] eta: 0:13:25 lr: 0.000012 loss_cls: 3.7038 (3.8473) grad_norm: 4.3380 (4.3060) time: 0.7634 data: 0.0003 max mem: 8421 +[2024-12-05 12:09:32 root] (utils.py 283): INFO Epoch: [14] [1480/2502] eta: 0:13:17 lr: 0.000012 loss_cls: 3.8163 (3.8477) grad_norm: 4.2903 (4.3054) time: 0.7641 data: 0.0003 max mem: 8421 +[2024-12-05 12:09:40 root] (utils.py 283): INFO Epoch: [14] [1490/2502] eta: 0:13:09 lr: 0.000012 loss_cls: 3.9797 (3.8475) grad_norm: 4.1271 (4.3058) time: 0.7633 data: 0.0003 max mem: 8421 +[2024-12-05 12:09:48 root] (utils.py 283): INFO Epoch: [14] [1500/2502] eta: 0:13:01 lr: 0.000012 loss_cls: 3.9797 (3.8473) grad_norm: 4.2099 (4.3067) time: 0.7613 data: 0.0002 max mem: 8421 +[2024-12-05 12:09:55 root] (utils.py 283): INFO Epoch: [14] [1510/2502] eta: 0:12:53 lr: 0.000012 loss_cls: 4.0225 (3.8487) grad_norm: 4.2099 (4.3062) time: 0.7670 data: 0.0003 max mem: 8421 +[2024-12-05 12:10:03 root] (utils.py 283): INFO Epoch: [14] [1520/2502] eta: 0:12:45 lr: 0.000012 loss_cls: 4.1492 (3.8516) grad_norm: 4.2134 (4.3060) time: 0.7671 data: 0.0003 max mem: 8421 +[2024-12-05 12:10:11 root] (utils.py 283): INFO Epoch: [14] [1530/2502] eta: 0:12:37 lr: 0.000012 loss_cls: 4.2532 (3.8529) grad_norm: 4.2742 (4.3067) time: 0.7637 data: 0.0003 max mem: 8421 +[2024-12-05 12:10:18 root] (utils.py 283): INFO Epoch: [14] [1540/2502] eta: 0:12:29 lr: 0.000012 loss_cls: 4.1694 (3.8536) grad_norm: 4.1894 (4.3077) time: 0.7678 data: 0.0003 max mem: 8421 +[2024-12-05 12:10:26 root] (utils.py 283): INFO Epoch: [14] [1550/2502] eta: 0:12:22 lr: 0.000012 loss_cls: 4.2049 (3.8547) grad_norm: 4.1527 (4.3059) time: 0.7802 data: 0.0003 max mem: 8421 +[2024-12-05 12:10:35 root] (utils.py 283): INFO Epoch: [14] [1560/2502] eta: 0:12:14 lr: 0.000012 loss_cls: 3.9714 (3.8547) grad_norm: 4.0471 (4.3050) time: 0.8104 data: 0.0003 max mem: 8421 +[2024-12-05 12:10:44 root] (utils.py 283): INFO Epoch: [14] [1570/2502] eta: 0:12:07 lr: 0.000012 loss_cls: 3.9714 (3.8550) grad_norm: 4.1346 (4.3045) time: 0.8894 data: 0.0003 max mem: 8421 +[2024-12-05 12:10:52 root] (utils.py 283): INFO Epoch: [14] [1580/2502] eta: 0:11:59 lr: 0.000012 loss_cls: 4.0766 (3.8547) grad_norm: 4.0960 (4.3034) time: 0.8581 data: 0.0003 max mem: 8421 +[2024-12-05 12:10:59 root] (utils.py 283): INFO Epoch: [14] [1590/2502] eta: 0:11:51 lr: 0.000012 loss_cls: 3.8678 (3.8544) grad_norm: 4.2277 (4.3067) time: 0.7625 data: 0.0003 max mem: 8421 +[2024-12-05 12:11:07 root] (utils.py 283): INFO Epoch: [14] [1600/2502] eta: 0:11:44 lr: 0.000012 loss_cls: 3.8228 (3.8537) grad_norm: 4.2277 (4.3056) time: 0.7573 data: 0.0003 max mem: 8421 +[2024-12-05 12:11:15 root] (utils.py 283): INFO Epoch: [14] [1610/2502] eta: 0:11:36 lr: 0.000012 loss_cls: 3.7382 (3.8527) grad_norm: 4.1808 (4.3053) time: 0.7652 data: 0.0003 max mem: 8421 +[2024-12-05 12:11:22 root] (utils.py 283): INFO Epoch: [14] [1620/2502] eta: 0:11:28 lr: 0.000012 loss_cls: 3.6290 (3.8505) grad_norm: 4.2170 (4.3045) time: 0.7677 data: 0.0003 max mem: 8421 +[2024-12-05 12:11:30 root] (utils.py 283): INFO Epoch: [14] [1630/2502] eta: 0:11:20 lr: 0.000012 loss_cls: 3.7671 (3.8515) grad_norm: 4.2433 (4.3044) time: 0.7698 data: 0.0003 max mem: 8421 +[2024-12-05 12:11:38 root] (utils.py 283): INFO Epoch: [14] [1640/2502] eta: 0:11:12 lr: 0.000012 loss_cls: 4.2282 (3.8536) grad_norm: 4.1929 (4.3097) time: 0.7686 data: 0.0003 max mem: 8421 +[2024-12-05 12:11:45 root] (utils.py 283): INFO Epoch: [14] [1650/2502] eta: 0:11:04 lr: 0.000012 loss_cls: 4.2585 (3.8550) grad_norm: 4.1388 (4.3094) time: 0.7594 data: 0.0003 max mem: 8421 +[2024-12-05 12:11:53 root] (utils.py 283): INFO Epoch: [14] [1660/2502] eta: 0:10:56 lr: 0.000012 loss_cls: 4.1470 (3.8564) grad_norm: 4.1388 (4.3088) time: 0.7594 data: 0.0003 max mem: 8421 +[2024-12-05 12:12:00 root] (utils.py 283): INFO Epoch: [14] [1670/2502] eta: 0:10:48 lr: 0.000012 loss_cls: 3.9332 (3.8566) grad_norm: 4.1460 (4.3097) time: 0.7613 data: 0.0003 max mem: 8421 +[2024-12-05 12:12:08 root] (utils.py 283): INFO Epoch: [14] [1680/2502] eta: 0:10:41 lr: 0.000012 loss_cls: 4.0162 (3.8573) grad_norm: 4.4494 (4.3113) time: 0.7647 data: 0.0003 max mem: 8421 +[2024-12-05 12:12:16 root] (utils.py 283): INFO Epoch: [14] [1690/2502] eta: 0:10:33 lr: 0.000012 loss_cls: 4.0778 (3.8577) grad_norm: 4.1828 (4.3106) time: 0.7690 data: 0.0003 max mem: 8421 +[2024-12-05 12:12:24 root] (utils.py 283): INFO Epoch: [14] [1700/2502] eta: 0:10:25 lr: 0.000012 loss_cls: 3.9209 (3.8581) grad_norm: 4.0122 (4.3103) time: 0.7742 data: 0.0003 max mem: 8421 +[2024-12-05 12:12:31 root] (utils.py 283): INFO Epoch: [14] [1710/2502] eta: 0:10:17 lr: 0.000012 loss_cls: 3.8708 (3.8582) grad_norm: 4.0729 (4.3096) time: 0.7697 data: 0.0003 max mem: 8421 +[2024-12-05 12:12:39 root] (utils.py 283): INFO Epoch: [14] [1720/2502] eta: 0:10:09 lr: 0.000012 loss_cls: 3.8330 (3.8582) grad_norm: 4.1160 (4.3094) time: 0.7667 data: 0.0003 max mem: 8421 +[2024-12-05 12:12:47 root] (utils.py 283): INFO Epoch: [14] [1730/2502] eta: 0:10:01 lr: 0.000012 loss_cls: 3.8381 (3.8582) grad_norm: 4.2980 (4.3117) time: 0.7744 data: 0.0003 max mem: 8421 +[2024-12-05 12:12:54 root] (utils.py 283): INFO Epoch: [14] [1740/2502] eta: 0:09:54 lr: 0.000012 loss_cls: 3.7962 (3.8565) grad_norm: 4.3925 (4.3114) time: 0.7743 data: 0.0003 max mem: 8421 +[2024-12-05 12:13:02 root] (utils.py 283): INFO Epoch: [14] [1750/2502] eta: 0:09:46 lr: 0.000012 loss_cls: 3.7081 (3.8566) grad_norm: 4.1556 (4.3107) time: 0.7721 data: 0.0003 max mem: 8421 +[2024-12-05 12:13:10 root] (utils.py 283): INFO Epoch: [14] [1760/2502] eta: 0:09:38 lr: 0.000012 loss_cls: 3.8217 (3.8573) grad_norm: 4.0312 (4.3097) time: 0.7723 data: 0.0003 max mem: 8421 +[2024-12-05 12:13:17 root] (utils.py 283): INFO Epoch: [14] [1770/2502] eta: 0:09:30 lr: 0.000012 loss_cls: 4.0608 (3.8583) grad_norm: 4.0585 (4.3085) time: 0.7653 data: 0.0003 max mem: 8421 +[2024-12-05 12:13:25 root] (utils.py 283): INFO Epoch: [14] [1780/2502] eta: 0:09:22 lr: 0.000012 loss_cls: 3.9574 (3.8581) grad_norm: 4.0585 (4.3080) time: 0.7602 data: 0.0003 max mem: 8421 +[2024-12-05 12:13:33 root] (utils.py 283): INFO Epoch: [14] [1790/2502] eta: 0:09:14 lr: 0.000012 loss_cls: 3.8745 (3.8583) grad_norm: 4.2210 (4.3085) time: 0.7637 data: 0.0003 max mem: 8421 +[2024-12-05 12:13:40 root] (utils.py 283): INFO Epoch: [14] [1800/2502] eta: 0:09:06 lr: 0.000012 loss_cls: 4.0222 (3.8588) grad_norm: 4.2393 (4.3104) time: 0.7660 data: 0.0003 max mem: 8421 +[2024-12-05 12:13:48 root] (utils.py 283): INFO Epoch: [14] [1810/2502] eta: 0:08:59 lr: 0.000012 loss_cls: 4.1144 (3.8597) grad_norm: 4.1264 (4.3103) time: 0.7645 data: 0.0003 max mem: 8421 +[2024-12-05 12:13:56 root] (utils.py 283): INFO Epoch: [14] [1820/2502] eta: 0:08:51 lr: 0.000012 loss_cls: 4.0395 (3.8578) grad_norm: 4.2013 (4.3117) time: 0.7682 data: 0.0003 max mem: 8421 +[2024-12-05 12:14:03 root] (utils.py 283): INFO Epoch: [14] [1830/2502] eta: 0:08:43 lr: 0.000012 loss_cls: 3.7543 (3.8579) grad_norm: 4.1786 (4.3119) time: 0.7692 data: 0.0003 max mem: 8421 +[2024-12-05 12:14:11 root] (utils.py 283): INFO Epoch: [14] [1840/2502] eta: 0:08:35 lr: 0.000012 loss_cls: 4.0734 (3.8591) grad_norm: 4.1711 (4.3119) time: 0.7687 data: 0.0002 max mem: 8421 +[2024-12-05 12:14:19 root] (utils.py 283): INFO Epoch: [14] [1850/2502] eta: 0:08:27 lr: 0.000012 loss_cls: 4.0734 (3.8595) grad_norm: 4.2271 (4.3119) time: 0.7818 data: 0.0002 max mem: 8421 +[2024-12-05 12:14:27 root] (utils.py 283): INFO Epoch: [14] [1860/2502] eta: 0:08:20 lr: 0.000012 loss_cls: 4.0482 (3.8605) grad_norm: 4.2099 (4.3103) time: 0.7991 data: 0.0002 max mem: 8421 +[2024-12-05 12:14:35 root] (utils.py 283): INFO Epoch: [14] [1870/2502] eta: 0:08:12 lr: 0.000012 loss_cls: 3.9884 (3.8595) grad_norm: 4.1332 (4.3100) time: 0.7959 data: 0.0002 max mem: 8421 +[2024-12-05 12:14:43 root] (utils.py 283): INFO Epoch: [14] [1880/2502] eta: 0:08:04 lr: 0.000012 loss_cls: 4.0127 (3.8607) grad_norm: 4.1900 (4.3097) time: 0.7777 data: 0.0002 max mem: 8421 +[2024-12-05 12:14:50 root] (utils.py 283): INFO Epoch: [14] [1890/2502] eta: 0:07:56 lr: 0.000012 loss_cls: 4.0131 (3.8605) grad_norm: 4.1900 (4.3120) time: 0.7668 data: 0.0002 max mem: 8421 +[2024-12-05 12:14:58 root] (utils.py 283): INFO Epoch: [14] [1900/2502] eta: 0:07:48 lr: 0.000012 loss_cls: 3.9656 (3.8610) grad_norm: 4.2223 (4.3117) time: 0.7634 data: 0.0002 max mem: 8421 +[2024-12-05 12:15:06 root] (utils.py 283): INFO Epoch: [14] [1910/2502] eta: 0:07:41 lr: 0.000012 loss_cls: 3.9338 (3.8605) grad_norm: 4.3017 (4.3132) time: 0.7675 data: 0.0002 max mem: 8421 +[2024-12-05 12:15:13 root] (utils.py 283): INFO Epoch: [14] [1920/2502] eta: 0:07:33 lr: 0.000012 loss_cls: 3.9538 (3.8613) grad_norm: 4.3217 (4.3136) time: 0.7794 data: 0.0003 max mem: 8421 +[2024-12-05 12:15:21 root] (utils.py 283): INFO Epoch: [14] [1930/2502] eta: 0:07:25 lr: 0.000012 loss_cls: 4.1389 (3.8617) grad_norm: 4.2665 (4.3139) time: 0.7890 data: 0.0003 max mem: 8421 +[2024-12-05 12:15:29 root] (utils.py 283): INFO Epoch: [14] [1940/2502] eta: 0:07:17 lr: 0.000012 loss_cls: 4.1394 (3.8628) grad_norm: 4.2665 (4.3148) time: 0.7904 data: 0.0003 max mem: 8421 +[2024-12-05 12:15:37 root] (utils.py 283): INFO Epoch: [14] [1950/2502] eta: 0:07:10 lr: 0.000012 loss_cls: 4.0886 (3.8631) grad_norm: 4.2408 (4.3153) time: 0.7920 data: 0.0003 max mem: 8421 +[2024-12-05 12:15:45 root] (utils.py 283): INFO Epoch: [14] [1960/2502] eta: 0:07:02 lr: 0.000012 loss_cls: 3.8933 (3.8625) grad_norm: 4.2063 (4.3158) time: 0.7928 data: 0.0003 max mem: 8421 +[2024-12-05 12:15:53 root] (utils.py 283): INFO Epoch: [14] [1970/2502] eta: 0:06:54 lr: 0.000012 loss_cls: 3.6394 (3.8615) grad_norm: 4.1585 (4.3151) time: 0.7921 data: 0.0003 max mem: 8421 +[2024-12-05 12:16:01 root] (utils.py 283): INFO Epoch: [14] [1980/2502] eta: 0:06:46 lr: 0.000012 loss_cls: 3.9417 (3.8629) grad_norm: 4.2285 (4.3152) time: 0.7862 data: 0.0002 max mem: 8421 +[2024-12-05 12:16:09 root] (utils.py 283): INFO Epoch: [14] [1990/2502] eta: 0:06:38 lr: 0.000012 loss_cls: 3.9232 (3.8621) grad_norm: 4.2746 (4.3153) time: 0.7736 data: 0.0003 max mem: 8421 +[2024-12-05 12:16:16 root] (utils.py 283): INFO Epoch: [14] [2000/2502] eta: 0:06:31 lr: 0.000012 loss_cls: 3.7464 (3.8613) grad_norm: 4.1572 (4.3149) time: 0.7647 data: 0.0003 max mem: 8421 +[2024-12-05 12:16:24 root] (utils.py 283): INFO Epoch: [14] [2010/2502] eta: 0:06:23 lr: 0.000012 loss_cls: 3.9414 (3.8624) grad_norm: 4.2456 (4.3148) time: 0.7652 data: 0.0003 max mem: 8421 +[2024-12-05 12:16:31 root] (utils.py 283): INFO Epoch: [14] [2020/2502] eta: 0:06:15 lr: 0.000012 loss_cls: 3.9413 (3.8616) grad_norm: 4.2432 (4.3139) time: 0.7653 data: 0.0003 max mem: 8421 +[2024-12-05 12:16:39 root] (utils.py 283): INFO Epoch: [14] [2030/2502] eta: 0:06:07 lr: 0.000012 loss_cls: 4.0615 (3.8630) grad_norm: 4.1179 (4.3134) time: 0.7623 data: 0.0003 max mem: 8421 +[2024-12-05 12:16:47 root] (utils.py 283): INFO Epoch: [14] [2040/2502] eta: 0:05:59 lr: 0.000012 loss_cls: 4.0615 (3.8618) grad_norm: 4.0788 (4.3121) time: 0.7695 data: 0.0002 max mem: 8421 +[2024-12-05 12:16:55 root] (utils.py 283): INFO Epoch: [14] [2050/2502] eta: 0:05:52 lr: 0.000012 loss_cls: 3.7765 (3.8617) grad_norm: 4.0478 (4.3114) time: 0.7736 data: 0.0002 max mem: 8421 +[2024-12-05 12:17:02 root] (utils.py 283): INFO Epoch: [14] [2060/2502] eta: 0:05:44 lr: 0.000012 loss_cls: 3.8079 (3.8608) grad_norm: 4.0806 (4.3112) time: 0.7712 data: 0.0003 max mem: 8421 +[2024-12-05 12:17:10 root] (utils.py 283): INFO Epoch: [14] [2070/2502] eta: 0:05:36 lr: 0.000012 loss_cls: 4.1997 (3.8633) grad_norm: 4.0806 (4.3102) time: 0.7791 data: 0.0002 max mem: 8421 +[2024-12-05 12:17:18 root] (utils.py 283): INFO Epoch: [14] [2080/2502] eta: 0:05:28 lr: 0.000012 loss_cls: 4.1997 (3.8639) grad_norm: 4.1249 (4.3096) time: 0.7737 data: 0.0002 max mem: 8421 +[2024-12-05 12:17:25 root] (utils.py 283): INFO Epoch: [14] [2090/2502] eta: 0:05:20 lr: 0.000012 loss_cls: 4.0684 (3.8649) grad_norm: 4.2690 (4.3103) time: 0.7624 data: 0.0002 max mem: 8421 +[2024-12-05 12:17:33 root] (utils.py 283): INFO Epoch: [14] [2100/2502] eta: 0:05:12 lr: 0.000012 loss_cls: 4.1210 (3.8658) grad_norm: 4.3624 (4.3103) time: 0.7626 data: 0.0002 max mem: 8421 +[2024-12-05 12:17:41 root] (utils.py 283): INFO Epoch: [14] [2110/2502] eta: 0:05:05 lr: 0.000012 loss_cls: 3.8990 (3.8659) grad_norm: 4.3089 (4.3108) time: 0.7618 data: 0.0002 max mem: 8421 +[2024-12-05 12:17:48 root] (utils.py 283): INFO Epoch: [14] [2120/2502] eta: 0:04:57 lr: 0.000012 loss_cls: 3.8395 (3.8655) grad_norm: 4.2171 (4.3122) time: 0.7662 data: 0.0002 max mem: 8421 +[2024-12-05 12:17:56 root] (utils.py 283): INFO Epoch: [14] [2130/2502] eta: 0:04:49 lr: 0.000012 loss_cls: 3.9861 (3.8661) grad_norm: 4.3794 (4.3123) time: 0.7686 data: 0.0002 max mem: 8421 +[2024-12-05 12:18:04 root] (utils.py 283): INFO Epoch: [14] [2140/2502] eta: 0:04:41 lr: 0.000012 loss_cls: 4.0919 (3.8674) grad_norm: 4.1588 (4.3120) time: 0.7676 data: 0.0002 max mem: 8421 +[2024-12-05 12:18:11 root] (utils.py 283): INFO Epoch: [14] [2150/2502] eta: 0:04:33 lr: 0.000012 loss_cls: 4.0919 (3.8681) grad_norm: 4.0653 (4.3132) time: 0.7693 data: 0.0002 max mem: 8421 +[2024-12-05 12:18:19 root] (utils.py 283): INFO Epoch: [14] [2160/2502] eta: 0:04:26 lr: 0.000012 loss_cls: 3.8792 (3.8680) grad_norm: 4.2179 (4.3162) time: 0.7758 data: 0.0003 max mem: 8421 +[2024-12-05 12:18:27 root] (utils.py 283): INFO Epoch: [14] [2170/2502] eta: 0:04:18 lr: 0.000012 loss_cls: 3.8723 (3.8677) grad_norm: 4.2577 (4.3162) time: 0.7813 data: 0.0003 max mem: 8421 +[2024-12-05 12:18:35 root] (utils.py 283): INFO Epoch: [14] [2180/2502] eta: 0:04:10 lr: 0.000012 loss_cls: 4.1067 (3.8689) grad_norm: 4.2509 (4.3159) time: 0.7851 data: 0.0003 max mem: 8421 +[2024-12-05 12:18:43 root] (utils.py 283): INFO Epoch: [14] [2190/2502] eta: 0:04:02 lr: 0.000012 loss_cls: 3.9432 (3.8679) grad_norm: 4.2423 (4.3158) time: 0.7921 data: 0.0002 max mem: 8421 +[2024-12-05 12:18:51 root] (utils.py 283): INFO Epoch: [14] [2200/2502] eta: 0:03:55 lr: 0.000012 loss_cls: 3.7095 (3.8673) grad_norm: 4.1004 (4.3146) time: 0.7943 data: 0.0002 max mem: 8421 +[2024-12-05 12:18:59 root] (utils.py 283): INFO Epoch: [14] [2210/2502] eta: 0:03:47 lr: 0.000012 loss_cls: 3.5069 (3.8649) grad_norm: 4.0902 (4.3133) time: 0.7934 data: 0.0003 max mem: 8421 +[2024-12-05 12:19:07 root] (utils.py 283): INFO Epoch: [14] [2220/2502] eta: 0:03:39 lr: 0.000012 loss_cls: 3.5753 (3.8651) grad_norm: 4.0843 (4.3142) time: 0.7949 data: 0.0003 max mem: 8421 +[2024-12-05 12:19:15 root] (utils.py 283): INFO Epoch: [14] [2230/2502] eta: 0:03:31 lr: 0.000012 loss_cls: 3.8375 (3.8638) grad_norm: 4.0907 (4.3133) time: 0.7962 data: 0.0003 max mem: 8421 +[2024-12-05 12:19:22 root] (utils.py 283): INFO Epoch: [14] [2240/2502] eta: 0:03:24 lr: 0.000012 loss_cls: 3.9579 (3.8650) grad_norm: 4.0399 (4.3121) time: 0.7891 data: 0.0003 max mem: 8421 +[2024-12-05 12:19:30 root] (utils.py 283): INFO Epoch: [14] [2250/2502] eta: 0:03:16 lr: 0.000012 loss_cls: 4.0241 (3.8654) grad_norm: 4.1033 (4.3129) time: 0.7773 data: 0.0003 max mem: 8421 +[2024-12-05 12:19:38 root] (utils.py 283): INFO Epoch: [14] [2260/2502] eta: 0:03:08 lr: 0.000012 loss_cls: 3.9937 (3.8652) grad_norm: 4.1343 (4.3121) time: 0.7761 data: 0.0003 max mem: 8421 +[2024-12-05 12:19:46 root] (utils.py 283): INFO Epoch: [14] [2270/2502] eta: 0:03:00 lr: 0.000012 loss_cls: 4.0595 (3.8658) grad_norm: 4.0896 (4.3114) time: 0.8008 data: 0.0003 max mem: 8421 +[2024-12-05 12:19:56 root] (utils.py 283): INFO Epoch: [14] [2280/2502] eta: 0:02:53 lr: 0.000012 loss_cls: 3.9844 (3.8650) grad_norm: 4.1538 (4.3135) time: 0.8994 data: 0.0003 max mem: 8421 +[2024-12-05 12:20:04 root] (utils.py 283): INFO Epoch: [14] [2290/2502] eta: 0:02:45 lr: 0.000012 loss_cls: 3.7308 (3.8638) grad_norm: 4.1781 (4.3132) time: 0.8737 data: 0.0003 max mem: 8421 +[2024-12-05 12:20:11 root] (utils.py 283): INFO Epoch: [14] [2300/2502] eta: 0:02:37 lr: 0.000012 loss_cls: 3.8151 (3.8646) grad_norm: 4.1169 (4.3142) time: 0.7732 data: 0.0003 max mem: 8421 +[2024-12-05 12:20:19 root] (utils.py 283): INFO Epoch: [14] [2310/2502] eta: 0:02:29 lr: 0.000012 loss_cls: 4.1360 (3.8655) grad_norm: 4.3742 (4.3163) time: 0.7792 data: 0.0003 max mem: 8421 +[2024-12-05 12:20:27 root] (utils.py 283): INFO Epoch: [14] [2320/2502] eta: 0:02:21 lr: 0.000012 loss_cls: 3.9828 (3.8647) grad_norm: 4.2622 (4.3159) time: 0.7705 data: 0.0003 max mem: 8421 +[2024-12-05 12:20:35 root] (utils.py 283): INFO Epoch: [14] [2330/2502] eta: 0:02:14 lr: 0.000012 loss_cls: 3.8384 (3.8648) grad_norm: 4.2088 (4.3156) time: 0.7617 data: 0.0003 max mem: 8421 +[2024-12-05 12:20:42 root] (utils.py 283): INFO Epoch: [14] [2340/2502] eta: 0:02:06 lr: 0.000012 loss_cls: 4.2695 (3.8646) grad_norm: 4.0917 (4.3158) time: 0.7631 data: 0.0003 max mem: 8421 +[2024-12-05 12:20:50 root] (utils.py 283): INFO Epoch: [14] [2350/2502] eta: 0:01:58 lr: 0.000012 loss_cls: 3.7808 (3.8637) grad_norm: 4.1091 (4.3171) time: 0.7655 data: 0.0003 max mem: 8421 +[2024-12-05 12:20:58 root] (utils.py 283): INFO Epoch: [14] [2360/2502] eta: 0:01:50 lr: 0.000012 loss_cls: 3.7808 (3.8642) grad_norm: 4.3144 (4.3178) time: 0.7701 data: 0.0002 max mem: 8421 +[2024-12-05 12:21:05 root] (utils.py 283): INFO Epoch: [14] [2370/2502] eta: 0:01:42 lr: 0.000012 loss_cls: 4.1739 (3.8651) grad_norm: 4.3059 (4.3172) time: 0.7738 data: 0.0003 max mem: 8421 +[2024-12-05 12:21:13 root] (utils.py 283): INFO Epoch: [14] [2380/2502] eta: 0:01:35 lr: 0.000012 loss_cls: 4.0525 (3.8653) grad_norm: 4.0498 (4.3167) time: 0.7792 data: 0.0003 max mem: 8421 +[2024-12-05 12:21:21 root] (utils.py 283): INFO Epoch: [14] [2390/2502] eta: 0:01:27 lr: 0.000012 loss_cls: 3.9831 (3.8649) grad_norm: 4.1173 (4.3163) time: 0.7782 data: 0.0003 max mem: 8421 +[2024-12-05 12:21:29 root] (utils.py 283): INFO Epoch: [14] [2400/2502] eta: 0:01:19 lr: 0.000012 loss_cls: 4.0407 (3.8648) grad_norm: 4.1880 (4.3157) time: 0.7736 data: 0.0003 max mem: 8421 +[2024-12-05 12:21:37 root] (utils.py 283): INFO Epoch: [14] [2410/2502] eta: 0:01:11 lr: 0.000012 loss_cls: 3.9249 (3.8644) grad_norm: 4.1564 (4.3151) time: 0.7862 data: 0.0003 max mem: 8421 +[2024-12-05 12:21:45 root] (utils.py 283): INFO Epoch: [14] [2420/2502] eta: 0:01:03 lr: 0.000012 loss_cls: 3.9313 (3.8645) grad_norm: 4.1564 (4.3156) time: 0.8104 data: 0.0003 max mem: 8421 +[2024-12-05 12:21:55 root] (utils.py 283): INFO Epoch: [14] [2430/2502] eta: 0:00:56 lr: 0.000012 loss_cls: 3.9463 (3.8641) grad_norm: 4.2118 (4.3154) time: 0.8975 data: 0.0003 max mem: 8421 +[2024-12-05 12:22:02 root] (utils.py 283): INFO Epoch: [14] [2440/2502] eta: 0:00:48 lr: 0.000012 loss_cls: 3.9463 (3.8637) grad_norm: 4.0982 (4.3148) time: 0.8712 data: 0.0002 max mem: 8421 +[2024-12-05 12:22:10 root] (utils.py 283): INFO Epoch: [14] [2450/2502] eta: 0:00:40 lr: 0.000012 loss_cls: 3.8598 (3.8631) grad_norm: 4.0206 (4.3144) time: 0.7643 data: 0.0002 max mem: 8421 +[2024-12-05 12:22:18 root] (utils.py 283): INFO Epoch: [14] [2460/2502] eta: 0:00:32 lr: 0.000012 loss_cls: 3.8598 (3.8638) grad_norm: 4.1886 (4.3145) time: 0.7638 data: 0.0003 max mem: 8421 +[2024-12-05 12:22:25 root] (utils.py 283): INFO Epoch: [14] [2470/2502] eta: 0:00:24 lr: 0.000012 loss_cls: 3.9552 (3.8640) grad_norm: 4.2784 (4.3140) time: 0.7703 data: 0.0003 max mem: 8421 +[2024-12-05 12:22:33 root] (utils.py 283): INFO Epoch: [14] [2480/2502] eta: 0:00:17 lr: 0.000012 loss_cls: 4.0415 (3.8644) grad_norm: 4.1700 (4.3164) time: 0.7672 data: 0.0003 max mem: 8421 +[2024-12-05 12:22:41 root] (utils.py 283): INFO Epoch: [14] [2490/2502] eta: 0:00:09 lr: 0.000012 loss_cls: 3.8381 (3.8637) grad_norm: 4.2450 (4.3162) time: 0.7970 data: 0.0231 max mem: 8421 +[2024-12-05 12:22:49 root] (utils.py 283): INFO Epoch: [14] [2500/2502] eta: 0:00:01 lr: 0.000012 loss_cls: 3.6692 (3.8632) grad_norm: 4.2420 (4.3184) time: 0.7990 data: 0.0231 max mem: 8421 +[2024-12-05 12:22:50 root] (utils.py 283): INFO Epoch: [14] [2501/2502] eta: 0:00:00 lr: 0.000012 loss_cls: 3.6692 (3.8631) grad_norm: 4.2420 (4.3185) time: 0.7987 data: 0.0231 max mem: 8421 +[2024-12-05 12:22:50 root] (utils.py 297): INFO Epoch: [14] Total time: 0:32:32 (0.7804 s / it) +[2024-12-05 12:22:50 root] (engine.py 178): INFO Averaged stats:lr: 0.000012 loss_cls: 3.6692 (3.8714) grad_norm: 4.2420 (4.3185) +[2024-12-05 12:22:50 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7203 (0.7203) acc1: 85.1562 (85.1562) acc3: 96.0938 (96.0938) acc5: 97.6562 (97.6562) time: 0.1310 data: 0.0004 max mem: 8421 +[2024-12-05 12:22:51 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8145 (0.8819) acc1: 85.1562 (80.9659) acc3: 92.9688 (93.0398) acc5: 95.3125 (95.5256) time: 0.1313 data: 0.0003 max mem: 8421 +[2024-12-05 12:22:53 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9205 (0.9351) acc1: 77.3438 (79.2039) acc3: 92.9688 (92.4107) acc5: 95.3125 (95.1637) time: 0.1314 data: 0.0004 max mem: 8421 +[2024-12-05 12:22:54 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9489 (0.9382) acc1: 78.9062 (78.8810) acc3: 92.9688 (92.6663) acc5: 96.0938 (95.2621) time: 0.1323 data: 0.0005 max mem: 8421 +[2024-12-05 12:22:55 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8464 (0.9280) acc1: 80.4688 (79.5541) acc3: 94.5312 (92.7782) acc5: 96.0938 (95.2744) time: 0.1326 data: 0.0005 max mem: 8421 +[2024-12-05 12:22:57 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9946 (1.0220) acc1: 74.2188 (77.6042) acc3: 88.2812 (91.0999) acc5: 92.9688 (94.0104) time: 0.1320 data: 0.0005 max mem: 8421 +[2024-12-05 12:22:58 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3759 (1.0659) acc1: 71.8750 (76.6650) acc3: 85.9375 (90.2792) acc5: 89.8438 (93.3017) time: 0.1329 data: 0.0005 max mem: 8421 +[2024-12-05 12:22:59 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2976 (1.1109) acc1: 72.6562 (75.5392) acc3: 86.7188 (89.6347) acc5: 89.8438 (92.8147) time: 0.1332 data: 0.0005 max mem: 8421 +[2024-12-05 12:23:01 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3370 (1.1489) acc1: 68.7500 (74.7492) acc3: 84.3750 (88.9178) acc5: 89.0625 (92.2936) time: 0.1332 data: 0.0007 max mem: 8421 +[2024-12-05 12:23:02 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3767 (1.1790) acc1: 67.9688 (74.0041) acc3: 82.8125 (88.3585) acc5: 89.0625 (91.8784) time: 0.1499 data: 0.0179 max mem: 8421 +[2024-12-05 12:23:04 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2229 (1.1662) acc1: 73.4375 (74.2240) acc3: 88.2812 (88.5920) acc5: 90.6250 (92.0800) time: 0.1735 data: 0.0412 max mem: 8421 +[2024-12-05 12:23:04 root] (utils.py 297): INFO Test: Total time: 0:00:13 (0.1409 s / it) +[2024-12-05 12:23:04 root] (engine.py 263): INFO * Acc@1 74.308 Acc@3 88.690 Acc@5 92.102 loss 1.164 flops 1.285 layer_flops 1.251 +[2024-12-05 12:23:04 root] (main.py 546): INFO Accuracy of the network on the 50000 test images: 74.3% +[2024-12-05 12:23:04 root] (main.py 550): INFO Max accuracy: 74.49% +[2024-12-05 12:23:05 root] (utils.py 283): INFO Epoch: [15] [ 0/2502] eta: 0:32:25 lr: 0.000011 loss_cls: 4.6322 (4.6322) grad_norm: 4.2979 (4.2979) time: 0.7776 data: 0.0004 max mem: 8421 +[2024-12-05 12:23:12 root] (utils.py 283): INFO Epoch: [15] [ 10/2502] eta: 0:32:17 lr: 0.000011 loss_cls: 4.2082 (4.0754) grad_norm: 4.1701 (4.1258) time: 0.7776 data: 0.0003 max mem: 8421 +[2024-12-05 12:23:20 root] (utils.py 283): INFO Epoch: [15] [ 20/2502] eta: 0:32:02 lr: 0.000011 loss_cls: 4.2199 (4.1630) grad_norm: 4.2387 (4.1873) time: 0.7743 data: 0.0003 max mem: 8421 +[2024-12-05 12:23:28 root] (utils.py 283): INFO Epoch: [15] [ 30/2502] eta: 0:31:52 lr: 0.000011 loss_cls: 4.1985 (4.0939) grad_norm: 4.3669 (4.2911) time: 0.7717 data: 0.0003 max mem: 8421 +[2024-12-05 12:23:35 root] (utils.py 283): INFO Epoch: [15] [ 40/2502] eta: 0:31:43 lr: 0.000011 loss_cls: 4.1187 (4.0516) grad_norm: 4.2678 (4.2769) time: 0.7718 data: 0.0002 max mem: 8421 +[2024-12-05 12:23:43 root] (utils.py 283): INFO Epoch: [15] [ 50/2502] eta: 0:31:33 lr: 0.000011 loss_cls: 3.8728 (3.9614) grad_norm: 4.1077 (4.2599) time: 0.7695 data: 0.0003 max mem: 8421 +[2024-12-05 12:23:51 root] (utils.py 283): INFO Epoch: [15] [ 60/2502] eta: 0:31:36 lr: 0.000011 loss_cls: 3.6960 (3.9324) grad_norm: 4.1173 (4.2661) time: 0.7836 data: 0.0003 max mem: 8421 +[2024-12-05 12:23:59 root] (utils.py 283): INFO Epoch: [15] [ 70/2502] eta: 0:31:37 lr: 0.000011 loss_cls: 3.8269 (3.9210) grad_norm: 4.2574 (4.2787) time: 0.8003 data: 0.0003 max mem: 8421 +[2024-12-05 12:24:09 root] (utils.py 283): INFO Epoch: [15] [ 80/2502] eta: 0:32:39 lr: 0.000011 loss_cls: 3.8269 (3.9161) grad_norm: 4.1632 (4.2642) time: 0.9083 data: 0.0003 max mem: 8421 +[2024-12-05 12:24:17 root] (utils.py 283): INFO Epoch: [15] [ 90/2502] eta: 0:32:22 lr: 0.000011 loss_cls: 3.9806 (3.9075) grad_norm: 4.0772 (4.2529) time: 0.8957 data: 0.0003 max mem: 8421 +[2024-12-05 12:24:25 root] (utils.py 283): INFO Epoch: [15] [ 100/2502] eta: 0:32:04 lr: 0.000011 loss_cls: 3.6818 (3.8730) grad_norm: 4.2056 (4.2729) time: 0.7701 data: 0.0003 max mem: 8421 +[2024-12-05 12:24:33 root] (utils.py 283): INFO Epoch: [15] [ 110/2502] eta: 0:31:53 lr: 0.000011 loss_cls: 3.7444 (3.8856) grad_norm: 4.3345 (4.2738) time: 0.7755 data: 0.0002 max mem: 8421 +[2024-12-05 12:24:40 root] (utils.py 283): INFO Epoch: [15] [ 120/2502] eta: 0:31:39 lr: 0.000011 loss_cls: 3.8751 (3.8803) grad_norm: 4.3310 (4.2787) time: 0.7778 data: 0.0002 max mem: 8421 +[2024-12-05 12:24:48 root] (utils.py 283): INFO Epoch: [15] [ 130/2502] eta: 0:31:26 lr: 0.000011 loss_cls: 3.8562 (3.8698) grad_norm: 4.3310 (4.2989) time: 0.7675 data: 0.0002 max mem: 8421 +[2024-12-05 12:24:56 root] (utils.py 283): INFO Epoch: [15] [ 140/2502] eta: 0:31:13 lr: 0.000011 loss_cls: 3.9331 (3.8869) grad_norm: 4.2474 (4.2922) time: 0.7660 data: 0.0002 max mem: 8421 +[2024-12-05 12:25:03 root] (utils.py 283): INFO Epoch: [15] [ 150/2502] eta: 0:31:01 lr: 0.000011 loss_cls: 4.0491 (3.8868) grad_norm: 4.1331 (4.3083) time: 0.7675 data: 0.0003 max mem: 8421 +[2024-12-05 12:25:11 root] (utils.py 283): INFO Epoch: [15] [ 160/2502] eta: 0:30:54 lr: 0.000011 loss_cls: 3.9696 (3.8671) grad_norm: 4.0723 (4.3029) time: 0.7827 data: 0.0003 max mem: 8421 +[2024-12-05 12:25:19 root] (utils.py 283): INFO Epoch: [15] [ 170/2502] eta: 0:30:46 lr: 0.000011 loss_cls: 4.0209 (3.8680) grad_norm: 4.0613 (4.2940) time: 0.7961 data: 0.0003 max mem: 8421 +[2024-12-05 12:25:27 root] (utils.py 283): INFO Epoch: [15] [ 180/2502] eta: 0:30:38 lr: 0.000011 loss_cls: 4.0531 (3.8732) grad_norm: 4.0308 (4.2792) time: 0.7910 data: 0.0003 max mem: 8421 +[2024-12-05 12:25:35 root] (utils.py 283): INFO Epoch: [15] [ 190/2502] eta: 0:30:27 lr: 0.000011 loss_cls: 3.9358 (3.8678) grad_norm: 4.0074 (4.2745) time: 0.7777 data: 0.0003 max mem: 8421 +[2024-12-05 12:25:42 root] (utils.py 283): INFO Epoch: [15] [ 200/2502] eta: 0:30:16 lr: 0.000011 loss_cls: 4.0073 (3.8769) grad_norm: 4.0105 (4.2694) time: 0.7673 data: 0.0003 max mem: 8421 +[2024-12-05 12:25:50 root] (utils.py 283): INFO Epoch: [15] [ 210/2502] eta: 0:30:07 lr: 0.000011 loss_cls: 4.0073 (3.8746) grad_norm: 4.2144 (4.2662) time: 0.7709 data: 0.0003 max mem: 8421 +[2024-12-05 12:25:58 root] (utils.py 283): INFO Epoch: [15] [ 220/2502] eta: 0:29:59 lr: 0.000011 loss_cls: 3.7487 (3.8639) grad_norm: 4.2144 (4.2645) time: 0.7795 data: 0.0003 max mem: 8421 +[2024-12-05 12:26:06 root] (utils.py 283): INFO Epoch: [15] [ 230/2502] eta: 0:29:51 lr: 0.000011 loss_cls: 3.7163 (3.8621) grad_norm: 4.1879 (4.2610) time: 0.7895 data: 0.0003 max mem: 8421 +[2024-12-05 12:26:14 root] (utils.py 283): INFO Epoch: [15] [ 240/2502] eta: 0:29:46 lr: 0.000011 loss_cls: 3.8703 (3.8632) grad_norm: 4.2776 (4.2610) time: 0.8063 data: 0.0003 max mem: 8421 +[2024-12-05 12:26:22 root] (utils.py 283): INFO Epoch: [15] [ 250/2502] eta: 0:29:36 lr: 0.000011 loss_cls: 3.7918 (3.8635) grad_norm: 4.2452 (4.2687) time: 0.7902 data: 0.0003 max mem: 8421 +[2024-12-05 12:26:29 root] (utils.py 283): INFO Epoch: [15] [ 260/2502] eta: 0:29:26 lr: 0.000011 loss_cls: 3.7956 (3.8615) grad_norm: 4.0903 (4.2602) time: 0.7626 data: 0.0002 max mem: 8421 +[2024-12-05 12:26:37 root] (utils.py 283): INFO Epoch: [15] [ 270/2502] eta: 0:29:16 lr: 0.000011 loss_cls: 3.6314 (3.8545) grad_norm: 4.0903 (4.2643) time: 0.7643 data: 0.0002 max mem: 8421 +[2024-12-05 12:26:45 root] (utils.py 283): INFO Epoch: [15] [ 280/2502] eta: 0:29:07 lr: 0.000011 loss_cls: 3.9157 (3.8640) grad_norm: 4.2785 (4.2653) time: 0.7719 data: 0.0003 max mem: 8421 +[2024-12-05 12:26:53 root] (utils.py 283): INFO Epoch: [15] [ 290/2502] eta: 0:28:59 lr: 0.000011 loss_cls: 4.0383 (3.8684) grad_norm: 4.2785 (4.2651) time: 0.7783 data: 0.0003 max mem: 8421 +[2024-12-05 12:27:02 root] (utils.py 283): INFO Epoch: [15] [ 300/2502] eta: 0:29:02 lr: 0.000011 loss_cls: 4.2057 (3.8723) grad_norm: 4.1696 (4.2617) time: 0.8567 data: 0.0003 max mem: 8421 +[2024-12-05 12:27:10 root] (utils.py 283): INFO Epoch: [15] [ 310/2502] eta: 0:28:52 lr: 0.000011 loss_cls: 3.9953 (3.8677) grad_norm: 4.1361 (4.2583) time: 0.8474 data: 0.0002 max mem: 8421 +[2024-12-05 12:27:17 root] (utils.py 283): INFO Epoch: [15] [ 320/2502] eta: 0:28:42 lr: 0.000011 loss_cls: 3.9114 (3.8737) grad_norm: 4.1442 (4.2578) time: 0.7578 data: 0.0002 max mem: 8421 +[2024-12-05 12:27:25 root] (utils.py 283): INFO Epoch: [15] [ 330/2502] eta: 0:28:32 lr: 0.000011 loss_cls: 4.0828 (3.8778) grad_norm: 4.2513 (4.2612) time: 0.7575 data: 0.0002 max mem: 8421 +[2024-12-05 12:27:32 root] (utils.py 283): INFO Epoch: [15] [ 340/2502] eta: 0:28:22 lr: 0.000011 loss_cls: 4.0508 (3.8779) grad_norm: 4.1800 (4.2554) time: 0.7611 data: 0.0002 max mem: 8421 +[2024-12-05 12:27:40 root] (utils.py 283): INFO Epoch: [15] [ 350/2502] eta: 0:28:13 lr: 0.000011 loss_cls: 4.2588 (3.8853) grad_norm: 3.9765 (4.2641) time: 0.7637 data: 0.0002 max mem: 8421 +[2024-12-05 12:27:48 root] (utils.py 283): INFO Epoch: [15] [ 360/2502] eta: 0:28:04 lr: 0.000011 loss_cls: 4.3075 (3.8854) grad_norm: 4.1035 (4.2636) time: 0.7698 data: 0.0002 max mem: 8421 +[2024-12-05 12:27:56 root] (utils.py 283): INFO Epoch: [15] [ 370/2502] eta: 0:27:58 lr: 0.000011 loss_cls: 3.9381 (3.8863) grad_norm: 4.1704 (4.2652) time: 0.7955 data: 0.0003 max mem: 8421 +[2024-12-05 12:28:04 root] (utils.py 283): INFO Epoch: [15] [ 380/2502] eta: 0:27:50 lr: 0.000011 loss_cls: 3.8338 (3.8817) grad_norm: 4.2077 (4.2710) time: 0.8011 data: 0.0003 max mem: 8421 +[2024-12-05 12:28:12 root] (utils.py 283): INFO Epoch: [15] [ 390/2502] eta: 0:27:42 lr: 0.000011 loss_cls: 3.6351 (3.8751) grad_norm: 4.1770 (4.2677) time: 0.7787 data: 0.0003 max mem: 8421 +[2024-12-05 12:28:19 root] (utils.py 283): INFO Epoch: [15] [ 400/2502] eta: 0:27:34 lr: 0.000011 loss_cls: 3.8080 (3.8749) grad_norm: 4.0048 (4.2728) time: 0.7829 data: 0.0003 max mem: 8421 +[2024-12-05 12:28:28 root] (utils.py 283): INFO Epoch: [15] [ 410/2502] eta: 0:27:27 lr: 0.000011 loss_cls: 3.9763 (3.8762) grad_norm: 4.0525 (4.2699) time: 0.8001 data: 0.0003 max mem: 8421 +[2024-12-05 12:28:36 root] (utils.py 283): INFO Epoch: [15] [ 420/2502] eta: 0:27:20 lr: 0.000011 loss_cls: 3.7812 (3.8685) grad_norm: 4.2197 (4.2788) time: 0.8025 data: 0.0003 max mem: 8421 +[2024-12-05 12:28:43 root] (utils.py 283): INFO Epoch: [15] [ 430/2502] eta: 0:27:12 lr: 0.000011 loss_cls: 3.6639 (3.8681) grad_norm: 4.1410 (4.2821) time: 0.7956 data: 0.0003 max mem: 8421 +[2024-12-05 12:28:51 root] (utils.py 283): INFO Epoch: [15] [ 440/2502] eta: 0:27:04 lr: 0.000011 loss_cls: 3.9417 (3.8681) grad_norm: 4.1410 (4.2865) time: 0.7917 data: 0.0002 max mem: 8421 +[2024-12-05 12:28:59 root] (utils.py 283): INFO Epoch: [15] [ 450/2502] eta: 0:26:56 lr: 0.000011 loss_cls: 4.1493 (3.8732) grad_norm: 4.2295 (4.2853) time: 0.7886 data: 0.0003 max mem: 8421 +[2024-12-05 12:29:07 root] (utils.py 283): INFO Epoch: [15] [ 460/2502] eta: 0:26:48 lr: 0.000011 loss_cls: 4.1570 (3.8698) grad_norm: 4.2058 (4.2844) time: 0.7865 data: 0.0003 max mem: 8421 +[2024-12-05 12:29:15 root] (utils.py 283): INFO Epoch: [15] [ 470/2502] eta: 0:26:41 lr: 0.000011 loss_cls: 3.8193 (3.8705) grad_norm: 4.2058 (4.2831) time: 0.7884 data: 0.0003 max mem: 8421 +[2024-12-05 12:29:23 root] (utils.py 283): INFO Epoch: [15] [ 480/2502] eta: 0:26:32 lr: 0.000011 loss_cls: 3.9056 (3.8731) grad_norm: 4.2260 (4.2839) time: 0.7807 data: 0.0003 max mem: 8421 +[2024-12-05 12:29:30 root] (utils.py 283): INFO Epoch: [15] [ 490/2502] eta: 0:26:24 lr: 0.000011 loss_cls: 3.8204 (3.8677) grad_norm: 4.1543 (4.2818) time: 0.7720 data: 0.0002 max mem: 8421 +[2024-12-05 12:29:38 root] (utils.py 283): INFO Epoch: [15] [ 500/2502] eta: 0:26:15 lr: 0.000011 loss_cls: 3.7479 (3.8670) grad_norm: 4.0861 (4.2888) time: 0.7712 data: 0.0002 max mem: 8421 +[2024-12-05 12:29:46 root] (utils.py 283): INFO Epoch: [15] [ 510/2502] eta: 0:26:07 lr: 0.000011 loss_cls: 3.7479 (3.8633) grad_norm: 4.1348 (4.2908) time: 0.7786 data: 0.0002 max mem: 8421 +[2024-12-05 12:29:54 root] (utils.py 283): INFO Epoch: [15] [ 520/2502] eta: 0:25:59 lr: 0.000011 loss_cls: 3.8652 (3.8657) grad_norm: 4.1445 (4.2877) time: 0.7828 data: 0.0002 max mem: 8421 +[2024-12-05 12:30:01 root] (utils.py 283): INFO Epoch: [15] [ 530/2502] eta: 0:25:50 lr: 0.000011 loss_cls: 4.0231 (3.8650) grad_norm: 4.0006 (4.2867) time: 0.7733 data: 0.0002 max mem: 8421 +[2024-12-05 12:30:09 root] (utils.py 283): INFO Epoch: [15] [ 540/2502] eta: 0:25:43 lr: 0.000011 loss_cls: 4.0231 (3.8627) grad_norm: 3.9865 (4.2853) time: 0.7770 data: 0.0002 max mem: 8421 +[2024-12-05 12:30:17 root] (utils.py 283): INFO Epoch: [15] [ 550/2502] eta: 0:25:34 lr: 0.000011 loss_cls: 3.8024 (3.8592) grad_norm: 4.3720 (4.2896) time: 0.7770 data: 0.0002 max mem: 8421 +[2024-12-05 12:30:25 root] (utils.py 283): INFO Epoch: [15] [ 560/2502] eta: 0:25:26 lr: 0.000011 loss_cls: 4.0891 (3.8648) grad_norm: 4.3485 (4.2875) time: 0.7684 data: 0.0002 max mem: 8421 +[2024-12-05 12:30:32 root] (utils.py 283): INFO Epoch: [15] [ 570/2502] eta: 0:25:17 lr: 0.000011 loss_cls: 4.2754 (3.8700) grad_norm: 4.1250 (4.2858) time: 0.7676 data: 0.0002 max mem: 8421 +[2024-12-05 12:30:40 root] (utils.py 283): INFO Epoch: [15] [ 580/2502] eta: 0:25:09 lr: 0.000011 loss_cls: 3.8105 (3.8611) grad_norm: 4.1941 (4.2835) time: 0.7663 data: 0.0002 max mem: 8421 +[2024-12-05 12:30:48 root] (utils.py 283): INFO Epoch: [15] [ 590/2502] eta: 0:25:00 lr: 0.000011 loss_cls: 3.8105 (3.8645) grad_norm: 4.1941 (4.2829) time: 0.7658 data: 0.0002 max mem: 8421 +[2024-12-05 12:30:55 root] (utils.py 283): INFO Epoch: [15] [ 600/2502] eta: 0:24:52 lr: 0.000011 loss_cls: 4.0695 (3.8620) grad_norm: 4.0653 (4.2827) time: 0.7733 data: 0.0002 max mem: 8421 +[2024-12-05 12:31:03 root] (utils.py 283): INFO Epoch: [15] [ 610/2502] eta: 0:24:44 lr: 0.000011 loss_cls: 3.7057 (3.8567) grad_norm: 4.1508 (4.2819) time: 0.7751 data: 0.0002 max mem: 8421 +[2024-12-05 12:31:11 root] (utils.py 283): INFO Epoch: [15] [ 620/2502] eta: 0:24:36 lr: 0.000011 loss_cls: 3.7057 (3.8546) grad_norm: 4.1711 (4.2806) time: 0.7783 data: 0.0002 max mem: 8421 +[2024-12-05 12:31:19 root] (utils.py 283): INFO Epoch: [15] [ 630/2502] eta: 0:24:28 lr: 0.000011 loss_cls: 3.9385 (3.8603) grad_norm: 4.1711 (4.2800) time: 0.7739 data: 0.0002 max mem: 8421 +[2024-12-05 12:31:26 root] (utils.py 283): INFO Epoch: [15] [ 640/2502] eta: 0:24:19 lr: 0.000011 loss_cls: 4.0872 (3.8565) grad_norm: 4.0522 (4.2768) time: 0.7696 data: 0.0002 max mem: 8421 +[2024-12-05 12:31:34 root] (utils.py 283): INFO Epoch: [15] [ 650/2502] eta: 0:24:12 lr: 0.000011 loss_cls: 3.9809 (3.8591) grad_norm: 4.0228 (4.2732) time: 0.7807 data: 0.0003 max mem: 8421 +[2024-12-05 12:31:42 root] (utils.py 283): INFO Epoch: [15] [ 660/2502] eta: 0:24:04 lr: 0.000011 loss_cls: 3.9100 (3.8602) grad_norm: 4.0344 (4.2731) time: 0.7970 data: 0.0003 max mem: 8421 +[2024-12-05 12:31:51 root] (utils.py 283): INFO Epoch: [15] [ 670/2502] eta: 0:23:59 lr: 0.000011 loss_cls: 3.8313 (3.8563) grad_norm: 4.1389 (4.2898) time: 0.8377 data: 0.0004 max mem: 8421 +[2024-12-05 12:32:00 root] (utils.py 283): INFO Epoch: [15] [ 680/2502] eta: 0:23:54 lr: 0.000011 loss_cls: 3.8895 (3.8571) grad_norm: 4.1494 (4.2891) time: 0.8825 data: 0.0005 max mem: 8421 +[2024-12-05 12:32:09 root] (utils.py 283): INFO Epoch: [15] [ 690/2502] eta: 0:23:49 lr: 0.000011 loss_cls: 3.9406 (3.8538) grad_norm: 4.1198 (4.2865) time: 0.9010 data: 0.0005 max mem: 8421 +[2024-12-05 12:32:18 root] (utils.py 283): INFO Epoch: [15] [ 700/2502] eta: 0:23:44 lr: 0.000011 loss_cls: 3.5954 (3.8541) grad_norm: 4.0784 (4.2855) time: 0.8979 data: 0.0005 max mem: 8421 +[2024-12-05 12:32:26 root] (utils.py 283): INFO Epoch: [15] [ 710/2502] eta: 0:23:37 lr: 0.000011 loss_cls: 4.0748 (3.8565) grad_norm: 4.2412 (4.2977) time: 0.8682 data: 0.0004 max mem: 8421 +[2024-12-05 12:32:36 root] (utils.py 283): INFO Epoch: [15] [ 720/2502] eta: 0:23:33 lr: 0.000011 loss_cls: 3.9365 (3.8516) grad_norm: 4.2412 (4.3099) time: 0.8799 data: 0.0003 max mem: 8421 +[2024-12-05 12:32:53 root] (utils.py 283): INFO Epoch: [15] [ 730/2502] eta: 0:23:47 lr: 0.000011 loss_cls: 3.8370 (3.8536) grad_norm: 4.1299 (4.3069) time: 1.3205 data: 0.0003 max mem: 8421 +[2024-12-05 12:33:04 root] (utils.py 283): INFO Epoch: [15] [ 740/2502] eta: 0:23:47 lr: 0.000011 loss_cls: 4.1548 (3.8576) grad_norm: 4.1540 (4.3134) time: 1.4350 data: 0.0002 max mem: 8421 +[2024-12-05 12:33:12 root] (utils.py 283): INFO Epoch: [15] [ 750/2502] eta: 0:23:38 lr: 0.000011 loss_cls: 4.0959 (3.8576) grad_norm: 4.3096 (4.3147) time: 0.9568 data: 0.0002 max mem: 8421 +[2024-12-05 12:33:20 root] (utils.py 283): INFO Epoch: [15] [ 760/2502] eta: 0:23:29 lr: 0.000011 loss_cls: 3.8552 (3.8567) grad_norm: 4.1991 (4.3131) time: 0.7756 data: 0.0002 max mem: 8421 +[2024-12-05 12:33:28 root] (utils.py 283): INFO Epoch: [15] [ 770/2502] eta: 0:23:20 lr: 0.000011 loss_cls: 3.7585 (3.8553) grad_norm: 4.0763 (4.3178) time: 0.7756 data: 0.0002 max mem: 8421 +[2024-12-05 12:33:35 root] (utils.py 283): INFO Epoch: [15] [ 780/2502] eta: 0:23:12 lr: 0.000011 loss_cls: 3.7907 (3.8544) grad_norm: 4.0915 (4.3200) time: 0.7689 data: 0.0002 max mem: 8421 +[2024-12-05 12:33:43 root] (utils.py 283): INFO Epoch: [15] [ 790/2502] eta: 0:23:03 lr: 0.000011 loss_cls: 4.0524 (3.8568) grad_norm: 4.1949 (4.3198) time: 0.7710 data: 0.0002 max mem: 8421 +[2024-12-05 12:33:51 root] (utils.py 283): INFO Epoch: [15] [ 800/2502] eta: 0:22:54 lr: 0.000011 loss_cls: 3.9876 (3.8553) grad_norm: 4.1023 (4.3222) time: 0.7687 data: 0.0002 max mem: 8421 +[2024-12-05 12:33:58 root] (utils.py 283): INFO Epoch: [15] [ 810/2502] eta: 0:22:45 lr: 0.000011 loss_cls: 3.7936 (3.8540) grad_norm: 4.2050 (4.3224) time: 0.7658 data: 0.0002 max mem: 8421 +[2024-12-05 12:34:06 root] (utils.py 283): INFO Epoch: [15] [ 820/2502] eta: 0:22:36 lr: 0.000011 loss_cls: 3.7667 (3.8529) grad_norm: 4.3277 (4.3226) time: 0.7680 data: 0.0002 max mem: 8421 +[2024-12-05 12:34:14 root] (utils.py 283): INFO Epoch: [15] [ 830/2502] eta: 0:22:27 lr: 0.000011 loss_cls: 3.7667 (3.8501) grad_norm: 4.1966 (4.3216) time: 0.7698 data: 0.0002 max mem: 8421 +[2024-12-05 12:34:21 root] (utils.py 283): INFO Epoch: [15] [ 840/2502] eta: 0:22:19 lr: 0.000011 loss_cls: 3.9093 (3.8525) grad_norm: 4.1465 (4.3218) time: 0.7750 data: 0.0002 max mem: 8421 +[2024-12-05 12:34:29 root] (utils.py 283): INFO Epoch: [15] [ 850/2502] eta: 0:22:10 lr: 0.000011 loss_cls: 4.0689 (3.8518) grad_norm: 4.2585 (4.3196) time: 0.7897 data: 0.0010 max mem: 8421 +[2024-12-05 12:34:38 root] (utils.py 283): INFO Epoch: [15] [ 860/2502] eta: 0:22:02 lr: 0.000011 loss_cls: 3.8523 (3.8512) grad_norm: 4.2583 (4.3181) time: 0.8026 data: 0.0010 max mem: 8421 +[2024-12-05 12:34:46 root] (utils.py 283): INFO Epoch: [15] [ 870/2502] eta: 0:21:54 lr: 0.000011 loss_cls: 3.7300 (3.8491) grad_norm: 4.1888 (4.3158) time: 0.8107 data: 0.0003 max mem: 8421 +[2024-12-05 12:34:54 root] (utils.py 283): INFO Epoch: [15] [ 880/2502] eta: 0:21:48 lr: 0.000011 loss_cls: 3.8031 (3.8487) grad_norm: 3.9642 (4.3129) time: 0.8477 data: 0.0004 max mem: 8421 +[2024-12-05 12:35:03 root] (utils.py 283): INFO Epoch: [15] [ 890/2502] eta: 0:21:41 lr: 0.000011 loss_cls: 3.8755 (3.8486) grad_norm: 4.0037 (4.3108) time: 0.8891 data: 0.0005 max mem: 8421 +[2024-12-05 12:35:12 root] (utils.py 283): INFO Epoch: [15] [ 900/2502] eta: 0:21:34 lr: 0.000011 loss_cls: 3.8755 (3.8482) grad_norm: 4.0595 (4.3093) time: 0.8800 data: 0.0004 max mem: 8421 +[2024-12-05 12:35:20 root] (utils.py 283): INFO Epoch: [15] [ 910/2502] eta: 0:21:25 lr: 0.000011 loss_cls: 3.9395 (3.8480) grad_norm: 4.1766 (4.3085) time: 0.8143 data: 0.0003 max mem: 8421 +[2024-12-05 12:35:27 root] (utils.py 283): INFO Epoch: [15] [ 920/2502] eta: 0:21:16 lr: 0.000011 loss_cls: 3.9303 (3.8461) grad_norm: 4.1352 (4.3061) time: 0.7628 data: 0.0002 max mem: 8421 +[2024-12-05 12:35:35 root] (utils.py 283): INFO Epoch: [15] [ 930/2502] eta: 0:21:08 lr: 0.000011 loss_cls: 3.7221 (3.8459) grad_norm: 4.1306 (4.3055) time: 0.7586 data: 0.0002 max mem: 8421 +[2024-12-05 12:35:43 root] (utils.py 283): INFO Epoch: [15] [ 940/2502] eta: 0:20:59 lr: 0.000011 loss_cls: 3.8291 (3.8466) grad_norm: 4.1306 (4.3037) time: 0.7665 data: 0.0002 max mem: 8421 +[2024-12-05 12:35:50 root] (utils.py 283): INFO Epoch: [15] [ 950/2502] eta: 0:20:50 lr: 0.000011 loss_cls: 4.0875 (3.8469) grad_norm: 4.0350 (4.3008) time: 0.7687 data: 0.0002 max mem: 8421 +[2024-12-05 12:35:58 root] (utils.py 283): INFO Epoch: [15] [ 960/2502] eta: 0:20:42 lr: 0.000011 loss_cls: 3.9455 (3.8461) grad_norm: 4.0328 (4.2985) time: 0.7683 data: 0.0002 max mem: 8421 +[2024-12-05 12:36:06 root] (utils.py 283): INFO Epoch: [15] [ 970/2502] eta: 0:20:33 lr: 0.000011 loss_cls: 3.8986 (3.8465) grad_norm: 4.0530 (4.3000) time: 0.7750 data: 0.0002 max mem: 8421 +[2024-12-05 12:36:13 root] (utils.py 283): INFO Epoch: [15] [ 980/2502] eta: 0:20:25 lr: 0.000011 loss_cls: 3.9888 (3.8469) grad_norm: 4.1844 (4.3006) time: 0.7733 data: 0.0002 max mem: 8421 +[2024-12-05 12:36:21 root] (utils.py 283): INFO Epoch: [15] [ 990/2502] eta: 0:20:16 lr: 0.000011 loss_cls: 4.0562 (3.8496) grad_norm: 4.1056 (4.2994) time: 0.7697 data: 0.0002 max mem: 8421 +[2024-12-05 12:36:29 root] (utils.py 283): INFO Epoch: [15] [1000/2502] eta: 0:20:07 lr: 0.000011 loss_cls: 3.8855 (3.8487) grad_norm: 3.9390 (4.2964) time: 0.7695 data: 0.0002 max mem: 8421 +[2024-12-05 12:36:37 root] (utils.py 283): INFO Epoch: [15] [1010/2502] eta: 0:19:59 lr: 0.000011 loss_cls: 3.7607 (3.8490) grad_norm: 3.9390 (4.2959) time: 0.7668 data: 0.0002 max mem: 8421 +[2024-12-05 12:36:44 root] (utils.py 283): INFO Epoch: [15] [1020/2502] eta: 0:19:50 lr: 0.000011 loss_cls: 3.9486 (3.8505) grad_norm: 4.0871 (4.2938) time: 0.7645 data: 0.0002 max mem: 8421 +[2024-12-05 12:36:52 root] (utils.py 283): INFO Epoch: [15] [1030/2502] eta: 0:19:42 lr: 0.000011 loss_cls: 3.9486 (3.8512) grad_norm: 4.0931 (4.2959) time: 0.7638 data: 0.0002 max mem: 8421 +[2024-12-05 12:36:59 root] (utils.py 283): INFO Epoch: [15] [1040/2502] eta: 0:19:33 lr: 0.000011 loss_cls: 4.0376 (3.8535) grad_norm: 4.2688 (4.2991) time: 0.7624 data: 0.0002 max mem: 8421 +[2024-12-05 12:37:07 root] (utils.py 283): INFO Epoch: [15] [1050/2502] eta: 0:19:24 lr: 0.000011 loss_cls: 4.1154 (3.8525) grad_norm: 4.2526 (4.2993) time: 0.7641 data: 0.0002 max mem: 8421 +[2024-12-05 12:37:15 root] (utils.py 283): INFO Epoch: [15] [1060/2502] eta: 0:19:16 lr: 0.000011 loss_cls: 4.0993 (3.8546) grad_norm: 4.2484 (4.2997) time: 0.7657 data: 0.0002 max mem: 8421 +[2024-12-05 12:37:22 root] (utils.py 283): INFO Epoch: [15] [1070/2502] eta: 0:19:07 lr: 0.000011 loss_cls: 4.0188 (3.8544) grad_norm: 4.0659 (4.2984) time: 0.7641 data: 0.0002 max mem: 8421 +[2024-12-05 12:37:30 root] (utils.py 283): INFO Epoch: [15] [1080/2502] eta: 0:18:59 lr: 0.000011 loss_cls: 3.9882 (3.8543) grad_norm: 4.1435 (4.2993) time: 0.7645 data: 0.0002 max mem: 8421 +[2024-12-05 12:37:38 root] (utils.py 283): INFO Epoch: [15] [1090/2502] eta: 0:18:50 lr: 0.000011 loss_cls: 4.0051 (3.8561) grad_norm: 4.3462 (4.3076) time: 0.7700 data: 0.0002 max mem: 8421 +[2024-12-05 12:37:46 root] (utils.py 283): INFO Epoch: [15] [1100/2502] eta: 0:18:42 lr: 0.000011 loss_cls: 4.2282 (3.8593) grad_norm: 4.3440 (4.3072) time: 0.7784 data: 0.0003 max mem: 8421 +[2024-12-05 12:37:53 root] (utils.py 283): INFO Epoch: [15] [1110/2502] eta: 0:18:34 lr: 0.000011 loss_cls: 4.1092 (3.8584) grad_norm: 4.0365 (4.3055) time: 0.7828 data: 0.0003 max mem: 8421 +[2024-12-05 12:38:02 root] (utils.py 283): INFO Epoch: [15] [1120/2502] eta: 0:18:26 lr: 0.000011 loss_cls: 3.8885 (3.8577) grad_norm: 4.0365 (4.3046) time: 0.8013 data: 0.0003 max mem: 8421 +[2024-12-05 12:38:10 root] (utils.py 283): INFO Epoch: [15] [1130/2502] eta: 0:18:18 lr: 0.000011 loss_cls: 3.9787 (3.8576) grad_norm: 4.3188 (4.3058) time: 0.8152 data: 0.0002 max mem: 8421 +[2024-12-05 12:38:18 root] (utils.py 283): INFO Epoch: [15] [1140/2502] eta: 0:18:10 lr: 0.000011 loss_cls: 4.0389 (3.8597) grad_norm: 4.3871 (4.3089) time: 0.8023 data: 0.0002 max mem: 8421 +[2024-12-05 12:38:26 root] (utils.py 283): INFO Epoch: [15] [1150/2502] eta: 0:18:02 lr: 0.000011 loss_cls: 3.8757 (3.8583) grad_norm: 4.2347 (4.3101) time: 0.7888 data: 0.0003 max mem: 8421 +[2024-12-05 12:38:33 root] (utils.py 283): INFO Epoch: [15] [1160/2502] eta: 0:17:54 lr: 0.000011 loss_cls: 3.7870 (3.8594) grad_norm: 4.2301 (4.3123) time: 0.7721 data: 0.0002 max mem: 8421 +[2024-12-05 12:38:41 root] (utils.py 283): INFO Epoch: [15] [1170/2502] eta: 0:17:45 lr: 0.000011 loss_cls: 4.1245 (3.8607) grad_norm: 4.2219 (4.3120) time: 0.7618 data: 0.0002 max mem: 8421 +[2024-12-05 12:38:48 root] (utils.py 283): INFO Epoch: [15] [1180/2502] eta: 0:17:37 lr: 0.000011 loss_cls: 4.0569 (3.8619) grad_norm: 4.2210 (4.3111) time: 0.7632 data: 0.0002 max mem: 8421 +[2024-12-05 12:38:56 root] (utils.py 283): INFO Epoch: [15] [1190/2502] eta: 0:17:28 lr: 0.000011 loss_cls: 4.0569 (3.8626) grad_norm: 4.2750 (4.3122) time: 0.7642 data: 0.0002 max mem: 8421 +[2024-12-05 12:39:04 root] (utils.py 283): INFO Epoch: [15] [1200/2502] eta: 0:17:20 lr: 0.000011 loss_cls: 4.1941 (3.8645) grad_norm: 4.2411 (4.3125) time: 0.7638 data: 0.0002 max mem: 8421 +[2024-12-05 12:39:11 root] (utils.py 283): INFO Epoch: [15] [1210/2502] eta: 0:17:12 lr: 0.000011 loss_cls: 4.1941 (3.8665) grad_norm: 4.2879 (4.3131) time: 0.7642 data: 0.0002 max mem: 8421 +[2024-12-05 12:39:19 root] (utils.py 283): INFO Epoch: [15] [1220/2502] eta: 0:17:03 lr: 0.000011 loss_cls: 3.8547 (3.8645) grad_norm: 4.3300 (4.3136) time: 0.7734 data: 0.0002 max mem: 8421 +[2024-12-05 12:39:27 root] (utils.py 283): INFO Epoch: [15] [1230/2502] eta: 0:16:55 lr: 0.000011 loss_cls: 3.8547 (3.8660) grad_norm: 4.1417 (4.3113) time: 0.7852 data: 0.0002 max mem: 8421 +[2024-12-05 12:39:35 root] (utils.py 283): INFO Epoch: [15] [1240/2502] eta: 0:16:48 lr: 0.000011 loss_cls: 3.9917 (3.8651) grad_norm: 4.0209 (4.3103) time: 0.8148 data: 0.0003 max mem: 8421 +[2024-12-05 12:39:51 root] (utils.py 283): INFO Epoch: [15] [1250/2502] eta: 0:16:47 lr: 0.000011 loss_cls: 3.8655 (3.8628) grad_norm: 4.1083 (4.3099) time: 1.1975 data: 0.0003 max mem: 8421 +[2024-12-05 12:40:04 root] (utils.py 283): INFO Epoch: [15] [1260/2502] eta: 0:16:44 lr: 0.000011 loss_cls: 3.8277 (3.8615) grad_norm: 4.1920 (4.3121) time: 1.4258 data: 0.0003 max mem: 8421 +[2024-12-05 12:40:12 root] (utils.py 283): INFO Epoch: [15] [1270/2502] eta: 0:16:36 lr: 0.000011 loss_cls: 3.9545 (3.8626) grad_norm: 4.1875 (4.3119) time: 1.0355 data: 0.0003 max mem: 8421 +[2024-12-05 12:40:20 root] (utils.py 283): INFO Epoch: [15] [1280/2502] eta: 0:16:27 lr: 0.000011 loss_cls: 4.0306 (3.8622) grad_norm: 4.1387 (4.3108) time: 0.7835 data: 0.0003 max mem: 8421 +[2024-12-05 12:40:28 root] (utils.py 283): INFO Epoch: [15] [1290/2502] eta: 0:16:19 lr: 0.000011 loss_cls: 3.9152 (3.8623) grad_norm: 4.2125 (4.3101) time: 0.7935 data: 0.0002 max mem: 8421 +[2024-12-05 12:40:36 root] (utils.py 283): INFO Epoch: [15] [1300/2502] eta: 0:16:11 lr: 0.000011 loss_cls: 3.9152 (3.8617) grad_norm: 4.0863 (4.3078) time: 0.7980 data: 0.0002 max mem: 8421 +[2024-12-05 12:40:44 root] (utils.py 283): INFO Epoch: [15] [1310/2502] eta: 0:16:03 lr: 0.000011 loss_cls: 3.9680 (3.8627) grad_norm: 4.0180 (4.3090) time: 0.7978 data: 0.0002 max mem: 8421 +[2024-12-05 12:40:51 root] (utils.py 283): INFO Epoch: [15] [1320/2502] eta: 0:15:54 lr: 0.000011 loss_cls: 3.9680 (3.8608) grad_norm: 4.2902 (4.3109) time: 0.7801 data: 0.0002 max mem: 8421 +[2024-12-05 12:40:59 root] (utils.py 283): INFO Epoch: [15] [1330/2502] eta: 0:15:46 lr: 0.000011 loss_cls: 3.8449 (3.8608) grad_norm: 4.2902 (4.3108) time: 0.7767 data: 0.0002 max mem: 8421 +[2024-12-05 12:41:07 root] (utils.py 283): INFO Epoch: [15] [1340/2502] eta: 0:15:38 lr: 0.000011 loss_cls: 4.1661 (3.8628) grad_norm: 4.2413 (4.3108) time: 0.7808 data: 0.0002 max mem: 8421 +[2024-12-05 12:41:15 root] (utils.py 283): INFO Epoch: [15] [1350/2502] eta: 0:15:30 lr: 0.000011 loss_cls: 4.2255 (3.8653) grad_norm: 4.0597 (4.3102) time: 0.7773 data: 0.0002 max mem: 8421 +[2024-12-05 12:41:22 root] (utils.py 283): INFO Epoch: [15] [1360/2502] eta: 0:15:21 lr: 0.000011 loss_cls: 4.0582 (3.8645) grad_norm: 4.1193 (4.3126) time: 0.7812 data: 0.0002 max mem: 8421 +[2024-12-05 12:41:30 root] (utils.py 283): INFO Epoch: [15] [1370/2502] eta: 0:15:13 lr: 0.000011 loss_cls: 3.6394 (3.8619) grad_norm: 4.2417 (4.3140) time: 0.7804 data: 0.0002 max mem: 8421 +[2024-12-05 12:41:38 root] (utils.py 283): INFO Epoch: [15] [1380/2502] eta: 0:15:05 lr: 0.000011 loss_cls: 3.6337 (3.8609) grad_norm: 4.2253 (4.3131) time: 0.7789 data: 0.0003 max mem: 8421 +[2024-12-05 12:41:46 root] (utils.py 283): INFO Epoch: [15] [1390/2502] eta: 0:14:57 lr: 0.000011 loss_cls: 3.8505 (3.8592) grad_norm: 4.1304 (4.3122) time: 0.7982 data: 0.0002 max mem: 8421 +[2024-12-05 12:41:54 root] (utils.py 283): INFO Epoch: [15] [1400/2502] eta: 0:14:49 lr: 0.000011 loss_cls: 3.7143 (3.8584) grad_norm: 4.3095 (4.3131) time: 0.8185 data: 0.0002 max mem: 8421 +[2024-12-05 12:42:03 root] (utils.py 283): INFO Epoch: [15] [1410/2502] eta: 0:14:41 lr: 0.000011 loss_cls: 3.7143 (3.8576) grad_norm: 4.3285 (4.3118) time: 0.8166 data: 0.0002 max mem: 8421 +[2024-12-05 12:42:10 root] (utils.py 283): INFO Epoch: [15] [1420/2502] eta: 0:14:33 lr: 0.000011 loss_cls: 3.5987 (3.8561) grad_norm: 4.0921 (4.3120) time: 0.8047 data: 0.0002 max mem: 8421 +[2024-12-05 12:42:18 root] (utils.py 283): INFO Epoch: [15] [1430/2502] eta: 0:14:24 lr: 0.000011 loss_cls: 3.5987 (3.8538) grad_norm: 4.1762 (4.3144) time: 0.7855 data: 0.0002 max mem: 8421 +[2024-12-05 12:42:26 root] (utils.py 283): INFO Epoch: [15] [1440/2502] eta: 0:14:16 lr: 0.000011 loss_cls: 3.9285 (3.8558) grad_norm: 4.2433 (4.3143) time: 0.7756 data: 0.0003 max mem: 8421 +[2024-12-05 12:42:34 root] (utils.py 283): INFO Epoch: [15] [1450/2502] eta: 0:14:08 lr: 0.000011 loss_cls: 4.0807 (3.8562) grad_norm: 4.2305 (4.3139) time: 0.7749 data: 0.0003 max mem: 8421 +[2024-12-05 12:42:42 root] (utils.py 283): INFO Epoch: [15] [1460/2502] eta: 0:13:59 lr: 0.000011 loss_cls: 4.1022 (3.8571) grad_norm: 4.1177 (4.3130) time: 0.7826 data: 0.0003 max mem: 8421 +[2024-12-05 12:42:50 root] (utils.py 283): INFO Epoch: [15] [1470/2502] eta: 0:13:51 lr: 0.000011 loss_cls: 3.9852 (3.8562) grad_norm: 4.1259 (4.3132) time: 0.7983 data: 0.0003 max mem: 8421 +[2024-12-05 12:42:58 root] (utils.py 283): INFO Epoch: [15] [1480/2502] eta: 0:13:43 lr: 0.000011 loss_cls: 3.9027 (3.8561) grad_norm: 4.2886 (4.3138) time: 0.8073 data: 0.0002 max mem: 8421 +[2024-12-05 12:43:06 root] (utils.py 283): INFO Epoch: [15] [1490/2502] eta: 0:13:35 lr: 0.000011 loss_cls: 3.9838 (3.8562) grad_norm: 4.3884 (4.3158) time: 0.8038 data: 0.0002 max mem: 8421 +[2024-12-05 12:43:14 root] (utils.py 283): INFO Epoch: [15] [1500/2502] eta: 0:13:27 lr: 0.000011 loss_cls: 3.8514 (3.8555) grad_norm: 4.2030 (4.3143) time: 0.7990 data: 0.0002 max mem: 8421 +[2024-12-05 12:43:22 root] (utils.py 283): INFO Epoch: [15] [1510/2502] eta: 0:13:19 lr: 0.000011 loss_cls: 4.0315 (3.8573) grad_norm: 4.1313 (4.3138) time: 0.7877 data: 0.0002 max mem: 8421 +[2024-12-05 12:43:29 root] (utils.py 283): INFO Epoch: [15] [1520/2502] eta: 0:13:11 lr: 0.000011 loss_cls: 4.1090 (3.8565) grad_norm: 4.1349 (4.3127) time: 0.7731 data: 0.0002 max mem: 8421 +[2024-12-05 12:43:37 root] (utils.py 283): INFO Epoch: [15] [1530/2502] eta: 0:13:02 lr: 0.000011 loss_cls: 3.9387 (3.8566) grad_norm: 4.1660 (4.3126) time: 0.7790 data: 0.0003 max mem: 8421 +[2024-12-05 12:43:45 root] (utils.py 283): INFO Epoch: [15] [1540/2502] eta: 0:12:54 lr: 0.000011 loss_cls: 3.9656 (3.8573) grad_norm: 4.2558 (4.3150) time: 0.7845 data: 0.0003 max mem: 8421 +[2024-12-05 12:43:53 root] (utils.py 283): INFO Epoch: [15] [1550/2502] eta: 0:12:46 lr: 0.000011 loss_cls: 3.9774 (3.8560) grad_norm: 4.3824 (4.3158) time: 0.8060 data: 0.0003 max mem: 8421 +[2024-12-05 12:44:02 root] (utils.py 283): INFO Epoch: [15] [1560/2502] eta: 0:12:38 lr: 0.000011 loss_cls: 3.8908 (3.8573) grad_norm: 4.2611 (4.3159) time: 0.8319 data: 0.0003 max mem: 8421 +[2024-12-05 12:44:10 root] (utils.py 283): INFO Epoch: [15] [1570/2502] eta: 0:12:30 lr: 0.000011 loss_cls: 4.0529 (3.8570) grad_norm: 4.2711 (4.3161) time: 0.8293 data: 0.0004 max mem: 8421 +[2024-12-05 12:44:18 root] (utils.py 283): INFO Epoch: [15] [1580/2502] eta: 0:12:22 lr: 0.000011 loss_cls: 3.9597 (3.8572) grad_norm: 4.0901 (4.3143) time: 0.8080 data: 0.0003 max mem: 8421 +[2024-12-05 12:44:26 root] (utils.py 283): INFO Epoch: [15] [1590/2502] eta: 0:12:14 lr: 0.000011 loss_cls: 3.9549 (3.8576) grad_norm: 4.2262 (4.3155) time: 0.7924 data: 0.0003 max mem: 8421 +[2024-12-05 12:44:34 root] (utils.py 283): INFO Epoch: [15] [1600/2502] eta: 0:12:06 lr: 0.000011 loss_cls: 3.8116 (3.8569) grad_norm: 4.3427 (4.3161) time: 0.8181 data: 0.0003 max mem: 8421 +[2024-12-05 12:44:42 root] (utils.py 283): INFO Epoch: [15] [1610/2502] eta: 0:11:58 lr: 0.000011 loss_cls: 3.6306 (3.8545) grad_norm: 4.1460 (4.3154) time: 0.8329 data: 0.0004 max mem: 8421 +[2024-12-05 12:44:51 root] (utils.py 283): INFO Epoch: [15] [1620/2502] eta: 0:11:50 lr: 0.000011 loss_cls: 4.0070 (3.8561) grad_norm: 4.2103 (4.3155) time: 0.8248 data: 0.0004 max mem: 8421 +[2024-12-05 12:44:59 root] (utils.py 283): INFO Epoch: [15] [1630/2502] eta: 0:11:42 lr: 0.000011 loss_cls: 4.1756 (3.8569) grad_norm: 4.1830 (4.3143) time: 0.8217 data: 0.0004 max mem: 8421 +[2024-12-05 12:45:07 root] (utils.py 283): INFO Epoch: [15] [1640/2502] eta: 0:11:34 lr: 0.000011 loss_cls: 4.0294 (3.8579) grad_norm: 4.1741 (4.3140) time: 0.7979 data: 0.0003 max mem: 8421 +[2024-12-05 12:45:14 root] (utils.py 283): INFO Epoch: [15] [1650/2502] eta: 0:11:26 lr: 0.000011 loss_cls: 3.9338 (3.8573) grad_norm: 4.3056 (4.3172) time: 0.7794 data: 0.0003 max mem: 8421 +[2024-12-05 12:45:22 root] (utils.py 283): INFO Epoch: [15] [1660/2502] eta: 0:11:18 lr: 0.000011 loss_cls: 3.7530 (3.8568) grad_norm: 4.3056 (4.3186) time: 0.7848 data: 0.0002 max mem: 8421 +[2024-12-05 12:45:30 root] (utils.py 283): INFO Epoch: [15] [1670/2502] eta: 0:11:10 lr: 0.000011 loss_cls: 3.7724 (3.8564) grad_norm: 4.1627 (4.3188) time: 0.7822 data: 0.0002 max mem: 8421 +[2024-12-05 12:45:38 root] (utils.py 283): INFO Epoch: [15] [1680/2502] eta: 0:11:02 lr: 0.000011 loss_cls: 3.9776 (3.8559) grad_norm: 4.2858 (4.3200) time: 0.7762 data: 0.0002 max mem: 8421 +[2024-12-05 12:45:46 root] (utils.py 283): INFO Epoch: [15] [1690/2502] eta: 0:10:53 lr: 0.000011 loss_cls: 4.0158 (3.8557) grad_norm: 4.1980 (4.3195) time: 0.7840 data: 0.0003 max mem: 8421 +[2024-12-05 12:45:54 root] (utils.py 283): INFO Epoch: [15] [1700/2502] eta: 0:10:45 lr: 0.000011 loss_cls: 4.0329 (3.8564) grad_norm: 4.0807 (4.3200) time: 0.7983 data: 0.0003 max mem: 8421 +[2024-12-05 12:46:02 root] (utils.py 283): INFO Epoch: [15] [1710/2502] eta: 0:10:37 lr: 0.000011 loss_cls: 4.0807 (3.8562) grad_norm: 4.2768 (4.3210) time: 0.8181 data: 0.0002 max mem: 8421 +[2024-12-05 12:46:10 root] (utils.py 283): INFO Epoch: [15] [1720/2502] eta: 0:10:29 lr: 0.000011 loss_cls: 3.9536 (3.8556) grad_norm: 4.1728 (4.3200) time: 0.8190 data: 0.0002 max mem: 8421 +[2024-12-05 12:46:18 root] (utils.py 283): INFO Epoch: [15] [1730/2502] eta: 0:10:21 lr: 0.000011 loss_cls: 4.0079 (3.8560) grad_norm: 4.0950 (4.3215) time: 0.7943 data: 0.0002 max mem: 8421 +[2024-12-05 12:46:26 root] (utils.py 283): INFO Epoch: [15] [1740/2502] eta: 0:10:13 lr: 0.000011 loss_cls: 4.0226 (3.8556) grad_norm: 4.0932 (4.3215) time: 0.7749 data: 0.0002 max mem: 8421 +[2024-12-05 12:46:34 root] (utils.py 283): INFO Epoch: [15] [1750/2502] eta: 0:10:05 lr: 0.000011 loss_cls: 3.8156 (3.8555) grad_norm: 4.1834 (4.3222) time: 0.7815 data: 0.0002 max mem: 8421 +[2024-12-05 12:46:42 root] (utils.py 283): INFO Epoch: [15] [1760/2502] eta: 0:09:57 lr: 0.000011 loss_cls: 4.0820 (3.8576) grad_norm: 4.2947 (4.3223) time: 0.7948 data: 0.0002 max mem: 8421 +[2024-12-05 12:46:50 root] (utils.py 283): INFO Epoch: [15] [1770/2502] eta: 0:09:49 lr: 0.000011 loss_cls: 4.1127 (3.8596) grad_norm: 4.1836 (4.3212) time: 0.8128 data: 0.0003 max mem: 8421 +[2024-12-05 12:47:07 root] (utils.py 283): INFO Epoch: [15] [1780/2502] eta: 0:09:45 lr: 0.000011 loss_cls: 4.0997 (3.8596) grad_norm: 4.1528 (4.3217) time: 1.2781 data: 0.0003 max mem: 8421 +[2024-12-05 12:47:19 root] (utils.py 283): INFO Epoch: [15] [1790/2502] eta: 0:09:38 lr: 0.000011 loss_cls: 3.9800 (3.8600) grad_norm: 4.2733 (4.3220) time: 1.4543 data: 0.0003 max mem: 8421 +[2024-12-05 12:47:27 root] (utils.py 283): INFO Epoch: [15] [1800/2502] eta: 0:09:30 lr: 0.000011 loss_cls: 3.9800 (3.8599) grad_norm: 4.2435 (4.3219) time: 0.9748 data: 0.0003 max mem: 8421 +[2024-12-05 12:47:34 root] (utils.py 283): INFO Epoch: [15] [1810/2502] eta: 0:09:21 lr: 0.000011 loss_cls: 3.9656 (3.8604) grad_norm: 4.1420 (4.3230) time: 0.7730 data: 0.0002 max mem: 8421 +[2024-12-05 12:47:42 root] (utils.py 283): INFO Epoch: [15] [1820/2502] eta: 0:09:13 lr: 0.000011 loss_cls: 4.0489 (3.8623) grad_norm: 4.1934 (4.3244) time: 0.7830 data: 0.0002 max mem: 8421 +[2024-12-05 12:47:50 root] (utils.py 283): INFO Epoch: [15] [1830/2502] eta: 0:09:05 lr: 0.000011 loss_cls: 4.1581 (3.8636) grad_norm: 4.1934 (4.3234) time: 0.7750 data: 0.0002 max mem: 8421 +[2024-12-05 12:47:58 root] (utils.py 283): INFO Epoch: [15] [1840/2502] eta: 0:08:57 lr: 0.000011 loss_cls: 3.8917 (3.8619) grad_norm: 4.2316 (4.3232) time: 0.7710 data: 0.0002 max mem: 8421 +[2024-12-05 12:48:05 root] (utils.py 283): INFO Epoch: [15] [1850/2502] eta: 0:08:48 lr: 0.000011 loss_cls: 3.6386 (3.8619) grad_norm: 4.2491 (4.3243) time: 0.7701 data: 0.0002 max mem: 8421 +[2024-12-05 12:48:13 root] (utils.py 283): INFO Epoch: [15] [1860/2502] eta: 0:08:40 lr: 0.000011 loss_cls: 3.6386 (3.8611) grad_norm: 4.0916 (4.3233) time: 0.7749 data: 0.0002 max mem: 8421 +[2024-12-05 12:48:21 root] (utils.py 283): INFO Epoch: [15] [1870/2502] eta: 0:08:32 lr: 0.000011 loss_cls: 3.8817 (3.8611) grad_norm: 4.0584 (4.3221) time: 0.7840 data: 0.0002 max mem: 8421 +[2024-12-05 12:48:29 root] (utils.py 283): INFO Epoch: [15] [1880/2502] eta: 0:08:24 lr: 0.000011 loss_cls: 4.0620 (3.8614) grad_norm: 4.2233 (4.3239) time: 0.7951 data: 0.0003 max mem: 8421 +[2024-12-05 12:48:37 root] (utils.py 283): INFO Epoch: [15] [1890/2502] eta: 0:08:16 lr: 0.000011 loss_cls: 4.0183 (3.8619) grad_norm: 4.2600 (4.3233) time: 0.8016 data: 0.0003 max mem: 8421 +[2024-12-05 12:48:45 root] (utils.py 283): INFO Epoch: [15] [1900/2502] eta: 0:08:08 lr: 0.000011 loss_cls: 3.9281 (3.8617) grad_norm: 4.1284 (4.3231) time: 0.8031 data: 0.0003 max mem: 8421 +[2024-12-05 12:48:54 root] (utils.py 283): INFO Epoch: [15] [1910/2502] eta: 0:08:00 lr: 0.000011 loss_cls: 3.9281 (3.8614) grad_norm: 4.1443 (4.3220) time: 0.8287 data: 0.0003 max mem: 8421 +[2024-12-05 12:49:02 root] (utils.py 283): INFO Epoch: [15] [1920/2502] eta: 0:07:51 lr: 0.000011 loss_cls: 4.1955 (3.8620) grad_norm: 4.1966 (4.3225) time: 0.8354 data: 0.0003 max mem: 8421 +[2024-12-05 12:49:10 root] (utils.py 283): INFO Epoch: [15] [1930/2502] eta: 0:07:43 lr: 0.000011 loss_cls: 3.7306 (3.8604) grad_norm: 4.1966 (4.3221) time: 0.8351 data: 0.0003 max mem: 8421 +[2024-12-05 12:49:21 root] (utils.py 283): INFO Epoch: [15] [1940/2502] eta: 0:07:36 lr: 0.000011 loss_cls: 3.7306 (3.8610) grad_norm: 4.2796 (4.3248) time: 0.9453 data: 0.0003 max mem: 8421 +[2024-12-05 12:49:38 root] (utils.py 283): INFO Epoch: [15] [1950/2502] eta: 0:07:30 lr: 0.000011 loss_cls: 4.0254 (3.8609) grad_norm: 4.1768 (4.3241) time: 1.3828 data: 0.0003 max mem: 8421 +[2024-12-05 12:49:48 root] (utils.py 283): INFO Epoch: [15] [1960/2502] eta: 0:07:23 lr: 0.000011 loss_cls: 4.0609 (3.8626) grad_norm: 4.0090 (4.3247) time: 1.3430 data: 0.0003 max mem: 8421 +[2024-12-05 12:49:56 root] (utils.py 283): INFO Epoch: [15] [1970/2502] eta: 0:07:14 lr: 0.000011 loss_cls: 4.0566 (3.8624) grad_norm: 4.1302 (4.3245) time: 0.8818 data: 0.0003 max mem: 8421 +[2024-12-05 12:50:04 root] (utils.py 283): INFO Epoch: [15] [1980/2502] eta: 0:07:06 lr: 0.000011 loss_cls: 4.0566 (3.8639) grad_norm: 4.1782 (4.3244) time: 0.8077 data: 0.0003 max mem: 8421 +[2024-12-05 12:50:12 root] (utils.py 283): INFO Epoch: [15] [1990/2502] eta: 0:06:58 lr: 0.000011 loss_cls: 4.0255 (3.8649) grad_norm: 4.2662 (4.3252) time: 0.8199 data: 0.0004 max mem: 8421 +[2024-12-05 12:50:20 root] (utils.py 283): INFO Epoch: [15] [2000/2502] eta: 0:06:50 lr: 0.000011 loss_cls: 3.6608 (3.8634) grad_norm: 4.1256 (4.3245) time: 0.8205 data: 0.0004 max mem: 8421 +[2024-12-05 12:50:28 root] (utils.py 283): INFO Epoch: [15] [2010/2502] eta: 0:06:42 lr: 0.000011 loss_cls: 3.5617 (3.8625) grad_norm: 4.1256 (4.3238) time: 0.8094 data: 0.0004 max mem: 8421 +[2024-12-05 12:50:36 root] (utils.py 283): INFO Epoch: [15] [2020/2502] eta: 0:06:34 lr: 0.000011 loss_cls: 3.9862 (3.8630) grad_norm: 4.1235 (4.3228) time: 0.8043 data: 0.0003 max mem: 8421 +[2024-12-05 12:50:45 root] (utils.py 283): INFO Epoch: [15] [2030/2502] eta: 0:06:25 lr: 0.000011 loss_cls: 3.9862 (3.8624) grad_norm: 4.0166 (4.3217) time: 0.8190 data: 0.0003 max mem: 8421 +[2024-12-05 12:50:53 root] (utils.py 283): INFO Epoch: [15] [2040/2502] eta: 0:06:17 lr: 0.000011 loss_cls: 3.9432 (3.8626) grad_norm: 4.0658 (4.3205) time: 0.8352 data: 0.0003 max mem: 8421 +[2024-12-05 12:51:01 root] (utils.py 283): INFO Epoch: [15] [2050/2502] eta: 0:06:09 lr: 0.000011 loss_cls: 3.8768 (3.8629) grad_norm: 4.1314 (4.3198) time: 0.8278 data: 0.0003 max mem: 8421 +[2024-12-05 12:51:09 root] (utils.py 283): INFO Epoch: [15] [2060/2502] eta: 0:06:01 lr: 0.000011 loss_cls: 4.0710 (3.8634) grad_norm: 4.0594 (4.3180) time: 0.8013 data: 0.0003 max mem: 8421 +[2024-12-05 12:51:17 root] (utils.py 283): INFO Epoch: [15] [2070/2502] eta: 0:05:53 lr: 0.000011 loss_cls: 3.9416 (3.8624) grad_norm: 4.0416 (4.3172) time: 0.7860 data: 0.0003 max mem: 8421 +[2024-12-05 12:51:25 root] (utils.py 283): INFO Epoch: [15] [2080/2502] eta: 0:05:44 lr: 0.000011 loss_cls: 3.6528 (3.8615) grad_norm: 4.1861 (4.3172) time: 0.7809 data: 0.0002 max mem: 8421 +[2024-12-05 12:51:32 root] (utils.py 283): INFO Epoch: [15] [2090/2502] eta: 0:05:36 lr: 0.000011 loss_cls: 3.7461 (3.8609) grad_norm: 4.0416 (4.3164) time: 0.7719 data: 0.0002 max mem: 8421 +[2024-12-05 12:51:40 root] (utils.py 283): INFO Epoch: [15] [2100/2502] eta: 0:05:28 lr: 0.000011 loss_cls: 3.7503 (3.8604) grad_norm: 4.0681 (4.3157) time: 0.7627 data: 0.0002 max mem: 8421 +[2024-12-05 12:51:48 root] (utils.py 283): INFO Epoch: [15] [2110/2502] eta: 0:05:20 lr: 0.000011 loss_cls: 3.7503 (3.8596) grad_norm: 4.1145 (4.3156) time: 0.7618 data: 0.0002 max mem: 8421 +[2024-12-05 12:51:55 root] (utils.py 283): INFO Epoch: [15] [2120/2502] eta: 0:05:11 lr: 0.000011 loss_cls: 3.8957 (3.8607) grad_norm: 4.2504 (4.3164) time: 0.7631 data: 0.0002 max mem: 8421 +[2024-12-05 12:52:03 root] (utils.py 283): INFO Epoch: [15] [2130/2502] eta: 0:05:03 lr: 0.000011 loss_cls: 4.0830 (3.8608) grad_norm: 4.1907 (4.3161) time: 0.7642 data: 0.0002 max mem: 8421 +[2024-12-05 12:52:11 root] (utils.py 283): INFO Epoch: [15] [2140/2502] eta: 0:04:55 lr: 0.000011 loss_cls: 3.8982 (3.8608) grad_norm: 4.0716 (4.3151) time: 0.7678 data: 0.0002 max mem: 8421 +[2024-12-05 12:52:18 root] (utils.py 283): INFO Epoch: [15] [2150/2502] eta: 0:04:47 lr: 0.000011 loss_cls: 4.0346 (3.8611) grad_norm: 4.1372 (4.3144) time: 0.7696 data: 0.0002 max mem: 8421 +[2024-12-05 12:52:26 root] (utils.py 283): INFO Epoch: [15] [2160/2502] eta: 0:04:38 lr: 0.000011 loss_cls: 4.0034 (3.8618) grad_norm: 4.1780 (4.3148) time: 0.7673 data: 0.0002 max mem: 8421 +[2024-12-05 12:52:34 root] (utils.py 283): INFO Epoch: [15] [2170/2502] eta: 0:04:30 lr: 0.000011 loss_cls: 4.1434 (3.8626) grad_norm: 4.1780 (4.3157) time: 0.7687 data: 0.0002 max mem: 8421 +[2024-12-05 12:52:41 root] (utils.py 283): INFO Epoch: [15] [2180/2502] eta: 0:04:22 lr: 0.000011 loss_cls: 4.1524 (3.8628) grad_norm: 4.1726 (4.3154) time: 0.7685 data: 0.0002 max mem: 8421 +[2024-12-05 12:52:49 root] (utils.py 283): INFO Epoch: [15] [2190/2502] eta: 0:04:14 lr: 0.000011 loss_cls: 4.0106 (3.8624) grad_norm: 4.1059 (4.3141) time: 0.7666 data: 0.0002 max mem: 8421 +[2024-12-05 12:52:57 root] (utils.py 283): INFO Epoch: [15] [2200/2502] eta: 0:04:05 lr: 0.000011 loss_cls: 4.0106 (3.8630) grad_norm: 4.1191 (4.3143) time: 0.7738 data: 0.0002 max mem: 8421 +[2024-12-05 12:53:04 root] (utils.py 283): INFO Epoch: [15] [2210/2502] eta: 0:03:57 lr: 0.000011 loss_cls: 3.8727 (3.8624) grad_norm: 4.2763 (4.3141) time: 0.7766 data: 0.0002 max mem: 8421 +[2024-12-05 12:53:12 root] (utils.py 283): INFO Epoch: [15] [2220/2502] eta: 0:03:49 lr: 0.000011 loss_cls: 3.6780 (3.8616) grad_norm: 4.2530 (4.3139) time: 0.7710 data: 0.0002 max mem: 8421 +[2024-12-05 12:53:20 root] (utils.py 283): INFO Epoch: [15] [2230/2502] eta: 0:03:41 lr: 0.000011 loss_cls: 3.7335 (3.8619) grad_norm: 4.1941 (4.3143) time: 0.7795 data: 0.0003 max mem: 8421 +[2024-12-05 12:53:28 root] (utils.py 283): INFO Epoch: [15] [2240/2502] eta: 0:03:33 lr: 0.000011 loss_cls: 3.7335 (3.8607) grad_norm: 4.1324 (4.3142) time: 0.7817 data: 0.0003 max mem: 8421 +[2024-12-05 12:53:35 root] (utils.py 283): INFO Epoch: [15] [2250/2502] eta: 0:03:25 lr: 0.000011 loss_cls: 3.8065 (3.8611) grad_norm: 4.0485 (4.3127) time: 0.7691 data: 0.0002 max mem: 8421 +[2024-12-05 12:53:43 root] (utils.py 283): INFO Epoch: [15] [2260/2502] eta: 0:03:16 lr: 0.000011 loss_cls: 3.8065 (3.8601) grad_norm: 3.9707 (4.3116) time: 0.7629 data: 0.0002 max mem: 8421 +[2024-12-05 12:53:51 root] (utils.py 283): INFO Epoch: [15] [2270/2502] eta: 0:03:08 lr: 0.000011 loss_cls: 3.6883 (3.8602) grad_norm: 3.9280 (4.3107) time: 0.7639 data: 0.0002 max mem: 8421 +[2024-12-05 12:53:58 root] (utils.py 283): INFO Epoch: [15] [2280/2502] eta: 0:03:00 lr: 0.000011 loss_cls: 3.8698 (3.8609) grad_norm: 3.9280 (4.3095) time: 0.7633 data: 0.0002 max mem: 8421 +[2024-12-05 12:54:06 root] (utils.py 283): INFO Epoch: [15] [2290/2502] eta: 0:02:52 lr: 0.000011 loss_cls: 4.0208 (3.8617) grad_norm: 4.1019 (4.3095) time: 0.7720 data: 0.0002 max mem: 8421 +[2024-12-05 12:54:14 root] (utils.py 283): INFO Epoch: [15] [2300/2502] eta: 0:02:44 lr: 0.000011 loss_cls: 4.0936 (3.8618) grad_norm: 4.2977 (4.3102) time: 0.7842 data: 0.0002 max mem: 8421 +[2024-12-05 12:54:22 root] (utils.py 283): INFO Epoch: [15] [2310/2502] eta: 0:02:35 lr: 0.000011 loss_cls: 4.1002 (3.8623) grad_norm: 4.3096 (4.3097) time: 0.7752 data: 0.0002 max mem: 8421 +[2024-12-05 12:54:29 root] (utils.py 283): INFO Epoch: [15] [2320/2502] eta: 0:02:27 lr: 0.000011 loss_cls: 4.1340 (3.8622) grad_norm: 4.2097 (4.3106) time: 0.7654 data: 0.0002 max mem: 8421 +[2024-12-05 12:54:37 root] (utils.py 283): INFO Epoch: [15] [2330/2502] eta: 0:02:19 lr: 0.000011 loss_cls: 4.0077 (3.8623) grad_norm: 4.3049 (4.3119) time: 0.7719 data: 0.0003 max mem: 8421 +[2024-12-05 12:54:45 root] (utils.py 283): INFO Epoch: [15] [2340/2502] eta: 0:02:11 lr: 0.000011 loss_cls: 3.9036 (3.8622) grad_norm: 4.4282 (4.3132) time: 0.7803 data: 0.0002 max mem: 8421 +[2024-12-05 12:54:53 root] (utils.py 283): INFO Epoch: [15] [2350/2502] eta: 0:02:03 lr: 0.000011 loss_cls: 3.8506 (3.8617) grad_norm: 4.3122 (4.3131) time: 0.7853 data: 0.0002 max mem: 8421 +[2024-12-05 12:55:00 root] (utils.py 283): INFO Epoch: [15] [2360/2502] eta: 0:01:55 lr: 0.000011 loss_cls: 3.8506 (3.8614) grad_norm: 4.2542 (4.3149) time: 0.7768 data: 0.0002 max mem: 8421 +[2024-12-05 12:55:08 root] (utils.py 283): INFO Epoch: [15] [2370/2502] eta: 0:01:47 lr: 0.000011 loss_cls: 3.7384 (3.8608) grad_norm: 4.2542 (4.3152) time: 0.7658 data: 0.0002 max mem: 8421 +[2024-12-05 12:55:16 root] (utils.py 283): INFO Epoch: [15] [2380/2502] eta: 0:01:38 lr: 0.000011 loss_cls: 3.7384 (3.8605) grad_norm: 4.1283 (4.3153) time: 0.7697 data: 0.0002 max mem: 8421 +[2024-12-05 12:55:24 root] (utils.py 283): INFO Epoch: [15] [2390/2502] eta: 0:01:30 lr: 0.000011 loss_cls: 4.0025 (3.8605) grad_norm: 4.0390 (4.3143) time: 0.7798 data: 0.0002 max mem: 8421 +[2024-12-05 12:55:32 root] (utils.py 283): INFO Epoch: [15] [2400/2502] eta: 0:01:22 lr: 0.000011 loss_cls: 4.0025 (3.8595) grad_norm: 4.0706 (4.3141) time: 0.7882 data: 0.0002 max mem: 8421 +[2024-12-05 12:55:40 root] (utils.py 283): INFO Epoch: [15] [2410/2502] eta: 0:01:14 lr: 0.000011 loss_cls: 3.8492 (3.8592) grad_norm: 4.1007 (4.3136) time: 0.7901 data: 0.0002 max mem: 8421 +[2024-12-05 12:55:47 root] (utils.py 283): INFO Epoch: [15] [2420/2502] eta: 0:01:06 lr: 0.000011 loss_cls: 4.0381 (3.8599) grad_norm: 4.0833 (4.3134) time: 0.7893 data: 0.0002 max mem: 8421 +[2024-12-05 12:55:55 root] (utils.py 283): INFO Epoch: [15] [2430/2502] eta: 0:00:58 lr: 0.000011 loss_cls: 4.1018 (3.8606) grad_norm: 4.2011 (4.3131) time: 0.7886 data: 0.0002 max mem: 8421 +[2024-12-05 12:56:03 root] (utils.py 283): INFO Epoch: [15] [2440/2502] eta: 0:00:50 lr: 0.000011 loss_cls: 4.1383 (3.8609) grad_norm: 4.1454 (4.3126) time: 0.7899 data: 0.0002 max mem: 8421 +[2024-12-05 12:56:11 root] (utils.py 283): INFO Epoch: [15] [2450/2502] eta: 0:00:42 lr: 0.000011 loss_cls: 4.2854 (3.8624) grad_norm: 4.2320 (4.3128) time: 0.7895 data: 0.0002 max mem: 8421 +[2024-12-05 12:56:19 root] (utils.py 283): INFO Epoch: [15] [2460/2502] eta: 0:00:34 lr: 0.000011 loss_cls: 4.1689 (3.8617) grad_norm: 4.3914 (4.3133) time: 0.7901 data: 0.0002 max mem: 8421 +[2024-12-05 12:56:27 root] (utils.py 283): INFO Epoch: [15] [2470/2502] eta: 0:00:25 lr: 0.000011 loss_cls: 4.1423 (3.8622) grad_norm: 4.3157 (4.3146) time: 0.7902 data: 0.0003 max mem: 8421 +[2024-12-05 12:56:35 root] (utils.py 283): INFO Epoch: [15] [2480/2502] eta: 0:00:17 lr: 0.000011 loss_cls: 4.1423 (3.8624) grad_norm: 4.1035 (4.3147) time: 0.7880 data: 0.0003 max mem: 8421 +[2024-12-05 12:56:43 root] (utils.py 283): INFO Epoch: [15] [2490/2502] eta: 0:00:09 lr: 0.000011 loss_cls: 3.7603 (3.8624) grad_norm: 4.3072 (4.3147) time: 0.8171 data: 0.0226 max mem: 8421 +[2024-12-05 12:56:51 root] (utils.py 283): INFO Epoch: [15] [2500/2502] eta: 0:00:01 lr: 0.000011 loss_cls: 3.9196 (3.8623) grad_norm: 4.3358 (4.3154) time: 0.8182 data: 0.0226 max mem: 8421 +[2024-12-05 12:56:52 root] (utils.py 283): INFO Epoch: [15] [2501/2502] eta: 0:00:00 lr: 0.000011 loss_cls: 3.9051 (3.8622) grad_norm: 4.3358 (4.3156) time: 0.8106 data: 0.0226 max mem: 8421 +[2024-12-05 12:56:52 root] (utils.py 297): INFO Epoch: [15] Total time: 0:33:48 (0.8106 s / it) +[2024-12-05 12:56:52 root] (engine.py 178): INFO Averaged stats:lr: 0.000011 loss_cls: 3.9051 (3.8624) grad_norm: 4.3358 (4.3156) +[2024-12-05 12:56:52 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7433 (0.7433) acc1: 85.1562 (85.1562) acc3: 96.0938 (96.0938) acc5: 96.8750 (96.8750) time: 0.1309 data: 0.0003 max mem: 8421 +[2024-12-05 12:56:54 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8233 (0.8740) acc1: 84.3750 (82.1023) acc3: 93.7500 (92.9688) acc5: 95.3125 (95.7386) time: 0.1315 data: 0.0004 max mem: 8421 +[2024-12-05 12:56:55 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8892 (0.9214) acc1: 79.6875 (80.5804) acc3: 92.1875 (92.4479) acc5: 95.3125 (95.2009) time: 0.1316 data: 0.0004 max mem: 8421 +[2024-12-05 12:56:56 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9594 (0.9283) acc1: 80.4688 (80.0907) acc3: 91.4062 (92.6411) acc5: 95.3125 (95.2621) time: 0.1324 data: 0.0005 max mem: 8421 +[2024-12-05 12:56:58 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8443 (0.9214) acc1: 80.4688 (80.2020) acc3: 93.7500 (92.7591) acc5: 95.3125 (95.3125) time: 0.1329 data: 0.0005 max mem: 8421 +[2024-12-05 12:56:59 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0300 (1.0117) acc1: 74.2188 (78.1097) acc3: 87.5000 (91.0386) acc5: 92.1875 (94.2402) time: 0.1327 data: 0.0005 max mem: 8421 +[2024-12-05 12:57:00 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3304 (1.0555) acc1: 71.0938 (77.2669) acc3: 85.1562 (90.3304) acc5: 89.8438 (93.5323) time: 0.1328 data: 0.0005 max mem: 8421 +[2024-12-05 12:57:02 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2707 (1.0971) acc1: 72.6562 (76.1994) acc3: 85.9375 (89.7997) acc5: 89.8438 (93.1228) time: 0.1331 data: 0.0005 max mem: 8421 +[2024-12-05 12:57:03 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2982 (1.1348) acc1: 70.3125 (75.4051) acc3: 85.1562 (89.0721) acc5: 89.0625 (92.5154) time: 0.1354 data: 0.0007 max mem: 8421 +[2024-12-05 12:57:04 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3524 (1.1642) acc1: 69.5312 (74.6223) acc3: 82.8125 (88.5903) acc5: 88.2812 (92.0931) time: 0.1349 data: 0.0007 max mem: 8421 +[2024-12-05 12:57:05 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1995 (1.1532) acc1: 71.8750 (74.7920) acc3: 88.2812 (88.7840) acc5: 91.4062 (92.2320) time: 0.1330 data: 0.0006 max mem: 8421 +[2024-12-05 12:57:05 root] (utils.py 297): INFO Test: Total time: 0:00:13 (0.1333 s / it) +[2024-12-05 12:57:06 root] (engine.py 263): INFO * Acc@1 74.562 Acc@3 88.772 Acc@5 92.206 loss 1.155 flops 1.285 layer_flops 1.251 +[2024-12-05 12:57:06 root] (main.py 546): INFO Accuracy of the network on the 50000 test images: 74.6% +[2024-12-05 12:57:06 root] (main.py 550): INFO Max accuracy: 74.56% +[2024-12-05 12:57:07 root] (utils.py 283): INFO Epoch: [16] [ 0/2502] eta: 0:32:48 lr: 0.000011 loss_cls: 4.0389 (4.0389) grad_norm: 4.7461 (4.7461) time: 0.7869 data: 0.0004 max mem: 8421 +[2024-12-05 13:08:02 root] (main.py 225): INFO Namespace(batch_size=128, epochs=30, model='RMeeTo_tiny', multi_reso=False, input_size=224, drop=0.0, drop_path=0.1, model_ema_decay=0.99996, model_ema_force_cpu=False, model_ema=False, opt='adamw', opt_eps=1e-08, opt_betas=None, clip_grad=None, momentum=0.9, weight_decay=0.0, sched='cosine', lr=2e-05, lr_noise=None, lr_noise_pct=0.67, lr_noise_std=1.0, warmup_lr=1e-06, min_lr=1e-06, decay_epochs=30, warmup_epochs=5, cooldown_epochs=10, patience_epochs=10, decay_rate=0.1, color_jitter=0.4, aa='rand-m9-mstd0.5-inc1', smoothing=0.1, train_interpolation='bicubic', repeated_aug=True, reprob=0.25, remode='pixel', recount=1, resplit=False, mixup=0.8, cutmix=1.0, cutmix_minmax=None, mixup_prob=1.0, mixup_switch_prob=0.5, mixup_mode='batch', finetune='', data_set='IMNET', inat_category='name', output_dir='check/tiny/30', device='cuda', seed=0, autoresume=False, start_epoch=0, dist_eval=True, num_workers=10, pin_mem=True, world_size=4, port='15662', dist_url='env://', target_flops=3.0, granularity=4, load_compression_rate=False, warmup_compression_rate=False, distill='True', throughput=False, eval=False, merge_method='ToMe', merge_interval=2, if_pruning=False, num_prune='5', metric='X', distance='cosine', if_order=True, if_random=False, if_merge_odd=False, rank=0, gpu=0, distributed=True, dist_backend='nccl') +[2024-12-05 13:08:07 root] (main.py 288): INFO Creating model: RMeeTo_tiny +[2024-12-05 13:08:12 root] (main.py 368): INFO number of params: 7148008 +[2024-12-05 13:08:15 root] (main.py 484): INFO Start training for 14 epochs +[2024-12-05 13:08:23 root] (utils.py 283): INFO Epoch: [16] [ 0/2502] eta: 5:30:14 lr: 0.000011 loss_cls: 4.3593 (4.3593) grad_norm: 4.2082 (4.2082) time: 7.9194 data: 0.0018 max mem: 8394 +[2024-12-05 13:08:39 root] (utils.py 283): INFO Epoch: [16] [ 10/2502] eta: 1:30:15 lr: 0.000011 loss_cls: 4.3564 (4.2388) grad_norm: 4.2457 (4.3274) time: 2.1731 data: 0.0005 max mem: 8421 +[2024-12-05 13:11:23 root] (main.py 225): INFO Namespace(batch_size=128, epochs=30, model='RMeeTo_tiny', multi_reso=False, input_size=224, drop=0.0, drop_path=0.1, model_ema_decay=0.99996, model_ema_force_cpu=False, model_ema=False, opt='adamw', opt_eps=1e-08, opt_betas=None, clip_grad=None, momentum=0.9, weight_decay=0.0, sched='cosine', lr=2e-05, lr_noise=None, lr_noise_pct=0.67, lr_noise_std=1.0, warmup_lr=1e-06, min_lr=1e-06, decay_epochs=30, warmup_epochs=5, cooldown_epochs=10, patience_epochs=10, decay_rate=0.1, color_jitter=0.4, aa='rand-m9-mstd0.5-inc1', smoothing=0.1, train_interpolation='bicubic', repeated_aug=True, reprob=0.25, remode='pixel', recount=1, resplit=False, mixup=0.8, cutmix=1.0, cutmix_minmax=None, mixup_prob=1.0, mixup_switch_prob=0.5, mixup_mode='batch', finetune='', data_set='IMNET', inat_category='name', output_dir='check/tiny/30', device='cuda', seed=0, autoresume=False, start_epoch=0, dist_eval=True, num_workers=10, pin_mem=True, world_size=4, port='15662', dist_url='env://', target_flops=3.0, granularity=4, load_compression_rate=False, warmup_compression_rate=False, distill='True', throughput=False, eval=False, merge_method='ToMe', merge_interval=2, if_pruning=False, num_prune='5', metric='X', distance='cosine', if_order=True, if_random=False, if_merge_odd=False, rank=0, gpu=0, distributed=True, dist_backend='nccl') +[2024-12-05 13:11:28 root] (main.py 288): INFO Creating model: RMeeTo_tiny +[2024-12-05 13:11:33 root] (main.py 368): INFO number of params: 7148008 +[2024-12-05 13:11:35 root] (main.py 484): INFO Start training for 14 epochs +[2024-12-05 13:11:41 root] (utils.py 283): INFO Epoch: [16] [ 0/2502] eta: 3:44:43 lr: 0.000011 loss_cls: 4.3593 (4.3593) grad_norm: 4.2082 (4.2082) time: 5.3889 data: 0.0015 max mem: 8394 +[2024-12-05 13:11:49 root] (utils.py 283): INFO Epoch: [16] [ 10/2502] eta: 0:50:49 lr: 0.000011 loss_cls: 4.3564 (4.2388) grad_norm: 4.2455 (4.3273) time: 1.2238 data: 0.0004 max mem: 8421 +[2024-12-05 13:11:57 root] (utils.py 283): INFO Epoch: [16] [ 20/2502] eta: 0:42:27 lr: 0.000011 loss_cls: 4.1705 (4.0920) grad_norm: 4.2455 (4.2656) time: 0.8082 data: 0.0002 max mem: 8421 +[2024-12-05 13:12:12 root] (utils.py 283): INFO Epoch: [16] [ 30/2502] eta: 0:49:14 lr: 0.000011 loss_cls: 4.0813 (4.1139) grad_norm: 4.0508 (4.1957) time: 1.1793 data: 0.0003 max mem: 8421 +[2024-12-05 13:12:26 root] (utils.py 283): INFO Epoch: [16] [ 40/2502] eta: 0:50:30 lr: 0.000011 loss_cls: 4.0024 (4.0487) grad_norm: 4.0229 (4.2504) time: 1.4457 data: 0.0003 max mem: 8421 +[2024-12-05 13:12:34 root] (utils.py 283): INFO Epoch: [16] [ 50/2502] eta: 0:46:42 lr: 0.000011 loss_cls: 3.9472 (4.0232) grad_norm: 4.0298 (4.2161) time: 1.0622 data: 0.0003 max mem: 8421 +[2024-12-05 13:12:41 root] (utils.py 283): INFO Epoch: [16] [ 60/2502] eta: 0:44:03 lr: 0.000011 loss_cls: 3.9472 (3.9910) grad_norm: 4.1094 (4.3555) time: 0.7784 data: 0.0003 max mem: 8421 +[2024-12-05 13:12:49 root] (utils.py 283): INFO Epoch: [16] [ 70/2502] eta: 0:42:08 lr: 0.000011 loss_cls: 3.9274 (3.9513) grad_norm: 4.2260 (4.3823) time: 0.7769 data: 0.0003 max mem: 8421 +[2024-12-05 13:12:57 root] (utils.py 283): INFO Epoch: [16] [ 80/2502] eta: 0:40:47 lr: 0.000011 loss_cls: 3.9489 (3.9301) grad_norm: 4.1717 (4.3555) time: 0.7903 data: 0.0003 max mem: 8421 +[2024-12-05 13:13:11 root] (utils.py 283): INFO Epoch: [16] [ 90/2502] eta: 0:42:12 lr: 0.000011 loss_cls: 3.9489 (3.9175) grad_norm: 4.1691 (4.3399) time: 1.0854 data: 0.0003 max mem: 8421 +[2024-12-05 13:13:26 root] (utils.py 283): INFO Epoch: [16] [ 100/2502] eta: 0:43:51 lr: 0.000011 loss_cls: 4.0892 (3.9277) grad_norm: 4.1781 (4.3377) time: 1.4411 data: 0.0003 max mem: 8421 +[2024-12-05 13:13:34 root] (utils.py 283): INFO Epoch: [16] [ 110/2502] eta: 0:42:30 lr: 0.000011 loss_cls: 4.1672 (3.9208) grad_norm: 4.2320 (4.3202) time: 1.1420 data: 0.0003 max mem: 8421 +[2024-12-05 13:13:41 root] (utils.py 283): INFO Epoch: [16] [ 120/2502] eta: 0:41:23 lr: 0.000011 loss_cls: 3.9329 (3.9197) grad_norm: 4.2841 (4.3581) time: 0.7745 data: 0.0003 max mem: 8421 +[2024-12-05 13:13:49 root] (utils.py 283): INFO Epoch: [16] [ 130/2502] eta: 0:40:23 lr: 0.000011 loss_cls: 3.9335 (3.9121) grad_norm: 4.3710 (4.3482) time: 0.7742 data: 0.0003 max mem: 8421 +[2024-12-05 13:13:57 root] (utils.py 283): INFO Epoch: [16] [ 140/2502] eta: 0:39:32 lr: 0.000011 loss_cls: 3.9335 (3.8990) grad_norm: 4.2717 (4.3522) time: 0.7746 data: 0.0003 max mem: 8421 +[2024-12-05 13:14:05 root] (utils.py 283): INFO Epoch: [16] [ 150/2502] eta: 0:38:48 lr: 0.000011 loss_cls: 3.8433 (3.8925) grad_norm: 4.2640 (4.3475) time: 0.7801 data: 0.0003 max mem: 8421 +[2024-12-05 13:14:13 root] (utils.py 283): INFO Epoch: [16] [ 160/2502] eta: 0:38:08 lr: 0.000011 loss_cls: 3.8470 (3.8909) grad_norm: 4.1785 (4.3337) time: 0.7844 data: 0.0003 max mem: 8421 +[2024-12-05 13:14:21 root] (utils.py 283): INFO Epoch: [16] [ 170/2502] eta: 0:37:33 lr: 0.000011 loss_cls: 3.8759 (3.8876) grad_norm: 4.0118 (4.3246) time: 0.7892 data: 0.0003 max mem: 8421 +[2024-12-05 13:14:28 root] (utils.py 283): INFO Epoch: [16] [ 180/2502] eta: 0:37:00 lr: 0.000011 loss_cls: 3.8584 (3.8703) grad_norm: 4.0633 (4.3141) time: 0.7889 data: 0.0003 max mem: 8421 +[2024-12-05 13:14:36 root] (utils.py 283): INFO Epoch: [16] [ 190/2502] eta: 0:36:30 lr: 0.000011 loss_cls: 3.6891 (3.8643) grad_norm: 4.0602 (4.2995) time: 0.7841 data: 0.0003 max mem: 8421 +[2024-12-05 13:14:44 root] (utils.py 283): INFO Epoch: [16] [ 200/2502] eta: 0:36:03 lr: 0.000011 loss_cls: 3.5987 (3.8501) grad_norm: 4.0308 (4.2870) time: 0.7886 data: 0.0003 max mem: 8421 +[2024-12-05 13:14:56 root] (utils.py 283): INFO Epoch: [16] [ 210/2502] eta: 0:36:23 lr: 0.000011 loss_cls: 3.7471 (3.8443) grad_norm: 4.0565 (4.2755) time: 1.0028 data: 0.0003 max mem: 8421 +[2024-12-05 13:15:13 root] (utils.py 283): INFO Epoch: [16] [ 220/2502] eta: 0:37:24 lr: 0.000011 loss_cls: 3.8277 (3.8370) grad_norm: 4.0449 (4.2646) time: 1.4254 data: 0.0003 max mem: 8421 +[2024-12-05 13:15:21 root] (utils.py 283): INFO Epoch: [16] [ 230/2502] eta: 0:37:03 lr: 0.000011 loss_cls: 3.8289 (3.8342) grad_norm: 4.0449 (4.2676) time: 1.2553 data: 0.0002 max mem: 8421 +[2024-12-05 13:15:29 root] (utils.py 283): INFO Epoch: [16] [ 240/2502] eta: 0:36:34 lr: 0.000011 loss_cls: 3.8657 (3.8306) grad_norm: 4.1313 (4.2661) time: 0.8202 data: 0.0002 max mem: 8421 +[2024-12-05 13:15:37 root] (utils.py 283): INFO Epoch: [16] [ 250/2502] eta: 0:36:06 lr: 0.000011 loss_cls: 3.7360 (3.8175) grad_norm: 4.1435 (4.2650) time: 0.7711 data: 0.0002 max mem: 8421 +[2024-12-05 13:15:45 root] (utils.py 283): INFO Epoch: [16] [ 260/2502] eta: 0:35:40 lr: 0.000011 loss_cls: 3.6345 (3.8068) grad_norm: 4.1435 (4.2611) time: 0.7724 data: 0.0003 max mem: 8421 +[2024-12-05 13:15:52 root] (utils.py 283): INFO Epoch: [16] [ 270/2502] eta: 0:35:17 lr: 0.000011 loss_cls: 3.7310 (3.8008) grad_norm: 4.2808 (4.2649) time: 0.7774 data: 0.0003 max mem: 8421 +[2024-12-05 13:16:00 root] (utils.py 283): INFO Epoch: [16] [ 280/2502] eta: 0:34:54 lr: 0.000011 loss_cls: 3.9026 (3.8070) grad_norm: 3.9310 (4.2511) time: 0.7847 data: 0.0003 max mem: 8421 +[2024-12-05 13:16:08 root] (utils.py 283): INFO Epoch: [16] [ 290/2502] eta: 0:34:35 lr: 0.000011 loss_cls: 3.8926 (3.7976) grad_norm: 3.8940 (4.2472) time: 0.7967 data: 0.0003 max mem: 8421 +[2024-12-05 13:16:17 root] (utils.py 283): INFO Epoch: [16] [ 300/2502] eta: 0:34:18 lr: 0.000011 loss_cls: 3.3868 (3.7892) grad_norm: 3.9731 (4.2401) time: 0.8204 data: 0.0003 max mem: 8421 +[2024-12-05 13:16:25 root] (utils.py 283): INFO Epoch: [16] [ 310/2502] eta: 0:34:03 lr: 0.000011 loss_cls: 3.6135 (3.7872) grad_norm: 4.0382 (4.2494) time: 0.8458 data: 0.0003 max mem: 8421 +[2024-12-05 13:16:33 root] (utils.py 283): INFO Epoch: [16] [ 320/2502] eta: 0:33:45 lr: 0.000011 loss_cls: 4.0424 (3.8005) grad_norm: 4.1990 (4.3795) time: 0.8287 data: 0.0003 max mem: 8421 +[2024-12-05 13:16:41 root] (utils.py 283): INFO Epoch: [16] [ 330/2502] eta: 0:33:26 lr: 0.000011 loss_cls: 4.1587 (3.7982) grad_norm: 4.1808 (4.3759) time: 0.7912 data: 0.0003 max mem: 8421 +[2024-12-05 13:16:49 root] (utils.py 283): INFO Epoch: [16] [ 340/2502] eta: 0:33:09 lr: 0.000011 loss_cls: 3.9582 (3.8032) grad_norm: 4.1552 (4.3691) time: 0.7951 data: 0.0003 max mem: 8421 +[2024-12-05 13:16:57 root] (utils.py 283): INFO Epoch: [16] [ 350/2502] eta: 0:32:52 lr: 0.000011 loss_cls: 3.9555 (3.8033) grad_norm: 4.2238 (4.3703) time: 0.7978 data: 0.0003 max mem: 8421 +[2024-12-05 13:17:05 root] (utils.py 283): INFO Epoch: [16] [ 360/2502] eta: 0:32:35 lr: 0.000011 loss_cls: 3.8978 (3.8008) grad_norm: 4.2238 (4.3679) time: 0.7842 data: 0.0003 max mem: 8421 +[2024-12-05 13:17:13 root] (utils.py 283): INFO Epoch: [16] [ 370/2502] eta: 0:32:19 lr: 0.000011 loss_cls: 3.7505 (3.7986) grad_norm: 4.0087 (4.3603) time: 0.7906 data: 0.0003 max mem: 8421 +[2024-12-05 13:17:21 root] (utils.py 283): INFO Epoch: [16] [ 380/2502] eta: 0:32:05 lr: 0.000011 loss_cls: 3.9487 (3.7963) grad_norm: 4.0038 (4.3542) time: 0.8136 data: 0.0003 max mem: 8421 +[2024-12-05 13:17:29 root] (utils.py 283): INFO Epoch: [16] [ 390/2502] eta: 0:31:51 lr: 0.000011 loss_cls: 3.9487 (3.7942) grad_norm: 4.0161 (4.3486) time: 0.8195 data: 0.0003 max mem: 8421 +[2024-12-05 13:17:37 root] (utils.py 283): INFO Epoch: [16] [ 400/2502] eta: 0:31:35 lr: 0.000011 loss_cls: 3.9102 (3.7987) grad_norm: 4.0399 (4.3406) time: 0.7938 data: 0.0003 max mem: 8421 +[2024-12-05 13:17:45 root] (utils.py 283): INFO Epoch: [16] [ 410/2502] eta: 0:31:20 lr: 0.000011 loss_cls: 3.9620 (3.8053) grad_norm: 4.1474 (4.3413) time: 0.7733 data: 0.0002 max mem: 8421 +[2024-12-05 13:17:52 root] (utils.py 283): INFO Epoch: [16] [ 420/2502] eta: 0:31:04 lr: 0.000011 loss_cls: 4.0408 (3.8070) grad_norm: 4.3428 (4.3416) time: 0.7723 data: 0.0003 max mem: 8421 +[2024-12-05 13:18:00 root] (utils.py 283): INFO Epoch: [16] [ 430/2502] eta: 0:30:50 lr: 0.000011 loss_cls: 4.1507 (3.8127) grad_norm: 4.2870 (4.3407) time: 0.7764 data: 0.0003 max mem: 8421 +[2024-12-05 13:18:08 root] (utils.py 283): INFO Epoch: [16] [ 440/2502] eta: 0:30:36 lr: 0.000011 loss_cls: 4.1668 (3.8185) grad_norm: 4.1203 (4.3381) time: 0.7848 data: 0.0003 max mem: 8421 +[2024-12-05 13:18:16 root] (utils.py 283): INFO Epoch: [16] [ 450/2502] eta: 0:30:22 lr: 0.000011 loss_cls: 3.8540 (3.8174) grad_norm: 4.1203 (4.3333) time: 0.7847 data: 0.0003 max mem: 8421 +[2024-12-05 13:18:24 root] (utils.py 283): INFO Epoch: [16] [ 460/2502] eta: 0:30:08 lr: 0.000011 loss_cls: 3.7651 (3.8123) grad_norm: 4.1835 (4.3339) time: 0.7783 data: 0.0003 max mem: 8421 +[2024-12-05 13:18:31 root] (utils.py 283): INFO Epoch: [16] [ 470/2502] eta: 0:29:55 lr: 0.000011 loss_cls: 3.5208 (3.8027) grad_norm: 4.1893 (4.3303) time: 0.7761 data: 0.0002 max mem: 8421 +[2024-12-05 13:18:39 root] (utils.py 283): INFO Epoch: [16] [ 480/2502] eta: 0:29:41 lr: 0.000011 loss_cls: 3.5688 (3.8002) grad_norm: 4.0412 (4.3243) time: 0.7729 data: 0.0002 max mem: 8421 +[2024-12-05 13:18:47 root] (utils.py 283): INFO Epoch: [16] [ 490/2502] eta: 0:29:28 lr: 0.000011 loss_cls: 3.7781 (3.8057) grad_norm: 4.0412 (4.3204) time: 0.7751 data: 0.0003 max mem: 8421 +[2024-12-05 13:18:55 root] (utils.py 283): INFO Epoch: [16] [ 500/2502] eta: 0:29:15 lr: 0.000011 loss_cls: 3.8949 (3.8027) grad_norm: 4.0644 (4.3209) time: 0.7754 data: 0.0003 max mem: 8421 +[2024-12-05 13:19:02 root] (utils.py 283): INFO Epoch: [16] [ 510/2502] eta: 0:29:02 lr: 0.000011 loss_cls: 3.8442 (3.8070) grad_norm: 4.2437 (4.3190) time: 0.7724 data: 0.0003 max mem: 8421 +[2024-12-05 13:19:10 root] (utils.py 283): INFO Epoch: [16] [ 520/2502] eta: 0:28:50 lr: 0.000011 loss_cls: 3.9847 (3.8087) grad_norm: 4.2061 (4.3211) time: 0.7744 data: 0.0003 max mem: 8421 +[2024-12-05 13:19:18 root] (utils.py 283): INFO Epoch: [16] [ 530/2502] eta: 0:28:38 lr: 0.000011 loss_cls: 3.9036 (3.8032) grad_norm: 4.0308 (4.3163) time: 0.7777 data: 0.0003 max mem: 8421 +[2024-12-05 13:19:26 root] (utils.py 283): INFO Epoch: [16] [ 540/2502] eta: 0:28:26 lr: 0.000011 loss_cls: 3.9539 (3.8015) grad_norm: 3.9319 (4.3133) time: 0.7879 data: 0.0003 max mem: 8421 +[2024-12-05 13:19:34 root] (utils.py 283): INFO Epoch: [16] [ 550/2502] eta: 0:28:14 lr: 0.000011 loss_cls: 3.9539 (3.8007) grad_norm: 4.0583 (4.3119) time: 0.7895 data: 0.0002 max mem: 8421 +[2024-12-05 13:19:42 root] (utils.py 283): INFO Epoch: [16] [ 560/2502] eta: 0:28:02 lr: 0.000011 loss_cls: 4.1314 (3.8055) grad_norm: 4.2885 (4.3116) time: 0.7790 data: 0.0002 max mem: 8421 +[2024-12-05 13:19:49 root] (utils.py 283): INFO Epoch: [16] [ 570/2502] eta: 0:27:51 lr: 0.000011 loss_cls: 4.1311 (3.8085) grad_norm: 4.2390 (4.3088) time: 0.7738 data: 0.0003 max mem: 8421 +[2024-12-05 13:19:57 root] (utils.py 283): INFO Epoch: [16] [ 580/2502] eta: 0:27:39 lr: 0.000011 loss_cls: 4.0118 (3.8104) grad_norm: 4.1035 (4.3076) time: 0.7724 data: 0.0003 max mem: 8421 +[2024-12-05 13:20:05 root] (utils.py 283): INFO Epoch: [16] [ 590/2502] eta: 0:27:27 lr: 0.000011 loss_cls: 4.0051 (3.8111) grad_norm: 3.9897 (4.3029) time: 0.7713 data: 0.0003 max mem: 8421 +[2024-12-05 13:20:12 root] (utils.py 283): INFO Epoch: [16] [ 600/2502] eta: 0:27:16 lr: 0.000011 loss_cls: 3.9905 (3.8099) grad_norm: 4.0320 (4.3000) time: 0.7710 data: 0.0003 max mem: 8421 +[2024-12-05 13:20:20 root] (utils.py 283): INFO Epoch: [16] [ 610/2502] eta: 0:27:04 lr: 0.000011 loss_cls: 3.7524 (3.8095) grad_norm: 4.1794 (4.3009) time: 0.7728 data: 0.0003 max mem: 8421 +[2024-12-05 13:20:28 root] (utils.py 283): INFO Epoch: [16] [ 620/2502] eta: 0:26:53 lr: 0.000011 loss_cls: 3.7524 (3.8115) grad_norm: 4.1625 (4.2996) time: 0.7716 data: 0.0003 max mem: 8421 +[2024-12-05 13:20:36 root] (utils.py 283): INFO Epoch: [16] [ 630/2502] eta: 0:26:42 lr: 0.000011 loss_cls: 3.9844 (3.8137) grad_norm: 4.1208 (4.3026) time: 0.7693 data: 0.0003 max mem: 8421 +[2024-12-05 13:20:43 root] (utils.py 283): INFO Epoch: [16] [ 640/2502] eta: 0:26:31 lr: 0.000011 loss_cls: 3.8650 (3.8127) grad_norm: 4.2889 (4.3043) time: 0.7703 data: 0.0003 max mem: 8421 +[2024-12-05 13:20:51 root] (utils.py 283): INFO Epoch: [16] [ 650/2502] eta: 0:26:20 lr: 0.000011 loss_cls: 3.8176 (3.8127) grad_norm: 4.2626 (4.3071) time: 0.7700 data: 0.0003 max mem: 8421 +[2024-12-05 13:20:59 root] (utils.py 283): INFO Epoch: [16] [ 660/2502] eta: 0:26:09 lr: 0.000011 loss_cls: 3.5839 (3.8060) grad_norm: 4.2489 (4.3063) time: 0.7669 data: 0.0003 max mem: 8421 +[2024-12-05 13:21:06 root] (utils.py 283): INFO Epoch: [16] [ 670/2502] eta: 0:25:58 lr: 0.000011 loss_cls: 3.7503 (3.8099) grad_norm: 4.2317 (4.3062) time: 0.7648 data: 0.0003 max mem: 8421 +[2024-12-05 13:21:14 root] (utils.py 283): INFO Epoch: [16] [ 680/2502] eta: 0:25:48 lr: 0.000011 loss_cls: 4.0931 (3.8111) grad_norm: 4.1939 (4.3058) time: 0.7702 data: 0.0003 max mem: 8421 +[2024-12-05 13:21:22 root] (utils.py 283): INFO Epoch: [16] [ 690/2502] eta: 0:25:38 lr: 0.000011 loss_cls: 3.5645 (3.8070) grad_norm: 4.1616 (4.3036) time: 0.7924 data: 0.0002 max mem: 8421 +[2024-12-05 13:21:30 root] (utils.py 283): INFO Epoch: [16] [ 700/2502] eta: 0:25:28 lr: 0.000011 loss_cls: 3.4599 (3.8050) grad_norm: 4.0364 (4.3018) time: 0.7916 data: 0.0003 max mem: 8421 +[2024-12-05 13:21:38 root] (utils.py 283): INFO Epoch: [16] [ 710/2502] eta: 0:25:17 lr: 0.000011 loss_cls: 3.7521 (3.8041) grad_norm: 4.0693 (4.2997) time: 0.7747 data: 0.0003 max mem: 8421 +[2024-12-05 13:21:45 root] (utils.py 283): INFO Epoch: [16] [ 720/2502] eta: 0:25:07 lr: 0.000011 loss_cls: 3.7521 (3.8012) grad_norm: 4.0719 (4.2986) time: 0.7762 data: 0.0003 max mem: 8421 +[2024-12-05 13:21:53 root] (utils.py 283): INFO Epoch: [16] [ 730/2502] eta: 0:24:57 lr: 0.000011 loss_cls: 3.8743 (3.8015) grad_norm: 4.1632 (4.2969) time: 0.7764 data: 0.0002 max mem: 8421 +[2024-12-05 13:22:01 root] (utils.py 283): INFO Epoch: [16] [ 740/2502] eta: 0:24:47 lr: 0.000011 loss_cls: 3.9885 (3.8065) grad_norm: 4.1657 (4.2977) time: 0.7749 data: 0.0003 max mem: 8421 +[2024-12-05 13:22:09 root] (utils.py 283): INFO Epoch: [16] [ 750/2502] eta: 0:24:37 lr: 0.000011 loss_cls: 4.0268 (3.8048) grad_norm: 4.0086 (4.2946) time: 0.7762 data: 0.0003 max mem: 8421 +[2024-12-05 13:22:17 root] (utils.py 283): INFO Epoch: [16] [ 760/2502] eta: 0:24:28 lr: 0.000011 loss_cls: 3.9858 (3.8051) grad_norm: 4.1252 (4.2960) time: 0.7994 data: 0.0003 max mem: 8421 +[2024-12-05 13:22:25 root] (utils.py 283): INFO Epoch: [16] [ 770/2502] eta: 0:24:18 lr: 0.000011 loss_cls: 3.9858 (3.8059) grad_norm: 4.1819 (4.3020) time: 0.8051 data: 0.0003 max mem: 8421 +[2024-12-05 13:22:33 root] (utils.py 283): INFO Epoch: [16] [ 780/2502] eta: 0:24:09 lr: 0.000011 loss_cls: 3.8537 (3.8072) grad_norm: 4.0962 (4.3011) time: 0.7990 data: 0.0002 max mem: 8421 +[2024-12-05 13:22:41 root] (utils.py 283): INFO Epoch: [16] [ 790/2502] eta: 0:23:59 lr: 0.000011 loss_cls: 3.7509 (3.8051) grad_norm: 3.9761 (4.2989) time: 0.7883 data: 0.0002 max mem: 8421 +[2024-12-05 13:22:48 root] (utils.py 283): INFO Epoch: [16] [ 800/2502] eta: 0:23:49 lr: 0.000011 loss_cls: 3.7412 (3.8051) grad_norm: 4.0573 (4.2996) time: 0.7669 data: 0.0002 max mem: 8421 +[2024-12-05 13:22:56 root] (utils.py 283): INFO Epoch: [16] [ 810/2502] eta: 0:23:39 lr: 0.000011 loss_cls: 3.5596 (3.7978) grad_norm: 4.0342 (4.2964) time: 0.7671 data: 0.0002 max mem: 8421 +[2024-12-05 13:23:04 root] (utils.py 283): INFO Epoch: [16] [ 820/2502] eta: 0:23:29 lr: 0.000011 loss_cls: 3.5596 (3.7990) grad_norm: 4.0337 (4.2952) time: 0.7684 data: 0.0002 max mem: 8421 +[2024-12-05 13:23:11 root] (utils.py 283): INFO Epoch: [16] [ 830/2502] eta: 0:23:19 lr: 0.000011 loss_cls: 3.9195 (3.7948) grad_norm: 4.1154 (4.2924) time: 0.7678 data: 0.0002 max mem: 8421 +[2024-12-05 13:23:19 root] (utils.py 283): INFO Epoch: [16] [ 840/2502] eta: 0:23:10 lr: 0.000011 loss_cls: 3.5906 (3.7944) grad_norm: 4.2075 (4.2984) time: 0.7707 data: 0.0002 max mem: 8421 +[2024-12-05 13:23:27 root] (utils.py 283): INFO Epoch: [16] [ 850/2502] eta: 0:23:00 lr: 0.000011 loss_cls: 3.9574 (3.7921) grad_norm: 4.3008 (4.2998) time: 0.7755 data: 0.0002 max mem: 8421 +[2024-12-05 13:23:34 root] (utils.py 283): INFO Epoch: [16] [ 860/2502] eta: 0:22:51 lr: 0.000011 loss_cls: 3.7957 (3.7927) grad_norm: 4.0683 (4.2996) time: 0.7733 data: 0.0002 max mem: 8421 +[2024-12-05 13:23:42 root] (utils.py 283): INFO Epoch: [16] [ 870/2502] eta: 0:22:41 lr: 0.000011 loss_cls: 3.7956 (3.7928) grad_norm: 4.1274 (4.3016) time: 0.7719 data: 0.0002 max mem: 8421 +[2024-12-05 13:23:50 root] (utils.py 283): INFO Epoch: [16] [ 880/2502] eta: 0:22:32 lr: 0.000011 loss_cls: 3.7956 (3.7907) grad_norm: 4.1823 (4.3004) time: 0.7738 data: 0.0002 max mem: 8421 +[2024-12-05 13:23:58 root] (utils.py 283): INFO Epoch: [16] [ 890/2502] eta: 0:22:23 lr: 0.000011 loss_cls: 3.7517 (3.7887) grad_norm: 4.1756 (4.3022) time: 0.7809 data: 0.0002 max mem: 8421 +[2024-12-05 13:24:06 root] (utils.py 283): INFO Epoch: [16] [ 900/2502] eta: 0:22:13 lr: 0.000011 loss_cls: 3.8769 (3.7891) grad_norm: 4.2675 (4.3029) time: 0.7823 data: 0.0003 max mem: 8421 +[2024-12-05 13:24:13 root] (utils.py 283): INFO Epoch: [16] [ 910/2502] eta: 0:22:04 lr: 0.000011 loss_cls: 3.9736 (3.7894) grad_norm: 4.0269 (4.3001) time: 0.7744 data: 0.0003 max mem: 8421 +[2024-12-05 13:24:21 root] (utils.py 283): INFO Epoch: [16] [ 920/2502] eta: 0:21:55 lr: 0.000011 loss_cls: 4.1234 (3.7913) grad_norm: 3.9863 (4.2975) time: 0.7719 data: 0.0003 max mem: 8421 +[2024-12-05 13:24:29 root] (utils.py 283): INFO Epoch: [16] [ 930/2502] eta: 0:21:45 lr: 0.000011 loss_cls: 4.1187 (3.7941) grad_norm: 4.0070 (4.2941) time: 0.7726 data: 0.0003 max mem: 8421 +[2024-12-05 13:24:36 root] (utils.py 283): INFO Epoch: [16] [ 940/2502] eta: 0:21:36 lr: 0.000011 loss_cls: 4.0238 (3.7931) grad_norm: 4.0865 (4.2952) time: 0.7749 data: 0.0003 max mem: 8421 +[2024-12-05 13:24:44 root] (utils.py 283): INFO Epoch: [16] [ 950/2502] eta: 0:21:27 lr: 0.000011 loss_cls: 3.2649 (3.7878) grad_norm: 4.1141 (4.2943) time: 0.7780 data: 0.0002 max mem: 8421 +[2024-12-05 13:24:52 root] (utils.py 283): INFO Epoch: [16] [ 960/2502] eta: 0:21:18 lr: 0.000011 loss_cls: 3.7440 (3.7888) grad_norm: 4.2108 (4.2966) time: 0.7836 data: 0.0002 max mem: 8421 +[2024-12-05 13:25:00 root] (utils.py 283): INFO Epoch: [16] [ 970/2502] eta: 0:21:09 lr: 0.000011 loss_cls: 3.9358 (3.7895) grad_norm: 4.2108 (4.2958) time: 0.7956 data: 0.0002 max mem: 8421 +[2024-12-05 13:25:08 root] (utils.py 283): INFO Epoch: [16] [ 980/2502] eta: 0:21:01 lr: 0.000011 loss_cls: 3.9345 (3.7887) grad_norm: 4.1831 (4.2962) time: 0.8121 data: 0.0003 max mem: 8421 +[2024-12-05 13:25:17 root] (utils.py 283): INFO Epoch: [16] [ 990/2502] eta: 0:20:53 lr: 0.000011 loss_cls: 4.0068 (3.7886) grad_norm: 4.3857 (4.2982) time: 0.8326 data: 0.0003 max mem: 8421 +[2024-12-05 13:25:25 root] (utils.py 283): INFO Epoch: [16] [1000/2502] eta: 0:20:44 lr: 0.000011 loss_cls: 3.9583 (3.7880) grad_norm: 4.1908 (4.2959) time: 0.8353 data: 0.0002 max mem: 8421 +[2024-12-05 13:25:33 root] (utils.py 283): INFO Epoch: [16] [1010/2502] eta: 0:20:36 lr: 0.000011 loss_cls: 3.9209 (3.7895) grad_norm: 4.0314 (4.2953) time: 0.8103 data: 0.0003 max mem: 8421 +[2024-12-05 13:25:41 root] (utils.py 283): INFO Epoch: [16] [1020/2502] eta: 0:20:27 lr: 0.000011 loss_cls: 3.9750 (3.7890) grad_norm: 4.2008 (4.2946) time: 0.7861 data: 0.0002 max mem: 8421 +[2024-12-05 13:25:49 root] (utils.py 283): INFO Epoch: [16] [1030/2502] eta: 0:20:18 lr: 0.000011 loss_cls: 3.6628 (3.7858) grad_norm: 4.2151 (4.2956) time: 0.7740 data: 0.0002 max mem: 8421 +[2024-12-05 13:25:56 root] (utils.py 283): INFO Epoch: [16] [1040/2502] eta: 0:20:09 lr: 0.000011 loss_cls: 3.9062 (3.7877) grad_norm: 4.2407 (4.2948) time: 0.7742 data: 0.0002 max mem: 8421 +[2024-12-05 13:26:04 root] (utils.py 283): INFO Epoch: [16] [1050/2502] eta: 0:20:00 lr: 0.000011 loss_cls: 3.8915 (3.7870) grad_norm: 4.1862 (4.2975) time: 0.7798 data: 0.0002 max mem: 8421 +[2024-12-05 13:26:12 root] (utils.py 283): INFO Epoch: [16] [1060/2502] eta: 0:19:51 lr: 0.000011 loss_cls: 3.8548 (3.7885) grad_norm: 4.4135 (4.2997) time: 0.7840 data: 0.0002 max mem: 8421 +[2024-12-05 13:26:20 root] (utils.py 283): INFO Epoch: [16] [1070/2502] eta: 0:19:42 lr: 0.000011 loss_cls: 3.8755 (3.7884) grad_norm: 4.3043 (4.2994) time: 0.7892 data: 0.0002 max mem: 8421 +[2024-12-05 13:26:28 root] (utils.py 283): INFO Epoch: [16] [1080/2502] eta: 0:19:34 lr: 0.000011 loss_cls: 3.8592 (3.7881) grad_norm: 4.3043 (4.3003) time: 0.8029 data: 0.0002 max mem: 8421 +[2024-12-05 13:26:36 root] (utils.py 283): INFO Epoch: [16] [1090/2502] eta: 0:19:25 lr: 0.000011 loss_cls: 3.7608 (3.7868) grad_norm: 4.1515 (4.2996) time: 0.8139 data: 0.0002 max mem: 8421 +[2024-12-05 13:26:44 root] (utils.py 283): INFO Epoch: [16] [1100/2502] eta: 0:19:17 lr: 0.000011 loss_cls: 3.7608 (3.7865) grad_norm: 4.2063 (4.3094) time: 0.8126 data: 0.0003 max mem: 8421 +[2024-12-05 13:26:52 root] (utils.py 283): INFO Epoch: [16] [1110/2502] eta: 0:19:08 lr: 0.000011 loss_cls: 3.7984 (3.7865) grad_norm: 4.3021 (4.3101) time: 0.8083 data: 0.0002 max mem: 8421 +[2024-12-05 13:27:00 root] (utils.py 283): INFO Epoch: [16] [1120/2502] eta: 0:19:00 lr: 0.000011 loss_cls: 3.9536 (3.7881) grad_norm: 4.3343 (4.3132) time: 0.7928 data: 0.0002 max mem: 8421 +[2024-12-05 13:27:08 root] (utils.py 283): INFO Epoch: [16] [1130/2502] eta: 0:18:51 lr: 0.000011 loss_cls: 4.0125 (3.7898) grad_norm: 4.3343 (4.3132) time: 0.7755 data: 0.0002 max mem: 8421 +[2024-12-05 13:27:16 root] (utils.py 283): INFO Epoch: [16] [1140/2502] eta: 0:18:42 lr: 0.000011 loss_cls: 3.9238 (3.7886) grad_norm: 4.1136 (4.3151) time: 0.7693 data: 0.0002 max mem: 8421 +[2024-12-05 13:27:23 root] (utils.py 283): INFO Epoch: [16] [1150/2502] eta: 0:18:33 lr: 0.000011 loss_cls: 3.7045 (3.7876) grad_norm: 4.1155 (4.3167) time: 0.7687 data: 0.0002 max mem: 8421 +[2024-12-05 13:27:31 root] (utils.py 283): INFO Epoch: [16] [1160/2502] eta: 0:18:24 lr: 0.000011 loss_cls: 3.8415 (3.7878) grad_norm: 4.1232 (4.3181) time: 0.7730 data: 0.0002 max mem: 8421 +[2024-12-05 13:27:39 root] (utils.py 283): INFO Epoch: [16] [1170/2502] eta: 0:18:15 lr: 0.000011 loss_cls: 3.8060 (3.7869) grad_norm: 4.1454 (4.3173) time: 0.7762 data: 0.0002 max mem: 8421 +[2024-12-05 13:27:46 root] (utils.py 283): INFO Epoch: [16] [1180/2502] eta: 0:18:06 lr: 0.000011 loss_cls: 4.0522 (3.7897) grad_norm: 4.1454 (4.3182) time: 0.7705 data: 0.0002 max mem: 8421 +[2024-12-05 13:27:54 root] (utils.py 283): INFO Epoch: [16] [1190/2502] eta: 0:17:58 lr: 0.000011 loss_cls: 4.1071 (3.7918) grad_norm: 4.1231 (4.3161) time: 0.7851 data: 0.0002 max mem: 8421 +[2024-12-05 13:28:03 root] (utils.py 283): INFO Epoch: [16] [1200/2502] eta: 0:17:50 lr: 0.000011 loss_cls: 4.0628 (3.7929) grad_norm: 4.0322 (4.3145) time: 0.8041 data: 0.0003 max mem: 8421 +[2024-12-05 13:28:10 root] (utils.py 283): INFO Epoch: [16] [1210/2502] eta: 0:17:41 lr: 0.000011 loss_cls: 3.7898 (3.7921) grad_norm: 4.1171 (4.3128) time: 0.7944 data: 0.0003 max mem: 8421 +[2024-12-05 13:28:18 root] (utils.py 283): INFO Epoch: [16] [1220/2502] eta: 0:17:33 lr: 0.000011 loss_cls: 3.7658 (3.7920) grad_norm: 4.3120 (4.3144) time: 0.7950 data: 0.0003 max mem: 8421 +[2024-12-05 13:28:26 root] (utils.py 283): INFO Epoch: [16] [1230/2502] eta: 0:17:24 lr: 0.000011 loss_cls: 3.8105 (3.7921) grad_norm: 4.3133 (4.3142) time: 0.7960 data: 0.0002 max mem: 8421 +[2024-12-05 13:28:34 root] (utils.py 283): INFO Epoch: [16] [1240/2502] eta: 0:17:15 lr: 0.000011 loss_cls: 3.9829 (3.7923) grad_norm: 4.1978 (4.3127) time: 0.7811 data: 0.0002 max mem: 8421 +[2024-12-05 13:28:42 root] (utils.py 283): INFO Epoch: [16] [1250/2502] eta: 0:17:07 lr: 0.000011 loss_cls: 3.7275 (3.7908) grad_norm: 4.0293 (4.3098) time: 0.7706 data: 0.0002 max mem: 8421 +[2024-12-05 13:28:49 root] (utils.py 283): INFO Epoch: [16] [1260/2502] eta: 0:16:58 lr: 0.000011 loss_cls: 3.9437 (3.7918) grad_norm: 4.0698 (4.3109) time: 0.7678 data: 0.0002 max mem: 8421 +[2024-12-05 13:28:57 root] (utils.py 283): INFO Epoch: [16] [1270/2502] eta: 0:16:49 lr: 0.000011 loss_cls: 3.9450 (3.7930) grad_norm: 4.2230 (4.3119) time: 0.7695 data: 0.0003 max mem: 8421 +[2024-12-05 13:29:05 root] (utils.py 283): INFO Epoch: [16] [1280/2502] eta: 0:16:41 lr: 0.000011 loss_cls: 4.0171 (3.7949) grad_norm: 4.1657 (4.3118) time: 0.7699 data: 0.0002 max mem: 8421 +[2024-12-05 13:29:12 root] (utils.py 283): INFO Epoch: [16] [1290/2502] eta: 0:16:32 lr: 0.000011 loss_cls: 4.0725 (3.7965) grad_norm: 4.2822 (4.3160) time: 0.7686 data: 0.0002 max mem: 8421 +[2024-12-05 13:29:20 root] (utils.py 283): INFO Epoch: [16] [1300/2502] eta: 0:16:23 lr: 0.000011 loss_cls: 4.0378 (3.7971) grad_norm: 4.3240 (4.3166) time: 0.7698 data: 0.0002 max mem: 8421 +[2024-12-05 13:29:28 root] (utils.py 283): INFO Epoch: [16] [1310/2502] eta: 0:16:15 lr: 0.000011 loss_cls: 3.7516 (3.7956) grad_norm: 4.2737 (4.3178) time: 0.7707 data: 0.0003 max mem: 8421 +[2024-12-05 13:29:36 root] (utils.py 283): INFO Epoch: [16] [1320/2502] eta: 0:16:06 lr: 0.000011 loss_cls: 3.6089 (3.7936) grad_norm: 4.4357 (4.3219) time: 0.7686 data: 0.0003 max mem: 8421 +[2024-12-05 13:29:43 root] (utils.py 283): INFO Epoch: [16] [1330/2502] eta: 0:15:57 lr: 0.000011 loss_cls: 3.8869 (3.7946) grad_norm: 4.3541 (4.3224) time: 0.7680 data: 0.0002 max mem: 8421 +[2024-12-05 13:29:51 root] (utils.py 283): INFO Epoch: [16] [1340/2502] eta: 0:15:49 lr: 0.000011 loss_cls: 4.1088 (3.7954) grad_norm: 4.1639 (4.3204) time: 0.7681 data: 0.0002 max mem: 8421 +[2024-12-05 13:29:59 root] (utils.py 283): INFO Epoch: [16] [1350/2502] eta: 0:15:40 lr: 0.000011 loss_cls: 3.8286 (3.7955) grad_norm: 4.0147 (4.3193) time: 0.7681 data: 0.0002 max mem: 8421 +[2024-12-05 13:30:06 root] (utils.py 283): INFO Epoch: [16] [1360/2502] eta: 0:15:32 lr: 0.000011 loss_cls: 3.8767 (3.7972) grad_norm: 4.2041 (4.3235) time: 0.7691 data: 0.0002 max mem: 8421 +[2024-12-05 13:30:14 root] (utils.py 283): INFO Epoch: [16] [1370/2502] eta: 0:15:23 lr: 0.000011 loss_cls: 4.0312 (3.7983) grad_norm: 4.2659 (4.3237) time: 0.7703 data: 0.0003 max mem: 8421 +[2024-12-05 13:30:22 root] (utils.py 283): INFO Epoch: [16] [1380/2502] eta: 0:15:15 lr: 0.000011 loss_cls: 4.0226 (3.7994) grad_norm: 4.1902 (4.3224) time: 0.7713 data: 0.0002 max mem: 8421 +[2024-12-05 13:30:29 root] (utils.py 283): INFO Epoch: [16] [1390/2502] eta: 0:15:06 lr: 0.000011 loss_cls: 4.0226 (3.8002) grad_norm: 4.2658 (4.3237) time: 0.7706 data: 0.0002 max mem: 8421 +[2024-12-05 13:30:37 root] (utils.py 283): INFO Epoch: [16] [1400/2502] eta: 0:14:58 lr: 0.000011 loss_cls: 4.0455 (3.8003) grad_norm: 4.2658 (4.3225) time: 0.7712 data: 0.0002 max mem: 8421 +[2024-12-05 13:30:45 root] (utils.py 283): INFO Epoch: [16] [1410/2502] eta: 0:14:49 lr: 0.000011 loss_cls: 4.0641 (3.8028) grad_norm: 4.1218 (4.3223) time: 0.7724 data: 0.0002 max mem: 8421 +[2024-12-05 13:30:53 root] (utils.py 283): INFO Epoch: [16] [1420/2502] eta: 0:14:41 lr: 0.000011 loss_cls: 3.9187 (3.8019) grad_norm: 4.1706 (4.3248) time: 0.7705 data: 0.0002 max mem: 8421 +[2024-12-05 13:31:00 root] (utils.py 283): INFO Epoch: [16] [1430/2502] eta: 0:14:32 lr: 0.000011 loss_cls: 3.8041 (3.8023) grad_norm: 4.0573 (4.3246) time: 0.7688 data: 0.0003 max mem: 8421 +[2024-12-05 13:31:08 root] (utils.py 283): INFO Epoch: [16] [1440/2502] eta: 0:14:24 lr: 0.000011 loss_cls: 3.9217 (3.8015) grad_norm: 4.0573 (4.3233) time: 0.7718 data: 0.0002 max mem: 8421 +[2024-12-05 13:31:16 root] (utils.py 283): INFO Epoch: [16] [1450/2502] eta: 0:14:15 lr: 0.000011 loss_cls: 4.0054 (3.8022) grad_norm: 4.1561 (4.3241) time: 0.7718 data: 0.0002 max mem: 8421 +[2024-12-05 13:31:23 root] (utils.py 283): INFO Epoch: [16] [1460/2502] eta: 0:14:07 lr: 0.000011 loss_cls: 4.1214 (3.8035) grad_norm: 4.2392 (4.3234) time: 0.7683 data: 0.0002 max mem: 8421 +[2024-12-05 13:31:31 root] (utils.py 283): INFO Epoch: [16] [1470/2502] eta: 0:13:58 lr: 0.000011 loss_cls: 4.0854 (3.8027) grad_norm: 4.1214 (4.3217) time: 0.7714 data: 0.0002 max mem: 8421 +[2024-12-05 13:31:39 root] (utils.py 283): INFO Epoch: [16] [1480/2502] eta: 0:13:50 lr: 0.000011 loss_cls: 3.5490 (3.8001) grad_norm: 4.0960 (4.3213) time: 0.7738 data: 0.0002 max mem: 8421 +[2024-12-05 13:31:47 root] (utils.py 283): INFO Epoch: [16] [1490/2502] eta: 0:13:42 lr: 0.000011 loss_cls: 3.4236 (3.7964) grad_norm: 4.1853 (4.3211) time: 0.7715 data: 0.0002 max mem: 8421 +[2024-12-05 13:31:55 root] (utils.py 283): INFO Epoch: [16] [1500/2502] eta: 0:13:33 lr: 0.000011 loss_cls: 3.4304 (3.7961) grad_norm: 4.2123 (4.3215) time: 0.7820 data: 0.0002 max mem: 8421 +[2024-12-05 13:32:02 root] (utils.py 283): INFO Epoch: [16] [1510/2502] eta: 0:13:25 lr: 0.000011 loss_cls: 3.9018 (3.7965) grad_norm: 4.1613 (4.3225) time: 0.7938 data: 0.0002 max mem: 8421 +[2024-12-05 13:32:10 root] (utils.py 283): INFO Epoch: [16] [1520/2502] eta: 0:13:17 lr: 0.000011 loss_cls: 3.9390 (3.7976) grad_norm: 4.1258 (4.3216) time: 0.7895 data: 0.0002 max mem: 8421 +[2024-12-05 13:32:18 root] (utils.py 283): INFO Epoch: [16] [1530/2502] eta: 0:13:08 lr: 0.000011 loss_cls: 3.9390 (3.7972) grad_norm: 4.1730 (4.3227) time: 0.7856 data: 0.0002 max mem: 8421 +[2024-12-05 13:32:28 root] (utils.py 283): INFO Epoch: [16] [1540/2502] eta: 0:13:01 lr: 0.000011 loss_cls: 3.8950 (3.7971) grad_norm: 4.2583 (4.3230) time: 0.8870 data: 0.0003 max mem: 8421 +[2024-12-05 13:32:36 root] (utils.py 283): INFO Epoch: [16] [1550/2502] eta: 0:12:53 lr: 0.000011 loss_cls: 3.7917 (3.7959) grad_norm: 4.2617 (4.3224) time: 0.8771 data: 0.0003 max mem: 8421 +[2024-12-05 13:32:43 root] (utils.py 283): INFO Epoch: [16] [1560/2502] eta: 0:12:45 lr: 0.000011 loss_cls: 3.6437 (3.7950) grad_norm: 4.2289 (4.3251) time: 0.7718 data: 0.0002 max mem: 8421 +[2024-12-05 13:32:51 root] (utils.py 283): INFO Epoch: [16] [1570/2502] eta: 0:12:36 lr: 0.000011 loss_cls: 3.6437 (3.7926) grad_norm: 4.2474 (4.3255) time: 0.7743 data: 0.0002 max mem: 8421 +[2024-12-05 13:32:59 root] (utils.py 283): INFO Epoch: [16] [1580/2502] eta: 0:12:28 lr: 0.000011 loss_cls: 3.6752 (3.7924) grad_norm: 4.2369 (4.3252) time: 0.7726 data: 0.0002 max mem: 8421 +[2024-12-05 13:33:07 root] (utils.py 283): INFO Epoch: [16] [1590/2502] eta: 0:12:20 lr: 0.000011 loss_cls: 3.9531 (3.7938) grad_norm: 4.2874 (4.3265) time: 0.7716 data: 0.0002 max mem: 8421 +[2024-12-05 13:33:14 root] (utils.py 283): INFO Epoch: [16] [1600/2502] eta: 0:12:11 lr: 0.000011 loss_cls: 3.9631 (3.7954) grad_norm: 4.3141 (4.3268) time: 0.7716 data: 0.0002 max mem: 8421 +[2024-12-05 13:33:22 root] (utils.py 283): INFO Epoch: [16] [1610/2502] eta: 0:12:03 lr: 0.000011 loss_cls: 3.9846 (3.7958) grad_norm: 4.2636 (4.3305) time: 0.7735 data: 0.0002 max mem: 8421 +[2024-12-05 13:33:30 root] (utils.py 283): INFO Epoch: [16] [1620/2502] eta: 0:11:55 lr: 0.000011 loss_cls: 3.8091 (3.7962) grad_norm: 4.2636 (4.3318) time: 0.7764 data: 0.0002 max mem: 8421 +[2024-12-05 13:33:38 root] (utils.py 283): INFO Epoch: [16] [1630/2502] eta: 0:11:47 lr: 0.000011 loss_cls: 3.9604 (3.7968) grad_norm: 4.1991 (4.3360) time: 0.7885 data: 0.0002 max mem: 8421 +[2024-12-05 13:33:46 root] (utils.py 283): INFO Epoch: [16] [1640/2502] eta: 0:11:38 lr: 0.000011 loss_cls: 3.9891 (3.7964) grad_norm: 4.2304 (4.3380) time: 0.8093 data: 0.0002 max mem: 8421 +[2024-12-05 13:33:55 root] (utils.py 283): INFO Epoch: [16] [1650/2502] eta: 0:11:31 lr: 0.000011 loss_cls: 3.6612 (3.7935) grad_norm: 4.2304 (4.3366) time: 0.8350 data: 0.0003 max mem: 8421 +[2024-12-05 13:34:07 root] (utils.py 283): INFO Epoch: [16] [1660/2502] eta: 0:11:25 lr: 0.000011 loss_cls: 3.3221 (3.7910) grad_norm: 4.2304 (4.3362) time: 1.0572 data: 0.0011 max mem: 8421 +[2024-12-05 13:34:15 root] (utils.py 283): INFO Epoch: [16] [1670/2502] eta: 0:11:16 lr: 0.000011 loss_cls: 3.4718 (3.7910) grad_norm: 4.1940 (4.3356) time: 1.0271 data: 0.0011 max mem: 8421 +[2024-12-05 13:34:23 root] (utils.py 283): INFO Epoch: [16] [1680/2502] eta: 0:11:08 lr: 0.000011 loss_cls: 3.9612 (3.7904) grad_norm: 4.1666 (4.3345) time: 0.7850 data: 0.0003 max mem: 8421 +[2024-12-05 13:34:31 root] (utils.py 283): INFO Epoch: [16] [1690/2502] eta: 0:11:00 lr: 0.000011 loss_cls: 3.9175 (3.7904) grad_norm: 4.1619 (4.3337) time: 0.7758 data: 0.0003 max mem: 8421 +[2024-12-05 13:34:39 root] (utils.py 283): INFO Epoch: [16] [1700/2502] eta: 0:10:52 lr: 0.000011 loss_cls: 4.0942 (3.7929) grad_norm: 4.2357 (4.3366) time: 0.7813 data: 0.0002 max mem: 8421 +[2024-12-05 13:34:47 root] (utils.py 283): INFO Epoch: [16] [1710/2502] eta: 0:10:43 lr: 0.000011 loss_cls: 4.0550 (3.7927) grad_norm: 4.2377 (4.3361) time: 0.7933 data: 0.0002 max mem: 8421 +[2024-12-05 13:34:54 root] (utils.py 283): INFO Epoch: [16] [1720/2502] eta: 0:10:35 lr: 0.000011 loss_cls: 3.5991 (3.7916) grad_norm: 4.1598 (4.3355) time: 0.7839 data: 0.0002 max mem: 8421 +[2024-12-05 13:35:02 root] (utils.py 283): INFO Epoch: [16] [1730/2502] eta: 0:10:27 lr: 0.000011 loss_cls: 4.0070 (3.7930) grad_norm: 4.0743 (4.3357) time: 0.7716 data: 0.0002 max mem: 8421 +[2024-12-05 13:35:10 root] (utils.py 283): INFO Epoch: [16] [1740/2502] eta: 0:10:18 lr: 0.000011 loss_cls: 3.9470 (3.7922) grad_norm: 4.2336 (4.3363) time: 0.7701 data: 0.0003 max mem: 8421 +[2024-12-05 13:35:17 root] (utils.py 283): INFO Epoch: [16] [1750/2502] eta: 0:10:10 lr: 0.000011 loss_cls: 3.5551 (3.7914) grad_norm: 4.2390 (4.3368) time: 0.7737 data: 0.0003 max mem: 8421 +[2024-12-05 13:35:25 root] (utils.py 283): INFO Epoch: [16] [1760/2502] eta: 0:10:02 lr: 0.000011 loss_cls: 3.3788 (3.7909) grad_norm: 4.2390 (4.3361) time: 0.7736 data: 0.0003 max mem: 8421 +[2024-12-05 13:35:33 root] (utils.py 283): INFO Epoch: [16] [1770/2502] eta: 0:09:54 lr: 0.000011 loss_cls: 3.9590 (3.7916) grad_norm: 4.2500 (4.3394) time: 0.7700 data: 0.0003 max mem: 8421 +[2024-12-05 13:35:41 root] (utils.py 283): INFO Epoch: [16] [1780/2502] eta: 0:09:45 lr: 0.000011 loss_cls: 3.9617 (3.7912) grad_norm: 4.2500 (4.3393) time: 0.7744 data: 0.0003 max mem: 8421 +[2024-12-05 13:35:49 root] (utils.py 283): INFO Epoch: [16] [1790/2502] eta: 0:09:37 lr: 0.000011 loss_cls: 3.7926 (3.7903) grad_norm: 4.3681 (4.3400) time: 0.7878 data: 0.0003 max mem: 8421 +[2024-12-05 13:35:59 root] (utils.py 283): INFO Epoch: [16] [1800/2502] eta: 0:09:30 lr: 0.000011 loss_cls: 3.8105 (3.7915) grad_norm: 4.3718 (4.3400) time: 0.8991 data: 0.0003 max mem: 8421 +[2024-12-05 13:36:06 root] (utils.py 283): INFO Epoch: [16] [1810/2502] eta: 0:09:21 lr: 0.000011 loss_cls: 3.8112 (3.7908) grad_norm: 4.2310 (4.3396) time: 0.8856 data: 0.0003 max mem: 8421 +[2024-12-05 13:36:14 root] (utils.py 283): INFO Epoch: [16] [1820/2502] eta: 0:09:13 lr: 0.000011 loss_cls: 3.8042 (3.7911) grad_norm: 4.0623 (4.3394) time: 0.7679 data: 0.0002 max mem: 8421 +[2024-12-05 13:36:22 root] (utils.py 283): INFO Epoch: [16] [1830/2502] eta: 0:09:05 lr: 0.000011 loss_cls: 3.9694 (3.7932) grad_norm: 4.1041 (4.3411) time: 0.7675 data: 0.0002 max mem: 8421 +[2024-12-05 13:36:29 root] (utils.py 283): INFO Epoch: [16] [1840/2502] eta: 0:08:57 lr: 0.000011 loss_cls: 3.9391 (3.7925) grad_norm: 4.1041 (4.3403) time: 0.7697 data: 0.0002 max mem: 8421 +[2024-12-05 13:36:37 root] (utils.py 283): INFO Epoch: [16] [1850/2502] eta: 0:08:48 lr: 0.000011 loss_cls: 3.7284 (3.7915) grad_norm: 4.1190 (4.3419) time: 0.7750 data: 0.0002 max mem: 8421 +[2024-12-05 13:36:45 root] (utils.py 283): INFO Epoch: [16] [1860/2502] eta: 0:08:40 lr: 0.000011 loss_cls: 3.7181 (3.7909) grad_norm: 4.1340 (4.3434) time: 0.7850 data: 0.0002 max mem: 8421 +[2024-12-05 13:36:57 root] (utils.py 283): INFO Epoch: [16] [1870/2502] eta: 0:08:33 lr: 0.000011 loss_cls: 3.7330 (3.7898) grad_norm: 4.3038 (4.3437) time: 0.9722 data: 0.0013 max mem: 8421 +[2024-12-05 13:37:15 root] (utils.py 283): INFO Epoch: [16] [1880/2502] eta: 0:08:28 lr: 0.000011 loss_cls: 4.0741 (3.7913) grad_norm: 4.3516 (4.3445) time: 1.4808 data: 0.0014 max mem: 8421 +[2024-12-05 13:37:33 root] (utils.py 283): INFO Epoch: [16] [1890/2502] eta: 0:08:23 lr: 0.000011 loss_cls: 4.0741 (3.7905) grad_norm: 4.3722 (4.3497) time: 1.8049 data: 0.0004 max mem: 8421 +[2024-12-05 13:37:53 root] (utils.py 283): INFO Epoch: [16] [1900/2502] eta: 0:08:19 lr: 0.000011 loss_cls: 3.6946 (3.7896) grad_norm: 4.2640 (4.3489) time: 1.9131 data: 0.0003 max mem: 8421 +[2024-12-05 13:38:11 root] (utils.py 283): INFO Epoch: [16] [1910/2502] eta: 0:08:14 lr: 0.000011 loss_cls: 3.7480 (3.7901) grad_norm: 4.1394 (4.3483) time: 1.9212 data: 0.0003 max mem: 8421 +[2024-12-05 13:38:29 root] (utils.py 283): INFO Epoch: [16] [1920/2502] eta: 0:08:08 lr: 0.000011 loss_cls: 3.5731 (3.7876) grad_norm: 4.1367 (4.3476) time: 1.8242 data: 0.0003 max mem: 8421 +[2024-12-05 13:38:50 root] (utils.py 283): INFO Epoch: [16] [1930/2502] eta: 0:08:04 lr: 0.000011 loss_cls: 3.5249 (3.7872) grad_norm: 4.3189 (4.3474) time: 1.9263 data: 0.0003 max mem: 8421 +[2024-12-05 13:39:08 root] (utils.py 283): INFO Epoch: [16] [1940/2502] eta: 0:07:58 lr: 0.000011 loss_cls: 3.7101 (3.7865) grad_norm: 4.3600 (4.3481) time: 1.9227 data: 0.0003 max mem: 8421 +[2024-12-05 13:39:28 root] (utils.py 283): INFO Epoch: [16] [1950/2502] eta: 0:07:53 lr: 0.000011 loss_cls: 3.7100 (3.7857) grad_norm: 4.2605 (4.3477) time: 1.9288 data: 0.0003 max mem: 8421 +[2024-12-05 13:39:46 root] (utils.py 283): INFO Epoch: [16] [1960/2502] eta: 0:07:47 lr: 0.000011 loss_cls: 3.8153 (3.7848) grad_norm: 4.0645 (4.3456) time: 1.9275 data: 0.0003 max mem: 8421 +[2024-12-05 13:40:05 root] (utils.py 283): INFO Epoch: [16] [1970/2502] eta: 0:07:41 lr: 0.000011 loss_cls: 3.7931 (3.7833) grad_norm: 3.9863 (4.3444) time: 1.8344 data: 0.0003 max mem: 8421 +[2024-12-05 13:40:23 root] (utils.py 283): INFO Epoch: [16] [1980/2502] eta: 0:07:35 lr: 0.000011 loss_cls: 3.6022 (3.7829) grad_norm: 4.2098 (4.3449) time: 1.8196 data: 0.0004 max mem: 8421 +[2024-12-05 13:40:41 root] (utils.py 283): INFO Epoch: [16] [1990/2502] eta: 0:07:28 lr: 0.000011 loss_cls: 3.9278 (3.7825) grad_norm: 4.4685 (4.3454) time: 1.7894 data: 0.0004 max mem: 8421 +[2024-12-05 13:41:02 root] (utils.py 283): INFO Epoch: [16] [2000/2502] eta: 0:07:23 lr: 0.000011 loss_cls: 3.7627 (3.7818) grad_norm: 4.1865 (4.3447) time: 1.9635 data: 0.0003 max mem: 8421 +[2024-12-05 13:41:28 root] (utils.py 283): INFO Epoch: [16] [2010/2502] eta: 0:07:18 lr: 0.000011 loss_cls: 3.9590 (3.7832) grad_norm: 4.1865 (4.3446) time: 2.3670 data: 0.0003 max mem: 8421 +[2024-12-05 13:41:54 root] (utils.py 283): INFO Epoch: [16] [2020/2502] eta: 0:07:13 lr: 0.000011 loss_cls: 3.9373 (3.7829) grad_norm: 4.2828 (4.3445) time: 2.5913 data: 0.0003 max mem: 8421 +[2024-12-05 13:42:20 root] (utils.py 283): INFO Epoch: [16] [2030/2502] eta: 0:07:08 lr: 0.000011 loss_cls: 3.9373 (3.7837) grad_norm: 4.0913 (4.3434) time: 2.5847 data: 0.0003 max mem: 8421 +[2024-12-05 13:42:46 root] (utils.py 283): INFO Epoch: [16] [2040/2502] eta: 0:07:03 lr: 0.000011 loss_cls: 4.0822 (3.7844) grad_norm: 4.0153 (4.3428) time: 2.5844 data: 0.0003 max mem: 8421 +[2024-12-05 13:43:12 root] (utils.py 283): INFO Epoch: [16] [2050/2502] eta: 0:06:57 lr: 0.000011 loss_cls: 3.8618 (3.7836) grad_norm: 4.1755 (4.3425) time: 2.5903 data: 0.0003 max mem: 8421 +[2024-12-05 13:43:38 root] (utils.py 283): INFO Epoch: [16] [2060/2502] eta: 0:06:52 lr: 0.000011 loss_cls: 3.5652 (3.7832) grad_norm: 4.2019 (4.3423) time: 2.5933 data: 0.0003 max mem: 8421 +[2024-12-05 13:44:03 root] (utils.py 283): INFO Epoch: [16] [2070/2502] eta: 0:06:46 lr: 0.000011 loss_cls: 3.9623 (3.7838) grad_norm: 4.2173 (4.3429) time: 2.5917 data: 0.0003 max mem: 8421 +[2024-12-05 13:44:29 root] (utils.py 283): INFO Epoch: [16] [2080/2502] eta: 0:06:40 lr: 0.000011 loss_cls: 3.9472 (3.7834) grad_norm: 4.2173 (4.3421) time: 2.5852 data: 0.0003 max mem: 8421 +[2024-12-05 13:44:55 root] (utils.py 283): INFO Epoch: [16] [2090/2502] eta: 0:06:33 lr: 0.000011 loss_cls: 3.8701 (3.7844) grad_norm: 4.1565 (4.3418) time: 2.5728 data: 0.0003 max mem: 8421 +[2024-12-05 13:45:21 root] (utils.py 283): INFO Epoch: [16] [2100/2502] eta: 0:06:27 lr: 0.000011 loss_cls: 4.0795 (3.7834) grad_norm: 4.1486 (4.3417) time: 2.5794 data: 0.0003 max mem: 8421 +[2024-12-05 13:45:47 root] (utils.py 283): INFO Epoch: [16] [2110/2502] eta: 0:06:20 lr: 0.000011 loss_cls: 3.9783 (3.7842) grad_norm: 4.2475 (4.3436) time: 2.5823 data: 0.0003 max mem: 8421 +[2024-12-05 13:46:12 root] (utils.py 283): INFO Epoch: [16] [2120/2502] eta: 0:06:13 lr: 0.000011 loss_cls: 4.0206 (3.7837) grad_norm: 4.1142 (4.3427) time: 2.5581 data: 0.0003 max mem: 8421 +[2024-12-05 13:46:30 root] (utils.py 283): INFO Epoch: [16] [2130/2502] eta: 0:06:05 lr: 0.000011 loss_cls: 3.9718 (3.7839) grad_norm: 4.0772 (4.3416) time: 2.1883 data: 0.0003 max mem: 8421 +[2024-12-05 13:46:48 root] (utils.py 283): INFO Epoch: [16] [2140/2502] eta: 0:05:57 lr: 0.000011 loss_cls: 3.9951 (3.7852) grad_norm: 4.0891 (4.3421) time: 1.7948 data: 0.0003 max mem: 8421 +[2024-12-05 13:47:06 root] (utils.py 283): INFO Epoch: [16] [2150/2502] eta: 0:05:48 lr: 0.000011 loss_cls: 4.0033 (3.7850) grad_norm: 4.2004 (4.3417) time: 1.7685 data: 0.0003 max mem: 8421 +[2024-12-05 13:47:23 root] (utils.py 283): INFO Epoch: [16] [2160/2502] eta: 0:05:39 lr: 0.000011 loss_cls: 4.0033 (3.7856) grad_norm: 4.0170 (4.3405) time: 1.7695 data: 0.0003 max mem: 8421 +[2024-12-05 13:47:41 root] (utils.py 283): INFO Epoch: [16] [2170/2502] eta: 0:05:31 lr: 0.000011 loss_cls: 4.0724 (3.7865) grad_norm: 4.2245 (4.3417) time: 1.7552 data: 0.0002 max mem: 8421 +[2024-12-05 13:47:58 root] (utils.py 283): INFO Epoch: [16] [2180/2502] eta: 0:05:22 lr: 0.000011 loss_cls: 3.8840 (3.7862) grad_norm: 4.2562 (4.3411) time: 1.7496 data: 0.0002 max mem: 8421 +[2024-12-05 13:48:16 root] (utils.py 283): INFO Epoch: [16] [2190/2502] eta: 0:05:13 lr: 0.000011 loss_cls: 4.0695 (3.7874) grad_norm: 4.0467 (4.3416) time: 1.7529 data: 0.0002 max mem: 8421 +[2024-12-05 13:48:33 root] (utils.py 283): INFO Epoch: [16] [2200/2502] eta: 0:05:04 lr: 0.000011 loss_cls: 3.8936 (3.7858) grad_norm: 4.0467 (4.3405) time: 1.7561 data: 0.0003 max mem: 8421 +[2024-12-05 13:48:52 root] (utils.py 283): INFO Epoch: [16] [2210/2502] eta: 0:04:55 lr: 0.000011 loss_cls: 3.5594 (3.7856) grad_norm: 4.1370 (4.3399) time: 1.7846 data: 0.0003 max mem: 8421 +[2024-12-05 13:49:09 root] (utils.py 283): INFO Epoch: [16] [2220/2502] eta: 0:04:46 lr: 0.000011 loss_cls: 3.8145 (3.7849) grad_norm: 4.2279 (4.3392) time: 1.7866 data: 0.0002 max mem: 8421 +[2024-12-05 13:49:27 root] (utils.py 283): INFO Epoch: [16] [2230/2502] eta: 0:04:36 lr: 0.000011 loss_cls: 3.7814 (3.7842) grad_norm: 4.2422 (4.3404) time: 1.7642 data: 0.0002 max mem: 8421 +[2024-12-05 13:49:45 root] (utils.py 283): INFO Epoch: [16] [2240/2502] eta: 0:04:27 lr: 0.000011 loss_cls: 3.6862 (3.7834) grad_norm: 4.3180 (4.3421) time: 1.7698 data: 0.0003 max mem: 8421 +[2024-12-05 13:50:03 root] (utils.py 283): INFO Epoch: [16] [2250/2502] eta: 0:04:18 lr: 0.000011 loss_cls: 3.5736 (3.7825) grad_norm: 4.1897 (4.3412) time: 1.8139 data: 0.0003 max mem: 8421 +[2024-12-05 13:50:21 root] (utils.py 283): INFO Epoch: [16] [2260/2502] eta: 0:04:08 lr: 0.000011 loss_cls: 3.4406 (3.7820) grad_norm: 4.1210 (4.3409) time: 1.8412 data: 0.0003 max mem: 8421 +[2024-12-05 13:50:47 root] (utils.py 283): INFO Epoch: [16] [2270/2502] eta: 0:04:00 lr: 0.000011 loss_cls: 3.8358 (3.7826) grad_norm: 4.3164 (4.3422) time: 2.2094 data: 0.0002 max mem: 8421 +[2024-12-05 13:51:09 root] (utils.py 283): INFO Epoch: [16] [2280/2502] eta: 0:03:50 lr: 0.000011 loss_cls: 3.9870 (3.7834) grad_norm: 4.3584 (4.3418) time: 2.3608 data: 0.0002 max mem: 8421 +[2024-12-05 13:51:27 root] (utils.py 283): INFO Epoch: [16] [2290/2502] eta: 0:03:41 lr: 0.000011 loss_cls: 4.0222 (3.7836) grad_norm: 4.2182 (4.3414) time: 1.9747 data: 0.0002 max mem: 8421 +[2024-12-05 13:51:50 root] (utils.py 283): INFO Epoch: [16] [2300/2502] eta: 0:03:31 lr: 0.000011 loss_cls: 3.6669 (3.7830) grad_norm: 4.1149 (4.3407) time: 2.0580 data: 0.0003 max mem: 8421 +[2024-12-05 13:52:14 root] (utils.py 283): INFO Epoch: [16] [2310/2502] eta: 0:03:22 lr: 0.000011 loss_cls: 3.6483 (3.7820) grad_norm: 4.1054 (4.3408) time: 2.3706 data: 0.0003 max mem: 8421 +[2024-12-05 13:52:32 root] (utils.py 283): INFO Epoch: [16] [2320/2502] eta: 0:03:12 lr: 0.000011 loss_cls: 3.7633 (3.7826) grad_norm: 4.2663 (4.3430) time: 2.1180 data: 0.0003 max mem: 8421 +[2024-12-05 13:52:50 root] (utils.py 283): INFO Epoch: [16] [2330/2502] eta: 0:03:02 lr: 0.000011 loss_cls: 3.9928 (3.7839) grad_norm: 4.5399 (4.3436) time: 1.8149 data: 0.0003 max mem: 8421 +[2024-12-05 13:53:17 root] (utils.py 283): INFO Epoch: [16] [2340/2502] eta: 0:02:53 lr: 0.000011 loss_cls: 3.9928 (3.7839) grad_norm: 4.4992 (4.3456) time: 2.2247 data: 0.0003 max mem: 8421 +[2024-12-05 13:53:38 root] (utils.py 283): INFO Epoch: [16] [2350/2502] eta: 0:02:43 lr: 0.000011 loss_cls: 3.8571 (3.7841) grad_norm: 4.2079 (4.3448) time: 2.3757 data: 0.0003 max mem: 8421 +[2024-12-05 13:53:56 root] (utils.py 283): INFO Epoch: [16] [2360/2502] eta: 0:02:32 lr: 0.000011 loss_cls: 3.6987 (3.7835) grad_norm: 4.0862 (4.3438) time: 1.9659 data: 0.0003 max mem: 8421 +[2024-12-05 13:54:14 root] (utils.py 283): INFO Epoch: [16] [2370/2502] eta: 0:02:22 lr: 0.000011 loss_cls: 3.6819 (3.7832) grad_norm: 4.1700 (4.3436) time: 1.7845 data: 0.0003 max mem: 8421 +[2024-12-05 13:54:32 root] (utils.py 283): INFO Epoch: [16] [2380/2502] eta: 0:02:11 lr: 0.000011 loss_cls: 3.9077 (3.7839) grad_norm: 4.1759 (4.3431) time: 1.7817 data: 0.0003 max mem: 8421 +[2024-12-05 13:54:50 root] (utils.py 283): INFO Epoch: [16] [2390/2502] eta: 0:02:01 lr: 0.000011 loss_cls: 3.9149 (3.7831) grad_norm: 4.1801 (4.3445) time: 1.7976 data: 0.0003 max mem: 8421 +[2024-12-05 13:55:07 root] (utils.py 283): INFO Epoch: [16] [2400/2502] eta: 0:01:50 lr: 0.000011 loss_cls: 3.6546 (3.7829) grad_norm: 4.2964 (4.3439) time: 1.7919 data: 0.0002 max mem: 8421 +[2024-12-05 13:55:26 root] (utils.py 283): INFO Epoch: [16] [2410/2502] eta: 0:01:40 lr: 0.000011 loss_cls: 4.0736 (3.7837) grad_norm: 4.2573 (4.3444) time: 1.7958 data: 0.0002 max mem: 8421 +[2024-12-05 13:55:44 root] (utils.py 283): INFO Epoch: [16] [2420/2502] eta: 0:01:29 lr: 0.000011 loss_cls: 3.9155 (3.7830) grad_norm: 4.2573 (4.3444) time: 1.8324 data: 0.0003 max mem: 8421 +[2024-12-05 13:56:03 root] (utils.py 283): INFO Epoch: [16] [2430/2502] eta: 0:01:18 lr: 0.000011 loss_cls: 3.4809 (3.7829) grad_norm: 4.2491 (4.3446) time: 1.8557 data: 0.0003 max mem: 8421 +[2024-12-05 13:56:21 root] (utils.py 283): INFO Epoch: [16] [2440/2502] eta: 0:01:08 lr: 0.000011 loss_cls: 3.9294 (3.7832) grad_norm: 4.2458 (4.3451) time: 1.8339 data: 0.0003 max mem: 8421 +[2024-12-05 13:56:39 root] (utils.py 283): INFO Epoch: [16] [2450/2502] eta: 0:00:57 lr: 0.000011 loss_cls: 3.9294 (3.7840) grad_norm: 4.2047 (4.3457) time: 1.8007 data: 0.0003 max mem: 8421 +[2024-12-05 13:56:57 root] (utils.py 283): INFO Epoch: [16] [2460/2502] eta: 0:00:46 lr: 0.000011 loss_cls: 4.0328 (3.7843) grad_norm: 4.0879 (4.3456) time: 1.8031 data: 0.0002 max mem: 8421 +[2024-12-05 13:57:15 root] (utils.py 283): INFO Epoch: [16] [2470/2502] eta: 0:00:35 lr: 0.000011 loss_cls: 4.0143 (3.7836) grad_norm: 4.2805 (4.3461) time: 1.8230 data: 0.0002 max mem: 8421 +[2024-12-05 13:57:34 root] (utils.py 283): INFO Epoch: [16] [2480/2502] eta: 0:00:24 lr: 0.000011 loss_cls: 3.4135 (3.7828) grad_norm: 4.2805 (4.3457) time: 1.8620 data: 0.0002 max mem: 8421 +[2024-12-05 13:57:54 root] (utils.py 283): INFO Epoch: [16] [2490/2502] eta: 0:00:13 lr: 0.000011 loss_cls: 3.7578 (3.7837) grad_norm: 4.1323 (4.3460) time: 1.9547 data: 0.0227 max mem: 8421 +[2024-12-05 13:58:14 root] (utils.py 283): INFO Epoch: [16] [2500/2502] eta: 0:00:02 lr: 0.000011 loss_cls: 3.8057 (3.7833) grad_norm: 4.2601 (4.3456) time: 2.0123 data: 0.0227 max mem: 8421 +[2024-12-05 13:58:16 root] (utils.py 283): INFO Epoch: [16] [2501/2502] eta: 0:00:01 lr: 0.000011 loss_cls: 3.8057 (3.7828) grad_norm: 4.2601 (4.3454) time: 2.0177 data: 0.0227 max mem: 8421 +[2024-12-05 13:58:16 root] (utils.py 297): INFO Epoch: [16] Total time: 0:46:40 (1.1195 s / it) +[2024-12-05 13:58:16 root] (engine.py 178): INFO Averaged stats:lr: 0.000011 loss_cls: 3.8057 (3.7833) grad_norm: 4.2601 (4.3454) +[2024-12-05 21:43:16 root] (main.py 226): INFO Namespace(batch_size=128, epochs=30, model='RMeeTo_tiny', multi_reso=False, input_size=224, drop=0.0, drop_path=0.1, model_ema_decay=0.99996, model_ema_force_cpu=False, model_ema=False, opt='adamw', opt_eps=1e-08, opt_betas=None, clip_grad=None, momentum=0.9, weight_decay=0.0, sched='cosine', lr=2e-05, lr_noise=None, lr_noise_pct=0.67, lr_noise_std=1.0, warmup_lr=1e-06, min_lr=1e-06, decay_epochs=30, warmup_epochs=5, cooldown_epochs=10, patience_epochs=10, decay_rate=0.1, color_jitter=0.4, aa='rand-m9-mstd0.5-inc1', smoothing=0.1, train_interpolation='bicubic', repeated_aug=True, reprob=0.25, remode='pixel', recount=1, resplit=False, mixup=0.8, cutmix=1.0, cutmix_minmax=None, mixup_prob=1.0, mixup_switch_prob=0.5, mixup_mode='batch', finetune='', data_set='IMNET', inat_category='name', device='cuda', seed=0, autoresume=False, start_epoch=0, dist_eval=True, num_workers=10, pin_mem=True, world_size=4, port='15662', dist_url='env://', target_flops=3.0, granularity=4, load_compression_rate=False, warmup_compression_rate=False, distill='True', throughput=False, eval=False, merge_method='ToMe', merge_interval=2, if_pruning=False, num_prune='5', metric='X', distance='cosine', if_order=True, if_random=False, if_merge_odd=False, rank=0, gpu=0, distributed=True, dist_backend='nccl') +[2024-12-05 21:43:21 root] (main.py 292): INFO Creating model: RMeeTo_tiny +[2024-12-05 21:43:27 root] (main.py 372): INFO number of params: 7148008 +[2024-12-05 21:43:29 root] (main.py 488): INFO Start training for 14 epochs +[2024-12-05 21:43:35 root] (utils.py 283): INFO Epoch: [16] [ 0/2502] eta: 4:30:06 lr: 0.000011 loss_cls: 4.3593 (4.3593) grad_norm: 4.2082 (4.2082) time: 6.4773 data: 0.0022 max mem: 8394 +[2024-12-05 21:43:43 root] (utils.py 283): INFO Epoch: [16] [ 10/2502] eta: 0:54:13 lr: 0.000011 loss_cls: 4.3564 (4.2387) grad_norm: 4.2458 (4.3271) time: 1.3055 data: 0.0005 max mem: 8421 +[2024-12-05 21:43:51 root] (utils.py 283): INFO Epoch: [16] [ 20/2502] eta: 0:43:41 lr: 0.000011 loss_cls: 4.1705 (4.0930) grad_norm: 4.1684 (4.2577) time: 0.7853 data: 0.0003 max mem: 8421 +[2024-12-05 21:43:59 root] (utils.py 283): INFO Epoch: [16] [ 30/2502] eta: 0:39:55 lr: 0.000011 loss_cls: 4.0791 (4.1135) grad_norm: 4.0477 (4.1974) time: 0.7841 data: 0.0003 max mem: 8421 +[2024-12-05 21:44:07 root] (utils.py 283): INFO Epoch: [16] [ 40/2502] eta: 0:38:04 lr: 0.000011 loss_cls: 4.0101 (4.0478) grad_norm: 4.0306 (4.2758) time: 0.7928 data: 0.0003 max mem: 8421 +[2024-12-05 21:44:14 root] (utils.py 283): INFO Epoch: [16] [ 50/2502] eta: 0:36:41 lr: 0.000011 loss_cls: 3.9430 (4.0241) grad_norm: 4.0970 (4.3500) time: 0.7872 data: 0.0003 max mem: 8421 +[2024-12-05 21:44:22 root] (utils.py 283): INFO Epoch: [16] [ 60/2502] eta: 0:35:45 lr: 0.000011 loss_cls: 3.9430 (3.9923) grad_norm: 4.1015 (4.3376) time: 0.7776 data: 0.0003 max mem: 8421 +[2024-12-05 21:44:30 root] (utils.py 283): INFO Epoch: [16] [ 70/2502] eta: 0:35:02 lr: 0.000011 loss_cls: 3.9356 (3.9521) grad_norm: 4.1963 (4.3761) time: 0.7797 data: 0.0002 max mem: 8421 +[2024-12-05 21:44:38 root] (utils.py 283): INFO Epoch: [16] [ 80/2502] eta: 0:34:28 lr: 0.000011 loss_cls: 3.9270 (3.9305) grad_norm: 4.1803 (4.3871) time: 0.7799 data: 0.0003 max mem: 8421 +[2024-12-05 21:44:46 root] (utils.py 283): INFO Epoch: [16] [ 90/2502] eta: 0:34:00 lr: 0.000011 loss_cls: 3.9389 (3.9172) grad_norm: 4.2005 (4.3807) time: 0.7797 data: 0.0003 max mem: 8421 +[2024-12-05 21:44:54 root] (utils.py 283): INFO Epoch: [16] [ 100/2502] eta: 0:33:40 lr: 0.000011 loss_cls: 4.1365 (3.9268) grad_norm: 4.2664 (4.3650) time: 0.7890 data: 0.0002 max mem: 8421 +[2024-12-05 21:45:01 root] (utils.py 283): INFO Epoch: [16] [ 110/2502] eta: 0:33:18 lr: 0.000011 loss_cls: 4.1560 (3.9196) grad_norm: 4.1309 (4.3517) time: 0.7891 data: 0.0002 max mem: 8421 +[2024-12-05 21:45:09 root] (utils.py 283): INFO Epoch: [16] [ 120/2502] eta: 0:32:59 lr: 0.000011 loss_cls: 3.9457 (3.9181) grad_norm: 4.2551 (4.3557) time: 0.7792 data: 0.0003 max mem: 8421 +[2024-12-05 21:45:17 root] (utils.py 283): INFO Epoch: [16] [ 130/2502] eta: 0:32:40 lr: 0.000011 loss_cls: 3.9059 (3.9100) grad_norm: 4.2663 (4.3391) time: 0.7770 data: 0.0003 max mem: 8421 +[2024-12-05 21:45:25 root] (utils.py 283): INFO Epoch: [16] [ 140/2502] eta: 0:32:24 lr: 0.000011 loss_cls: 3.9059 (3.8968) grad_norm: 4.1438 (4.3344) time: 0.7752 data: 0.0003 max mem: 8421 +[2024-12-05 21:45:32 root] (utils.py 283): INFO Epoch: [16] [ 150/2502] eta: 0:32:09 lr: 0.000011 loss_cls: 3.8310 (3.8905) grad_norm: 4.3328 (4.3450) time: 0.7774 data: 0.0003 max mem: 8421 +[2024-12-05 21:45:40 root] (utils.py 283): INFO Epoch: [16] [ 160/2502] eta: 0:31:55 lr: 0.000011 loss_cls: 3.8449 (3.8890) grad_norm: 4.1969 (4.3307) time: 0.7798 data: 0.0003 max mem: 8421 +[2024-12-05 21:45:48 root] (utils.py 283): INFO Epoch: [16] [ 170/2502] eta: 0:31:41 lr: 0.000011 loss_cls: 3.8789 (3.8859) grad_norm: 4.0451 (4.3235) time: 0.7784 data: 0.0003 max mem: 8421 +[2024-12-05 21:45:56 root] (utils.py 283): INFO Epoch: [16] [ 180/2502] eta: 0:31:28 lr: 0.000011 loss_cls: 3.8678 (3.8684) grad_norm: 4.1300 (4.3209) time: 0.7792 data: 0.0003 max mem: 8421 +[2024-12-05 21:46:04 root] (utils.py 283): INFO Epoch: [16] [ 190/2502] eta: 0:31:16 lr: 0.000011 loss_cls: 3.6914 (3.8622) grad_norm: 4.1375 (4.3173) time: 0.7804 data: 0.0003 max mem: 8421 +[2024-12-05 21:46:11 root] (utils.py 283): INFO Epoch: [16] [ 200/2502] eta: 0:31:05 lr: 0.000011 loss_cls: 3.6224 (3.8482) grad_norm: 4.0600 (4.3010) time: 0.7803 data: 0.0003 max mem: 8421 +[2024-12-05 21:46:19 root] (utils.py 283): INFO Epoch: [16] [ 210/2502] eta: 0:30:54 lr: 0.000011 loss_cls: 3.7639 (3.8425) grad_norm: 3.9619 (4.2836) time: 0.7831 data: 0.0003 max mem: 8421 +[2024-12-05 21:46:27 root] (utils.py 283): INFO Epoch: [16] [ 220/2502] eta: 0:30:42 lr: 0.000011 loss_cls: 3.8355 (3.8355) grad_norm: 3.8758 (4.2781) time: 0.7798 data: 0.0003 max mem: 8421 +[2024-12-05 21:46:35 root] (utils.py 283): INFO Epoch: [16] [ 230/2502] eta: 0:30:31 lr: 0.000011 loss_cls: 3.8388 (3.8328) grad_norm: 4.0672 (4.2764) time: 0.7782 data: 0.0002 max mem: 8421 +[2024-12-05 21:46:43 root] (utils.py 283): INFO Epoch: [16] [ 240/2502] eta: 0:30:21 lr: 0.000011 loss_cls: 3.8636 (3.8297) grad_norm: 4.1516 (4.2796) time: 0.7796 data: 0.0002 max mem: 8421 +[2024-12-05 21:46:50 root] (utils.py 283): INFO Epoch: [16] [ 250/2502] eta: 0:30:10 lr: 0.000011 loss_cls: 3.7551 (3.8168) grad_norm: 4.1149 (4.2767) time: 0.7800 data: 0.0002 max mem: 8421 +[2024-12-05 21:46:58 root] (utils.py 283): INFO Epoch: [16] [ 260/2502] eta: 0:30:00 lr: 0.000011 loss_cls: 3.6254 (3.8058) grad_norm: 4.0644 (4.2733) time: 0.7809 data: 0.0003 max mem: 8421 +[2024-12-05 21:47:06 root] (utils.py 283): INFO Epoch: [16] [ 270/2502] eta: 0:29:50 lr: 0.000011 loss_cls: 3.7344 (3.8002) grad_norm: 4.0618 (4.2693) time: 0.7805 data: 0.0003 max mem: 8421 +[2024-12-05 21:47:14 root] (utils.py 283): INFO Epoch: [16] [ 280/2502] eta: 0:29:41 lr: 0.000011 loss_cls: 3.8968 (3.8067) grad_norm: 3.9249 (4.2587) time: 0.7814 data: 0.0003 max mem: 8421 +[2024-12-05 21:47:22 root] (utils.py 283): INFO Epoch: [16] [ 290/2502] eta: 0:29:31 lr: 0.000011 loss_cls: 3.8936 (3.7974) grad_norm: 3.9949 (4.2596) time: 0.7823 data: 0.0003 max mem: 8421 +[2024-12-05 21:47:29 root] (utils.py 283): INFO Epoch: [16] [ 300/2502] eta: 0:29:22 lr: 0.000011 loss_cls: 3.3947 (3.7891) grad_norm: 4.1353 (4.2542) time: 0.7792 data: 0.0003 max mem: 8421 +[2024-12-05 21:47:37 root] (utils.py 283): INFO Epoch: [16] [ 310/2502] eta: 0:29:12 lr: 0.000011 loss_cls: 3.6248 (3.7872) grad_norm: 4.1374 (4.2568) time: 0.7787 data: 0.0003 max mem: 8421 +[2024-12-05 21:47:45 root] (utils.py 283): INFO Epoch: [16] [ 320/2502] eta: 0:29:03 lr: 0.000011 loss_cls: 4.0567 (3.8002) grad_norm: 4.4841 (4.2613) time: 0.7790 data: 0.0003 max mem: 8421 +[2024-12-05 21:47:53 root] (utils.py 283): INFO Epoch: [16] [ 330/2502] eta: 0:28:53 lr: 0.000011 loss_cls: 4.1492 (3.7973) grad_norm: 4.2688 (4.2564) time: 0.7787 data: 0.0003 max mem: 8421 +[2024-12-05 21:48:01 root] (utils.py 283): INFO Epoch: [16] [ 340/2502] eta: 0:28:44 lr: 0.000011 loss_cls: 3.9577 (3.8024) grad_norm: 4.0092 (4.2499) time: 0.7813 data: 0.0003 max mem: 8421 +[2024-12-05 21:48:09 root] (utils.py 283): INFO Epoch: [16] [ 350/2502] eta: 0:28:37 lr: 0.000011 loss_cls: 3.9159 (3.8025) grad_norm: 4.1109 (4.2601) time: 0.7907 data: 0.0003 max mem: 8421 +[2024-12-05 21:48:17 root] (utils.py 283): INFO Epoch: [16] [ 360/2502] eta: 0:28:28 lr: 0.000011 loss_cls: 3.9123 (3.8001) grad_norm: 4.1395 (4.2600) time: 0.7939 data: 0.0003 max mem: 8421 +[2024-12-05 21:48:24 root] (utils.py 283): INFO Epoch: [16] [ 370/2502] eta: 0:28:19 lr: 0.000011 loss_cls: 3.7433 (3.7976) grad_norm: 4.0209 (4.2539) time: 0.7876 data: 0.0003 max mem: 8421 +[2024-12-05 21:48:32 root] (utils.py 283): INFO Epoch: [16] [ 380/2502] eta: 0:28:11 lr: 0.000011 loss_cls: 3.9640 (3.7953) grad_norm: 3.9916 (4.2494) time: 0.7859 data: 0.0003 max mem: 8421 +[2024-12-05 21:48:40 root] (utils.py 283): INFO Epoch: [16] [ 390/2502] eta: 0:28:02 lr: 0.000011 loss_cls: 3.9640 (3.7929) grad_norm: 4.0191 (4.2542) time: 0.7852 data: 0.0003 max mem: 8421 +[2024-12-05 21:48:48 root] (utils.py 283): INFO Epoch: [16] [ 400/2502] eta: 0:27:53 lr: 0.000011 loss_cls: 3.9018 (3.7973) grad_norm: 4.0697 (4.2520) time: 0.7835 data: 0.0003 max mem: 8421 +[2024-12-05 21:48:56 root] (utils.py 283): INFO Epoch: [16] [ 410/2502] eta: 0:27:45 lr: 0.000011 loss_cls: 3.9599 (3.8039) grad_norm: 4.2234 (4.2542) time: 0.7813 data: 0.0003 max mem: 8421 +[2024-12-05 21:49:04 root] (utils.py 283): INFO Epoch: [16] [ 420/2502] eta: 0:27:37 lr: 0.000011 loss_cls: 4.0299 (3.8054) grad_norm: 4.2234 (4.2590) time: 0.7895 data: 0.0003 max mem: 8421 +[2024-12-05 21:49:12 root] (utils.py 283): INFO Epoch: [16] [ 430/2502] eta: 0:27:28 lr: 0.000011 loss_cls: 4.1354 (3.8111) grad_norm: 4.1979 (4.2602) time: 0.7899 data: 0.0003 max mem: 8421 +[2024-12-05 21:49:19 root] (utils.py 283): INFO Epoch: [16] [ 440/2502] eta: 0:27:20 lr: 0.000011 loss_cls: 4.1473 (3.8171) grad_norm: 4.0891 (4.2576) time: 0.7822 data: 0.0003 max mem: 8421 +[2024-12-05 21:49:27 root] (utils.py 283): INFO Epoch: [16] [ 450/2502] eta: 0:27:11 lr: 0.000011 loss_cls: 3.8614 (3.8163) grad_norm: 4.0606 (4.2529) time: 0.7801 data: 0.0003 max mem: 8421 +[2024-12-05 21:49:35 root] (utils.py 283): INFO Epoch: [16] [ 460/2502] eta: 0:27:02 lr: 0.000011 loss_cls: 3.7903 (3.8110) grad_norm: 4.1520 (4.2611) time: 0.7784 data: 0.0002 max mem: 8421 +[2024-12-05 21:49:43 root] (utils.py 283): INFO Epoch: [16] [ 470/2502] eta: 0:26:54 lr: 0.000011 loss_cls: 3.5230 (3.8015) grad_norm: 4.1715 (4.2598) time: 0.7794 data: 0.0002 max mem: 8421 +[2024-12-05 21:49:51 root] (utils.py 283): INFO Epoch: [16] [ 480/2502] eta: 0:26:45 lr: 0.000011 loss_cls: 3.5785 (3.7991) grad_norm: 4.0991 (4.2558) time: 0.7821 data: 0.0002 max mem: 8421 +[2024-12-05 21:49:58 root] (utils.py 283): INFO Epoch: [16] [ 490/2502] eta: 0:26:37 lr: 0.000011 loss_cls: 3.7839 (3.8050) grad_norm: 3.9696 (4.2522) time: 0.7815 data: 0.0003 max mem: 8421 +[2024-12-05 21:50:06 root] (utils.py 283): INFO Epoch: [16] [ 500/2502] eta: 0:26:28 lr: 0.000011 loss_cls: 3.9106 (3.8019) grad_norm: 4.1582 (4.2543) time: 0.7787 data: 0.0003 max mem: 8421 +[2024-12-05 21:50:14 root] (utils.py 283): INFO Epoch: [16] [ 510/2502] eta: 0:26:20 lr: 0.000011 loss_cls: 3.8580 (3.8063) grad_norm: 4.3145 (4.2568) time: 0.7810 data: 0.0002 max mem: 8421 +[2024-12-05 21:50:22 root] (utils.py 283): INFO Epoch: [16] [ 520/2502] eta: 0:26:12 lr: 0.000011 loss_cls: 3.9780 (3.8081) grad_norm: 4.1493 (4.2573) time: 0.7840 data: 0.0003 max mem: 8421 +[2024-12-05 21:50:30 root] (utils.py 283): INFO Epoch: [16] [ 530/2502] eta: 0:26:03 lr: 0.000011 loss_cls: 3.9171 (3.8026) grad_norm: 4.0502 (4.2548) time: 0.7818 data: 0.0003 max mem: 8421 +[2024-12-05 21:50:37 root] (utils.py 283): INFO Epoch: [16] [ 540/2502] eta: 0:25:55 lr: 0.000011 loss_cls: 3.9740 (3.8010) grad_norm: 3.9485 (4.2531) time: 0.7785 data: 0.0003 max mem: 8421 +[2024-12-05 21:50:45 root] (utils.py 283): INFO Epoch: [16] [ 550/2502] eta: 0:25:46 lr: 0.000011 loss_cls: 3.9740 (3.8004) grad_norm: 4.1721 (4.2549) time: 0.7810 data: 0.0003 max mem: 8421 +[2024-12-05 21:50:53 root] (utils.py 283): INFO Epoch: [16] [ 560/2502] eta: 0:25:38 lr: 0.000011 loss_cls: 4.1442 (3.8051) grad_norm: 4.2547 (4.2542) time: 0.7815 data: 0.0003 max mem: 8421 +[2024-12-05 21:51:01 root] (utils.py 283): INFO Epoch: [16] [ 570/2502] eta: 0:25:30 lr: 0.000011 loss_cls: 4.1374 (3.8082) grad_norm: 4.0605 (4.2513) time: 0.7777 data: 0.0003 max mem: 8421 +[2024-12-05 21:51:09 root] (utils.py 283): INFO Epoch: [16] [ 580/2502] eta: 0:25:21 lr: 0.000011 loss_cls: 4.0296 (3.8102) grad_norm: 4.0827 (4.2608) time: 0.7782 data: 0.0003 max mem: 8421 +[2024-12-05 21:51:16 root] (utils.py 283): INFO Epoch: [16] [ 590/2502] eta: 0:25:13 lr: 0.000011 loss_cls: 3.9938 (3.8108) grad_norm: 4.0898 (4.2631) time: 0.7769 data: 0.0002 max mem: 8421 +[2024-12-05 21:51:24 root] (utils.py 283): INFO Epoch: [16] [ 600/2502] eta: 0:25:04 lr: 0.000011 loss_cls: 3.9804 (3.8096) grad_norm: 4.1018 (4.2609) time: 0.7761 data: 0.0002 max mem: 8421 +[2024-12-05 21:51:32 root] (utils.py 283): INFO Epoch: [16] [ 610/2502] eta: 0:24:56 lr: 0.000011 loss_cls: 3.7520 (3.8093) grad_norm: 4.2051 (4.2653) time: 0.7787 data: 0.0002 max mem: 8421 +[2024-12-05 21:51:40 root] (utils.py 283): INFO Epoch: [16] [ 620/2502] eta: 0:24:48 lr: 0.000011 loss_cls: 3.7520 (3.8114) grad_norm: 4.2000 (4.2629) time: 0.7795 data: 0.0003 max mem: 8421 +[2024-12-05 21:51:48 root] (utils.py 283): INFO Epoch: [16] [ 630/2502] eta: 0:24:39 lr: 0.000011 loss_cls: 3.9770 (3.8136) grad_norm: 4.1446 (4.2662) time: 0.7774 data: 0.0003 max mem: 8421 +[2024-12-05 21:51:55 root] (utils.py 283): INFO Epoch: [16] [ 640/2502] eta: 0:24:31 lr: 0.000011 loss_cls: 3.8762 (3.8126) grad_norm: 4.2006 (4.2669) time: 0.7784 data: 0.0003 max mem: 8421 +[2024-12-05 21:52:03 root] (utils.py 283): INFO Epoch: [16] [ 650/2502] eta: 0:24:23 lr: 0.000011 loss_cls: 3.8284 (3.8128) grad_norm: 4.1583 (4.2666) time: 0.7810 data: 0.0003 max mem: 8421 +[2024-12-05 21:52:11 root] (utils.py 283): INFO Epoch: [16] [ 660/2502] eta: 0:24:15 lr: 0.000011 loss_cls: 3.6007 (3.8061) grad_norm: 4.1654 (4.2657) time: 0.7792 data: 0.0003 max mem: 8421 +[2024-12-05 21:52:19 root] (utils.py 283): INFO Epoch: [16] [ 670/2502] eta: 0:24:07 lr: 0.000011 loss_cls: 3.7604 (3.8101) grad_norm: 4.1654 (4.2655) time: 0.7792 data: 0.0003 max mem: 8421 +[2024-12-05 21:52:27 root] (utils.py 283): INFO Epoch: [16] [ 680/2502] eta: 0:23:58 lr: 0.000011 loss_cls: 4.0953 (3.8113) grad_norm: 4.2193 (4.2660) time: 0.7795 data: 0.0002 max mem: 8421 +[2024-12-05 21:52:34 root] (utils.py 283): INFO Epoch: [16] [ 690/2502] eta: 0:23:50 lr: 0.000011 loss_cls: 3.5587 (3.8071) grad_norm: 4.1681 (4.2624) time: 0.7793 data: 0.0002 max mem: 8421 +[2024-12-05 21:52:42 root] (utils.py 283): INFO Epoch: [16] [ 700/2502] eta: 0:23:42 lr: 0.000011 loss_cls: 3.4625 (3.8049) grad_norm: 4.0710 (4.2616) time: 0.7811 data: 0.0003 max mem: 8421 +[2024-12-05 21:52:50 root] (utils.py 283): INFO Epoch: [16] [ 710/2502] eta: 0:23:34 lr: 0.000011 loss_cls: 3.7406 (3.8041) grad_norm: 4.1526 (4.2917) time: 0.7824 data: 0.0003 max mem: 8421 +[2024-12-05 21:52:58 root] (utils.py 283): INFO Epoch: [16] [ 720/2502] eta: 0:23:26 lr: 0.000011 loss_cls: 3.7406 (3.8018) grad_norm: 4.3682 (4.2996) time: 0.7822 data: 0.0003 max mem: 8421 +[2024-12-05 21:53:06 root] (utils.py 283): INFO Epoch: [16] [ 730/2502] eta: 0:23:18 lr: 0.000011 loss_cls: 3.8632 (3.8023) grad_norm: 4.4724 (4.2996) time: 0.7802 data: 0.0003 max mem: 8421 +[2024-12-05 21:53:13 root] (utils.py 283): INFO Epoch: [16] [ 740/2502] eta: 0:23:10 lr: 0.000011 loss_cls: 4.0148 (3.8074) grad_norm: 4.1042 (4.2979) time: 0.7787 data: 0.0003 max mem: 8421 +[2024-12-05 21:53:21 root] (utils.py 283): INFO Epoch: [16] [ 750/2502] eta: 0:23:02 lr: 0.000011 loss_cls: 4.0290 (3.8059) grad_norm: 4.1208 (4.2947) time: 0.7806 data: 0.0003 max mem: 8421 +[2024-12-05 21:53:29 root] (utils.py 283): INFO Epoch: [16] [ 760/2502] eta: 0:22:54 lr: 0.000011 loss_cls: 3.9675 (3.8062) grad_norm: 4.1824 (4.2965) time: 0.7805 data: 0.0003 max mem: 8421 +[2024-12-05 21:53:37 root] (utils.py 283): INFO Epoch: [16] [ 770/2502] eta: 0:22:46 lr: 0.000011 loss_cls: 3.9675 (3.8069) grad_norm: 4.1850 (4.2958) time: 0.7785 data: 0.0002 max mem: 8421 +[2024-12-05 21:53:45 root] (utils.py 283): INFO Epoch: [16] [ 780/2502] eta: 0:22:38 lr: 0.000011 loss_cls: 3.8383 (3.8081) grad_norm: 4.0521 (4.2986) time: 0.7850 data: 0.0002 max mem: 8421 +[2024-12-05 21:53:52 root] (utils.py 283): INFO Epoch: [16] [ 790/2502] eta: 0:22:30 lr: 0.000011 loss_cls: 3.7677 (3.8059) grad_norm: 3.9949 (4.2957) time: 0.7850 data: 0.0003 max mem: 8421 +[2024-12-05 21:54:00 root] (utils.py 283): INFO Epoch: [16] [ 800/2502] eta: 0:22:21 lr: 0.000011 loss_cls: 3.7677 (3.8060) grad_norm: 4.0508 (4.2950) time: 0.7780 data: 0.0003 max mem: 8421 +[2024-12-05 21:54:08 root] (utils.py 283): INFO Epoch: [16] [ 810/2502] eta: 0:22:14 lr: 0.000011 loss_cls: 3.5256 (3.7986) grad_norm: 4.0642 (4.2920) time: 0.7826 data: 0.0003 max mem: 8421 +[2024-12-05 21:54:16 root] (utils.py 283): INFO Epoch: [16] [ 820/2502] eta: 0:22:06 lr: 0.000011 loss_cls: 3.5256 (3.7998) grad_norm: 4.0437 (4.2930) time: 0.7868 data: 0.0003 max mem: 8421 +[2024-12-05 21:54:24 root] (utils.py 283): INFO Epoch: [16] [ 830/2502] eta: 0:21:58 lr: 0.000011 loss_cls: 3.9107 (3.7957) grad_norm: 4.1511 (4.2923) time: 0.7834 data: 0.0003 max mem: 8421 +[2024-12-05 21:54:32 root] (utils.py 283): INFO Epoch: [16] [ 840/2502] eta: 0:21:50 lr: 0.000011 loss_cls: 3.5929 (3.7955) grad_norm: 4.1778 (4.2931) time: 0.7829 data: 0.0003 max mem: 8421 +[2024-12-05 21:54:39 root] (utils.py 283): INFO Epoch: [16] [ 850/2502] eta: 0:21:42 lr: 0.000011 loss_cls: 3.9594 (3.7933) grad_norm: 4.1522 (4.2930) time: 0.7827 data: 0.0003 max mem: 8421 +[2024-12-05 21:54:47 root] (utils.py 283): INFO Epoch: [16] [ 860/2502] eta: 0:21:34 lr: 0.000011 loss_cls: 3.7882 (3.7937) grad_norm: 4.0744 (4.2911) time: 0.7822 data: 0.0003 max mem: 8421 +[2024-12-05 21:54:55 root] (utils.py 283): INFO Epoch: [16] [ 870/2502] eta: 0:21:26 lr: 0.000011 loss_cls: 3.8038 (3.7937) grad_norm: 4.2164 (4.2919) time: 0.7867 data: 0.0003 max mem: 8421 +[2024-12-05 21:55:03 root] (utils.py 283): INFO Epoch: [16] [ 880/2502] eta: 0:21:18 lr: 0.000011 loss_cls: 3.8038 (3.7917) grad_norm: 4.2161 (4.2904) time: 0.7836 data: 0.0002 max mem: 8421 +[2024-12-05 21:55:11 root] (utils.py 283): INFO Epoch: [16] [ 890/2502] eta: 0:21:10 lr: 0.000011 loss_cls: 3.7437 (3.7896) grad_norm: 4.2146 (4.2916) time: 0.7776 data: 0.0003 max mem: 8421 +[2024-12-05 21:55:19 root] (utils.py 283): INFO Epoch: [16] [ 900/2502] eta: 0:21:02 lr: 0.000011 loss_cls: 3.8738 (3.7899) grad_norm: 4.2064 (4.2911) time: 0.7779 data: 0.0003 max mem: 8421 +[2024-12-05 21:55:26 root] (utils.py 283): INFO Epoch: [16] [ 910/2502] eta: 0:20:53 lr: 0.000011 loss_cls: 3.9751 (3.7902) grad_norm: 4.0687 (4.2888) time: 0.7749 data: 0.0003 max mem: 8421 +[2024-12-05 21:55:34 root] (utils.py 283): INFO Epoch: [16] [ 920/2502] eta: 0:20:45 lr: 0.000011 loss_cls: 4.1408 (3.7921) grad_norm: 4.0687 (4.2951) time: 0.7746 data: 0.0003 max mem: 8421 +[2024-12-05 21:55:42 root] (utils.py 283): INFO Epoch: [16] [ 930/2502] eta: 0:20:37 lr: 0.000011 loss_cls: 4.1332 (3.7950) grad_norm: 4.2112 (4.2946) time: 0.7783 data: 0.0003 max mem: 8421 +[2024-12-05 21:55:50 root] (utils.py 283): INFO Epoch: [16] [ 940/2502] eta: 0:20:29 lr: 0.000011 loss_cls: 4.0351 (3.7940) grad_norm: 4.2545 (4.2952) time: 0.7790 data: 0.0003 max mem: 8421 +[2024-12-05 21:55:57 root] (utils.py 283): INFO Epoch: [16] [ 950/2502] eta: 0:20:21 lr: 0.000011 loss_cls: 3.2725 (3.7887) grad_norm: 4.2545 (4.2960) time: 0.7800 data: 0.0002 max mem: 8421 +[2024-12-05 21:56:05 root] (utils.py 283): INFO Epoch: [16] [ 960/2502] eta: 0:20:13 lr: 0.000011 loss_cls: 3.7373 (3.7898) grad_norm: 4.2836 (4.2969) time: 0.7794 data: 0.0003 max mem: 8421 +[2024-12-05 21:56:13 root] (utils.py 283): INFO Epoch: [16] [ 970/2502] eta: 0:20:05 lr: 0.000011 loss_cls: 3.9420 (3.7905) grad_norm: 4.1605 (4.2948) time: 0.7767 data: 0.0003 max mem: 8421 +[2024-12-05 21:56:21 root] (utils.py 283): INFO Epoch: [16] [ 980/2502] eta: 0:19:57 lr: 0.000011 loss_cls: 3.9364 (3.7897) grad_norm: 4.0074 (4.2934) time: 0.7769 data: 0.0002 max mem: 8421 +[2024-12-05 21:56:29 root] (utils.py 283): INFO Epoch: [16] [ 990/2502] eta: 0:19:49 lr: 0.000011 loss_cls: 4.0108 (3.7896) grad_norm: 4.1409 (4.2945) time: 0.7790 data: 0.0002 max mem: 8421 +[2024-12-05 21:56:36 root] (utils.py 283): INFO Epoch: [16] [1000/2502] eta: 0:19:41 lr: 0.000011 loss_cls: 3.9466 (3.7890) grad_norm: 4.1659 (4.2932) time: 0.7823 data: 0.0002 max mem: 8421 +[2024-12-05 21:56:44 root] (utils.py 283): INFO Epoch: [16] [1010/2502] eta: 0:19:33 lr: 0.000011 loss_cls: 3.9082 (3.7903) grad_norm: 4.1277 (4.2916) time: 0.7796 data: 0.0002 max mem: 8421 +[2024-12-05 21:56:52 root] (utils.py 283): INFO Epoch: [16] [1020/2502] eta: 0:19:25 lr: 0.000011 loss_cls: 3.9621 (3.7897) grad_norm: 4.1700 (4.2915) time: 0.7764 data: 0.0003 max mem: 8421 +[2024-12-05 21:57:00 root] (utils.py 283): INFO Epoch: [16] [1030/2502] eta: 0:19:17 lr: 0.000011 loss_cls: 3.6538 (3.7865) grad_norm: 4.1805 (4.2922) time: 0.7777 data: 0.0003 max mem: 8421 +[2024-12-05 21:57:08 root] (utils.py 283): INFO Epoch: [16] [1040/2502] eta: 0:19:09 lr: 0.000011 loss_cls: 3.9116 (3.7885) grad_norm: 4.2910 (4.2925) time: 0.7803 data: 0.0002 max mem: 8421 +[2024-12-05 21:57:15 root] (utils.py 283): INFO Epoch: [16] [1050/2502] eta: 0:19:02 lr: 0.000011 loss_cls: 3.8766 (3.7879) grad_norm: 4.3257 (4.2986) time: 0.7840 data: 0.0002 max mem: 8421 +[2024-12-05 21:57:23 root] (utils.py 283): INFO Epoch: [16] [1060/2502] eta: 0:18:54 lr: 0.000011 loss_cls: 3.8766 (3.7894) grad_norm: 4.3155 (4.3014) time: 0.7806 data: 0.0002 max mem: 8421 +[2024-12-05 21:57:31 root] (utils.py 283): INFO Epoch: [16] [1070/2502] eta: 0:18:46 lr: 0.000011 loss_cls: 3.8822 (3.7893) grad_norm: 4.3809 (4.3019) time: 0.7772 data: 0.0002 max mem: 8421 +[2024-12-05 21:57:39 root] (utils.py 283): INFO Epoch: [16] [1080/2502] eta: 0:18:38 lr: 0.000011 loss_cls: 3.8737 (3.7889) grad_norm: 4.4198 (4.3071) time: 0.7776 data: 0.0002 max mem: 8421 +[2024-12-05 21:57:47 root] (utils.py 283): INFO Epoch: [16] [1090/2502] eta: 0:18:30 lr: 0.000011 loss_cls: 3.7616 (3.7876) grad_norm: 4.1802 (4.3070) time: 0.7798 data: 0.0002 max mem: 8421 +[2024-12-05 21:57:54 root] (utils.py 283): INFO Epoch: [16] [1100/2502] eta: 0:18:22 lr: 0.000011 loss_cls: 3.7616 (3.7873) grad_norm: 4.1802 (4.3073) time: 0.7835 data: 0.0002 max mem: 8421 +[2024-12-05 21:58:02 root] (utils.py 283): INFO Epoch: [16] [1110/2502] eta: 0:18:14 lr: 0.000011 loss_cls: 3.8031 (3.7874) grad_norm: 4.2336 (4.3072) time: 0.7819 data: 0.0002 max mem: 8421 +[2024-12-05 21:58:10 root] (utils.py 283): INFO Epoch: [16] [1120/2502] eta: 0:18:06 lr: 0.000011 loss_cls: 3.9657 (3.7891) grad_norm: 4.2101 (4.3077) time: 0.7780 data: 0.0002 max mem: 8421 +[2024-12-05 21:58:18 root] (utils.py 283): INFO Epoch: [16] [1130/2502] eta: 0:17:58 lr: 0.000011 loss_cls: 4.0078 (3.7909) grad_norm: 4.1919 (4.3061) time: 0.7791 data: 0.0002 max mem: 8421 +[2024-12-05 21:58:26 root] (utils.py 283): INFO Epoch: [16] [1140/2502] eta: 0:17:50 lr: 0.000011 loss_cls: 3.9075 (3.7896) grad_norm: 4.1782 (4.3056) time: 0.7812 data: 0.0002 max mem: 8421 +[2024-12-05 21:58:33 root] (utils.py 283): INFO Epoch: [16] [1150/2502] eta: 0:17:42 lr: 0.000011 loss_cls: 3.6905 (3.7886) grad_norm: 4.0955 (4.3035) time: 0.7796 data: 0.0002 max mem: 8421 +[2024-12-05 21:58:41 root] (utils.py 283): INFO Epoch: [16] [1160/2502] eta: 0:17:34 lr: 0.000011 loss_cls: 3.8527 (3.7889) grad_norm: 4.0103 (4.3021) time: 0.7802 data: 0.0002 max mem: 8421 +[2024-12-05 21:58:49 root] (utils.py 283): INFO Epoch: [16] [1170/2502] eta: 0:17:26 lr: 0.000011 loss_cls: 3.7889 (3.7879) grad_norm: 4.1578 (4.3017) time: 0.7829 data: 0.0003 max mem: 8421 +[2024-12-05 21:58:57 root] (utils.py 283): INFO Epoch: [16] [1180/2502] eta: 0:17:18 lr: 0.000011 loss_cls: 4.0369 (3.7907) grad_norm: 4.1812 (4.3015) time: 0.7835 data: 0.0003 max mem: 8421 +[2024-12-05 21:59:05 root] (utils.py 283): INFO Epoch: [16] [1190/2502] eta: 0:17:10 lr: 0.000011 loss_cls: 4.1198 (3.7928) grad_norm: 4.1732 (4.3000) time: 0.7796 data: 0.0002 max mem: 8421 +[2024-12-05 21:59:12 root] (utils.py 283): INFO Epoch: [16] [1200/2502] eta: 0:17:02 lr: 0.000011 loss_cls: 4.0538 (3.7939) grad_norm: 4.0524 (4.2986) time: 0.7765 data: 0.0002 max mem: 8421 +[2024-12-05 21:59:20 root] (utils.py 283): INFO Epoch: [16] [1210/2502] eta: 0:16:55 lr: 0.000011 loss_cls: 3.8048 (3.7931) grad_norm: 4.1773 (4.2983) time: 0.7803 data: 0.0002 max mem: 8421 +[2024-12-05 21:59:28 root] (utils.py 283): INFO Epoch: [16] [1220/2502] eta: 0:16:47 lr: 0.000011 loss_cls: 3.7637 (3.7929) grad_norm: 4.2432 (4.3002) time: 0.7814 data: 0.0002 max mem: 8421 +[2024-12-05 21:59:36 root] (utils.py 283): INFO Epoch: [16] [1230/2502] eta: 0:16:39 lr: 0.000011 loss_cls: 3.8372 (3.7930) grad_norm: 4.2255 (4.2995) time: 0.7898 data: 0.0003 max mem: 8421 +[2024-12-05 21:59:44 root] (utils.py 283): INFO Epoch: [16] [1240/2502] eta: 0:16:31 lr: 0.000011 loss_cls: 3.9961 (3.7933) grad_norm: 4.1553 (4.2980) time: 0.7911 data: 0.0003 max mem: 8421 +[2024-12-05 21:59:52 root] (utils.py 283): INFO Epoch: [16] [1250/2502] eta: 0:16:23 lr: 0.000011 loss_cls: 3.7406 (3.7918) grad_norm: 4.0614 (4.2959) time: 0.7796 data: 0.0003 max mem: 8421 +[2024-12-05 21:59:59 root] (utils.py 283): INFO Epoch: [16] [1260/2502] eta: 0:16:15 lr: 0.000011 loss_cls: 3.9465 (3.7928) grad_norm: 4.1362 (4.2961) time: 0.7757 data: 0.0003 max mem: 8421 +[2024-12-05 22:00:07 root] (utils.py 283): INFO Epoch: [16] [1270/2502] eta: 0:16:07 lr: 0.000011 loss_cls: 3.9655 (3.7939) grad_norm: 4.2004 (4.2994) time: 0.7769 data: 0.0003 max mem: 8421 +[2024-12-05 22:00:15 root] (utils.py 283): INFO Epoch: [16] [1280/2502] eta: 0:15:59 lr: 0.000011 loss_cls: 4.0003 (3.7958) grad_norm: 4.1822 (4.2989) time: 0.7787 data: 0.0002 max mem: 8421 +[2024-12-05 22:00:23 root] (utils.py 283): INFO Epoch: [16] [1290/2502] eta: 0:15:51 lr: 0.000011 loss_cls: 4.0726 (3.7974) grad_norm: 4.3269 (4.3041) time: 0.7800 data: 0.0003 max mem: 8421 +[2024-12-05 22:00:31 root] (utils.py 283): INFO Epoch: [16] [1300/2502] eta: 0:15:44 lr: 0.000011 loss_cls: 4.0618 (3.7979) grad_norm: 4.2635 (4.3043) time: 0.7813 data: 0.0002 max mem: 8421 +[2024-12-05 22:00:38 root] (utils.py 283): INFO Epoch: [16] [1310/2502] eta: 0:15:36 lr: 0.000011 loss_cls: 3.7110 (3.7963) grad_norm: 4.3487 (4.3097) time: 0.7822 data: 0.0002 max mem: 8421 +[2024-12-05 22:00:46 root] (utils.py 283): INFO Epoch: [16] [1320/2502] eta: 0:15:28 lr: 0.000011 loss_cls: 3.5965 (3.7943) grad_norm: 4.3487 (4.3104) time: 0.7825 data: 0.0002 max mem: 8421 +[2024-12-05 22:00:54 root] (utils.py 283): INFO Epoch: [16] [1330/2502] eta: 0:15:20 lr: 0.000011 loss_cls: 3.8637 (3.7953) grad_norm: 4.3104 (4.3116) time: 0.7817 data: 0.0003 max mem: 8421 +[2024-12-05 22:01:02 root] (utils.py 283): INFO Epoch: [16] [1340/2502] eta: 0:15:12 lr: 0.000011 loss_cls: 4.1415 (3.7961) grad_norm: 4.2018 (4.3111) time: 0.7792 data: 0.0003 max mem: 8421 +[2024-12-05 22:01:10 root] (utils.py 283): INFO Epoch: [16] [1350/2502] eta: 0:15:04 lr: 0.000011 loss_cls: 3.8272 (3.7961) grad_norm: 4.1553 (4.3098) time: 0.7810 data: 0.0003 max mem: 8421 +[2024-12-05 22:01:17 root] (utils.py 283): INFO Epoch: [16] [1360/2502] eta: 0:14:56 lr: 0.000011 loss_cls: 3.8858 (3.7978) grad_norm: 4.3179 (4.3116) time: 0.7824 data: 0.0003 max mem: 8421 +[2024-12-05 22:01:25 root] (utils.py 283): INFO Epoch: [16] [1370/2502] eta: 0:14:48 lr: 0.000011 loss_cls: 4.0274 (3.7988) grad_norm: 4.3179 (4.3141) time: 0.7817 data: 0.0003 max mem: 8421 +[2024-12-05 22:01:33 root] (utils.py 283): INFO Epoch: [16] [1380/2502] eta: 0:14:40 lr: 0.000011 loss_cls: 4.0224 (3.7999) grad_norm: 4.1987 (4.3131) time: 0.7834 data: 0.0003 max mem: 8421 +[2024-12-05 22:01:41 root] (utils.py 283): INFO Epoch: [16] [1390/2502] eta: 0:14:33 lr: 0.000011 loss_cls: 4.0180 (3.8007) grad_norm: 4.2508 (4.3139) time: 0.7823 data: 0.0003 max mem: 8421 +[2024-12-05 22:01:49 root] (utils.py 283): INFO Epoch: [16] [1400/2502] eta: 0:14:25 lr: 0.000011 loss_cls: 4.0407 (3.8007) grad_norm: 4.3942 (4.3143) time: 0.7830 data: 0.0003 max mem: 8421 +[2024-12-05 22:01:57 root] (utils.py 283): INFO Epoch: [16] [1410/2502] eta: 0:14:17 lr: 0.000011 loss_cls: 4.0491 (3.8033) grad_norm: 4.2161 (4.3152) time: 0.7820 data: 0.0003 max mem: 8421 +[2024-12-05 22:02:04 root] (utils.py 283): INFO Epoch: [16] [1420/2502] eta: 0:14:09 lr: 0.000011 loss_cls: 3.9095 (3.8024) grad_norm: 4.2101 (4.3146) time: 0.7824 data: 0.0003 max mem: 8421 +[2024-12-05 22:02:12 root] (utils.py 283): INFO Epoch: [16] [1430/2502] eta: 0:14:01 lr: 0.000011 loss_cls: 3.8175 (3.8027) grad_norm: 3.9790 (4.3131) time: 0.7821 data: 0.0003 max mem: 8421 +[2024-12-05 22:02:20 root] (utils.py 283): INFO Epoch: [16] [1440/2502] eta: 0:13:53 lr: 0.000011 loss_cls: 3.9225 (3.8020) grad_norm: 3.9871 (4.3122) time: 0.7809 data: 0.0003 max mem: 8421 +[2024-12-05 22:02:28 root] (utils.py 283): INFO Epoch: [16] [1450/2502] eta: 0:13:45 lr: 0.000011 loss_cls: 3.9903 (3.8026) grad_norm: 4.2576 (4.3136) time: 0.7815 data: 0.0003 max mem: 8421 +[2024-12-05 22:02:36 root] (utils.py 283): INFO Epoch: [16] [1460/2502] eta: 0:13:37 lr: 0.000011 loss_cls: 4.1206 (3.8039) grad_norm: 4.2947 (4.3130) time: 0.7827 data: 0.0003 max mem: 8421 +[2024-12-05 22:02:43 root] (utils.py 283): INFO Epoch: [16] [1470/2502] eta: 0:13:30 lr: 0.000011 loss_cls: 4.0789 (3.8032) grad_norm: 4.1642 (4.3121) time: 0.7831 data: 0.0003 max mem: 8421 +[2024-12-05 22:02:51 root] (utils.py 283): INFO Epoch: [16] [1480/2502] eta: 0:13:22 lr: 0.000011 loss_cls: 3.5680 (3.8005) grad_norm: 4.0096 (4.3103) time: 0.7811 data: 0.0003 max mem: 8421 +[2024-12-05 22:02:59 root] (utils.py 283): INFO Epoch: [16] [1490/2502] eta: 0:13:14 lr: 0.000011 loss_cls: 3.4256 (3.7969) grad_norm: 4.1238 (4.3103) time: 0.7795 data: 0.0003 max mem: 8421 +[2024-12-05 22:03:07 root] (utils.py 283): INFO Epoch: [16] [1500/2502] eta: 0:13:06 lr: 0.000011 loss_cls: 3.4642 (3.7967) grad_norm: 4.1262 (4.3094) time: 0.7792 data: 0.0003 max mem: 8421 +[2024-12-05 22:03:15 root] (utils.py 283): INFO Epoch: [16] [1510/2502] eta: 0:12:58 lr: 0.000011 loss_cls: 3.9149 (3.7971) grad_norm: 4.1074 (4.3145) time: 0.7808 data: 0.0002 max mem: 8421 +[2024-12-05 22:03:23 root] (utils.py 283): INFO Epoch: [16] [1520/2502] eta: 0:12:50 lr: 0.000011 loss_cls: 3.9199 (3.7981) grad_norm: 4.2096 (4.3137) time: 0.7817 data: 0.0003 max mem: 8421 +[2024-12-05 22:03:30 root] (utils.py 283): INFO Epoch: [16] [1530/2502] eta: 0:12:42 lr: 0.000011 loss_cls: 3.9199 (3.7977) grad_norm: 4.2252 (4.3140) time: 0.7834 data: 0.0003 max mem: 8421 +[2024-12-05 22:03:38 root] (utils.py 283): INFO Epoch: [16] [1540/2502] eta: 0:12:34 lr: 0.000011 loss_cls: 3.8753 (3.7976) grad_norm: 4.1962 (4.3135) time: 0.7824 data: 0.0002 max mem: 8421 +[2024-12-05 22:03:46 root] (utils.py 283): INFO Epoch: [16] [1550/2502] eta: 0:12:27 lr: 0.000011 loss_cls: 3.7815 (3.7964) grad_norm: 4.1880 (4.3146) time: 0.7791 data: 0.0002 max mem: 8421 +[2024-12-05 22:03:54 root] (utils.py 283): INFO Epoch: [16] [1560/2502] eta: 0:12:19 lr: 0.000011 loss_cls: 3.6562 (3.7955) grad_norm: 4.3074 (4.3161) time: 0.7785 data: 0.0003 max mem: 8421 +[2024-12-05 22:04:02 root] (utils.py 283): INFO Epoch: [16] [1570/2502] eta: 0:12:11 lr: 0.000011 loss_cls: 3.6562 (3.7931) grad_norm: 4.2739 (4.3159) time: 0.7841 data: 0.0003 max mem: 8421 +[2024-12-05 22:04:09 root] (utils.py 283): INFO Epoch: [16] [1580/2502] eta: 0:12:03 lr: 0.000011 loss_cls: 3.6936 (3.7928) grad_norm: 4.1013 (4.3149) time: 0.7837 data: 0.0003 max mem: 8421 +[2024-12-05 22:04:17 root] (utils.py 283): INFO Epoch: [16] [1590/2502] eta: 0:11:55 lr: 0.000011 loss_cls: 3.9478 (3.7942) grad_norm: 4.2534 (4.3215) time: 0.7781 data: 0.0003 max mem: 8421 +[2024-12-05 22:04:25 root] (utils.py 283): INFO Epoch: [16] [1600/2502] eta: 0:11:47 lr: 0.000011 loss_cls: 3.9756 (3.7958) grad_norm: 4.4617 (4.3216) time: 0.7769 data: 0.0003 max mem: 8421 +[2024-12-05 22:04:33 root] (utils.py 283): INFO Epoch: [16] [1610/2502] eta: 0:11:39 lr: 0.000011 loss_cls: 3.9907 (3.7962) grad_norm: 4.1519 (4.3214) time: 0.7764 data: 0.0002 max mem: 8421 +[2024-12-05 22:04:41 root] (utils.py 283): INFO Epoch: [16] [1620/2502] eta: 0:11:32 lr: 0.000011 loss_cls: 3.8135 (3.7965) grad_norm: 4.1519 (4.3208) time: 0.7873 data: 0.0003 max mem: 8421 +[2024-12-05 22:04:49 root] (utils.py 283): INFO Epoch: [16] [1630/2502] eta: 0:11:24 lr: 0.000011 loss_cls: 3.9796 (3.7971) grad_norm: 4.2522 (4.3227) time: 0.7878 data: 0.0003 max mem: 8421 +[2024-12-05 22:04:56 root] (utils.py 283): INFO Epoch: [16] [1640/2502] eta: 0:11:16 lr: 0.000011 loss_cls: 3.9973 (3.7969) grad_norm: 4.2891 (4.3220) time: 0.7782 data: 0.0002 max mem: 8421 +[2024-12-05 22:05:04 root] (utils.py 283): INFO Epoch: [16] [1650/2502] eta: 0:11:08 lr: 0.000011 loss_cls: 3.6801 (3.7939) grad_norm: 4.2313 (4.3213) time: 0.7861 data: 0.0002 max mem: 8421 +[2024-12-05 22:05:12 root] (utils.py 283): INFO Epoch: [16] [1660/2502] eta: 0:11:00 lr: 0.000011 loss_cls: 3.3008 (3.7914) grad_norm: 4.2191 (4.3241) time: 0.7942 data: 0.0003 max mem: 8421 +[2024-12-05 22:05:20 root] (utils.py 283): INFO Epoch: [16] [1670/2502] eta: 0:10:52 lr: 0.000011 loss_cls: 3.4715 (3.7914) grad_norm: 4.2258 (4.3240) time: 0.7862 data: 0.0003 max mem: 8421 +[2024-12-05 22:05:28 root] (utils.py 283): INFO Epoch: [16] [1680/2502] eta: 0:10:44 lr: 0.000011 loss_cls: 3.9563 (3.7907) grad_norm: 4.2025 (4.3231) time: 0.7803 data: 0.0003 max mem: 8421 +[2024-12-05 22:05:36 root] (utils.py 283): INFO Epoch: [16] [1690/2502] eta: 0:10:37 lr: 0.000011 loss_cls: 3.9033 (3.7907) grad_norm: 4.1617 (4.3217) time: 0.7828 data: 0.0003 max mem: 8421 +[2024-12-05 22:05:43 root] (utils.py 283): INFO Epoch: [16] [1700/2502] eta: 0:10:29 lr: 0.000011 loss_cls: 4.0725 (3.7932) grad_norm: 4.1631 (4.3221) time: 0.7795 data: 0.0003 max mem: 8421 +[2024-12-05 22:05:51 root] (utils.py 283): INFO Epoch: [16] [1710/2502] eta: 0:10:21 lr: 0.000011 loss_cls: 4.0827 (3.7930) grad_norm: 4.3166 (4.3219) time: 0.7757 data: 0.0002 max mem: 8421 +[2024-12-05 22:05:59 root] (utils.py 283): INFO Epoch: [16] [1720/2502] eta: 0:10:13 lr: 0.000011 loss_cls: 3.6161 (3.7919) grad_norm: 4.1962 (4.3213) time: 0.7791 data: 0.0002 max mem: 8421 +[2024-12-05 22:06:07 root] (utils.py 283): INFO Epoch: [16] [1730/2502] eta: 0:10:05 lr: 0.000011 loss_cls: 4.0325 (3.7933) grad_norm: 4.1743 (4.3216) time: 0.7863 data: 0.0002 max mem: 8421 +[2024-12-05 22:06:15 root] (utils.py 283): INFO Epoch: [16] [1740/2502] eta: 0:09:57 lr: 0.000011 loss_cls: 3.9239 (3.7926) grad_norm: 4.0969 (4.3215) time: 0.7854 data: 0.0002 max mem: 8421 +[2024-12-05 22:06:23 root] (utils.py 283): INFO Epoch: [16] [1750/2502] eta: 0:09:49 lr: 0.000011 loss_cls: 3.5595 (3.7917) grad_norm: 4.1912 (4.3221) time: 0.7826 data: 0.0003 max mem: 8421 +[2024-12-05 22:06:30 root] (utils.py 283): INFO Epoch: [16] [1760/2502] eta: 0:09:42 lr: 0.000011 loss_cls: 3.4093 (3.7912) grad_norm: 4.2553 (4.3217) time: 0.7815 data: 0.0002 max mem: 8421 +[2024-12-05 22:06:38 root] (utils.py 283): INFO Epoch: [16] [1770/2502] eta: 0:09:34 lr: 0.000011 loss_cls: 3.9643 (3.7919) grad_norm: 4.1557 (4.3232) time: 0.7781 data: 0.0003 max mem: 8421 +[2024-12-05 22:06:46 root] (utils.py 283): INFO Epoch: [16] [1780/2502] eta: 0:09:26 lr: 0.000011 loss_cls: 3.9643 (3.7914) grad_norm: 4.1557 (4.3225) time: 0.7806 data: 0.0003 max mem: 8421 +[2024-12-05 22:06:54 root] (utils.py 283): INFO Epoch: [16] [1790/2502] eta: 0:09:18 lr: 0.000011 loss_cls: 3.8043 (3.7905) grad_norm: 4.2656 (4.3221) time: 0.7815 data: 0.0003 max mem: 8421 +[2024-12-05 22:07:02 root] (utils.py 283): INFO Epoch: [16] [1800/2502] eta: 0:09:10 lr: 0.000011 loss_cls: 3.8169 (3.7917) grad_norm: 4.2986 (4.3219) time: 0.7795 data: 0.0003 max mem: 8421 +[2024-12-05 22:07:09 root] (utils.py 283): INFO Epoch: [16] [1810/2502] eta: 0:09:02 lr: 0.000011 loss_cls: 3.8169 (3.7910) grad_norm: 4.1676 (4.3218) time: 0.7825 data: 0.0002 max mem: 8421 +[2024-12-05 22:07:17 root] (utils.py 283): INFO Epoch: [16] [1820/2502] eta: 0:08:55 lr: 0.000011 loss_cls: 3.7989 (3.7913) grad_norm: 4.1162 (4.3219) time: 0.7980 data: 0.0003 max mem: 8421 +[2024-12-05 22:07:26 root] (utils.py 283): INFO Epoch: [16] [1830/2502] eta: 0:08:47 lr: 0.000011 loss_cls: 3.9705 (3.7934) grad_norm: 4.1162 (4.3212) time: 0.8096 data: 0.0003 max mem: 8421 +[2024-12-05 22:07:34 root] (utils.py 283): INFO Epoch: [16] [1840/2502] eta: 0:08:39 lr: 0.000011 loss_cls: 3.9388 (3.7926) grad_norm: 4.2249 (4.3209) time: 0.8068 data: 0.0003 max mem: 8421 +[2024-12-05 22:07:42 root] (utils.py 283): INFO Epoch: [16] [1850/2502] eta: 0:08:31 lr: 0.000011 loss_cls: 3.7280 (3.7916) grad_norm: 4.2249 (4.3210) time: 0.8083 data: 0.0003 max mem: 8421 +[2024-12-05 22:07:50 root] (utils.py 283): INFO Epoch: [16] [1860/2502] eta: 0:08:24 lr: 0.000011 loss_cls: 3.7096 (3.7910) grad_norm: 4.1667 (4.3211) time: 0.8091 data: 0.0003 max mem: 8421 +[2024-12-05 22:07:58 root] (utils.py 283): INFO Epoch: [16] [1870/2502] eta: 0:08:16 lr: 0.000011 loss_cls: 3.7320 (3.7898) grad_norm: 4.2035 (4.3235) time: 0.8080 data: 0.0003 max mem: 8421 +[2024-12-05 22:08:06 root] (utils.py 283): INFO Epoch: [16] [1880/2502] eta: 0:08:08 lr: 0.000011 loss_cls: 4.0846 (3.7914) grad_norm: 4.3118 (4.3257) time: 0.8099 data: 0.0003 max mem: 8421 +[2024-12-05 22:08:14 root] (utils.py 283): INFO Epoch: [16] [1890/2502] eta: 0:08:00 lr: 0.000011 loss_cls: 4.0846 (3.7906) grad_norm: 4.3681 (4.3270) time: 0.8085 data: 0.0002 max mem: 8421 +[2024-12-05 22:08:22 root] (utils.py 283): INFO Epoch: [16] [1900/2502] eta: 0:07:52 lr: 0.000011 loss_cls: 3.6930 (3.7897) grad_norm: 4.2680 (4.3268) time: 0.8077 data: 0.0003 max mem: 8421 +[2024-12-05 22:08:30 root] (utils.py 283): INFO Epoch: [16] [1910/2502] eta: 0:07:45 lr: 0.000011 loss_cls: 3.7684 (3.7902) grad_norm: 4.2374 (4.3267) time: 0.8087 data: 0.0003 max mem: 8421 +[2024-12-05 22:08:38 root] (utils.py 283): INFO Epoch: [16] [1920/2502] eta: 0:07:37 lr: 0.000011 loss_cls: 3.5823 (3.7877) grad_norm: 4.2374 (4.3264) time: 0.8080 data: 0.0003 max mem: 8421 +[2024-12-05 22:08:46 root] (utils.py 283): INFO Epoch: [16] [1930/2502] eta: 0:07:29 lr: 0.000011 loss_cls: 3.5437 (3.7873) grad_norm: 4.1740 (4.3268) time: 0.8033 data: 0.0003 max mem: 8421 +[2024-12-05 22:08:54 root] (utils.py 283): INFO Epoch: [16] [1940/2502] eta: 0:07:21 lr: 0.000011 loss_cls: 3.7405 (3.7866) grad_norm: 4.1938 (4.3269) time: 0.7914 data: 0.0002 max mem: 8421 +[2024-12-05 22:09:02 root] (utils.py 283): INFO Epoch: [16] [1950/2502] eta: 0:07:13 lr: 0.000011 loss_cls: 3.7405 (3.7858) grad_norm: 4.1106 (4.3267) time: 0.7837 data: 0.0002 max mem: 8421 +[2024-12-05 22:09:10 root] (utils.py 283): INFO Epoch: [16] [1960/2502] eta: 0:07:05 lr: 0.000011 loss_cls: 3.7954 (3.7849) grad_norm: 4.0438 (4.3254) time: 0.7878 data: 0.0002 max mem: 8421 +[2024-12-05 22:09:18 root] (utils.py 283): INFO Epoch: [16] [1970/2502] eta: 0:06:58 lr: 0.000011 loss_cls: 3.7903 (3.7835) grad_norm: 4.0303 (4.3243) time: 0.7954 data: 0.0003 max mem: 8421 +[2024-12-05 22:09:26 root] (utils.py 283): INFO Epoch: [16] [1980/2502] eta: 0:06:50 lr: 0.000011 loss_cls: 3.6189 (3.7832) grad_norm: 4.3335 (4.3259) time: 0.7899 data: 0.0003 max mem: 8421 +[2024-12-05 22:09:34 root] (utils.py 283): INFO Epoch: [16] [1990/2502] eta: 0:06:42 lr: 0.000011 loss_cls: 3.9265 (3.7827) grad_norm: 4.5212 (4.3264) time: 0.7991 data: 0.0003 max mem: 8421 +[2024-12-05 22:09:42 root] (utils.py 283): INFO Epoch: [16] [2000/2502] eta: 0:06:34 lr: 0.000011 loss_cls: 3.7927 (3.7821) grad_norm: 4.2805 (4.3273) time: 0.7997 data: 0.0003 max mem: 8421 +[2024-12-05 22:09:50 root] (utils.py 283): INFO Epoch: [16] [2010/2502] eta: 0:06:26 lr: 0.000011 loss_cls: 3.9594 (3.7835) grad_norm: 4.2051 (4.3267) time: 0.7866 data: 0.0003 max mem: 8421 +[2024-12-05 22:09:57 root] (utils.py 283): INFO Epoch: [16] [2020/2502] eta: 0:06:18 lr: 0.000011 loss_cls: 3.9536 (3.7832) grad_norm: 4.2051 (4.3263) time: 0.7871 data: 0.0002 max mem: 8421 +[2024-12-05 22:10:05 root] (utils.py 283): INFO Epoch: [16] [2030/2502] eta: 0:06:10 lr: 0.000011 loss_cls: 3.9536 (3.7840) grad_norm: 4.0933 (4.3253) time: 0.7800 data: 0.0002 max mem: 8421 +[2024-12-05 22:10:13 root] (utils.py 283): INFO Epoch: [16] [2040/2502] eta: 0:06:03 lr: 0.000011 loss_cls: 4.1072 (3.7846) grad_norm: 4.0933 (4.3248) time: 0.7777 data: 0.0003 max mem: 8421 +[2024-12-05 22:10:21 root] (utils.py 283): INFO Epoch: [16] [2050/2502] eta: 0:05:55 lr: 0.000011 loss_cls: 3.8572 (3.7838) grad_norm: 4.0808 (4.3242) time: 0.7806 data: 0.0003 max mem: 8421 +[2024-12-05 22:10:29 root] (utils.py 283): INFO Epoch: [16] [2060/2502] eta: 0:05:47 lr: 0.000011 loss_cls: 3.6187 (3.7835) grad_norm: 4.1167 (4.3243) time: 0.7820 data: 0.0003 max mem: 8421 +[2024-12-05 22:10:36 root] (utils.py 283): INFO Epoch: [16] [2070/2502] eta: 0:05:39 lr: 0.000011 loss_cls: 3.9496 (3.7841) grad_norm: 4.2235 (4.3236) time: 0.7811 data: 0.0002 max mem: 8421 +[2024-12-05 22:10:44 root] (utils.py 283): INFO Epoch: [16] [2080/2502] eta: 0:05:31 lr: 0.000011 loss_cls: 3.9650 (3.7837) grad_norm: 4.1458 (4.3232) time: 0.7831 data: 0.0002 max mem: 8421 +[2024-12-05 22:10:52 root] (utils.py 283): INFO Epoch: [16] [2090/2502] eta: 0:05:23 lr: 0.000011 loss_cls: 3.9036 (3.7847) grad_norm: 4.1866 (4.3236) time: 0.7816 data: 0.0002 max mem: 8421 +[2024-12-05 22:11:00 root] (utils.py 283): INFO Epoch: [16] [2100/2502] eta: 0:05:15 lr: 0.000011 loss_cls: 4.0918 (3.7837) grad_norm: 4.2189 (4.3239) time: 0.7786 data: 0.0003 max mem: 8421 +[2024-12-05 22:11:08 root] (utils.py 283): INFO Epoch: [16] [2110/2502] eta: 0:05:08 lr: 0.000011 loss_cls: 3.9774 (3.7845) grad_norm: 4.2160 (4.3235) time: 0.7949 data: 0.0002 max mem: 8421 +[2024-12-05 22:11:16 root] (utils.py 283): INFO Epoch: [16] [2120/2502] eta: 0:05:00 lr: 0.000011 loss_cls: 4.0161 (3.7840) grad_norm: 4.1420 (4.3230) time: 0.7959 data: 0.0003 max mem: 8421 +[2024-12-05 22:11:24 root] (utils.py 283): INFO Epoch: [16] [2130/2502] eta: 0:04:52 lr: 0.000011 loss_cls: 3.9597 (3.7842) grad_norm: 4.1420 (4.3227) time: 0.7813 data: 0.0003 max mem: 8421 +[2024-12-05 22:11:31 root] (utils.py 283): INFO Epoch: [16] [2140/2502] eta: 0:04:44 lr: 0.000011 loss_cls: 3.9801 (3.7854) grad_norm: 4.1946 (4.3223) time: 0.7834 data: 0.0002 max mem: 8421 +[2024-12-05 22:11:39 root] (utils.py 283): INFO Epoch: [16] [2150/2502] eta: 0:04:36 lr: 0.000011 loss_cls: 4.0101 (3.7853) grad_norm: 4.0839 (4.3212) time: 0.7847 data: 0.0003 max mem: 8421 +[2024-12-05 22:11:47 root] (utils.py 283): INFO Epoch: [16] [2160/2502] eta: 0:04:28 lr: 0.000011 loss_cls: 4.0101 (3.7859) grad_norm: 4.0345 (4.3203) time: 0.7818 data: 0.0003 max mem: 8421 +[2024-12-05 22:11:55 root] (utils.py 283): INFO Epoch: [16] [2170/2502] eta: 0:04:20 lr: 0.000011 loss_cls: 4.0883 (3.7868) grad_norm: 4.1489 (4.3204) time: 0.7816 data: 0.0003 max mem: 8421 +[2024-12-05 22:12:03 root] (utils.py 283): INFO Epoch: [16] [2180/2502] eta: 0:04:13 lr: 0.000011 loss_cls: 3.8815 (3.7865) grad_norm: 4.2181 (4.3203) time: 0.7925 data: 0.0003 max mem: 8421 +[2024-12-05 22:12:11 root] (utils.py 283): INFO Epoch: [16] [2190/2502] eta: 0:04:05 lr: 0.000011 loss_cls: 4.0933 (3.7877) grad_norm: 4.2805 (4.3211) time: 0.7886 data: 0.0003 max mem: 8421 +[2024-12-05 22:12:19 root] (utils.py 283): INFO Epoch: [16] [2200/2502] eta: 0:03:57 lr: 0.000011 loss_cls: 3.8779 (3.7861) grad_norm: 4.1028 (4.3202) time: 0.7782 data: 0.0003 max mem: 8421 +[2024-12-05 22:12:26 root] (utils.py 283): INFO Epoch: [16] [2210/2502] eta: 0:03:49 lr: 0.000011 loss_cls: 3.5704 (3.7859) grad_norm: 4.1028 (4.3228) time: 0.7788 data: 0.0003 max mem: 8421 +[2024-12-05 22:12:34 root] (utils.py 283): INFO Epoch: [16] [2220/2502] eta: 0:03:41 lr: 0.000011 loss_cls: 3.8125 (3.7851) grad_norm: 4.2014 (4.3221) time: 0.7770 data: 0.0002 max mem: 8421 +[2024-12-05 22:12:42 root] (utils.py 283): INFO Epoch: [16] [2230/2502] eta: 0:03:33 lr: 0.000011 loss_cls: 3.7666 (3.7844) grad_norm: 4.2014 (4.3259) time: 0.7918 data: 0.0003 max mem: 8421 +[2024-12-05 22:12:51 root] (utils.py 283): INFO Epoch: [16] [2240/2502] eta: 0:03:26 lr: 0.000011 loss_cls: 3.7498 (3.7837) grad_norm: 4.2953 (4.3258) time: 0.8457 data: 0.0004 max mem: 8421 +[2024-12-05 22:12:59 root] (utils.py 283): INFO Epoch: [16] [2250/2502] eta: 0:03:18 lr: 0.000011 loss_cls: 3.5862 (3.7828) grad_norm: 4.2810 (4.3255) time: 0.8586 data: 0.0004 max mem: 8421 +[2024-12-05 22:13:07 root] (utils.py 283): INFO Epoch: [16] [2260/2502] eta: 0:03:10 lr: 0.000011 loss_cls: 3.4163 (3.7823) grad_norm: 4.1364 (4.3256) time: 0.8054 data: 0.0003 max mem: 8421 +[2024-12-05 22:13:15 root] (utils.py 283): INFO Epoch: [16] [2270/2502] eta: 0:03:02 lr: 0.000011 loss_cls: 3.8448 (3.7829) grad_norm: 4.2569 (4.3268) time: 0.7791 data: 0.0003 max mem: 8421 +[2024-12-05 22:13:23 root] (utils.py 283): INFO Epoch: [16] [2280/2502] eta: 0:02:54 lr: 0.000011 loss_cls: 3.9379 (3.7837) grad_norm: 4.3514 (4.3267) time: 0.7783 data: 0.0003 max mem: 8421 +[2024-12-05 22:13:30 root] (utils.py 283): INFO Epoch: [16] [2290/2502] eta: 0:02:46 lr: 0.000011 loss_cls: 4.0345 (3.7839) grad_norm: 4.1452 (4.3266) time: 0.7782 data: 0.0003 max mem: 8421 +[2024-12-05 22:13:38 root] (utils.py 283): INFO Epoch: [16] [2300/2502] eta: 0:02:38 lr: 0.000011 loss_cls: 3.6792 (3.7833) grad_norm: 4.0846 (4.3264) time: 0.7777 data: 0.0003 max mem: 8421 +[2024-12-05 22:13:46 root] (utils.py 283): INFO Epoch: [16] [2310/2502] eta: 0:02:30 lr: 0.000011 loss_cls: 3.6614 (3.7823) grad_norm: 4.0846 (4.3265) time: 0.7780 data: 0.0003 max mem: 8421 +[2024-12-05 22:13:54 root] (utils.py 283): INFO Epoch: [16] [2320/2502] eta: 0:02:23 lr: 0.000011 loss_cls: 3.7531 (3.7829) grad_norm: 4.1956 (4.3268) time: 0.7785 data: 0.0003 max mem: 8421 +[2024-12-05 22:14:02 root] (utils.py 283): INFO Epoch: [16] [2330/2502] eta: 0:02:15 lr: 0.000011 loss_cls: 4.0079 (3.7842) grad_norm: 4.1956 (4.3293) time: 0.7802 data: 0.0003 max mem: 8421 +[2024-12-05 22:14:09 root] (utils.py 283): INFO Epoch: [16] [2340/2502] eta: 0:02:07 lr: 0.000011 loss_cls: 4.0079 (3.7841) grad_norm: 4.4048 (4.3318) time: 0.7785 data: 0.0003 max mem: 8421 +[2024-12-05 22:14:17 root] (utils.py 283): INFO Epoch: [16] [2350/2502] eta: 0:01:59 lr: 0.000011 loss_cls: 3.8443 (3.7843) grad_norm: 4.1885 (4.3309) time: 0.7750 data: 0.0003 max mem: 8421 +[2024-12-05 22:14:25 root] (utils.py 283): INFO Epoch: [16] [2360/2502] eta: 0:01:51 lr: 0.000011 loss_cls: 3.7020 (3.7837) grad_norm: 4.1400 (4.3306) time: 0.7788 data: 0.0003 max mem: 8421 +[2024-12-05 22:14:33 root] (utils.py 283): INFO Epoch: [16] [2370/2502] eta: 0:01:43 lr: 0.000011 loss_cls: 3.6884 (3.7834) grad_norm: 4.1859 (4.3309) time: 0.7803 data: 0.0003 max mem: 8421 +[2024-12-05 22:14:41 root] (utils.py 283): INFO Epoch: [16] [2380/2502] eta: 0:01:35 lr: 0.000011 loss_cls: 3.9057 (3.7842) grad_norm: 4.1123 (4.3299) time: 0.7790 data: 0.0003 max mem: 8421 +[2024-12-05 22:14:49 root] (utils.py 283): INFO Epoch: [16] [2390/2502] eta: 0:01:28 lr: 0.000011 loss_cls: 3.9102 (3.7833) grad_norm: 4.2081 (4.3367) time: 0.7893 data: 0.0003 max mem: 8421 +[2024-12-05 22:14:56 root] (utils.py 283): INFO Epoch: [16] [2400/2502] eta: 0:01:20 lr: 0.000011 loss_cls: 3.6751 (3.7832) grad_norm: 4.2647 (4.3373) time: 0.7935 data: 0.0003 max mem: 8421 +[2024-12-05 22:15:04 root] (utils.py 283): INFO Epoch: [16] [2410/2502] eta: 0:01:12 lr: 0.000011 loss_cls: 4.0736 (3.7840) grad_norm: 4.2647 (4.3369) time: 0.7832 data: 0.0003 max mem: 8421 +[2024-12-05 22:15:12 root] (utils.py 283): INFO Epoch: [16] [2420/2502] eta: 0:01:04 lr: 0.000011 loss_cls: 3.8984 (3.7833) grad_norm: 4.1928 (4.3369) time: 0.7809 data: 0.0003 max mem: 8421 +[2024-12-05 22:15:20 root] (utils.py 283): INFO Epoch: [16] [2430/2502] eta: 0:00:56 lr: 0.000011 loss_cls: 3.4769 (3.7833) grad_norm: 4.2991 (4.3373) time: 0.7812 data: 0.0003 max mem: 8421 +[2024-12-05 22:15:28 root] (utils.py 283): INFO Epoch: [16] [2440/2502] eta: 0:00:48 lr: 0.000011 loss_cls: 3.9184 (3.7836) grad_norm: 4.4573 (4.3374) time: 0.7823 data: 0.0003 max mem: 8421 +[2024-12-05 22:15:35 root] (utils.py 283): INFO Epoch: [16] [2450/2502] eta: 0:00:40 lr: 0.000011 loss_cls: 3.9184 (3.7844) grad_norm: 4.3776 (4.3373) time: 0.7814 data: 0.0003 max mem: 8421 +[2024-12-05 22:15:43 root] (utils.py 283): INFO Epoch: [16] [2460/2502] eta: 0:00:33 lr: 0.000011 loss_cls: 4.0212 (3.7847) grad_norm: 4.0880 (4.3363) time: 0.7797 data: 0.0003 max mem: 8421 +[2024-12-05 22:15:51 root] (utils.py 283): INFO Epoch: [16] [2470/2502] eta: 0:00:25 lr: 0.000011 loss_cls: 4.0212 (3.7840) grad_norm: 4.1854 (4.3363) time: 0.7833 data: 0.0003 max mem: 8421 +[2024-12-05 22:15:59 root] (utils.py 283): INFO Epoch: [16] [2480/2502] eta: 0:00:17 lr: 0.000011 loss_cls: 3.4245 (3.7833) grad_norm: 4.2117 (4.3357) time: 0.7860 data: 0.0002 max mem: 8421 +[2024-12-05 22:16:07 root] (utils.py 283): INFO Epoch: [16] [2490/2502] eta: 0:00:09 lr: 0.000011 loss_cls: 3.7628 (3.7841) grad_norm: 4.1655 (4.3360) time: 0.8073 data: 0.0256 max mem: 8421 +[2024-12-05 22:16:15 root] (utils.py 283): INFO Epoch: [16] [2500/2502] eta: 0:00:01 lr: 0.000011 loss_cls: 3.8138 (3.7837) grad_norm: 4.2627 (4.3356) time: 0.8171 data: 0.0256 max mem: 8421 +[2024-12-05 22:16:16 root] (utils.py 283): INFO Epoch: [16] [2501/2502] eta: 0:00:00 lr: 0.000011 loss_cls: 3.8138 (3.7832) grad_norm: 4.2627 (4.3355) time: 0.8185 data: 0.0256 max mem: 8421 +[2024-12-05 22:16:16 root] (utils.py 297): INFO Epoch: [16] Total time: 0:32:47 (0.7864 s / it) +[2024-12-05 22:16:16 root] (engine.py 179): INFO Averaged stats:lr: 0.000011 loss_cls: 3.8138 (3.7836) grad_norm: 4.2627 (4.3355) +[2024-12-05 22:16:17 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:14 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7330 (0.7330) acc1: 85.1562 (85.1562) acc3: 94.5312 (94.5312) acc5: 98.4375 (98.4375) time: 0.1482 data: 0.0005 max mem: 8421 +[2024-12-05 22:16:18 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8072 (0.8697) acc1: 84.3750 (81.9602) acc3: 92.9688 (92.8267) acc5: 94.5312 (95.0994) time: 0.1327 data: 0.0004 max mem: 8421 +[2024-12-05 22:16:19 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8999 (0.9198) acc1: 79.6875 (80.5060) acc3: 92.9688 (92.2619) acc5: 94.5312 (94.7917) time: 0.1313 data: 0.0004 max mem: 8421 +[2024-12-05 22:16:21 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9885 (0.9196) acc1: 78.9062 (79.9647) acc3: 91.4062 (92.6159) acc5: 95.3125 (95.2117) time: 0.1320 data: 0.0005 max mem: 8421 +[2024-12-05 22:16:22 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8427 (0.9125) acc1: 80.4688 (80.1639) acc3: 93.7500 (92.7782) acc5: 95.3125 (95.1601) time: 0.1436 data: 0.0098 max mem: 8421 +[2024-12-05 22:16:24 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0272 (1.0037) acc1: 75.0000 (78.1097) acc3: 86.7188 (91.1612) acc5: 91.4062 (94.0257) time: 0.1698 data: 0.0341 max mem: 8421 +[2024-12-05 22:16:26 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2966 (1.0469) acc1: 71.0938 (77.3053) acc3: 85.9375 (90.3560) acc5: 90.6250 (93.4554) time: 0.1832 data: 0.0457 max mem: 8421 +[2024-12-05 22:16:27 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:04 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2689 (1.0892) acc1: 71.0938 (76.1664) acc3: 86.7188 (89.7777) acc5: 90.6250 (92.9798) time: 0.1693 data: 0.0313 max mem: 8421 +[2024-12-05 22:16:29 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3151 (1.1271) acc1: 68.7500 (75.3954) acc3: 85.9375 (89.1397) acc5: 89.0625 (92.4479) time: 0.1623 data: 0.0263 max mem: 8421 +[2024-12-05 22:16:31 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3609 (1.1583) acc1: 68.7500 (74.6652) acc3: 84.3750 (88.6590) acc5: 89.0625 (92.0587) time: 0.1813 data: 0.0467 max mem: 8421 +[2024-12-05 22:16:33 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1864 (1.1483) acc1: 73.4375 (74.7920) acc3: 88.2812 (88.8720) acc5: 91.4062 (92.2800) time: 0.2120 data: 0.0651 max mem: 8421 +[2024-12-05 22:16:33 root] (utils.py 297): INFO Test: Total time: 0:00:16 (0.1674 s / it) +[2024-12-05 22:16:33 root] (engine.py 264): INFO * Acc@1 74.604 Acc@3 88.770 Acc@5 92.218 loss 1.148 flops 1.285 layer_flops 1.251 +[2024-12-05 22:16:33 root] (main.py 547): INFO Accuracy of the network on the 50000 test images: 74.6% +[2024-12-05 22:16:33 root] (main.py 551): INFO Max accuracy: 74.60% +[2024-12-05 22:16:34 root] (utils.py 283): INFO Epoch: [17] [ 0/2502] eta: 0:44:45 lr: 0.000010 loss_cls: 3.5716 (3.5716) grad_norm: 4.1189 (4.1189) time: 1.0733 data: 0.0007 max mem: 8421 +[2024-12-05 22:16:42 root] (utils.py 283): INFO Epoch: [17] [ 10/2502] eta: 0:33:28 lr: 0.000010 loss_cls: 3.7791 (3.7764) grad_norm: 4.1189 (4.2606) time: 0.8059 data: 0.0003 max mem: 8421 +[2024-12-05 22:16:50 root] (utils.py 283): INFO Epoch: [17] [ 20/2502] eta: 0:32:49 lr: 0.000010 loss_cls: 3.9434 (3.8747) grad_norm: 4.3701 (4.3431) time: 0.7793 data: 0.0003 max mem: 8421 +[2024-12-05 22:16:58 root] (utils.py 283): INFO Epoch: [17] [ 30/2502] eta: 0:32:28 lr: 0.000010 loss_cls: 4.0616 (3.9137) grad_norm: 4.3837 (4.3764) time: 0.7787 data: 0.0003 max mem: 8421 +[2024-12-05 22:17:06 root] (utils.py 283): INFO Epoch: [17] [ 40/2502] eta: 0:32:14 lr: 0.000010 loss_cls: 4.0044 (3.9061) grad_norm: 4.4233 (4.3889) time: 0.7779 data: 0.0002 max mem: 8421 +[2024-12-05 22:17:13 root] (utils.py 283): INFO Epoch: [17] [ 50/2502] eta: 0:32:04 lr: 0.000010 loss_cls: 3.9680 (3.9305) grad_norm: 4.2604 (4.3563) time: 0.7792 data: 0.0002 max mem: 8421 +[2024-12-05 22:17:21 root] (utils.py 283): INFO Epoch: [17] [ 60/2502] eta: 0:31:53 lr: 0.000010 loss_cls: 4.1168 (3.9763) grad_norm: 4.1539 (4.3479) time: 0.7791 data: 0.0002 max mem: 8421 +[2024-12-05 22:17:29 root] (utils.py 283): INFO Epoch: [17] [ 70/2502] eta: 0:31:44 lr: 0.000010 loss_cls: 4.1022 (3.9904) grad_norm: 4.3326 (4.3866) time: 0.7782 data: 0.0002 max mem: 8421 +[2024-12-05 22:17:37 root] (utils.py 283): INFO Epoch: [17] [ 80/2502] eta: 0:31:35 lr: 0.000010 loss_cls: 3.9326 (3.9833) grad_norm: 4.3326 (4.3933) time: 0.7792 data: 0.0003 max mem: 8421 +[2024-12-05 22:17:45 root] (utils.py 283): INFO Epoch: [17] [ 90/2502] eta: 0:31:28 lr: 0.000010 loss_cls: 3.9190 (3.9583) grad_norm: 4.2133 (4.3713) time: 0.7824 data: 0.0003 max mem: 8421 +[2024-12-05 22:17:53 root] (utils.py 283): INFO Epoch: [17] [ 100/2502] eta: 0:31:25 lr: 0.000010 loss_cls: 4.0519 (3.9597) grad_norm: 4.2235 (4.3552) time: 0.7943 data: 0.0003 max mem: 8421 +[2024-12-05 22:18:00 root] (utils.py 283): INFO Epoch: [17] [ 110/2502] eta: 0:31:15 lr: 0.000010 loss_cls: 3.8531 (3.9224) grad_norm: 4.2185 (4.3511) time: 0.7903 data: 0.0002 max mem: 8421 +[2024-12-05 22:18:08 root] (utils.py 283): INFO Epoch: [17] [ 120/2502] eta: 0:31:07 lr: 0.000010 loss_cls: 3.8531 (3.9304) grad_norm: 4.1580 (4.3353) time: 0.7799 data: 0.0002 max mem: 8421 +[2024-12-05 22:18:16 root] (utils.py 283): INFO Epoch: [17] [ 130/2502] eta: 0:30:58 lr: 0.000010 loss_cls: 4.0183 (3.9269) grad_norm: 4.1986 (4.3410) time: 0.7813 data: 0.0003 max mem: 8421 +[2024-12-05 22:18:24 root] (utils.py 283): INFO Epoch: [17] [ 140/2502] eta: 0:30:50 lr: 0.000010 loss_cls: 4.0181 (3.9347) grad_norm: 4.2898 (4.3609) time: 0.7814 data: 0.0002 max mem: 8421 +[2024-12-05 22:18:32 root] (utils.py 283): INFO Epoch: [17] [ 150/2502] eta: 0:30:42 lr: 0.000010 loss_cls: 4.0080 (3.9170) grad_norm: 4.3876 (4.3795) time: 0.7826 data: 0.0003 max mem: 8421 +[2024-12-05 22:18:39 root] (utils.py 283): INFO Epoch: [17] [ 160/2502] eta: 0:30:34 lr: 0.000010 loss_cls: 4.0762 (3.9247) grad_norm: 4.4017 (4.3705) time: 0.7804 data: 0.0003 max mem: 8421 +[2024-12-05 22:18:47 root] (utils.py 283): INFO Epoch: [17] [ 170/2502] eta: 0:30:26 lr: 0.000010 loss_cls: 4.0123 (3.9207) grad_norm: 4.1500 (4.3747) time: 0.7822 data: 0.0003 max mem: 8421 +[2024-12-05 22:18:55 root] (utils.py 283): INFO Epoch: [17] [ 180/2502] eta: 0:30:18 lr: 0.000010 loss_cls: 3.9750 (3.9207) grad_norm: 4.0585 (4.3677) time: 0.7843 data: 0.0003 max mem: 8421 +[2024-12-05 22:19:03 root] (utils.py 283): INFO Epoch: [17] [ 190/2502] eta: 0:30:10 lr: 0.000010 loss_cls: 3.9821 (3.9209) grad_norm: 4.2699 (4.3762) time: 0.7813 data: 0.0003 max mem: 8421 +[2024-12-05 22:19:11 root] (utils.py 283): INFO Epoch: [17] [ 200/2502] eta: 0:30:02 lr: 0.000010 loss_cls: 4.0335 (3.9319) grad_norm: 4.2699 (4.3723) time: 0.7790 data: 0.0003 max mem: 8421 +[2024-12-05 22:19:19 root] (utils.py 283): INFO Epoch: [17] [ 210/2502] eta: 0:29:54 lr: 0.000010 loss_cls: 4.0335 (3.9436) grad_norm: 4.2188 (4.3705) time: 0.7799 data: 0.0003 max mem: 8421 +[2024-12-05 22:19:26 root] (utils.py 283): INFO Epoch: [17] [ 220/2502] eta: 0:29:46 lr: 0.000010 loss_cls: 4.0304 (3.9376) grad_norm: 4.3346 (4.3707) time: 0.7799 data: 0.0003 max mem: 8421 +[2024-12-05 22:19:34 root] (utils.py 283): INFO Epoch: [17] [ 230/2502] eta: 0:29:37 lr: 0.000010 loss_cls: 4.1426 (3.9508) grad_norm: 4.3346 (4.3757) time: 0.7766 data: 0.0003 max mem: 8421 +[2024-12-05 22:19:42 root] (utils.py 283): INFO Epoch: [17] [ 240/2502] eta: 0:29:29 lr: 0.000010 loss_cls: 3.9542 (3.9350) grad_norm: 4.2653 (4.3804) time: 0.7771 data: 0.0002 max mem: 8421 +[2024-12-05 22:19:50 root] (utils.py 283): INFO Epoch: [17] [ 250/2502] eta: 0:29:21 lr: 0.000010 loss_cls: 3.5245 (3.9275) grad_norm: 4.2269 (4.3758) time: 0.7785 data: 0.0002 max mem: 8421 +[2024-12-05 22:19:58 root] (utils.py 283): INFO Epoch: [17] [ 260/2502] eta: 0:29:14 lr: 0.000010 loss_cls: 3.8867 (3.9220) grad_norm: 4.2914 (4.3860) time: 0.7883 data: 0.0003 max mem: 8421 +[2024-12-05 22:20:06 root] (utils.py 283): INFO Epoch: [17] [ 270/2502] eta: 0:29:08 lr: 0.000010 loss_cls: 4.0787 (3.9262) grad_norm: 4.3660 (4.3760) time: 0.7982 data: 0.0003 max mem: 8421 +[2024-12-05 22:20:13 root] (utils.py 283): INFO Epoch: [17] [ 280/2502] eta: 0:29:00 lr: 0.000010 loss_cls: 3.9602 (3.9224) grad_norm: 4.1934 (4.3755) time: 0.7905 data: 0.0003 max mem: 8421 +[2024-12-05 22:20:21 root] (utils.py 283): INFO Epoch: [17] [ 290/2502] eta: 0:28:52 lr: 0.000010 loss_cls: 3.5777 (3.9077) grad_norm: 4.2520 (4.3803) time: 0.7812 data: 0.0003 max mem: 8421 +[2024-12-05 22:20:29 root] (utils.py 283): INFO Epoch: [17] [ 300/2502] eta: 0:28:44 lr: 0.000010 loss_cls: 3.5196 (3.9066) grad_norm: 4.2520 (4.3815) time: 0.7796 data: 0.0002 max mem: 8421 +[2024-12-05 22:20:37 root] (utils.py 283): INFO Epoch: [17] [ 310/2502] eta: 0:28:37 lr: 0.000010 loss_cls: 4.0304 (3.9087) grad_norm: 4.2459 (4.3732) time: 0.7909 data: 0.0002 max mem: 8421 +[2024-12-05 22:20:45 root] (utils.py 283): INFO Epoch: [17] [ 320/2502] eta: 0:28:30 lr: 0.000010 loss_cls: 4.0304 (3.9075) grad_norm: 4.2445 (4.3708) time: 0.7996 data: 0.0003 max mem: 8421 +[2024-12-05 22:20:53 root] (utils.py 283): INFO Epoch: [17] [ 330/2502] eta: 0:28:23 lr: 0.000010 loss_cls: 4.0826 (3.9035) grad_norm: 4.2445 (4.3658) time: 0.7956 data: 0.0003 max mem: 8421 +[2024-12-05 22:21:01 root] (utils.py 283): INFO Epoch: [17] [ 340/2502] eta: 0:28:15 lr: 0.000010 loss_cls: 3.6777 (3.8987) grad_norm: 4.0775 (4.3610) time: 0.7874 data: 0.0003 max mem: 8421 +[2024-12-05 22:21:09 root] (utils.py 283): INFO Epoch: [17] [ 350/2502] eta: 0:28:07 lr: 0.000010 loss_cls: 3.5825 (3.8960) grad_norm: 4.2688 (4.3677) time: 0.7800 data: 0.0003 max mem: 8421 +[2024-12-05 22:21:16 root] (utils.py 283): INFO Epoch: [17] [ 360/2502] eta: 0:27:59 lr: 0.000010 loss_cls: 3.4605 (3.8783) grad_norm: 4.2688 (4.3617) time: 0.7796 data: 0.0003 max mem: 8421 +[2024-12-05 22:21:24 root] (utils.py 283): INFO Epoch: [17] [ 370/2502] eta: 0:27:51 lr: 0.000010 loss_cls: 3.5076 (3.8746) grad_norm: 4.0896 (4.3560) time: 0.7789 data: 0.0003 max mem: 8421 +[2024-12-05 22:21:32 root] (utils.py 283): INFO Epoch: [17] [ 380/2502] eta: 0:27:42 lr: 0.000010 loss_cls: 4.0061 (3.8734) grad_norm: 4.1743 (4.3548) time: 0.7786 data: 0.0003 max mem: 8421 +[2024-12-05 22:21:40 root] (utils.py 283): INFO Epoch: [17] [ 390/2502] eta: 0:27:34 lr: 0.000010 loss_cls: 3.9847 (3.8682) grad_norm: 4.1743 (4.3524) time: 0.7792 data: 0.0003 max mem: 8421 +[2024-12-05 22:21:48 root] (utils.py 283): INFO Epoch: [17] [ 400/2502] eta: 0:27:26 lr: 0.000010 loss_cls: 3.7457 (3.8680) grad_norm: 4.0805 (4.3509) time: 0.7780 data: 0.0003 max mem: 8421 +[2024-12-05 22:21:55 root] (utils.py 283): INFO Epoch: [17] [ 410/2502] eta: 0:27:18 lr: 0.000010 loss_cls: 3.8940 (3.8659) grad_norm: 4.2118 (4.3502) time: 0.7796 data: 0.0003 max mem: 8421 +[2024-12-05 22:22:03 root] (utils.py 283): INFO Epoch: [17] [ 420/2502] eta: 0:27:10 lr: 0.000010 loss_cls: 3.8666 (3.8677) grad_norm: 4.2845 (4.3463) time: 0.7823 data: 0.0003 max mem: 8421 +[2024-12-05 22:22:11 root] (utils.py 283): INFO Epoch: [17] [ 430/2502] eta: 0:27:02 lr: 0.000010 loss_cls: 4.0658 (3.8726) grad_norm: 4.2545 (4.3453) time: 0.7806 data: 0.0003 max mem: 8421 +[2024-12-05 22:22:19 root] (utils.py 283): INFO Epoch: [17] [ 440/2502] eta: 0:26:54 lr: 0.000010 loss_cls: 4.1667 (3.8776) grad_norm: 4.2325 (4.3418) time: 0.7778 data: 0.0003 max mem: 8421 +[2024-12-05 22:22:27 root] (utils.py 283): INFO Epoch: [17] [ 450/2502] eta: 0:26:47 lr: 0.000010 loss_cls: 3.9030 (3.8703) grad_norm: 4.1716 (4.3400) time: 0.7894 data: 0.0003 max mem: 8421 +[2024-12-05 22:22:35 root] (utils.py 283): INFO Epoch: [17] [ 460/2502] eta: 0:26:39 lr: 0.000010 loss_cls: 3.5734 (3.8670) grad_norm: 4.1716 (4.3393) time: 0.7908 data: 0.0003 max mem: 8421 +[2024-12-05 22:22:42 root] (utils.py 283): INFO Epoch: [17] [ 470/2502] eta: 0:26:32 lr: 0.000010 loss_cls: 3.8221 (3.8682) grad_norm: 4.0331 (4.3316) time: 0.7844 data: 0.0003 max mem: 8421 +[2024-12-05 22:22:50 root] (utils.py 283): INFO Epoch: [17] [ 480/2502] eta: 0:26:24 lr: 0.000010 loss_cls: 3.9607 (3.8643) grad_norm: 4.0331 (4.3316) time: 0.7893 data: 0.0003 max mem: 8421 +[2024-12-05 22:22:58 root] (utils.py 283): INFO Epoch: [17] [ 490/2502] eta: 0:26:16 lr: 0.000010 loss_cls: 4.0351 (3.8680) grad_norm: 4.2264 (4.3451) time: 0.7858 data: 0.0003 max mem: 8421 +[2024-12-05 22:23:06 root] (utils.py 283): INFO Epoch: [17] [ 500/2502] eta: 0:26:09 lr: 0.000010 loss_cls: 4.1462 (3.8670) grad_norm: 4.4323 (4.3553) time: 0.7897 data: 0.0003 max mem: 8421 +[2024-12-05 22:23:14 root] (utils.py 283): INFO Epoch: [17] [ 510/2502] eta: 0:26:01 lr: 0.000010 loss_cls: 3.7982 (3.8648) grad_norm: 4.1909 (4.3500) time: 0.7879 data: 0.0003 max mem: 8421 +[2024-12-05 22:23:22 root] (utils.py 283): INFO Epoch: [17] [ 520/2502] eta: 0:25:53 lr: 0.000010 loss_cls: 3.7684 (3.8654) grad_norm: 4.1299 (4.3492) time: 0.7797 data: 0.0003 max mem: 8421 +[2024-12-05 22:23:30 root] (utils.py 283): INFO Epoch: [17] [ 530/2502] eta: 0:25:45 lr: 0.000010 loss_cls: 3.7608 (3.8628) grad_norm: 4.1550 (4.3488) time: 0.7803 data: 0.0003 max mem: 8421 +[2024-12-05 22:23:37 root] (utils.py 283): INFO Epoch: [17] [ 540/2502] eta: 0:25:37 lr: 0.000010 loss_cls: 3.9755 (3.8624) grad_norm: 4.3017 (4.3541) time: 0.7811 data: 0.0002 max mem: 8421 +[2024-12-05 22:23:45 root] (utils.py 283): INFO Epoch: [17] [ 550/2502] eta: 0:25:29 lr: 0.000010 loss_cls: 4.0048 (3.8634) grad_norm: 4.2364 (4.3558) time: 0.7807 data: 0.0002 max mem: 8421 +[2024-12-05 22:23:53 root] (utils.py 283): INFO Epoch: [17] [ 560/2502] eta: 0:25:21 lr: 0.000010 loss_cls: 3.8977 (3.8608) grad_norm: 4.1807 (4.3544) time: 0.7785 data: 0.0002 max mem: 8421 +[2024-12-05 22:24:01 root] (utils.py 283): INFO Epoch: [17] [ 570/2502] eta: 0:25:13 lr: 0.000010 loss_cls: 3.8373 (3.8581) grad_norm: 4.1807 (4.3547) time: 0.7795 data: 0.0003 max mem: 8421 +[2024-12-05 22:24:09 root] (utils.py 283): INFO Epoch: [17] [ 580/2502] eta: 0:25:05 lr: 0.000010 loss_cls: 3.6303 (3.8529) grad_norm: 4.0983 (4.3542) time: 0.7806 data: 0.0003 max mem: 8421 +[2024-12-05 22:24:16 root] (utils.py 283): INFO Epoch: [17] [ 590/2502] eta: 0:24:57 lr: 0.000010 loss_cls: 3.9568 (3.8530) grad_norm: 4.2550 (4.3538) time: 0.7795 data: 0.0003 max mem: 8421 +[2024-12-05 22:24:24 root] (utils.py 283): INFO Epoch: [17] [ 600/2502] eta: 0:24:49 lr: 0.000010 loss_cls: 4.0315 (3.8533) grad_norm: 4.2550 (4.3590) time: 0.7777 data: 0.0002 max mem: 8421 +[2024-12-05 22:24:32 root] (utils.py 283): INFO Epoch: [17] [ 610/2502] eta: 0:24:41 lr: 0.000010 loss_cls: 4.0315 (3.8527) grad_norm: 4.2095 (4.3576) time: 0.7775 data: 0.0003 max mem: 8421 +[2024-12-05 22:24:40 root] (utils.py 283): INFO Epoch: [17] [ 620/2502] eta: 0:24:33 lr: 0.000010 loss_cls: 3.9791 (3.8554) grad_norm: 4.1228 (4.3577) time: 0.7772 data: 0.0003 max mem: 8421 +[2024-12-05 22:24:48 root] (utils.py 283): INFO Epoch: [17] [ 630/2502] eta: 0:24:26 lr: 0.000010 loss_cls: 3.9791 (3.8565) grad_norm: 4.2763 (4.3683) time: 0.7894 data: 0.0003 max mem: 8421 +[2024-12-05 22:24:55 root] (utils.py 283): INFO Epoch: [17] [ 640/2502] eta: 0:24:18 lr: 0.000010 loss_cls: 3.7445 (3.8515) grad_norm: 4.4493 (4.3735) time: 0.7926 data: 0.0003 max mem: 8421 +[2024-12-05 22:25:03 root] (utils.py 283): INFO Epoch: [17] [ 650/2502] eta: 0:24:10 lr: 0.000010 loss_cls: 3.7914 (3.8512) grad_norm: 4.3362 (4.3713) time: 0.7815 data: 0.0003 max mem: 8421 +[2024-12-05 22:25:11 root] (utils.py 283): INFO Epoch: [17] [ 660/2502] eta: 0:24:02 lr: 0.000010 loss_cls: 4.1006 (3.8500) grad_norm: 4.2155 (4.3685) time: 0.7810 data: 0.0002 max mem: 8421 +[2024-12-05 22:25:19 root] (utils.py 283): INFO Epoch: [17] [ 670/2502] eta: 0:23:54 lr: 0.000010 loss_cls: 3.9822 (3.8506) grad_norm: 4.4190 (4.3788) time: 0.7792 data: 0.0003 max mem: 8421 +[2024-12-05 22:25:27 root] (utils.py 283): INFO Epoch: [17] [ 680/2502] eta: 0:23:46 lr: 0.000010 loss_cls: 3.9822 (3.8504) grad_norm: 4.3897 (4.3765) time: 0.7782 data: 0.0003 max mem: 8421 +[2024-12-05 22:25:34 root] (utils.py 283): INFO Epoch: [17] [ 690/2502] eta: 0:23:38 lr: 0.000010 loss_cls: 3.8291 (3.8472) grad_norm: 4.1897 (4.3775) time: 0.7802 data: 0.0003 max mem: 8421 +[2024-12-05 22:25:42 root] (utils.py 283): INFO Epoch: [17] [ 700/2502] eta: 0:23:30 lr: 0.000010 loss_cls: 3.8291 (3.8493) grad_norm: 4.1897 (4.3768) time: 0.7809 data: 0.0003 max mem: 8421 +[2024-12-05 22:25:50 root] (utils.py 283): INFO Epoch: [17] [ 710/2502] eta: 0:23:23 lr: 0.000010 loss_cls: 4.2273 (3.8525) grad_norm: 4.2351 (4.3825) time: 0.7798 data: 0.0003 max mem: 8421 +[2024-12-05 22:25:58 root] (utils.py 283): INFO Epoch: [17] [ 720/2502] eta: 0:23:15 lr: 0.000010 loss_cls: 4.1387 (3.8506) grad_norm: 4.2351 (4.3809) time: 0.7836 data: 0.0002 max mem: 8421 +[2024-12-05 22:26:06 root] (utils.py 283): INFO Epoch: [17] [ 730/2502] eta: 0:23:07 lr: 0.000010 loss_cls: 3.9760 (3.8542) grad_norm: 4.2110 (4.3818) time: 0.7891 data: 0.0003 max mem: 8421 +[2024-12-05 22:26:14 root] (utils.py 283): INFO Epoch: [17] [ 740/2502] eta: 0:22:59 lr: 0.000010 loss_cls: 4.2274 (3.8592) grad_norm: 4.2704 (4.3824) time: 0.7849 data: 0.0003 max mem: 8421 +[2024-12-05 22:26:21 root] (utils.py 283): INFO Epoch: [17] [ 750/2502] eta: 0:22:51 lr: 0.000010 loss_cls: 4.1747 (3.8589) grad_norm: 4.2702 (4.3833) time: 0.7781 data: 0.0003 max mem: 8421 +[2024-12-05 22:26:29 root] (utils.py 283): INFO Epoch: [17] [ 760/2502] eta: 0:22:43 lr: 0.000010 loss_cls: 3.8795 (3.8602) grad_norm: 4.3647 (4.3847) time: 0.7774 data: 0.0003 max mem: 8421 +[2024-12-05 22:26:37 root] (utils.py 283): INFO Epoch: [17] [ 770/2502] eta: 0:22:36 lr: 0.000010 loss_cls: 3.7157 (3.8590) grad_norm: 4.3793 (4.3830) time: 0.7815 data: 0.0002 max mem: 8421 +[2024-12-05 22:26:45 root] (utils.py 283): INFO Epoch: [17] [ 780/2502] eta: 0:22:28 lr: 0.000010 loss_cls: 3.7138 (3.8575) grad_norm: 4.4357 (4.3856) time: 0.7821 data: 0.0003 max mem: 8421 +[2024-12-05 22:26:53 root] (utils.py 283): INFO Epoch: [17] [ 790/2502] eta: 0:22:20 lr: 0.000010 loss_cls: 3.8878 (3.8591) grad_norm: 4.4714 (4.3864) time: 0.7773 data: 0.0003 max mem: 8421 +[2024-12-05 22:27:00 root] (utils.py 283): INFO Epoch: [17] [ 800/2502] eta: 0:22:12 lr: 0.000010 loss_cls: 4.0224 (3.8605) grad_norm: 4.3345 (4.3859) time: 0.7752 data: 0.0003 max mem: 8421 +[2024-12-05 22:27:08 root] (utils.py 283): INFO Epoch: [17] [ 810/2502] eta: 0:22:04 lr: 0.000010 loss_cls: 4.0680 (3.8614) grad_norm: 4.2529 (4.3858) time: 0.7757 data: 0.0003 max mem: 8421 +[2024-12-05 22:27:16 root] (utils.py 283): INFO Epoch: [17] [ 820/2502] eta: 0:21:56 lr: 0.000010 loss_cls: 4.0188 (3.8616) grad_norm: 4.1437 (4.3873) time: 0.7747 data: 0.0003 max mem: 8421 +[2024-12-05 22:27:24 root] (utils.py 283): INFO Epoch: [17] [ 830/2502] eta: 0:21:48 lr: 0.000010 loss_cls: 3.8112 (3.8585) grad_norm: 4.2222 (4.3861) time: 0.7764 data: 0.0003 max mem: 8421 +[2024-12-05 22:27:31 root] (utils.py 283): INFO Epoch: [17] [ 840/2502] eta: 0:21:40 lr: 0.000010 loss_cls: 3.7431 (3.8562) grad_norm: 4.1699 (4.3830) time: 0.7803 data: 0.0003 max mem: 8421 +[2024-12-05 22:27:39 root] (utils.py 283): INFO Epoch: [17] [ 850/2502] eta: 0:21:32 lr: 0.000010 loss_cls: 4.0733 (3.8583) grad_norm: 4.1517 (4.3856) time: 0.7809 data: 0.0003 max mem: 8421 +[2024-12-05 22:27:47 root] (utils.py 283): INFO Epoch: [17] [ 860/2502] eta: 0:21:24 lr: 0.000010 loss_cls: 4.0939 (3.8579) grad_norm: 4.2903 (4.3843) time: 0.7800 data: 0.0003 max mem: 8421 +[2024-12-05 22:27:55 root] (utils.py 283): INFO Epoch: [17] [ 870/2502] eta: 0:21:16 lr: 0.000010 loss_cls: 3.8092 (3.8558) grad_norm: 4.2579 (4.3834) time: 0.7780 data: 0.0003 max mem: 8421 +[2024-12-05 22:28:03 root] (utils.py 283): INFO Epoch: [17] [ 880/2502] eta: 0:21:08 lr: 0.000010 loss_cls: 4.0435 (3.8574) grad_norm: 4.2697 (4.3846) time: 0.7767 data: 0.0003 max mem: 8421 +[2024-12-05 22:28:10 root] (utils.py 283): INFO Epoch: [17] [ 890/2502] eta: 0:21:00 lr: 0.000010 loss_cls: 4.0895 (3.8598) grad_norm: 4.2697 (4.3826) time: 0.7758 data: 0.0002 max mem: 8421 +[2024-12-05 22:28:18 root] (utils.py 283): INFO Epoch: [17] [ 900/2502] eta: 0:20:52 lr: 0.000010 loss_cls: 4.0388 (3.8581) grad_norm: 4.1856 (4.3834) time: 0.7753 data: 0.0003 max mem: 8421 +[2024-12-05 22:28:26 root] (utils.py 283): INFO Epoch: [17] [ 910/2502] eta: 0:20:45 lr: 0.000010 loss_cls: 3.9194 (3.8588) grad_norm: 4.1764 (4.3823) time: 0.7797 data: 0.0003 max mem: 8421 +[2024-12-05 22:28:34 root] (utils.py 283): INFO Epoch: [17] [ 920/2502] eta: 0:20:37 lr: 0.000010 loss_cls: 4.2404 (3.8618) grad_norm: 4.2418 (4.3828) time: 0.7804 data: 0.0003 max mem: 8421 +[2024-12-05 22:28:41 root] (utils.py 283): INFO Epoch: [17] [ 930/2502] eta: 0:20:29 lr: 0.000010 loss_cls: 4.0480 (3.8603) grad_norm: 4.1375 (4.3800) time: 0.7761 data: 0.0003 max mem: 8421 +[2024-12-05 22:28:49 root] (utils.py 283): INFO Epoch: [17] [ 940/2502] eta: 0:20:21 lr: 0.000010 loss_cls: 3.5201 (3.8587) grad_norm: 4.1229 (4.3781) time: 0.7760 data: 0.0003 max mem: 8421 +[2024-12-05 22:28:57 root] (utils.py 283): INFO Epoch: [17] [ 950/2502] eta: 0:20:13 lr: 0.000010 loss_cls: 3.7445 (3.8588) grad_norm: 4.1227 (4.3777) time: 0.7786 data: 0.0003 max mem: 8421 +[2024-12-05 22:29:05 root] (utils.py 283): INFO Epoch: [17] [ 960/2502] eta: 0:20:05 lr: 0.000010 loss_cls: 4.0760 (3.8595) grad_norm: 4.1739 (4.3783) time: 0.7798 data: 0.0003 max mem: 8421 +[2024-12-05 22:29:13 root] (utils.py 283): INFO Epoch: [17] [ 970/2502] eta: 0:19:57 lr: 0.000010 loss_cls: 3.6820 (3.8564) grad_norm: 4.2505 (4.3788) time: 0.7799 data: 0.0003 max mem: 8421 +[2024-12-05 22:29:20 root] (utils.py 283): INFO Epoch: [17] [ 980/2502] eta: 0:19:50 lr: 0.000010 loss_cls: 3.6820 (3.8559) grad_norm: 4.2145 (4.3782) time: 0.7798 data: 0.0003 max mem: 8421 +[2024-12-05 22:29:28 root] (utils.py 283): INFO Epoch: [17] [ 990/2502] eta: 0:19:42 lr: 0.000010 loss_cls: 3.8681 (3.8575) grad_norm: 4.5636 (4.4073) time: 0.7782 data: 0.0003 max mem: 8421 +[2024-12-05 22:29:36 root] (utils.py 283): INFO Epoch: [17] [1000/2502] eta: 0:19:34 lr: 0.000010 loss_cls: 3.8812 (3.8572) grad_norm: 4.4946 (4.4058) time: 0.7757 data: 0.0003 max mem: 8421 +[2024-12-05 22:29:44 root] (utils.py 283): INFO Epoch: [17] [1010/2502] eta: 0:19:26 lr: 0.000010 loss_cls: 4.0435 (3.8585) grad_norm: 4.1809 (4.4076) time: 0.7768 data: 0.0003 max mem: 8421 +[2024-12-05 22:29:52 root] (utils.py 283): INFO Epoch: [17] [1020/2502] eta: 0:19:18 lr: 0.000010 loss_cls: 3.8104 (3.8569) grad_norm: 4.3061 (4.4078) time: 0.7789 data: 0.0003 max mem: 8421 +[2024-12-05 22:29:59 root] (utils.py 283): INFO Epoch: [17] [1030/2502] eta: 0:19:10 lr: 0.000010 loss_cls: 3.8615 (3.8576) grad_norm: 4.3061 (4.4085) time: 0.7783 data: 0.0003 max mem: 8421 +[2024-12-05 22:30:07 root] (utils.py 283): INFO Epoch: [17] [1040/2502] eta: 0:19:02 lr: 0.000010 loss_cls: 3.7404 (3.8544) grad_norm: 4.2689 (4.4076) time: 0.7783 data: 0.0003 max mem: 8421 +[2024-12-05 22:30:15 root] (utils.py 283): INFO Epoch: [17] [1050/2502] eta: 0:18:54 lr: 0.000010 loss_cls: 3.6437 (3.8545) grad_norm: 4.4501 (4.4156) time: 0.7789 data: 0.0003 max mem: 8421 +[2024-12-05 22:30:23 root] (utils.py 283): INFO Epoch: [17] [1060/2502] eta: 0:18:47 lr: 0.000010 loss_cls: 4.1409 (3.8577) grad_norm: 4.4617 (4.4151) time: 0.7802 data: 0.0003 max mem: 8421 +[2024-12-05 22:30:31 root] (utils.py 283): INFO Epoch: [17] [1070/2502] eta: 0:18:39 lr: 0.000010 loss_cls: 4.1167 (3.8582) grad_norm: 4.2164 (4.4147) time: 0.7804 data: 0.0003 max mem: 8421 +[2024-12-05 22:30:38 root] (utils.py 283): INFO Epoch: [17] [1080/2502] eta: 0:18:31 lr: 0.000010 loss_cls: 4.0936 (3.8598) grad_norm: 4.1635 (4.4138) time: 0.7778 data: 0.0003 max mem: 8421 +[2024-12-05 22:30:46 root] (utils.py 283): INFO Epoch: [17] [1090/2502] eta: 0:18:23 lr: 0.000010 loss_cls: 4.0238 (3.8603) grad_norm: 4.1861 (4.4133) time: 0.7774 data: 0.0003 max mem: 8421 +[2024-12-05 22:30:54 root] (utils.py 283): INFO Epoch: [17] [1100/2502] eta: 0:18:15 lr: 0.000010 loss_cls: 3.9110 (3.8592) grad_norm: 4.2584 (4.4118) time: 0.7790 data: 0.0003 max mem: 8421 +[2024-12-05 22:31:02 root] (utils.py 283): INFO Epoch: [17] [1110/2502] eta: 0:18:07 lr: 0.000010 loss_cls: 4.0990 (3.8612) grad_norm: 4.1284 (4.4103) time: 0.7793 data: 0.0003 max mem: 8421 +[2024-12-05 22:31:09 root] (utils.py 283): INFO Epoch: [17] [1120/2502] eta: 0:17:59 lr: 0.000010 loss_cls: 4.1613 (3.8629) grad_norm: 4.1486 (4.4100) time: 0.7801 data: 0.0003 max mem: 8421 +[2024-12-05 22:31:17 root] (utils.py 283): INFO Epoch: [17] [1130/2502] eta: 0:17:52 lr: 0.000010 loss_cls: 4.1610 (3.8650) grad_norm: 4.3794 (4.4101) time: 0.7812 data: 0.0003 max mem: 8421 +[2024-12-05 22:31:25 root] (utils.py 283): INFO Epoch: [17] [1140/2502] eta: 0:17:44 lr: 0.000010 loss_cls: 3.9867 (3.8653) grad_norm: 4.3487 (4.4087) time: 0.7797 data: 0.0003 max mem: 8421 +[2024-12-05 22:31:33 root] (utils.py 283): INFO Epoch: [17] [1150/2502] eta: 0:17:36 lr: 0.000010 loss_cls: 3.9867 (3.8665) grad_norm: 4.1135 (4.4074) time: 0.7841 data: 0.0003 max mem: 8421 +[2024-12-05 22:31:41 root] (utils.py 283): INFO Epoch: [17] [1160/2502] eta: 0:17:28 lr: 0.000010 loss_cls: 4.0563 (3.8663) grad_norm: 3.9478 (4.4052) time: 0.7835 data: 0.0003 max mem: 8421 +[2024-12-05 22:31:49 root] (utils.py 283): INFO Epoch: [17] [1170/2502] eta: 0:17:20 lr: 0.000010 loss_cls: 4.0224 (3.8670) grad_norm: 4.1288 (4.4046) time: 0.7794 data: 0.0003 max mem: 8421 +[2024-12-05 22:31:56 root] (utils.py 283): INFO Epoch: [17] [1180/2502] eta: 0:17:13 lr: 0.000010 loss_cls: 3.9948 (3.8676) grad_norm: 4.2451 (4.4027) time: 0.7833 data: 0.0003 max mem: 8421 +[2024-12-05 22:32:04 root] (utils.py 283): INFO Epoch: [17] [1190/2502] eta: 0:17:05 lr: 0.000010 loss_cls: 3.9667 (3.8672) grad_norm: 4.2451 (4.4036) time: 0.7801 data: 0.0003 max mem: 8421 +[2024-12-05 22:32:12 root] (utils.py 283): INFO Epoch: [17] [1200/2502] eta: 0:16:57 lr: 0.000010 loss_cls: 4.0087 (3.8671) grad_norm: 4.1298 (4.4013) time: 0.7798 data: 0.0003 max mem: 8421 +[2024-12-05 22:32:20 root] (utils.py 283): INFO Epoch: [17] [1210/2502] eta: 0:16:49 lr: 0.000010 loss_cls: 3.9758 (3.8665) grad_norm: 4.2291 (4.4040) time: 0.7803 data: 0.0003 max mem: 8421 +[2024-12-05 22:32:28 root] (utils.py 283): INFO Epoch: [17] [1220/2502] eta: 0:16:41 lr: 0.000010 loss_cls: 3.8601 (3.8680) grad_norm: 4.4461 (4.4059) time: 0.7799 data: 0.0003 max mem: 8421 +[2024-12-05 22:32:35 root] (utils.py 283): INFO Epoch: [17] [1230/2502] eta: 0:16:34 lr: 0.000010 loss_cls: 4.0152 (3.8686) grad_norm: 4.3537 (4.4058) time: 0.7837 data: 0.0003 max mem: 8421 +[2024-12-05 22:32:43 root] (utils.py 283): INFO Epoch: [17] [1240/2502] eta: 0:16:26 lr: 0.000010 loss_cls: 3.8895 (3.8698) grad_norm: 4.3361 (4.4070) time: 0.7820 data: 0.0003 max mem: 8421 +[2024-12-05 22:32:51 root] (utils.py 283): INFO Epoch: [17] [1250/2502] eta: 0:16:18 lr: 0.000010 loss_cls: 4.0270 (3.8683) grad_norm: 4.3361 (4.4078) time: 0.7806 data: 0.0003 max mem: 8421 +[2024-12-05 22:32:59 root] (utils.py 283): INFO Epoch: [17] [1260/2502] eta: 0:16:10 lr: 0.000010 loss_cls: 4.0707 (3.8698) grad_norm: 4.2783 (4.4066) time: 0.7820 data: 0.0003 max mem: 8421 +[2024-12-05 22:33:07 root] (utils.py 283): INFO Epoch: [17] [1270/2502] eta: 0:16:02 lr: 0.000010 loss_cls: 4.0707 (3.8698) grad_norm: 4.2059 (4.4042) time: 0.7820 data: 0.0003 max mem: 8421 +[2024-12-05 22:33:15 root] (utils.py 283): INFO Epoch: [17] [1280/2502] eta: 0:15:54 lr: 0.000010 loss_cls: 4.0228 (3.8693) grad_norm: 4.0697 (4.4077) time: 0.7806 data: 0.0003 max mem: 8421 +[2024-12-05 22:33:22 root] (utils.py 283): INFO Epoch: [17] [1290/2502] eta: 0:15:47 lr: 0.000010 loss_cls: 4.0228 (3.8702) grad_norm: 4.3050 (4.4081) time: 0.7798 data: 0.0003 max mem: 8421 +[2024-12-05 22:33:30 root] (utils.py 283): INFO Epoch: [17] [1300/2502] eta: 0:15:39 lr: 0.000010 loss_cls: 4.0283 (3.8708) grad_norm: 4.3146 (4.4080) time: 0.7842 data: 0.0003 max mem: 8421 +[2024-12-05 22:33:38 root] (utils.py 283): INFO Epoch: [17] [1310/2502] eta: 0:15:31 lr: 0.000010 loss_cls: 3.8150 (3.8688) grad_norm: 4.2859 (4.4057) time: 0.7851 data: 0.0003 max mem: 8421 +[2024-12-05 22:33:46 root] (utils.py 283): INFO Epoch: [17] [1320/2502] eta: 0:15:23 lr: 0.000010 loss_cls: 3.8150 (3.8685) grad_norm: 4.0890 (4.4036) time: 0.7804 data: 0.0003 max mem: 8421 +[2024-12-05 22:33:54 root] (utils.py 283): INFO Epoch: [17] [1330/2502] eta: 0:15:15 lr: 0.000010 loss_cls: 3.9827 (3.8688) grad_norm: 4.0701 (4.4015) time: 0.7809 data: 0.0003 max mem: 8421 +[2024-12-05 22:34:01 root] (utils.py 283): INFO Epoch: [17] [1340/2502] eta: 0:15:08 lr: 0.000010 loss_cls: 4.0427 (3.8674) grad_norm: 4.2657 (4.4031) time: 0.7822 data: 0.0003 max mem: 8421 +[2024-12-05 22:34:10 root] (utils.py 283): INFO Epoch: [17] [1350/2502] eta: 0:15:00 lr: 0.000010 loss_cls: 4.1027 (3.8690) grad_norm: 4.4346 (4.4037) time: 0.7977 data: 0.0003 max mem: 8421 +[2024-12-05 22:34:17 root] (utils.py 283): INFO Epoch: [17] [1360/2502] eta: 0:14:52 lr: 0.000010 loss_cls: 3.9662 (3.8676) grad_norm: 4.1291 (4.4024) time: 0.7985 data: 0.0003 max mem: 8421 +[2024-12-05 22:34:25 root] (utils.py 283): INFO Epoch: [17] [1370/2502] eta: 0:14:44 lr: 0.000010 loss_cls: 3.7748 (3.8666) grad_norm: 4.1066 (4.4016) time: 0.7863 data: 0.0003 max mem: 8421 +[2024-12-05 22:34:33 root] (utils.py 283): INFO Epoch: [17] [1380/2502] eta: 0:14:37 lr: 0.000010 loss_cls: 3.7259 (3.8648) grad_norm: 4.2676 (4.4017) time: 0.7869 data: 0.0003 max mem: 8421 +[2024-12-05 22:34:41 root] (utils.py 283): INFO Epoch: [17] [1390/2502] eta: 0:14:29 lr: 0.000010 loss_cls: 3.7259 (3.8649) grad_norm: 4.2746 (4.4015) time: 0.7820 data: 0.0003 max mem: 8421 +[2024-12-05 22:34:49 root] (utils.py 283): INFO Epoch: [17] [1400/2502] eta: 0:14:21 lr: 0.000010 loss_cls: 4.0112 (3.8637) grad_norm: 4.1964 (4.4005) time: 0.7786 data: 0.0003 max mem: 8421 +[2024-12-05 22:34:57 root] (utils.py 283): INFO Epoch: [17] [1410/2502] eta: 0:14:13 lr: 0.000010 loss_cls: 3.8755 (3.8642) grad_norm: 4.3171 (4.4012) time: 0.7817 data: 0.0003 max mem: 8421 +[2024-12-05 22:35:04 root] (utils.py 283): INFO Epoch: [17] [1420/2502] eta: 0:14:05 lr: 0.000010 loss_cls: 3.9221 (3.8641) grad_norm: 4.3104 (4.4006) time: 0.7834 data: 0.0003 max mem: 8421 +[2024-12-05 22:35:12 root] (utils.py 283): INFO Epoch: [17] [1430/2502] eta: 0:13:58 lr: 0.000010 loss_cls: 3.9410 (3.8632) grad_norm: 4.2818 (4.4010) time: 0.7812 data: 0.0003 max mem: 8421 +[2024-12-05 22:35:20 root] (utils.py 283): INFO Epoch: [17] [1440/2502] eta: 0:13:50 lr: 0.000010 loss_cls: 3.6855 (3.8620) grad_norm: 4.1787 (4.3995) time: 0.7824 data: 0.0003 max mem: 8421 +[2024-12-05 22:35:28 root] (utils.py 283): INFO Epoch: [17] [1450/2502] eta: 0:13:42 lr: 0.000010 loss_cls: 3.6855 (3.8606) grad_norm: 4.1787 (4.3980) time: 0.7819 data: 0.0003 max mem: 8421 +[2024-12-05 22:35:36 root] (utils.py 283): INFO Epoch: [17] [1460/2502] eta: 0:13:34 lr: 0.000010 loss_cls: 3.8314 (3.8612) grad_norm: 4.2362 (4.3986) time: 0.7909 data: 0.0003 max mem: 8421 +[2024-12-05 22:35:44 root] (utils.py 283): INFO Epoch: [17] [1470/2502] eta: 0:13:27 lr: 0.000010 loss_cls: 3.9819 (3.8631) grad_norm: 4.3605 (4.3989) time: 0.8063 data: 0.0002 max mem: 8421 +[2024-12-05 22:35:52 root] (utils.py 283): INFO Epoch: [17] [1480/2502] eta: 0:13:19 lr: 0.000010 loss_cls: 3.9819 (3.8631) grad_norm: 4.3734 (4.3987) time: 0.7967 data: 0.0003 max mem: 8421 +[2024-12-05 22:36:00 root] (utils.py 283): INFO Epoch: [17] [1490/2502] eta: 0:13:11 lr: 0.000010 loss_cls: 3.8234 (3.8631) grad_norm: 4.2780 (4.3979) time: 0.7811 data: 0.0003 max mem: 8421 +[2024-12-05 22:36:07 root] (utils.py 283): INFO Epoch: [17] [1500/2502] eta: 0:13:03 lr: 0.000010 loss_cls: 3.8024 (3.8625) grad_norm: 4.2327 (4.3983) time: 0.7800 data: 0.0003 max mem: 8421 +[2024-12-05 22:36:15 root] (utils.py 283): INFO Epoch: [17] [1510/2502] eta: 0:12:55 lr: 0.000010 loss_cls: 3.5893 (3.8616) grad_norm: 4.2877 (4.3995) time: 0.7866 data: 0.0003 max mem: 8421 +[2024-12-05 22:36:23 root] (utils.py 283): INFO Epoch: [17] [1520/2502] eta: 0:12:48 lr: 0.000010 loss_cls: 3.9815 (3.8628) grad_norm: 4.3034 (4.3989) time: 0.7927 data: 0.0003 max mem: 8421 +[2024-12-05 22:36:31 root] (utils.py 283): INFO Epoch: [17] [1530/2502] eta: 0:12:40 lr: 0.000010 loss_cls: 3.7630 (3.8623) grad_norm: 4.3674 (4.3990) time: 0.7871 data: 0.0003 max mem: 8421 +[2024-12-05 22:36:39 root] (utils.py 283): INFO Epoch: [17] [1540/2502] eta: 0:12:32 lr: 0.000010 loss_cls: 3.8499 (3.8638) grad_norm: 4.3177 (4.3989) time: 0.7814 data: 0.0003 max mem: 8421 +[2024-12-05 22:36:47 root] (utils.py 283): INFO Epoch: [17] [1550/2502] eta: 0:12:24 lr: 0.000010 loss_cls: 4.0695 (3.8652) grad_norm: 4.2003 (4.3977) time: 0.7782 data: 0.0003 max mem: 8421 +[2024-12-05 22:36:54 root] (utils.py 283): INFO Epoch: [17] [1560/2502] eta: 0:12:16 lr: 0.000010 loss_cls: 4.0695 (3.8662) grad_norm: 4.2003 (4.4001) time: 0.7770 data: 0.0003 max mem: 8421 +[2024-12-05 22:37:02 root] (utils.py 283): INFO Epoch: [17] [1570/2502] eta: 0:12:09 lr: 0.000010 loss_cls: 3.5710 (3.8630) grad_norm: 4.2585 (4.3985) time: 0.7841 data: 0.0003 max mem: 8421 +[2024-12-05 22:37:10 root] (utils.py 283): INFO Epoch: [17] [1580/2502] eta: 0:12:01 lr: 0.000010 loss_cls: 3.3757 (3.8611) grad_norm: 4.0651 (4.3971) time: 0.7907 data: 0.0003 max mem: 8421 +[2024-12-05 22:37:18 root] (utils.py 283): INFO Epoch: [17] [1590/2502] eta: 0:11:53 lr: 0.000010 loss_cls: 3.6775 (3.8600) grad_norm: 4.2067 (4.3968) time: 0.7870 data: 0.0003 max mem: 8421 +[2024-12-05 22:37:26 root] (utils.py 283): INFO Epoch: [17] [1600/2502] eta: 0:11:45 lr: 0.000010 loss_cls: 3.6775 (3.8580) grad_norm: 4.2067 (4.3964) time: 0.7790 data: 0.0003 max mem: 8421 +[2024-12-05 22:37:34 root] (utils.py 283): INFO Epoch: [17] [1610/2502] eta: 0:11:37 lr: 0.000010 loss_cls: 3.8009 (3.8587) grad_norm: 4.2639 (4.3954) time: 0.7779 data: 0.0002 max mem: 8421 +[2024-12-05 22:37:41 root] (utils.py 283): INFO Epoch: [17] [1620/2502] eta: 0:11:29 lr: 0.000010 loss_cls: 3.8359 (3.8579) grad_norm: 4.2639 (4.3956) time: 0.7795 data: 0.0003 max mem: 8421 +[2024-12-05 22:37:49 root] (utils.py 283): INFO Epoch: [17] [1630/2502] eta: 0:11:22 lr: 0.000010 loss_cls: 3.8062 (3.8578) grad_norm: 4.2940 (4.3966) time: 0.7789 data: 0.0003 max mem: 8421 +[2024-12-05 22:37:57 root] (utils.py 283): INFO Epoch: [17] [1640/2502] eta: 0:11:14 lr: 0.000010 loss_cls: 3.8740 (3.8579) grad_norm: 4.4713 (4.3989) time: 0.7769 data: 0.0003 max mem: 8421 +[2024-12-05 22:38:05 root] (utils.py 283): INFO Epoch: [17] [1650/2502] eta: 0:11:06 lr: 0.000010 loss_cls: 3.7810 (3.8571) grad_norm: 4.4713 (4.4003) time: 0.7774 data: 0.0003 max mem: 8421 +[2024-12-05 22:38:13 root] (utils.py 283): INFO Epoch: [17] [1660/2502] eta: 0:10:58 lr: 0.000010 loss_cls: 3.7810 (3.8557) grad_norm: 4.3508 (4.3999) time: 0.7771 data: 0.0003 max mem: 8421 +[2024-12-05 22:38:20 root] (utils.py 283): INFO Epoch: [17] [1670/2502] eta: 0:10:50 lr: 0.000010 loss_cls: 4.0116 (3.8576) grad_norm: 4.3290 (4.3998) time: 0.7778 data: 0.0003 max mem: 8421 +[2024-12-05 22:38:28 root] (utils.py 283): INFO Epoch: [17] [1680/2502] eta: 0:10:42 lr: 0.000010 loss_cls: 4.1121 (3.8583) grad_norm: 4.1004 (4.3987) time: 0.7808 data: 0.0003 max mem: 8421 +[2024-12-05 22:38:36 root] (utils.py 283): INFO Epoch: [17] [1690/2502] eta: 0:10:35 lr: 0.000010 loss_cls: 3.8672 (3.8565) grad_norm: 4.2166 (4.3973) time: 0.7801 data: 0.0003 max mem: 8421 +[2024-12-05 22:38:44 root] (utils.py 283): INFO Epoch: [17] [1700/2502] eta: 0:10:27 lr: 0.000010 loss_cls: 3.7467 (3.8569) grad_norm: 4.2229 (4.3962) time: 0.7811 data: 0.0003 max mem: 8421 +[2024-12-05 22:38:52 root] (utils.py 283): INFO Epoch: [17] [1710/2502] eta: 0:10:19 lr: 0.000010 loss_cls: 3.9655 (3.8571) grad_norm: 4.2229 (4.3973) time: 0.7829 data: 0.0003 max mem: 8421 +[2024-12-05 22:38:59 root] (utils.py 283): INFO Epoch: [17] [1720/2502] eta: 0:10:11 lr: 0.000010 loss_cls: 3.9711 (3.8575) grad_norm: 4.1585 (4.3957) time: 0.7814 data: 0.0003 max mem: 8421 +[2024-12-05 22:39:07 root] (utils.py 283): INFO Epoch: [17] [1730/2502] eta: 0:10:03 lr: 0.000010 loss_cls: 3.9711 (3.8559) grad_norm: 4.2442 (4.3965) time: 0.7794 data: 0.0002 max mem: 8421 +[2024-12-05 22:39:15 root] (utils.py 283): INFO Epoch: [17] [1740/2502] eta: 0:09:55 lr: 0.000010 loss_cls: 3.6423 (3.8559) grad_norm: 4.4667 (4.3975) time: 0.7801 data: 0.0003 max mem: 8421 +[2024-12-05 22:39:23 root] (utils.py 283): INFO Epoch: [17] [1750/2502] eta: 0:09:48 lr: 0.000010 loss_cls: 4.0601 (3.8571) grad_norm: 4.3635 (4.3999) time: 0.7796 data: 0.0003 max mem: 8421 +[2024-12-05 22:39:31 root] (utils.py 283): INFO Epoch: [17] [1760/2502] eta: 0:09:40 lr: 0.000010 loss_cls: 4.1349 (3.8570) grad_norm: 4.2767 (4.4008) time: 0.7787 data: 0.0003 max mem: 8421 +[2024-12-05 22:39:38 root] (utils.py 283): INFO Epoch: [17] [1770/2502] eta: 0:09:32 lr: 0.000010 loss_cls: 3.7318 (3.8555) grad_norm: 4.2369 (4.3997) time: 0.7803 data: 0.0003 max mem: 8421 +[2024-12-05 22:39:46 root] (utils.py 283): INFO Epoch: [17] [1780/2502] eta: 0:09:24 lr: 0.000010 loss_cls: 3.7318 (3.8552) grad_norm: 4.1434 (4.3985) time: 0.7792 data: 0.0003 max mem: 8421 +[2024-12-05 22:39:54 root] (utils.py 283): INFO Epoch: [17] [1790/2502] eta: 0:09:16 lr: 0.000010 loss_cls: 3.8208 (3.8547) grad_norm: 4.1656 (4.3980) time: 0.7789 data: 0.0003 max mem: 8421 +[2024-12-05 22:40:02 root] (utils.py 283): INFO Epoch: [17] [1800/2502] eta: 0:09:08 lr: 0.000010 loss_cls: 3.7570 (3.8538) grad_norm: 4.3234 (4.3983) time: 0.7798 data: 0.0002 max mem: 8421 +[2024-12-05 22:40:10 root] (utils.py 283): INFO Epoch: [17] [1810/2502] eta: 0:09:01 lr: 0.000010 loss_cls: 4.0422 (3.8552) grad_norm: 4.2213 (4.3976) time: 0.7812 data: 0.0003 max mem: 8421 +[2024-12-05 22:40:17 root] (utils.py 283): INFO Epoch: [17] [1820/2502] eta: 0:08:53 lr: 0.000010 loss_cls: 4.0232 (3.8553) grad_norm: 4.1184 (4.3968) time: 0.7824 data: 0.0003 max mem: 8421 +[2024-12-05 22:40:25 root] (utils.py 283): INFO Epoch: [17] [1830/2502] eta: 0:08:45 lr: 0.000010 loss_cls: 3.9849 (3.8550) grad_norm: 4.1475 (4.3960) time: 0.7784 data: 0.0003 max mem: 8421 +[2024-12-05 22:40:33 root] (utils.py 283): INFO Epoch: [17] [1840/2502] eta: 0:08:37 lr: 0.000010 loss_cls: 3.7743 (3.8537) grad_norm: 4.0323 (4.3942) time: 0.7788 data: 0.0003 max mem: 8421 +[2024-12-05 22:40:41 root] (utils.py 283): INFO Epoch: [17] [1850/2502] eta: 0:08:29 lr: 0.000010 loss_cls: 3.7743 (3.8536) grad_norm: 4.2585 (4.3946) time: 0.7818 data: 0.0003 max mem: 8421 +[2024-12-05 22:40:49 root] (utils.py 283): INFO Epoch: [17] [1860/2502] eta: 0:08:21 lr: 0.000010 loss_cls: 4.0082 (3.8532) grad_norm: 4.2585 (4.3941) time: 0.7831 data: 0.0003 max mem: 8421 +[2024-12-05 22:40:56 root] (utils.py 283): INFO Epoch: [17] [1870/2502] eta: 0:08:14 lr: 0.000010 loss_cls: 4.1072 (3.8541) grad_norm: 4.0325 (4.3920) time: 0.7810 data: 0.0003 max mem: 8421 +[2024-12-05 22:41:04 root] (utils.py 283): INFO Epoch: [17] [1880/2502] eta: 0:08:06 lr: 0.000010 loss_cls: 4.0429 (3.8532) grad_norm: 4.0325 (4.3915) time: 0.7793 data: 0.0003 max mem: 8421 +[2024-12-05 22:41:12 root] (utils.py 283): INFO Epoch: [17] [1890/2502] eta: 0:07:58 lr: 0.000010 loss_cls: 3.8531 (3.8534) grad_norm: 4.1407 (4.3914) time: 0.7898 data: 0.0003 max mem: 8421 +[2024-12-05 22:41:20 root] (utils.py 283): INFO Epoch: [17] [1900/2502] eta: 0:07:50 lr: 0.000010 loss_cls: 4.0583 (3.8546) grad_norm: 4.0750 (4.3899) time: 0.7875 data: 0.0003 max mem: 8421 +[2024-12-05 22:41:28 root] (utils.py 283): INFO Epoch: [17] [1910/2502] eta: 0:07:42 lr: 0.000010 loss_cls: 4.0654 (3.8551) grad_norm: 4.1383 (4.3916) time: 0.7792 data: 0.0003 max mem: 8421 +[2024-12-05 22:41:36 root] (utils.py 283): INFO Epoch: [17] [1920/2502] eta: 0:07:35 lr: 0.000010 loss_cls: 3.9334 (3.8549) grad_norm: 4.3540 (4.3921) time: 0.7818 data: 0.0003 max mem: 8421 +[2024-12-05 22:41:43 root] (utils.py 283): INFO Epoch: [17] [1930/2502] eta: 0:07:27 lr: 0.000010 loss_cls: 3.8923 (3.8540) grad_norm: 4.3540 (4.3920) time: 0.7815 data: 0.0003 max mem: 8421 +[2024-12-05 22:41:51 root] (utils.py 283): INFO Epoch: [17] [1940/2502] eta: 0:07:19 lr: 0.000010 loss_cls: 4.0676 (3.8546) grad_norm: 4.3488 (4.3920) time: 0.7878 data: 0.0003 max mem: 8421 +[2024-12-05 22:41:59 root] (utils.py 283): INFO Epoch: [17] [1950/2502] eta: 0:07:11 lr: 0.000010 loss_cls: 4.1799 (3.8546) grad_norm: 4.4979 (4.3936) time: 0.7933 data: 0.0002 max mem: 8421 +[2024-12-05 22:42:07 root] (utils.py 283): INFO Epoch: [17] [1960/2502] eta: 0:07:03 lr: 0.000010 loss_cls: 3.8470 (3.8534) grad_norm: 4.2612 (4.3931) time: 0.7861 data: 0.0003 max mem: 8421 +[2024-12-05 22:42:15 root] (utils.py 283): INFO Epoch: [17] [1970/2502] eta: 0:06:56 lr: 0.000010 loss_cls: 4.0513 (3.8556) grad_norm: 4.2612 (4.3995) time: 0.7806 data: 0.0002 max mem: 8421 +[2024-12-05 22:42:23 root] (utils.py 283): INFO Epoch: [17] [1980/2502] eta: 0:06:48 lr: 0.000010 loss_cls: 4.1449 (3.8561) grad_norm: 4.1985 (4.4007) time: 0.7802 data: 0.0003 max mem: 8421 +[2024-12-05 22:42:31 root] (utils.py 283): INFO Epoch: [17] [1990/2502] eta: 0:06:40 lr: 0.000010 loss_cls: 3.7063 (3.8542) grad_norm: 4.1591 (4.4003) time: 0.7799 data: 0.0003 max mem: 8421 +[2024-12-05 22:42:38 root] (utils.py 283): INFO Epoch: [17] [2000/2502] eta: 0:06:32 lr: 0.000010 loss_cls: 3.7335 (3.8550) grad_norm: 4.2976 (4.3999) time: 0.7816 data: 0.0002 max mem: 8421 +[2024-12-05 22:42:46 root] (utils.py 283): INFO Epoch: [17] [2010/2502] eta: 0:06:24 lr: 0.000010 loss_cls: 4.1329 (3.8556) grad_norm: 4.2445 (4.4001) time: 0.7817 data: 0.0002 max mem: 8421 +[2024-12-05 22:42:54 root] (utils.py 283): INFO Epoch: [17] [2020/2502] eta: 0:06:16 lr: 0.000010 loss_cls: 4.0912 (3.8551) grad_norm: 4.3140 (4.4003) time: 0.7808 data: 0.0002 max mem: 8421 +[2024-12-05 22:43:02 root] (utils.py 283): INFO Epoch: [17] [2030/2502] eta: 0:06:09 lr: 0.000010 loss_cls: 3.9224 (3.8556) grad_norm: 4.1177 (4.3988) time: 0.7846 data: 0.0003 max mem: 8421 +[2024-12-05 22:43:10 root] (utils.py 283): INFO Epoch: [17] [2040/2502] eta: 0:06:01 lr: 0.000010 loss_cls: 4.0405 (3.8553) grad_norm: 4.1177 (4.3990) time: 0.7841 data: 0.0003 max mem: 8421 +[2024-12-05 22:43:18 root] (utils.py 283): INFO Epoch: [17] [2050/2502] eta: 0:05:53 lr: 0.000010 loss_cls: 3.9043 (3.8558) grad_norm: 4.2553 (4.3989) time: 0.7857 data: 0.0003 max mem: 8421 +[2024-12-05 22:43:25 root] (utils.py 283): INFO Epoch: [17] [2060/2502] eta: 0:05:45 lr: 0.000010 loss_cls: 4.1518 (3.8570) grad_norm: 4.2809 (4.3985) time: 0.7897 data: 0.0003 max mem: 8421 +[2024-12-05 22:43:33 root] (utils.py 283): INFO Epoch: [17] [2070/2502] eta: 0:05:37 lr: 0.000010 loss_cls: 4.0606 (3.8573) grad_norm: 4.3806 (4.3991) time: 0.7884 data: 0.0003 max mem: 8421 +[2024-12-05 22:43:41 root] (utils.py 283): INFO Epoch: [17] [2080/2502] eta: 0:05:30 lr: 0.000010 loss_cls: 4.0371 (3.8585) grad_norm: 4.4410 (4.3991) time: 0.7989 data: 0.0003 max mem: 8421 +[2024-12-05 22:43:49 root] (utils.py 283): INFO Epoch: [17] [2090/2502] eta: 0:05:22 lr: 0.000010 loss_cls: 4.1915 (3.8590) grad_norm: 4.4578 (4.4006) time: 0.7926 data: 0.0003 max mem: 8421 +[2024-12-05 22:43:57 root] (utils.py 283): INFO Epoch: [17] [2100/2502] eta: 0:05:14 lr: 0.000010 loss_cls: 4.0731 (3.8594) grad_norm: 4.2388 (4.3997) time: 0.7784 data: 0.0003 max mem: 8421 +[2024-12-05 22:44:05 root] (utils.py 283): INFO Epoch: [17] [2110/2502] eta: 0:05:06 lr: 0.000010 loss_cls: 3.8585 (3.8581) grad_norm: 4.1714 (4.3995) time: 0.7834 data: 0.0003 max mem: 8421 +[2024-12-05 22:44:13 root] (utils.py 283): INFO Epoch: [17] [2120/2502] eta: 0:04:58 lr: 0.000010 loss_cls: 3.8542 (3.8589) grad_norm: 4.3142 (4.3999) time: 0.7839 data: 0.0003 max mem: 8421 +[2024-12-05 22:44:20 root] (utils.py 283): INFO Epoch: [17] [2130/2502] eta: 0:04:50 lr: 0.000010 loss_cls: 3.9948 (3.8588) grad_norm: 4.4579 (4.4007) time: 0.7803 data: 0.0003 max mem: 8421 +[2024-12-05 22:44:28 root] (utils.py 283): INFO Epoch: [17] [2140/2502] eta: 0:04:43 lr: 0.000010 loss_cls: 4.1034 (3.8601) grad_norm: 4.2932 (4.4003) time: 0.7780 data: 0.0003 max mem: 8421 +[2024-12-05 22:44:36 root] (utils.py 283): INFO Epoch: [17] [2150/2502] eta: 0:04:35 lr: 0.000010 loss_cls: 4.1607 (3.8599) grad_norm: 4.3445 (4.4003) time: 0.7770 data: 0.0003 max mem: 8421 +[2024-12-05 22:44:44 root] (utils.py 283): INFO Epoch: [17] [2160/2502] eta: 0:04:27 lr: 0.000010 loss_cls: 4.0840 (3.8611) grad_norm: 4.3445 (4.3999) time: 0.7770 data: 0.0002 max mem: 8421 +[2024-12-05 22:44:52 root] (utils.py 283): INFO Epoch: [17] [2170/2502] eta: 0:04:19 lr: 0.000010 loss_cls: 4.0840 (3.8610) grad_norm: 4.2404 (4.3999) time: 0.7821 data: 0.0003 max mem: 8421 +[2024-12-05 22:44:59 root] (utils.py 283): INFO Epoch: [17] [2180/2502] eta: 0:04:11 lr: 0.000010 loss_cls: 4.0551 (3.8611) grad_norm: 4.2479 (4.3997) time: 0.7822 data: 0.0003 max mem: 8421 +[2024-12-05 22:45:07 root] (utils.py 283): INFO Epoch: [17] [2190/2502] eta: 0:04:04 lr: 0.000010 loss_cls: 4.1971 (3.8626) grad_norm: 4.2479 (4.3991) time: 0.7798 data: 0.0003 max mem: 8421 +[2024-12-05 22:45:15 root] (utils.py 283): INFO Epoch: [17] [2200/2502] eta: 0:03:56 lr: 0.000010 loss_cls: 4.2307 (3.8635) grad_norm: 4.3123 (4.4007) time: 0.7792 data: 0.0002 max mem: 8421 +[2024-12-05 22:45:23 root] (utils.py 283): INFO Epoch: [17] [2210/2502] eta: 0:03:48 lr: 0.000010 loss_cls: 3.7386 (3.8627) grad_norm: 4.3328 (4.3999) time: 0.7795 data: 0.0002 max mem: 8421 +[2024-12-05 22:45:31 root] (utils.py 283): INFO Epoch: [17] [2220/2502] eta: 0:03:40 lr: 0.000010 loss_cls: 3.9607 (3.8637) grad_norm: 4.0647 (4.3991) time: 0.7819 data: 0.0003 max mem: 8421 +[2024-12-05 22:45:38 root] (utils.py 283): INFO Epoch: [17] [2230/2502] eta: 0:03:32 lr: 0.000010 loss_cls: 4.1141 (3.8628) grad_norm: 4.0566 (4.3980) time: 0.7813 data: 0.0003 max mem: 8421 +[2024-12-05 22:45:47 root] (utils.py 283): INFO Epoch: [17] [2240/2502] eta: 0:03:25 lr: 0.000010 loss_cls: 3.9078 (3.8634) grad_norm: 4.1553 (4.4080) time: 0.8333 data: 0.0004 max mem: 8421 +[2024-12-05 22:45:56 root] (utils.py 283): INFO Epoch: [17] [2250/2502] eta: 0:03:17 lr: 0.000010 loss_cls: 3.9078 (3.8626) grad_norm: 4.5515 (4.4092) time: 0.8819 data: 0.0005 max mem: 8421 +[2024-12-05 22:46:04 root] (utils.py 283): INFO Epoch: [17] [2260/2502] eta: 0:03:09 lr: 0.000010 loss_cls: 3.9648 (3.8627) grad_norm: 4.5303 (4.4103) time: 0.8310 data: 0.0004 max mem: 8421 +[2024-12-05 22:46:12 root] (utils.py 283): INFO Epoch: [17] [2270/2502] eta: 0:03:01 lr: 0.000010 loss_cls: 3.9648 (3.8621) grad_norm: 4.3208 (4.4115) time: 0.7784 data: 0.0002 max mem: 8421 +[2024-12-05 22:46:21 root] (utils.py 283): INFO Epoch: [17] [2280/2502] eta: 0:02:53 lr: 0.000010 loss_cls: 3.7961 (3.8622) grad_norm: 4.2786 (4.4114) time: 0.8651 data: 0.0003 max mem: 8421 +[2024-12-05 22:46:29 root] (utils.py 283): INFO Epoch: [17] [2290/2502] eta: 0:02:46 lr: 0.000010 loss_cls: 4.0741 (3.8625) grad_norm: 3.9693 (4.4099) time: 0.8657 data: 0.0003 max mem: 8421 +[2024-12-05 22:46:37 root] (utils.py 283): INFO Epoch: [17] [2300/2502] eta: 0:02:38 lr: 0.000010 loss_cls: 4.1049 (3.8628) grad_norm: 4.1238 (4.4100) time: 0.7773 data: 0.0003 max mem: 8421 +[2024-12-05 22:46:45 root] (utils.py 283): INFO Epoch: [17] [2310/2502] eta: 0:02:30 lr: 0.000010 loss_cls: 4.0223 (3.8630) grad_norm: 4.2552 (4.4101) time: 0.7786 data: 0.0003 max mem: 8421 +[2024-12-05 22:46:52 root] (utils.py 283): INFO Epoch: [17] [2320/2502] eta: 0:02:22 lr: 0.000010 loss_cls: 3.9687 (3.8622) grad_norm: 4.2647 (4.4098) time: 0.7773 data: 0.0003 max mem: 8421 +[2024-12-05 22:47:00 root] (utils.py 283): INFO Epoch: [17] [2330/2502] eta: 0:02:14 lr: 0.000010 loss_cls: 4.0154 (3.8627) grad_norm: 4.2562 (4.4092) time: 0.7746 data: 0.0003 max mem: 8421 +[2024-12-05 22:47:08 root] (utils.py 283): INFO Epoch: [17] [2340/2502] eta: 0:02:06 lr: 0.000010 loss_cls: 4.0787 (3.8631) grad_norm: 4.0693 (4.4081) time: 0.7823 data: 0.0003 max mem: 8421 +[2024-12-05 22:47:16 root] (utils.py 283): INFO Epoch: [17] [2350/2502] eta: 0:01:59 lr: 0.000010 loss_cls: 4.0363 (3.8620) grad_norm: 4.0301 (4.4069) time: 0.7843 data: 0.0002 max mem: 8421 +[2024-12-05 22:47:24 root] (utils.py 283): INFO Epoch: [17] [2360/2502] eta: 0:01:51 lr: 0.000010 loss_cls: 3.5329 (3.8604) grad_norm: 4.0806 (4.4060) time: 0.7772 data: 0.0003 max mem: 8421 +[2024-12-05 22:47:31 root] (utils.py 283): INFO Epoch: [17] [2370/2502] eta: 0:01:43 lr: 0.000010 loss_cls: 3.5063 (3.8590) grad_norm: 4.2340 (4.4056) time: 0.7776 data: 0.0003 max mem: 8421 +[2024-12-05 22:47:39 root] (utils.py 283): INFO Epoch: [17] [2380/2502] eta: 0:01:35 lr: 0.000010 loss_cls: 3.7120 (3.8595) grad_norm: 4.3111 (4.4056) time: 0.7746 data: 0.0003 max mem: 8421 +[2024-12-05 22:47:47 root] (utils.py 283): INFO Epoch: [17] [2390/2502] eta: 0:01:27 lr: 0.000010 loss_cls: 3.9543 (3.8592) grad_norm: 4.2353 (4.4062) time: 0.7716 data: 0.0003 max mem: 8421 +[2024-12-05 22:47:55 root] (utils.py 283): INFO Epoch: [17] [2400/2502] eta: 0:01:19 lr: 0.000010 loss_cls: 3.8464 (3.8584) grad_norm: 4.1675 (4.4060) time: 0.7778 data: 0.0003 max mem: 8421 +[2024-12-05 22:48:03 root] (utils.py 283): INFO Epoch: [17] [2410/2502] eta: 0:01:12 lr: 0.000010 loss_cls: 3.8121 (3.8582) grad_norm: 4.1237 (4.4048) time: 0.7879 data: 0.0003 max mem: 8421 +[2024-12-05 22:48:10 root] (utils.py 283): INFO Epoch: [17] [2420/2502] eta: 0:01:04 lr: 0.000010 loss_cls: 3.9440 (3.8583) grad_norm: 4.1230 (4.4046) time: 0.7892 data: 0.0002 max mem: 8421 +[2024-12-05 22:48:18 root] (utils.py 283): INFO Epoch: [17] [2430/2502] eta: 0:00:56 lr: 0.000010 loss_cls: 3.9992 (3.8591) grad_norm: 4.2163 (4.4040) time: 0.7828 data: 0.0002 max mem: 8421 +[2024-12-05 22:48:26 root] (utils.py 283): INFO Epoch: [17] [2440/2502] eta: 0:00:48 lr: 0.000010 loss_cls: 4.1562 (3.8598) grad_norm: 4.2163 (4.4029) time: 0.7818 data: 0.0002 max mem: 8421 +[2024-12-05 22:48:34 root] (utils.py 283): INFO Epoch: [17] [2450/2502] eta: 0:00:40 lr: 0.000010 loss_cls: 4.1137 (3.8601) grad_norm: 4.1852 (4.4026) time: 0.7818 data: 0.0003 max mem: 8421 +[2024-12-05 22:48:42 root] (utils.py 283): INFO Epoch: [17] [2460/2502] eta: 0:00:32 lr: 0.000010 loss_cls: 3.8365 (3.8592) grad_norm: 4.2881 (4.4023) time: 0.7835 data: 0.0003 max mem: 8421 +[2024-12-05 22:48:50 root] (utils.py 283): INFO Epoch: [17] [2470/2502] eta: 0:00:25 lr: 0.000010 loss_cls: 3.8365 (3.8595) grad_norm: 4.0934 (4.4013) time: 0.7866 data: 0.0003 max mem: 8421 +[2024-12-05 22:48:57 root] (utils.py 283): INFO Epoch: [17] [2480/2502] eta: 0:00:17 lr: 0.000010 loss_cls: 4.1419 (3.8608) grad_norm: 4.1059 (4.4004) time: 0.7847 data: 0.0003 max mem: 8421 +[2024-12-05 22:49:06 root] (utils.py 283): INFO Epoch: [17] [2490/2502] eta: 0:00:09 lr: 0.000010 loss_cls: 4.0591 (3.8607) grad_norm: 4.1225 (4.3994) time: 0.8079 data: 0.0252 max mem: 8421 +[2024-12-05 22:49:14 root] (utils.py 283): INFO Epoch: [17] [2500/2502] eta: 0:00:01 lr: 0.000010 loss_cls: 3.9968 (3.8615) grad_norm: 4.1957 (4.4024) time: 0.8083 data: 0.0252 max mem: 8421 +[2024-12-05 22:49:14 root] (utils.py 283): INFO Epoch: [17] [2501/2502] eta: 0:00:00 lr: 0.000010 loss_cls: 3.9968 (3.8618) grad_norm: 4.2042 (4.4032) time: 0.8082 data: 0.0252 max mem: 8421 +[2024-12-05 22:49:14 root] (utils.py 297): INFO Epoch: [17] Total time: 0:32:41 (0.7838 s / it) +[2024-12-05 22:49:14 root] (engine.py 179): INFO Averaged stats:lr: 0.000010 loss_cls: 3.9968 (3.8615) grad_norm: 4.2042 (4.4032) +[2024-12-05 22:49:15 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7649 (0.7649) acc1: 85.1562 (85.1562) acc3: 95.3125 (95.3125) acc5: 96.8750 (96.8750) time: 0.1308 data: 0.0003 max mem: 8421 +[2024-12-05 22:49:16 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8288 (0.8708) acc1: 81.2500 (81.5341) acc3: 92.9688 (92.8977) acc5: 95.3125 (95.1705) time: 0.1312 data: 0.0005 max mem: 8421 +[2024-12-05 22:49:18 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8932 (0.9176) acc1: 77.3438 (80.0967) acc3: 92.9688 (92.4851) acc5: 94.5312 (94.9405) time: 0.1337 data: 0.0005 max mem: 8421 +[2024-12-05 22:49:19 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9462 (0.9228) acc1: 78.1250 (79.5615) acc3: 92.1875 (92.7167) acc5: 95.3125 (95.2369) time: 0.1374 data: 0.0005 max mem: 8421 +[2024-12-05 22:49:21 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8602 (0.9163) acc1: 81.2500 (79.8780) acc3: 93.7500 (92.7401) acc5: 96.0938 (95.2363) time: 0.1532 data: 0.0163 max mem: 8421 +[2024-12-05 22:49:22 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0431 (1.0079) acc1: 75.7812 (77.9259) acc3: 89.0625 (91.1765) acc5: 90.6250 (94.0411) time: 0.1527 data: 0.0163 max mem: 8421 +[2024-12-05 22:49:23 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3075 (1.0521) acc1: 71.8750 (77.0492) acc3: 85.9375 (90.3945) acc5: 89.0625 (93.3145) time: 0.1370 data: 0.0005 max mem: 8421 +[2024-12-05 22:49:25 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2655 (1.0957) acc1: 71.8750 (75.9243) acc3: 86.7188 (89.8107) acc5: 89.8438 (92.9247) time: 0.1443 data: 0.0076 max mem: 8421 +[2024-12-05 22:49:27 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3517 (1.1343) acc1: 71.0938 (75.0965) acc3: 84.3750 (89.1107) acc5: 89.8438 (92.4286) time: 0.1710 data: 0.0341 max mem: 8421 +[2024-12-05 22:49:28 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3636 (1.1638) acc1: 69.5312 (74.3990) acc3: 83.5938 (88.5216) acc5: 89.0625 (92.0072) time: 0.1739 data: 0.0398 max mem: 8421 +[2024-12-05 22:49:29 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2509 (1.1529) acc1: 71.8750 (74.4960) acc3: 86.7188 (88.7200) acc5: 90.6250 (92.2320) time: 0.1685 data: 0.0344 max mem: 8421 +[2024-12-05 22:49:29 root] (utils.py 297): INFO Test: Total time: 0:00:14 (0.1490 s / it) +[2024-12-05 22:49:32 root] (engine.py 264): INFO * Acc@1 74.544 Acc@3 88.834 Acc@5 92.252 loss 1.153 flops 1.285 layer_flops 1.251 +[2024-12-05 22:49:32 root] (main.py 547): INFO Accuracy of the network on the 50000 test images: 74.5% +[2024-12-05 22:49:32 root] (main.py 551): INFO Max accuracy: 74.60% +[2024-12-05 22:49:32 root] (utils.py 283): INFO Epoch: [18] [ 0/2502] eta: 0:33:31 lr: 0.000009 loss_cls: 4.3373 (4.3373) grad_norm: 4.3050 (4.3050) time: 0.8039 data: 0.0004 max mem: 8421 +[2024-12-05 22:49:40 root] (utils.py 283): INFO Epoch: [18] [ 10/2502] eta: 0:32:19 lr: 0.000009 loss_cls: 3.7366 (3.8730) grad_norm: 4.1998 (4.2508) time: 0.7784 data: 0.0003 max mem: 8421 +[2024-12-05 22:49:48 root] (utils.py 283): INFO Epoch: [18] [ 20/2502] eta: 0:32:04 lr: 0.000009 loss_cls: 3.7366 (3.8217) grad_norm: 4.1873 (4.4033) time: 0.7740 data: 0.0002 max mem: 8421 +[2024-12-05 22:49:56 root] (utils.py 283): INFO Epoch: [18] [ 30/2502] eta: 0:31:58 lr: 0.000009 loss_cls: 3.8042 (3.7648) grad_norm: 4.2449 (4.4529) time: 0.7748 data: 0.0003 max mem: 8421 +[2024-12-05 22:50:03 root] (utils.py 283): INFO Epoch: [18] [ 40/2502] eta: 0:31:53 lr: 0.000009 loss_cls: 3.8357 (3.8015) grad_norm: 4.1106 (4.3770) time: 0.7793 data: 0.0003 max mem: 8421 +[2024-12-05 22:50:11 root] (utils.py 283): INFO Epoch: [18] [ 50/2502] eta: 0:31:49 lr: 0.000009 loss_cls: 3.9939 (3.7946) grad_norm: 4.1106 (4.3941) time: 0.7829 data: 0.0003 max mem: 8421 +[2024-12-05 22:50:19 root] (utils.py 283): INFO Epoch: [18] [ 60/2502] eta: 0:31:47 lr: 0.000009 loss_cls: 4.0860 (3.8210) grad_norm: 4.0841 (4.3496) time: 0.7889 data: 0.0003 max mem: 8421 +[2024-12-05 22:50:27 root] (utils.py 283): INFO Epoch: [18] [ 70/2502] eta: 0:31:45 lr: 0.000009 loss_cls: 3.9767 (3.8030) grad_norm: 4.0221 (4.3352) time: 0.7962 data: 0.0002 max mem: 8421 +[2024-12-05 22:50:35 root] (utils.py 283): INFO Epoch: [18] [ 80/2502] eta: 0:31:39 lr: 0.000009 loss_cls: 3.9767 (3.7957) grad_norm: 4.0973 (4.3107) time: 0.7935 data: 0.0002 max mem: 8421 +[2024-12-05 22:50:43 root] (utils.py 283): INFO Epoch: [18] [ 90/2502] eta: 0:31:28 lr: 0.000009 loss_cls: 3.7941 (3.7825) grad_norm: 4.1186 (4.2986) time: 0.7801 data: 0.0002 max mem: 8421 +[2024-12-05 22:50:51 root] (utils.py 283): INFO Epoch: [18] [ 100/2502] eta: 0:31:19 lr: 0.000009 loss_cls: 3.9128 (3.8200) grad_norm: 4.1186 (4.3009) time: 0.7750 data: 0.0002 max mem: 8421 +[2024-12-05 22:50:58 root] (utils.py 283): INFO Epoch: [18] [ 110/2502] eta: 0:31:10 lr: 0.000009 loss_cls: 4.2115 (3.8198) grad_norm: 4.1809 (4.3123) time: 0.7770 data: 0.0002 max mem: 8421 +[2024-12-05 22:51:06 root] (utils.py 283): INFO Epoch: [18] [ 120/2502] eta: 0:31:00 lr: 0.000009 loss_cls: 3.6717 (3.8104) grad_norm: 4.1809 (4.3086) time: 0.7754 data: 0.0002 max mem: 8421 +[2024-12-05 22:51:14 root] (utils.py 283): INFO Epoch: [18] [ 130/2502] eta: 0:30:52 lr: 0.000009 loss_cls: 3.8086 (3.8318) grad_norm: 4.1573 (4.3165) time: 0.7760 data: 0.0002 max mem: 8421 +[2024-12-05 22:51:22 root] (utils.py 283): INFO Epoch: [18] [ 140/2502] eta: 0:30:43 lr: 0.000009 loss_cls: 4.0830 (3.8419) grad_norm: 4.1308 (4.3024) time: 0.7774 data: 0.0002 max mem: 8421 +[2024-12-05 22:51:29 root] (utils.py 283): INFO Epoch: [18] [ 150/2502] eta: 0:30:36 lr: 0.000009 loss_cls: 3.9842 (3.8419) grad_norm: 4.1876 (4.3078) time: 0.7788 data: 0.0002 max mem: 8421 +[2024-12-05 22:51:37 root] (utils.py 283): INFO Epoch: [18] [ 160/2502] eta: 0:30:30 lr: 0.000009 loss_cls: 4.0311 (3.8613) grad_norm: 4.2851 (4.3058) time: 0.7866 data: 0.0003 max mem: 8421 +[2024-12-05 22:51:45 root] (utils.py 283): INFO Epoch: [18] [ 170/2502] eta: 0:30:24 lr: 0.000009 loss_cls: 4.0311 (3.8498) grad_norm: 4.1480 (4.3014) time: 0.7949 data: 0.0003 max mem: 8421 +[2024-12-05 22:51:53 root] (utils.py 283): INFO Epoch: [18] [ 180/2502] eta: 0:30:18 lr: 0.000009 loss_cls: 4.0561 (3.8591) grad_norm: 4.0976 (4.2948) time: 0.7968 data: 0.0003 max mem: 8421 +[2024-12-05 22:52:01 root] (utils.py 283): INFO Epoch: [18] [ 190/2502] eta: 0:30:09 lr: 0.000009 loss_cls: 3.7674 (3.8498) grad_norm: 4.0976 (4.2957) time: 0.7872 data: 0.0003 max mem: 8421 +[2024-12-05 22:52:09 root] (utils.py 283): INFO Epoch: [18] [ 200/2502] eta: 0:30:02 lr: 0.000009 loss_cls: 3.9719 (3.8559) grad_norm: 4.2175 (4.3047) time: 0.7806 data: 0.0002 max mem: 8421 +[2024-12-05 22:52:17 root] (utils.py 283): INFO Epoch: [18] [ 210/2502] eta: 0:29:54 lr: 0.000009 loss_cls: 4.1197 (3.8702) grad_norm: 4.3121 (4.3509) time: 0.7836 data: 0.0002 max mem: 8421 +[2024-12-05 22:52:25 root] (utils.py 283): INFO Epoch: [18] [ 220/2502] eta: 0:29:46 lr: 0.000009 loss_cls: 4.1084 (3.8723) grad_norm: 4.3292 (4.3462) time: 0.7807 data: 0.0002 max mem: 8421 +[2024-12-05 22:52:32 root] (utils.py 283): INFO Epoch: [18] [ 230/2502] eta: 0:29:39 lr: 0.000009 loss_cls: 3.9123 (3.8695) grad_norm: 4.2101 (4.3479) time: 0.7853 data: 0.0003 max mem: 8421 +[2024-12-05 22:52:40 root] (utils.py 283): INFO Epoch: [18] [ 240/2502] eta: 0:29:31 lr: 0.000009 loss_cls: 4.1715 (3.8817) grad_norm: 4.2079 (4.3445) time: 0.7873 data: 0.0003 max mem: 8421 +[2024-12-05 22:52:48 root] (utils.py 283): INFO Epoch: [18] [ 250/2502] eta: 0:29:23 lr: 0.000009 loss_cls: 4.0678 (3.8826) grad_norm: 4.2927 (4.3531) time: 0.7835 data: 0.0003 max mem: 8421 +[2024-12-05 22:52:56 root] (utils.py 283): INFO Epoch: [18] [ 260/2502] eta: 0:29:16 lr: 0.000009 loss_cls: 3.9564 (3.8804) grad_norm: 4.5848 (4.3609) time: 0.7863 data: 0.0003 max mem: 8421 +[2024-12-05 22:53:04 root] (utils.py 283): INFO Epoch: [18] [ 270/2502] eta: 0:29:08 lr: 0.000009 loss_cls: 3.8937 (3.8772) grad_norm: 4.3946 (4.3567) time: 0.7854 data: 0.0002 max mem: 8421 +[2024-12-05 22:53:12 root] (utils.py 283): INFO Epoch: [18] [ 280/2502] eta: 0:29:00 lr: 0.000009 loss_cls: 3.8980 (3.8799) grad_norm: 4.4202 (4.3679) time: 0.7829 data: 0.0002 max mem: 8421 +[2024-12-05 22:53:20 root] (utils.py 283): INFO Epoch: [18] [ 290/2502] eta: 0:28:52 lr: 0.000009 loss_cls: 3.9319 (3.8801) grad_norm: 4.4884 (4.3708) time: 0.7838 data: 0.0002 max mem: 8421 +[2024-12-05 22:53:27 root] (utils.py 283): INFO Epoch: [18] [ 300/2502] eta: 0:28:44 lr: 0.000009 loss_cls: 3.9641 (3.8862) grad_norm: 4.2169 (4.3673) time: 0.7827 data: 0.0002 max mem: 8421 +[2024-12-05 22:53:35 root] (utils.py 283): INFO Epoch: [18] [ 310/2502] eta: 0:28:36 lr: 0.000009 loss_cls: 3.9641 (3.8770) grad_norm: 4.1125 (4.3602) time: 0.7816 data: 0.0002 max mem: 8421 +[2024-12-05 22:53:43 root] (utils.py 283): INFO Epoch: [18] [ 320/2502] eta: 0:28:28 lr: 0.000009 loss_cls: 3.6874 (3.8739) grad_norm: 4.1719 (4.3558) time: 0.7827 data: 0.0003 max mem: 8421 +[2024-12-05 22:53:51 root] (utils.py 283): INFO Epoch: [18] [ 330/2502] eta: 0:28:20 lr: 0.000009 loss_cls: 3.6874 (3.8667) grad_norm: 4.1776 (4.3594) time: 0.7807 data: 0.0003 max mem: 8421 +[2024-12-05 22:53:59 root] (utils.py 283): INFO Epoch: [18] [ 340/2502] eta: 0:28:12 lr: 0.000009 loss_cls: 4.1242 (3.8718) grad_norm: 4.2468 (4.3609) time: 0.7781 data: 0.0003 max mem: 8421 +[2024-12-05 22:54:06 root] (utils.py 283): INFO Epoch: [18] [ 350/2502] eta: 0:28:04 lr: 0.000009 loss_cls: 4.1757 (3.8762) grad_norm: 4.2468 (4.3563) time: 0.7781 data: 0.0003 max mem: 8421 +[2024-12-05 22:54:14 root] (utils.py 283): INFO Epoch: [18] [ 360/2502] eta: 0:27:56 lr: 0.000009 loss_cls: 4.0587 (3.8723) grad_norm: 4.2126 (4.3536) time: 0.7791 data: 0.0003 max mem: 8421 +[2024-12-05 22:54:22 root] (utils.py 283): INFO Epoch: [18] [ 370/2502] eta: 0:27:48 lr: 0.000009 loss_cls: 3.6982 (3.8698) grad_norm: 4.2121 (4.3475) time: 0.7796 data: 0.0002 max mem: 8421 +[2024-12-05 22:54:30 root] (utils.py 283): INFO Epoch: [18] [ 380/2502] eta: 0:27:40 lr: 0.000009 loss_cls: 3.9281 (3.8686) grad_norm: 4.1406 (4.3420) time: 0.7838 data: 0.0002 max mem: 8421 +[2024-12-05 22:54:38 root] (utils.py 283): INFO Epoch: [18] [ 390/2502] eta: 0:27:33 lr: 0.000009 loss_cls: 3.9504 (3.8685) grad_norm: 4.1492 (4.3388) time: 0.7879 data: 0.0003 max mem: 8421 +[2024-12-05 22:54:46 root] (utils.py 283): INFO Epoch: [18] [ 400/2502] eta: 0:27:25 lr: 0.000009 loss_cls: 4.0228 (3.8717) grad_norm: 4.2226 (4.3413) time: 0.7886 data: 0.0003 max mem: 8421 +[2024-12-05 22:54:53 root] (utils.py 283): INFO Epoch: [18] [ 410/2502] eta: 0:27:17 lr: 0.000009 loss_cls: 3.9953 (3.8735) grad_norm: 4.3837 (4.3418) time: 0.7837 data: 0.0003 max mem: 8421 +[2024-12-05 22:55:01 root] (utils.py 283): INFO Epoch: [18] [ 420/2502] eta: 0:27:09 lr: 0.000009 loss_cls: 3.7368 (3.8659) grad_norm: 4.3079 (4.3474) time: 0.7797 data: 0.0002 max mem: 8421 +[2024-12-05 22:55:09 root] (utils.py 283): INFO Epoch: [18] [ 430/2502] eta: 0:27:02 lr: 0.000009 loss_cls: 3.8236 (3.8656) grad_norm: 4.2495 (4.3446) time: 0.7838 data: 0.0003 max mem: 8421 +[2024-12-05 22:55:17 root] (utils.py 283): INFO Epoch: [18] [ 440/2502] eta: 0:26:54 lr: 0.000009 loss_cls: 3.9159 (3.8648) grad_norm: 4.2087 (4.3420) time: 0.7852 data: 0.0003 max mem: 8421 +[2024-12-05 22:55:25 root] (utils.py 283): INFO Epoch: [18] [ 450/2502] eta: 0:26:46 lr: 0.000009 loss_cls: 3.8963 (3.8666) grad_norm: 4.3412 (4.3417) time: 0.7824 data: 0.0002 max mem: 8421 +[2024-12-05 22:55:33 root] (utils.py 283): INFO Epoch: [18] [ 460/2502] eta: 0:26:38 lr: 0.000009 loss_cls: 4.0166 (3.8702) grad_norm: 4.3412 (4.3428) time: 0.7828 data: 0.0002 max mem: 8421 +[2024-12-05 22:55:40 root] (utils.py 283): INFO Epoch: [18] [ 470/2502] eta: 0:26:31 lr: 0.000009 loss_cls: 4.0166 (3.8723) grad_norm: 4.2696 (4.3414) time: 0.7844 data: 0.0003 max mem: 8421 +[2024-12-05 22:55:48 root] (utils.py 283): INFO Epoch: [18] [ 480/2502] eta: 0:26:23 lr: 0.000009 loss_cls: 3.8605 (3.8666) grad_norm: 4.1740 (4.3374) time: 0.7890 data: 0.0003 max mem: 8421 +[2024-12-05 22:55:56 root] (utils.py 283): INFO Epoch: [18] [ 490/2502] eta: 0:26:16 lr: 0.000009 loss_cls: 3.9269 (3.8688) grad_norm: 4.0803 (4.3353) time: 0.7936 data: 0.0002 max mem: 8421 +[2024-12-05 22:56:04 root] (utils.py 283): INFO Epoch: [18] [ 500/2502] eta: 0:26:08 lr: 0.000009 loss_cls: 4.0202 (3.8702) grad_norm: 4.1448 (4.3333) time: 0.7882 data: 0.0002 max mem: 8421 +[2024-12-05 22:56:12 root] (utils.py 283): INFO Epoch: [18] [ 510/2502] eta: 0:26:00 lr: 0.000009 loss_cls: 3.6493 (3.8668) grad_norm: 4.1933 (4.3346) time: 0.7850 data: 0.0002 max mem: 8421 +[2024-12-05 22:56:20 root] (utils.py 283): INFO Epoch: [18] [ 520/2502] eta: 0:25:52 lr: 0.000009 loss_cls: 4.0683 (3.8699) grad_norm: 4.1485 (4.3319) time: 0.7833 data: 0.0003 max mem: 8421 +[2024-12-05 22:56:28 root] (utils.py 283): INFO Epoch: [18] [ 530/2502] eta: 0:25:45 lr: 0.000009 loss_cls: 4.0521 (3.8629) grad_norm: 4.0814 (4.3288) time: 0.7877 data: 0.0003 max mem: 8421 +[2024-12-05 22:56:36 root] (utils.py 283): INFO Epoch: [18] [ 540/2502] eta: 0:25:37 lr: 0.000009 loss_cls: 3.7952 (3.8616) grad_norm: 3.9603 (4.3239) time: 0.7920 data: 0.0003 max mem: 8421 +[2024-12-05 22:56:43 root] (utils.py 283): INFO Epoch: [18] [ 550/2502] eta: 0:25:29 lr: 0.000009 loss_cls: 3.9228 (3.8592) grad_norm: 3.8922 (4.3240) time: 0.7846 data: 0.0003 max mem: 8421 +[2024-12-05 22:56:51 root] (utils.py 283): INFO Epoch: [18] [ 560/2502] eta: 0:25:21 lr: 0.000009 loss_cls: 3.9193 (3.8598) grad_norm: 4.1569 (4.3263) time: 0.7804 data: 0.0002 max mem: 8421 +[2024-12-05 22:56:59 root] (utils.py 283): INFO Epoch: [18] [ 570/2502] eta: 0:25:14 lr: 0.000009 loss_cls: 3.8052 (3.8573) grad_norm: 4.3349 (4.3258) time: 0.7871 data: 0.0002 max mem: 8421 +[2024-12-05 22:57:07 root] (utils.py 283): INFO Epoch: [18] [ 580/2502] eta: 0:25:07 lr: 0.000009 loss_cls: 3.7451 (3.8565) grad_norm: 4.2085 (4.3278) time: 0.8043 data: 0.0003 max mem: 8421 +[2024-12-05 22:57:15 root] (utils.py 283): INFO Epoch: [18] [ 590/2502] eta: 0:25:00 lr: 0.000009 loss_cls: 4.0817 (3.8590) grad_norm: 4.1543 (4.3290) time: 0.8117 data: 0.0003 max mem: 8421 +[2024-12-05 22:57:24 root] (utils.py 283): INFO Epoch: [18] [ 600/2502] eta: 0:24:53 lr: 0.000009 loss_cls: 4.0817 (3.8548) grad_norm: 4.2562 (4.3344) time: 0.8137 data: 0.0002 max mem: 8421 +[2024-12-05 22:57:32 root] (utils.py 283): INFO Epoch: [18] [ 610/2502] eta: 0:24:46 lr: 0.000009 loss_cls: 3.9058 (3.8548) grad_norm: 4.1946 (4.3315) time: 0.8137 data: 0.0002 max mem: 8421 +[2024-12-05 22:57:40 root] (utils.py 283): INFO Epoch: [18] [ 620/2502] eta: 0:24:39 lr: 0.000009 loss_cls: 3.9143 (3.8562) grad_norm: 4.1946 (4.3308) time: 0.8107 data: 0.0002 max mem: 8421 +[2024-12-05 22:57:48 root] (utils.py 283): INFO Epoch: [18] [ 630/2502] eta: 0:24:32 lr: 0.000009 loss_cls: 4.0911 (3.8610) grad_norm: 4.2697 (4.3324) time: 0.8117 data: 0.0002 max mem: 8421 +[2024-12-05 22:57:56 root] (utils.py 283): INFO Epoch: [18] [ 640/2502] eta: 0:24:24 lr: 0.000009 loss_cls: 4.0535 (3.8605) grad_norm: 4.3014 (4.3311) time: 0.7988 data: 0.0003 max mem: 8421 +[2024-12-05 22:58:04 root] (utils.py 283): INFO Epoch: [18] [ 650/2502] eta: 0:24:16 lr: 0.000009 loss_cls: 3.7989 (3.8590) grad_norm: 4.2244 (4.3318) time: 0.7858 data: 0.0002 max mem: 8421 +[2024-12-05 22:58:11 root] (utils.py 283): INFO Epoch: [18] [ 660/2502] eta: 0:24:08 lr: 0.000009 loss_cls: 3.9412 (3.8577) grad_norm: 4.3694 (4.3343) time: 0.7851 data: 0.0002 max mem: 8421 +[2024-12-05 22:58:19 root] (utils.py 283): INFO Epoch: [18] [ 670/2502] eta: 0:24:00 lr: 0.000009 loss_cls: 3.8105 (3.8573) grad_norm: 4.4122 (4.3359) time: 0.7823 data: 0.0002 max mem: 8421 +[2024-12-05 22:58:27 root] (utils.py 283): INFO Epoch: [18] [ 680/2502] eta: 0:23:52 lr: 0.000009 loss_cls: 3.8506 (3.8576) grad_norm: 4.2910 (4.3342) time: 0.7813 data: 0.0002 max mem: 8421 +[2024-12-05 22:58:35 root] (utils.py 283): INFO Epoch: [18] [ 690/2502] eta: 0:23:44 lr: 0.000009 loss_cls: 3.9468 (3.8586) grad_norm: 4.1956 (4.3334) time: 0.7843 data: 0.0003 max mem: 8421 +[2024-12-05 22:58:43 root] (utils.py 283): INFO Epoch: [18] [ 700/2502] eta: 0:23:36 lr: 0.000009 loss_cls: 3.9948 (3.8598) grad_norm: 4.1633 (4.3317) time: 0.7838 data: 0.0003 max mem: 8421 +[2024-12-05 22:58:51 root] (utils.py 283): INFO Epoch: [18] [ 710/2502] eta: 0:23:28 lr: 0.000009 loss_cls: 3.9948 (3.8569) grad_norm: 4.1013 (4.3316) time: 0.7818 data: 0.0002 max mem: 8421 +[2024-12-05 22:58:58 root] (utils.py 283): INFO Epoch: [18] [ 720/2502] eta: 0:23:20 lr: 0.000009 loss_cls: 3.7437 (3.8548) grad_norm: 4.3358 (4.3349) time: 0.7835 data: 0.0002 max mem: 8421 +[2024-12-05 22:59:06 root] (utils.py 283): INFO Epoch: [18] [ 730/2502] eta: 0:23:12 lr: 0.000009 loss_cls: 3.7578 (3.8536) grad_norm: 4.1483 (4.3307) time: 0.7825 data: 0.0003 max mem: 8421 +[2024-12-05 22:59:14 root] (utils.py 283): INFO Epoch: [18] [ 740/2502] eta: 0:23:04 lr: 0.000009 loss_cls: 3.8482 (3.8526) grad_norm: 4.1483 (4.3318) time: 0.7802 data: 0.0003 max mem: 8421 +[2024-12-05 22:59:22 root] (utils.py 283): INFO Epoch: [18] [ 750/2502] eta: 0:22:57 lr: 0.000009 loss_cls: 3.9840 (3.8551) grad_norm: 4.2031 (4.3297) time: 0.7814 data: 0.0003 max mem: 8421 +[2024-12-05 22:59:30 root] (utils.py 283): INFO Epoch: [18] [ 760/2502] eta: 0:22:48 lr: 0.000009 loss_cls: 4.1791 (3.8579) grad_norm: 4.1515 (4.3283) time: 0.7803 data: 0.0003 max mem: 8421 +[2024-12-05 22:59:38 root] (utils.py 283): INFO Epoch: [18] [ 770/2502] eta: 0:22:41 lr: 0.000009 loss_cls: 3.9435 (3.8558) grad_norm: 4.2229 (4.3276) time: 0.7799 data: 0.0003 max mem: 8421 +[2024-12-05 22:59:45 root] (utils.py 283): INFO Epoch: [18] [ 780/2502] eta: 0:22:33 lr: 0.000009 loss_cls: 3.7543 (3.8558) grad_norm: 4.3238 (4.3267) time: 0.7809 data: 0.0002 max mem: 8421 +[2024-12-05 22:59:53 root] (utils.py 283): INFO Epoch: [18] [ 790/2502] eta: 0:22:24 lr: 0.000009 loss_cls: 3.8448 (3.8543) grad_norm: 4.1829 (4.3240) time: 0.7787 data: 0.0002 max mem: 8421 +[2024-12-05 23:00:01 root] (utils.py 283): INFO Epoch: [18] [ 800/2502] eta: 0:22:17 lr: 0.000009 loss_cls: 3.8878 (3.8550) grad_norm: 4.1784 (4.3223) time: 0.7798 data: 0.0003 max mem: 8421 +[2024-12-05 23:00:09 root] (utils.py 283): INFO Epoch: [18] [ 810/2502] eta: 0:22:09 lr: 0.000009 loss_cls: 3.8878 (3.8549) grad_norm: 4.1635 (4.3203) time: 0.7831 data: 0.0003 max mem: 8421 +[2024-12-05 23:00:17 root] (utils.py 283): INFO Epoch: [18] [ 820/2502] eta: 0:22:01 lr: 0.000009 loss_cls: 4.0435 (3.8562) grad_norm: 4.1385 (4.3188) time: 0.7871 data: 0.0003 max mem: 8421 +[2024-12-05 23:00:24 root] (utils.py 283): INFO Epoch: [18] [ 830/2502] eta: 0:21:53 lr: 0.000009 loss_cls: 4.0685 (3.8576) grad_norm: 4.2115 (4.3204) time: 0.7840 data: 0.0003 max mem: 8421 +[2024-12-05 23:00:32 root] (utils.py 283): INFO Epoch: [18] [ 840/2502] eta: 0:21:45 lr: 0.000009 loss_cls: 3.9585 (3.8565) grad_norm: 4.3786 (4.3223) time: 0.7795 data: 0.0002 max mem: 8421 +[2024-12-05 23:00:40 root] (utils.py 283): INFO Epoch: [18] [ 850/2502] eta: 0:21:37 lr: 0.000009 loss_cls: 3.9125 (3.8550) grad_norm: 4.2853 (4.3224) time: 0.7809 data: 0.0002 max mem: 8421 +[2024-12-05 23:00:48 root] (utils.py 283): INFO Epoch: [18] [ 860/2502] eta: 0:21:29 lr: 0.000009 loss_cls: 4.0657 (3.8588) grad_norm: 4.2129 (4.3217) time: 0.7817 data: 0.0002 max mem: 8421 +[2024-12-05 23:00:56 root] (utils.py 283): INFO Epoch: [18] [ 870/2502] eta: 0:21:21 lr: 0.000009 loss_cls: 4.2075 (3.8637) grad_norm: 4.3052 (4.3223) time: 0.7873 data: 0.0002 max mem: 8421 +[2024-12-05 23:01:04 root] (utils.py 283): INFO Epoch: [18] [ 880/2502] eta: 0:21:14 lr: 0.000009 loss_cls: 4.0856 (3.8646) grad_norm: 4.1503 (4.3198) time: 0.7892 data: 0.0002 max mem: 8421 +[2024-12-05 23:01:12 root] (utils.py 283): INFO Epoch: [18] [ 890/2502] eta: 0:21:06 lr: 0.000009 loss_cls: 3.8882 (3.8623) grad_norm: 4.1251 (4.3299) time: 0.7850 data: 0.0002 max mem: 8421 +[2024-12-05 23:01:19 root] (utils.py 283): INFO Epoch: [18] [ 900/2502] eta: 0:20:58 lr: 0.000009 loss_cls: 3.6335 (3.8602) grad_norm: 4.1571 (4.3297) time: 0.7830 data: 0.0002 max mem: 8421 +[2024-12-05 23:01:27 root] (utils.py 283): INFO Epoch: [18] [ 910/2502] eta: 0:20:50 lr: 0.000009 loss_cls: 3.8507 (3.8614) grad_norm: 4.2246 (4.3300) time: 0.7900 data: 0.0002 max mem: 8421 +[2024-12-05 23:01:35 root] (utils.py 283): INFO Epoch: [18] [ 920/2502] eta: 0:20:42 lr: 0.000009 loss_cls: 4.1313 (3.8623) grad_norm: 4.2454 (4.3293) time: 0.7974 data: 0.0002 max mem: 8421 +[2024-12-05 23:01:43 root] (utils.py 283): INFO Epoch: [18] [ 930/2502] eta: 0:20:35 lr: 0.000009 loss_cls: 4.1523 (3.8632) grad_norm: 4.1792 (4.3304) time: 0.7915 data: 0.0002 max mem: 8421 +[2024-12-05 23:01:51 root] (utils.py 283): INFO Epoch: [18] [ 940/2502] eta: 0:20:27 lr: 0.000009 loss_cls: 4.1314 (3.8660) grad_norm: 4.2007 (4.3289) time: 0.7850 data: 0.0003 max mem: 8421 +[2024-12-05 23:01:59 root] (utils.py 283): INFO Epoch: [18] [ 950/2502] eta: 0:20:19 lr: 0.000009 loss_cls: 4.1109 (3.8652) grad_norm: 4.1729 (4.3276) time: 0.7853 data: 0.0003 max mem: 8421 +[2024-12-05 23:02:07 root] (utils.py 283): INFO Epoch: [18] [ 960/2502] eta: 0:20:11 lr: 0.000009 loss_cls: 3.8507 (3.8633) grad_norm: 4.3518 (4.3292) time: 0.7836 data: 0.0003 max mem: 8421 +[2024-12-05 23:02:14 root] (utils.py 283): INFO Epoch: [18] [ 970/2502] eta: 0:20:03 lr: 0.000009 loss_cls: 4.0163 (3.8656) grad_norm: 4.2824 (4.3278) time: 0.7805 data: 0.0003 max mem: 8421 +[2024-12-05 23:02:22 root] (utils.py 283): INFO Epoch: [18] [ 980/2502] eta: 0:19:55 lr: 0.000009 loss_cls: 4.1161 (3.8668) grad_norm: 4.2280 (4.3293) time: 0.7794 data: 0.0003 max mem: 8421 +[2024-12-05 23:02:30 root] (utils.py 283): INFO Epoch: [18] [ 990/2502] eta: 0:19:47 lr: 0.000009 loss_cls: 4.0078 (3.8666) grad_norm: 4.1126 (4.3333) time: 0.7794 data: 0.0003 max mem: 8421 +[2024-12-05 23:02:38 root] (utils.py 283): INFO Epoch: [18] [1000/2502] eta: 0:19:39 lr: 0.000009 loss_cls: 3.9569 (3.8681) grad_norm: 4.0586 (4.3315) time: 0.7798 data: 0.0003 max mem: 8421 +[2024-12-05 23:02:46 root] (utils.py 283): INFO Epoch: [18] [1010/2502] eta: 0:19:31 lr: 0.000009 loss_cls: 4.0488 (3.8707) grad_norm: 4.1123 (4.3302) time: 0.7783 data: 0.0003 max mem: 8421 +[2024-12-05 23:02:53 root] (utils.py 283): INFO Epoch: [18] [1020/2502] eta: 0:19:23 lr: 0.000009 loss_cls: 4.0256 (3.8689) grad_norm: 4.1006 (4.3281) time: 0.7762 data: 0.0002 max mem: 8421 +[2024-12-05 23:03:01 root] (utils.py 283): INFO Epoch: [18] [1030/2502] eta: 0:19:15 lr: 0.000009 loss_cls: 3.5227 (3.8659) grad_norm: 4.2075 (4.3282) time: 0.7807 data: 0.0002 max mem: 8421 +[2024-12-05 23:03:09 root] (utils.py 283): INFO Epoch: [18] [1040/2502] eta: 0:19:07 lr: 0.000009 loss_cls: 3.7841 (3.8659) grad_norm: 4.3133 (4.3366) time: 0.7813 data: 0.0002 max mem: 8421 +[2024-12-05 23:03:17 root] (utils.py 283): INFO Epoch: [18] [1050/2502] eta: 0:18:59 lr: 0.000009 loss_cls: 3.8883 (3.8650) grad_norm: 4.2346 (4.3411) time: 0.7781 data: 0.0002 max mem: 8421 +[2024-12-05 23:03:25 root] (utils.py 283): INFO Epoch: [18] [1060/2502] eta: 0:18:52 lr: 0.000009 loss_cls: 3.9105 (3.8651) grad_norm: 4.2652 (4.3410) time: 0.7811 data: 0.0002 max mem: 8421 +[2024-12-05 23:03:32 root] (utils.py 283): INFO Epoch: [18] [1070/2502] eta: 0:18:44 lr: 0.000009 loss_cls: 4.1182 (3.8671) grad_norm: 4.2652 (4.3408) time: 0.7815 data: 0.0003 max mem: 8421 +[2024-12-05 23:03:40 root] (utils.py 283): INFO Epoch: [18] [1080/2502] eta: 0:18:36 lr: 0.000009 loss_cls: 4.1073 (3.8675) grad_norm: 4.2522 (4.3397) time: 0.7813 data: 0.0003 max mem: 8421 +[2024-12-05 23:03:48 root] (utils.py 283): INFO Epoch: [18] [1090/2502] eta: 0:18:28 lr: 0.000009 loss_cls: 4.0764 (3.8679) grad_norm: 4.2439 (4.3385) time: 0.7849 data: 0.0003 max mem: 8421 +[2024-12-05 23:03:56 root] (utils.py 283): INFO Epoch: [18] [1100/2502] eta: 0:18:20 lr: 0.000009 loss_cls: 3.9596 (3.8675) grad_norm: 4.2588 (4.3554) time: 0.7845 data: 0.0003 max mem: 8421 +[2024-12-05 23:04:04 root] (utils.py 283): INFO Epoch: [18] [1110/2502] eta: 0:18:12 lr: 0.000009 loss_cls: 3.5414 (3.8644) grad_norm: 4.2804 (4.3548) time: 0.7789 data: 0.0002 max mem: 8421 +[2024-12-05 23:04:12 root] (utils.py 283): INFO Epoch: [18] [1120/2502] eta: 0:18:04 lr: 0.000009 loss_cls: 3.5909 (3.8644) grad_norm: 4.1716 (4.3547) time: 0.7846 data: 0.0002 max mem: 8421 +[2024-12-05 23:04:19 root] (utils.py 283): INFO Epoch: [18] [1130/2502] eta: 0:17:56 lr: 0.000009 loss_cls: 3.8629 (3.8644) grad_norm: 4.2273 (4.3544) time: 0.7855 data: 0.0003 max mem: 8421 +[2024-12-05 23:04:27 root] (utils.py 283): INFO Epoch: [18] [1140/2502] eta: 0:17:48 lr: 0.000009 loss_cls: 3.5956 (3.8617) grad_norm: 4.2985 (4.3545) time: 0.7772 data: 0.0003 max mem: 8421 +[2024-12-05 23:04:35 root] (utils.py 283): INFO Epoch: [18] [1150/2502] eta: 0:17:41 lr: 0.000009 loss_cls: 3.5291 (3.8613) grad_norm: 4.2789 (4.3528) time: 0.7790 data: 0.0002 max mem: 8421 +[2024-12-05 23:04:43 root] (utils.py 283): INFO Epoch: [18] [1160/2502] eta: 0:17:33 lr: 0.000009 loss_cls: 4.0145 (3.8609) grad_norm: 4.2555 (4.3532) time: 0.7852 data: 0.0002 max mem: 8421 +[2024-12-05 23:04:51 root] (utils.py 283): INFO Epoch: [18] [1170/2502] eta: 0:17:25 lr: 0.000009 loss_cls: 4.0258 (3.8626) grad_norm: 4.2515 (4.3523) time: 0.7865 data: 0.0002 max mem: 8421 +[2024-12-05 23:04:59 root] (utils.py 283): INFO Epoch: [18] [1180/2502] eta: 0:17:17 lr: 0.000009 loss_cls: 4.0258 (3.8619) grad_norm: 4.1383 (4.3510) time: 0.7835 data: 0.0002 max mem: 8421 +[2024-12-05 23:05:07 root] (utils.py 283): INFO Epoch: [18] [1190/2502] eta: 0:17:09 lr: 0.000009 loss_cls: 4.0314 (3.8624) grad_norm: 4.2157 (4.3516) time: 0.7885 data: 0.0003 max mem: 8421 +[2024-12-05 23:05:14 root] (utils.py 283): INFO Epoch: [18] [1200/2502] eta: 0:17:01 lr: 0.000009 loss_cls: 3.9688 (3.8634) grad_norm: 4.3288 (4.3517) time: 0.7893 data: 0.0003 max mem: 8421 +[2024-12-05 23:05:22 root] (utils.py 283): INFO Epoch: [18] [1210/2502] eta: 0:16:54 lr: 0.000009 loss_cls: 3.6589 (3.8590) grad_norm: 4.3288 (4.3516) time: 0.7828 data: 0.0002 max mem: 8421 +[2024-12-05 23:05:30 root] (utils.py 283): INFO Epoch: [18] [1220/2502] eta: 0:16:46 lr: 0.000009 loss_cls: 3.6535 (3.8595) grad_norm: 4.3034 (4.3508) time: 0.7798 data: 0.0002 max mem: 8421 +[2024-12-05 23:05:38 root] (utils.py 283): INFO Epoch: [18] [1230/2502] eta: 0:16:38 lr: 0.000009 loss_cls: 3.8846 (3.8581) grad_norm: 4.1417 (4.3483) time: 0.7945 data: 0.0002 max mem: 8421 +[2024-12-05 23:05:46 root] (utils.py 283): INFO Epoch: [18] [1240/2502] eta: 0:16:30 lr: 0.000009 loss_cls: 3.9598 (3.8594) grad_norm: 4.1512 (4.3477) time: 0.8051 data: 0.0002 max mem: 8421 +[2024-12-05 23:05:54 root] (utils.py 283): INFO Epoch: [18] [1250/2502] eta: 0:16:22 lr: 0.000009 loss_cls: 4.0263 (3.8618) grad_norm: 4.1907 (4.3525) time: 0.7912 data: 0.0002 max mem: 8421 +[2024-12-05 23:06:02 root] (utils.py 283): INFO Epoch: [18] [1260/2502] eta: 0:16:15 lr: 0.000009 loss_cls: 4.0273 (3.8632) grad_norm: 4.2562 (4.3522) time: 0.7839 data: 0.0002 max mem: 8421 +[2024-12-05 23:06:10 root] (utils.py 283): INFO Epoch: [18] [1270/2502] eta: 0:16:07 lr: 0.000009 loss_cls: 3.9439 (3.8632) grad_norm: 4.1920 (4.3514) time: 0.7814 data: 0.0002 max mem: 8421 +[2024-12-05 23:06:17 root] (utils.py 283): INFO Epoch: [18] [1280/2502] eta: 0:15:59 lr: 0.000009 loss_cls: 3.9935 (3.8637) grad_norm: 4.2128 (4.3523) time: 0.7794 data: 0.0002 max mem: 8421 +[2024-12-05 23:06:25 root] (utils.py 283): INFO Epoch: [18] [1290/2502] eta: 0:15:51 lr: 0.000009 loss_cls: 4.1046 (3.8657) grad_norm: 4.3192 (4.3569) time: 0.7797 data: 0.0002 max mem: 8421 +[2024-12-05 23:06:33 root] (utils.py 283): INFO Epoch: [18] [1300/2502] eta: 0:15:43 lr: 0.000009 loss_cls: 4.0943 (3.8678) grad_norm: 4.3046 (4.3567) time: 0.7794 data: 0.0002 max mem: 8421 +[2024-12-05 23:06:41 root] (utils.py 283): INFO Epoch: [18] [1310/2502] eta: 0:15:35 lr: 0.000009 loss_cls: 4.1055 (3.8685) grad_norm: 4.2192 (4.3557) time: 0.7836 data: 0.0002 max mem: 8421 +[2024-12-05 23:06:49 root] (utils.py 283): INFO Epoch: [18] [1320/2502] eta: 0:15:27 lr: 0.000009 loss_cls: 4.0701 (3.8702) grad_norm: 4.3891 (4.3594) time: 0.7834 data: 0.0002 max mem: 8421 +[2024-12-05 23:06:56 root] (utils.py 283): INFO Epoch: [18] [1330/2502] eta: 0:15:19 lr: 0.000009 loss_cls: 3.9193 (3.8693) grad_norm: 4.3754 (4.3582) time: 0.7816 data: 0.0002 max mem: 8421 +[2024-12-05 23:07:04 root] (utils.py 283): INFO Epoch: [18] [1340/2502] eta: 0:15:12 lr: 0.000009 loss_cls: 3.7255 (3.8697) grad_norm: 4.0567 (4.3574) time: 0.7915 data: 0.0002 max mem: 8421 +[2024-12-05 23:07:12 root] (utils.py 283): INFO Epoch: [18] [1350/2502] eta: 0:15:04 lr: 0.000009 loss_cls: 4.0940 (3.8688) grad_norm: 4.1446 (4.3572) time: 0.7915 data: 0.0002 max mem: 8421 +[2024-12-05 23:07:20 root] (utils.py 283): INFO Epoch: [18] [1360/2502] eta: 0:14:56 lr: 0.000009 loss_cls: 4.1461 (3.8696) grad_norm: 4.3659 (4.3572) time: 0.7899 data: 0.0002 max mem: 8421 +[2024-12-05 23:07:28 root] (utils.py 283): INFO Epoch: [18] [1370/2502] eta: 0:14:48 lr: 0.000009 loss_cls: 4.2140 (3.8706) grad_norm: 4.3514 (4.3588) time: 0.7954 data: 0.0002 max mem: 8421 +[2024-12-05 23:07:36 root] (utils.py 283): INFO Epoch: [18] [1380/2502] eta: 0:14:40 lr: 0.000009 loss_cls: 3.9762 (3.8705) grad_norm: 4.3514 (4.3594) time: 0.7877 data: 0.0002 max mem: 8421 +[2024-12-05 23:07:44 root] (utils.py 283): INFO Epoch: [18] [1390/2502] eta: 0:14:33 lr: 0.000009 loss_cls: 3.8391 (3.8711) grad_norm: 4.2311 (4.3595) time: 0.7822 data: 0.0002 max mem: 8421 +[2024-12-05 23:07:52 root] (utils.py 283): INFO Epoch: [18] [1400/2502] eta: 0:14:25 lr: 0.000009 loss_cls: 3.9343 (3.8711) grad_norm: 4.2540 (4.3612) time: 0.7799 data: 0.0002 max mem: 8421 +[2024-12-05 23:07:59 root] (utils.py 283): INFO Epoch: [18] [1410/2502] eta: 0:14:17 lr: 0.000009 loss_cls: 3.7898 (3.8715) grad_norm: 4.1396 (4.3595) time: 0.7752 data: 0.0002 max mem: 8421 +[2024-12-05 23:08:07 root] (utils.py 283): INFO Epoch: [18] [1420/2502] eta: 0:14:09 lr: 0.000009 loss_cls: 3.9364 (3.8721) grad_norm: 4.0541 (4.3579) time: 0.7786 data: 0.0003 max mem: 8421 +[2024-12-05 23:08:15 root] (utils.py 283): INFO Epoch: [18] [1430/2502] eta: 0:14:01 lr: 0.000009 loss_cls: 3.7517 (3.8703) grad_norm: 4.2256 (4.3578) time: 0.7825 data: 0.0003 max mem: 8421 +[2024-12-05 23:08:23 root] (utils.py 283): INFO Epoch: [18] [1440/2502] eta: 0:13:53 lr: 0.000009 loss_cls: 3.8027 (3.8706) grad_norm: 4.3973 (4.3576) time: 0.7799 data: 0.0002 max mem: 8421 +[2024-12-05 23:08:31 root] (utils.py 283): INFO Epoch: [18] [1450/2502] eta: 0:13:45 lr: 0.000009 loss_cls: 4.0219 (3.8696) grad_norm: 4.3861 (4.3586) time: 0.7790 data: 0.0002 max mem: 8421 +[2024-12-05 23:08:38 root] (utils.py 283): INFO Epoch: [18] [1460/2502] eta: 0:13:37 lr: 0.000009 loss_cls: 3.6849 (3.8679) grad_norm: 4.2954 (4.3586) time: 0.7786 data: 0.0002 max mem: 8421 +[2024-12-05 23:08:46 root] (utils.py 283): INFO Epoch: [18] [1470/2502] eta: 0:13:29 lr: 0.000009 loss_cls: 3.5246 (3.8651) grad_norm: 4.2594 (4.3580) time: 0.7779 data: 0.0002 max mem: 8421 +[2024-12-05 23:08:54 root] (utils.py 283): INFO Epoch: [18] [1480/2502] eta: 0:13:22 lr: 0.000009 loss_cls: 3.5755 (3.8643) grad_norm: 4.1963 (4.3581) time: 0.7831 data: 0.0002 max mem: 8421 +[2024-12-05 23:09:02 root] (utils.py 283): INFO Epoch: [18] [1490/2502] eta: 0:13:14 lr: 0.000009 loss_cls: 4.0486 (3.8651) grad_norm: 4.3193 (4.3587) time: 0.7889 data: 0.0003 max mem: 8421 +[2024-12-05 23:09:10 root] (utils.py 283): INFO Epoch: [18] [1500/2502] eta: 0:13:06 lr: 0.000009 loss_cls: 4.0640 (3.8645) grad_norm: 4.2763 (4.3584) time: 0.7941 data: 0.0003 max mem: 8421 +[2024-12-05 23:09:18 root] (utils.py 283): INFO Epoch: [18] [1510/2502] eta: 0:12:58 lr: 0.000009 loss_cls: 4.0640 (3.8656) grad_norm: 4.3864 (4.3604) time: 0.7895 data: 0.0003 max mem: 8421 +[2024-12-05 23:09:26 root] (utils.py 283): INFO Epoch: [18] [1520/2502] eta: 0:12:50 lr: 0.000009 loss_cls: 4.0654 (3.8652) grad_norm: 4.3765 (4.3597) time: 0.7817 data: 0.0002 max mem: 8421 +[2024-12-05 23:09:33 root] (utils.py 283): INFO Epoch: [18] [1530/2502] eta: 0:12:42 lr: 0.000009 loss_cls: 3.9980 (3.8655) grad_norm: 4.2474 (4.3608) time: 0.7801 data: 0.0002 max mem: 8421 +[2024-12-05 23:09:41 root] (utils.py 283): INFO Epoch: [18] [1540/2502] eta: 0:12:34 lr: 0.000009 loss_cls: 4.0622 (3.8669) grad_norm: 4.2210 (4.3592) time: 0.7792 data: 0.0002 max mem: 8421 +[2024-12-05 23:09:49 root] (utils.py 283): INFO Epoch: [18] [1550/2502] eta: 0:12:27 lr: 0.000009 loss_cls: 4.1302 (3.8675) grad_norm: 4.2400 (4.3607) time: 0.7838 data: 0.0003 max mem: 8421 +[2024-12-05 23:09:57 root] (utils.py 283): INFO Epoch: [18] [1560/2502] eta: 0:12:19 lr: 0.000009 loss_cls: 3.8166 (3.8671) grad_norm: 4.4844 (4.3611) time: 0.7833 data: 0.0003 max mem: 8421 +[2024-12-05 23:10:05 root] (utils.py 283): INFO Epoch: [18] [1570/2502] eta: 0:12:11 lr: 0.000009 loss_cls: 3.8243 (3.8678) grad_norm: 4.3094 (4.3616) time: 0.7830 data: 0.0003 max mem: 8421 +[2024-12-05 23:10:13 root] (utils.py 283): INFO Epoch: [18] [1580/2502] eta: 0:12:03 lr: 0.000009 loss_cls: 4.1152 (3.8672) grad_norm: 4.2687 (4.3608) time: 0.7867 data: 0.0002 max mem: 8421 +[2024-12-05 23:10:20 root] (utils.py 283): INFO Epoch: [18] [1590/2502] eta: 0:11:55 lr: 0.000009 loss_cls: 3.9511 (3.8678) grad_norm: 4.1771 (4.3598) time: 0.7826 data: 0.0002 max mem: 8421 +[2024-12-05 23:10:28 root] (utils.py 283): INFO Epoch: [18] [1600/2502] eta: 0:11:47 lr: 0.000009 loss_cls: 3.9279 (3.8664) grad_norm: 4.2580 (4.3602) time: 0.7801 data: 0.0003 max mem: 8421 +[2024-12-05 23:10:36 root] (utils.py 283): INFO Epoch: [18] [1610/2502] eta: 0:11:39 lr: 0.000009 loss_cls: 3.8165 (3.8655) grad_norm: 4.2655 (4.3592) time: 0.7810 data: 0.0003 max mem: 8421 +[2024-12-05 23:10:44 root] (utils.py 283): INFO Epoch: [18] [1620/2502] eta: 0:11:32 lr: 0.000009 loss_cls: 3.9852 (3.8670) grad_norm: 4.2848 (4.3600) time: 0.7810 data: 0.0002 max mem: 8421 +[2024-12-05 23:10:52 root] (utils.py 283): INFO Epoch: [18] [1630/2502] eta: 0:11:24 lr: 0.000009 loss_cls: 4.0196 (3.8661) grad_norm: 4.2848 (4.3625) time: 0.7900 data: 0.0002 max mem: 8421 +[2024-12-05 23:11:00 root] (utils.py 283): INFO Epoch: [18] [1640/2502] eta: 0:11:16 lr: 0.000009 loss_cls: 4.0201 (3.8666) grad_norm: 4.2872 (4.3659) time: 0.7930 data: 0.0002 max mem: 8421 +[2024-12-05 23:11:07 root] (utils.py 283): INFO Epoch: [18] [1650/2502] eta: 0:11:08 lr: 0.000009 loss_cls: 3.8119 (3.8650) grad_norm: 4.2872 (4.3653) time: 0.7861 data: 0.0003 max mem: 8421 +[2024-12-05 23:11:15 root] (utils.py 283): INFO Epoch: [18] [1660/2502] eta: 0:11:00 lr: 0.000009 loss_cls: 3.7956 (3.8642) grad_norm: 4.0669 (4.3641) time: 0.7821 data: 0.0003 max mem: 8421 +[2024-12-05 23:11:23 root] (utils.py 283): INFO Epoch: [18] [1670/2502] eta: 0:10:52 lr: 0.000009 loss_cls: 3.9050 (3.8632) grad_norm: 4.0875 (4.3633) time: 0.7811 data: 0.0002 max mem: 8421 +[2024-12-05 23:11:31 root] (utils.py 283): INFO Epoch: [18] [1680/2502] eta: 0:10:45 lr: 0.000009 loss_cls: 3.8181 (3.8623) grad_norm: 4.0907 (4.3772) time: 0.7795 data: 0.0002 max mem: 8421 +[2024-12-05 23:11:39 root] (utils.py 283): INFO Epoch: [18] [1690/2502] eta: 0:10:37 lr: 0.000009 loss_cls: 3.7471 (3.8614) grad_norm: 4.2901 (4.3808) time: 0.7779 data: 0.0003 max mem: 8421 +[2024-12-05 23:11:46 root] (utils.py 283): INFO Epoch: [18] [1700/2502] eta: 0:10:29 lr: 0.000009 loss_cls: 4.1514 (3.8627) grad_norm: 4.7069 (4.3811) time: 0.7778 data: 0.0003 max mem: 8421 +[2024-12-05 23:11:54 root] (utils.py 283): INFO Epoch: [18] [1710/2502] eta: 0:10:21 lr: 0.000009 loss_cls: 4.0976 (3.8634) grad_norm: 4.1800 (4.3798) time: 0.7761 data: 0.0003 max mem: 8421 +[2024-12-05 23:12:02 root] (utils.py 283): INFO Epoch: [18] [1720/2502] eta: 0:10:13 lr: 0.000009 loss_cls: 4.0976 (3.8655) grad_norm: 4.2497 (4.3799) time: 0.7759 data: 0.0002 max mem: 8421 +[2024-12-05 23:12:10 root] (utils.py 283): INFO Epoch: [18] [1730/2502] eta: 0:10:05 lr: 0.000009 loss_cls: 4.1109 (3.8658) grad_norm: 4.3307 (4.3832) time: 0.7751 data: 0.0002 max mem: 8421 +[2024-12-05 23:12:17 root] (utils.py 283): INFO Epoch: [18] [1740/2502] eta: 0:09:57 lr: 0.000009 loss_cls: 3.7371 (3.8641) grad_norm: 4.4416 (4.3842) time: 0.7770 data: 0.0003 max mem: 8421 +[2024-12-05 23:12:25 root] (utils.py 283): INFO Epoch: [18] [1750/2502] eta: 0:09:49 lr: 0.000009 loss_cls: 3.5042 (3.8630) grad_norm: 4.5031 (4.3846) time: 0.7781 data: 0.0003 max mem: 8421 +[2024-12-05 23:12:33 root] (utils.py 283): INFO Epoch: [18] [1760/2502] eta: 0:09:41 lr: 0.000009 loss_cls: 3.5201 (3.8620) grad_norm: 4.3812 (4.3855) time: 0.7746 data: 0.0002 max mem: 8421 +[2024-12-05 23:12:41 root] (utils.py 283): INFO Epoch: [18] [1770/2502] eta: 0:09:34 lr: 0.000009 loss_cls: 3.7934 (3.8626) grad_norm: 4.4184 (4.3869) time: 0.7805 data: 0.0002 max mem: 8421 +[2024-12-05 23:12:49 root] (utils.py 283): INFO Epoch: [18] [1780/2502] eta: 0:09:26 lr: 0.000009 loss_cls: 4.2407 (3.8634) grad_norm: 4.2523 (4.3855) time: 0.7814 data: 0.0002 max mem: 8421 +[2024-12-05 23:12:56 root] (utils.py 283): INFO Epoch: [18] [1790/2502] eta: 0:09:18 lr: 0.000009 loss_cls: 4.2407 (3.8647) grad_norm: 4.1505 (4.3846) time: 0.7828 data: 0.0002 max mem: 8421 +[2024-12-05 23:13:04 root] (utils.py 283): INFO Epoch: [18] [1800/2502] eta: 0:09:10 lr: 0.000009 loss_cls: 3.9901 (3.8636) grad_norm: 4.3224 (4.3844) time: 0.7924 data: 0.0003 max mem: 8421 +[2024-12-05 23:13:12 root] (utils.py 283): INFO Epoch: [18] [1810/2502] eta: 0:09:02 lr: 0.000009 loss_cls: 3.9636 (3.8630) grad_norm: 4.3928 (4.3861) time: 0.7892 data: 0.0002 max mem: 8421 +[2024-12-05 23:13:20 root] (utils.py 283): INFO Epoch: [18] [1820/2502] eta: 0:08:54 lr: 0.000009 loss_cls: 3.9098 (3.8632) grad_norm: 4.4056 (4.3862) time: 0.7806 data: 0.0003 max mem: 8421 +[2024-12-05 23:13:28 root] (utils.py 283): INFO Epoch: [18] [1830/2502] eta: 0:08:47 lr: 0.000009 loss_cls: 3.8493 (3.8626) grad_norm: 4.2956 (4.3859) time: 0.7778 data: 0.0003 max mem: 8421 +[2024-12-05 23:13:36 root] (utils.py 283): INFO Epoch: [18] [1840/2502] eta: 0:08:39 lr: 0.000009 loss_cls: 3.8781 (3.8623) grad_norm: 4.2493 (4.3861) time: 0.7812 data: 0.0003 max mem: 8421 +[2024-12-05 23:13:43 root] (utils.py 283): INFO Epoch: [18] [1850/2502] eta: 0:08:31 lr: 0.000009 loss_cls: 3.5737 (3.8606) grad_norm: 4.2604 (4.3857) time: 0.7825 data: 0.0002 max mem: 8421 +[2024-12-05 23:13:51 root] (utils.py 283): INFO Epoch: [18] [1860/2502] eta: 0:08:23 lr: 0.000009 loss_cls: 3.5674 (3.8604) grad_norm: 4.3151 (4.3879) time: 0.7811 data: 0.0002 max mem: 8421 +[2024-12-05 23:13:59 root] (utils.py 283): INFO Epoch: [18] [1870/2502] eta: 0:08:15 lr: 0.000009 loss_cls: 3.9296 (3.8609) grad_norm: 4.3644 (4.3885) time: 0.7809 data: 0.0002 max mem: 8421 +[2024-12-05 23:14:07 root] (utils.py 283): INFO Epoch: [18] [1880/2502] eta: 0:08:07 lr: 0.000009 loss_cls: 3.9296 (3.8599) grad_norm: 4.1885 (4.3874) time: 0.7769 data: 0.0002 max mem: 8421 +[2024-12-05 23:14:15 root] (utils.py 283): INFO Epoch: [18] [1890/2502] eta: 0:07:59 lr: 0.000009 loss_cls: 3.8802 (3.8600) grad_norm: 4.1009 (4.3858) time: 0.7760 data: 0.0002 max mem: 8421 +[2024-12-05 23:14:22 root] (utils.py 283): INFO Epoch: [18] [1900/2502] eta: 0:07:52 lr: 0.000009 loss_cls: 3.9407 (3.8592) grad_norm: 4.2075 (4.3854) time: 0.7787 data: 0.0003 max mem: 8421 +[2024-12-05 23:14:30 root] (utils.py 283): INFO Epoch: [18] [1910/2502] eta: 0:07:44 lr: 0.000009 loss_cls: 3.9486 (3.8593) grad_norm: 4.2814 (4.3863) time: 0.7813 data: 0.0003 max mem: 8421 +[2024-12-05 23:14:38 root] (utils.py 283): INFO Epoch: [18] [1920/2502] eta: 0:07:36 lr: 0.000009 loss_cls: 3.8406 (3.8590) grad_norm: 4.2040 (4.3863) time: 0.7921 data: 0.0003 max mem: 8421 +[2024-12-05 23:14:46 root] (utils.py 283): INFO Epoch: [18] [1930/2502] eta: 0:07:28 lr: 0.000009 loss_cls: 3.9065 (3.8599) grad_norm: 4.2040 (4.3868) time: 0.7999 data: 0.0003 max mem: 8421 +[2024-12-05 23:14:54 root] (utils.py 283): INFO Epoch: [18] [1940/2502] eta: 0:07:20 lr: 0.000009 loss_cls: 3.8839 (3.8587) grad_norm: 4.3577 (4.3877) time: 0.7900 data: 0.0003 max mem: 8421 +[2024-12-05 23:15:02 root] (utils.py 283): INFO Epoch: [18] [1950/2502] eta: 0:07:12 lr: 0.000009 loss_cls: 3.7170 (3.8576) grad_norm: 4.3724 (4.3874) time: 0.7836 data: 0.0003 max mem: 8421 +[2024-12-05 23:15:10 root] (utils.py 283): INFO Epoch: [18] [1960/2502] eta: 0:07:05 lr: 0.000009 loss_cls: 3.8813 (3.8580) grad_norm: 4.2344 (4.3867) time: 0.7818 data: 0.0003 max mem: 8421 +[2024-12-05 23:15:18 root] (utils.py 283): INFO Epoch: [18] [1970/2502] eta: 0:06:57 lr: 0.000009 loss_cls: 3.7929 (3.8573) grad_norm: 4.3094 (4.3865) time: 0.7796 data: 0.0003 max mem: 8421 +[2024-12-05 23:15:25 root] (utils.py 283): INFO Epoch: [18] [1980/2502] eta: 0:06:49 lr: 0.000009 loss_cls: 3.7683 (3.8570) grad_norm: 4.1061 (4.3851) time: 0.7824 data: 0.0003 max mem: 8421 +[2024-12-05 23:15:33 root] (utils.py 283): INFO Epoch: [18] [1990/2502] eta: 0:06:41 lr: 0.000009 loss_cls: 3.7683 (3.8565) grad_norm: 4.0810 (4.3847) time: 0.7819 data: 0.0002 max mem: 8421 +[2024-12-05 23:15:41 root] (utils.py 283): INFO Epoch: [18] [2000/2502] eta: 0:06:33 lr: 0.000009 loss_cls: 3.6983 (3.8547) grad_norm: 4.3263 (4.3858) time: 0.7829 data: 0.0002 max mem: 8421 +[2024-12-05 23:15:49 root] (utils.py 283): INFO Epoch: [18] [2010/2502] eta: 0:06:25 lr: 0.000009 loss_cls: 3.6333 (3.8553) grad_norm: 4.3873 (4.3854) time: 0.7820 data: 0.0003 max mem: 8421 +[2024-12-05 23:15:57 root] (utils.py 283): INFO Epoch: [18] [2020/2502] eta: 0:06:17 lr: 0.000009 loss_cls: 3.9930 (3.8550) grad_norm: 4.1769 (4.3844) time: 0.7803 data: 0.0003 max mem: 8421 +[2024-12-05 23:16:04 root] (utils.py 283): INFO Epoch: [18] [2030/2502] eta: 0:06:10 lr: 0.000009 loss_cls: 3.9930 (3.8556) grad_norm: 4.0777 (4.3825) time: 0.7793 data: 0.0002 max mem: 8421 +[2024-12-05 23:16:12 root] (utils.py 283): INFO Epoch: [18] [2040/2502] eta: 0:06:02 lr: 0.000009 loss_cls: 3.9767 (3.8542) grad_norm: 4.2228 (4.3847) time: 0.7780 data: 0.0002 max mem: 8421 +[2024-12-05 23:16:20 root] (utils.py 283): INFO Epoch: [18] [2050/2502] eta: 0:05:54 lr: 0.000009 loss_cls: 3.9767 (3.8537) grad_norm: 4.4033 (4.3845) time: 0.7808 data: 0.0002 max mem: 8421 +[2024-12-05 23:16:28 root] (utils.py 283): INFO Epoch: [18] [2060/2502] eta: 0:05:46 lr: 0.000009 loss_cls: 4.1045 (3.8551) grad_norm: 4.2472 (4.3849) time: 0.7985 data: 0.0002 max mem: 8421 +[2024-12-05 23:16:36 root] (utils.py 283): INFO Epoch: [18] [2070/2502] eta: 0:05:38 lr: 0.000009 loss_cls: 4.0357 (3.8548) grad_norm: 4.2437 (4.3843) time: 0.8001 data: 0.0002 max mem: 8421 +[2024-12-05 23:16:44 root] (utils.py 283): INFO Epoch: [18] [2080/2502] eta: 0:05:30 lr: 0.000009 loss_cls: 3.6760 (3.8535) grad_norm: 4.2470 (4.3846) time: 0.7825 data: 0.0002 max mem: 8421 +[2024-12-05 23:16:52 root] (utils.py 283): INFO Epoch: [18] [2090/2502] eta: 0:05:23 lr: 0.000009 loss_cls: 3.6760 (3.8537) grad_norm: 4.2490 (4.3840) time: 0.7783 data: 0.0003 max mem: 8421 +[2024-12-05 23:17:00 root] (utils.py 283): INFO Epoch: [18] [2100/2502] eta: 0:05:15 lr: 0.000009 loss_cls: 3.9357 (3.8540) grad_norm: 4.2612 (4.3848) time: 0.7862 data: 0.0003 max mem: 8421 +[2024-12-05 23:17:07 root] (utils.py 283): INFO Epoch: [18] [2110/2502] eta: 0:05:07 lr: 0.000009 loss_cls: 3.6954 (3.8529) grad_norm: 4.4236 (4.3858) time: 0.7935 data: 0.0003 max mem: 8421 +[2024-12-05 23:17:15 root] (utils.py 283): INFO Epoch: [18] [2120/2502] eta: 0:04:59 lr: 0.000009 loss_cls: 3.6535 (3.8527) grad_norm: 4.4236 (4.3861) time: 0.7836 data: 0.0003 max mem: 8421 +[2024-12-05 23:17:23 root] (utils.py 283): INFO Epoch: [18] [2130/2502] eta: 0:04:51 lr: 0.000009 loss_cls: 3.9249 (3.8532) grad_norm: 4.3003 (4.3853) time: 0.7764 data: 0.0003 max mem: 8421 +[2024-12-05 23:17:31 root] (utils.py 283): INFO Epoch: [18] [2140/2502] eta: 0:04:43 lr: 0.000009 loss_cls: 3.9877 (3.8532) grad_norm: 4.1747 (4.3850) time: 0.7793 data: 0.0002 max mem: 8421 +[2024-12-05 23:17:39 root] (utils.py 283): INFO Epoch: [18] [2150/2502] eta: 0:04:36 lr: 0.000009 loss_cls: 4.2658 (3.8555) grad_norm: 4.3348 (4.3867) time: 0.7790 data: 0.0003 max mem: 8421 +[2024-12-05 23:17:46 root] (utils.py 283): INFO Epoch: [18] [2160/2502] eta: 0:04:28 lr: 0.000009 loss_cls: 4.4202 (3.8572) grad_norm: 4.5035 (4.3890) time: 0.7779 data: 0.0003 max mem: 8421 +[2024-12-05 23:17:54 root] (utils.py 283): INFO Epoch: [18] [2170/2502] eta: 0:04:20 lr: 0.000009 loss_cls: 4.1794 (3.8579) grad_norm: 4.2913 (4.3882) time: 0.7792 data: 0.0002 max mem: 8421 +[2024-12-05 23:18:02 root] (utils.py 283): INFO Epoch: [18] [2180/2502] eta: 0:04:12 lr: 0.000009 loss_cls: 4.0874 (3.8569) grad_norm: 4.2913 (4.3886) time: 0.7789 data: 0.0002 max mem: 8421 +[2024-12-05 23:18:10 root] (utils.py 283): INFO Epoch: [18] [2190/2502] eta: 0:04:04 lr: 0.000009 loss_cls: 3.8216 (3.8565) grad_norm: 4.3910 (4.3890) time: 0.7793 data: 0.0002 max mem: 8421 +[2024-12-05 23:18:18 root] (utils.py 283): INFO Epoch: [18] [2200/2502] eta: 0:03:56 lr: 0.000009 loss_cls: 3.8793 (3.8569) grad_norm: 4.3247 (4.3890) time: 0.7794 data: 0.0002 max mem: 8421 +[2024-12-05 23:18:25 root] (utils.py 283): INFO Epoch: [18] [2210/2502] eta: 0:03:48 lr: 0.000009 loss_cls: 3.7957 (3.8564) grad_norm: 4.2686 (4.3891) time: 0.7787 data: 0.0002 max mem: 8421 +[2024-12-05 23:18:33 root] (utils.py 283): INFO Epoch: [18] [2220/2502] eta: 0:03:41 lr: 0.000009 loss_cls: 3.9711 (3.8569) grad_norm: 4.4180 (4.3917) time: 0.7871 data: 0.0002 max mem: 8421 +[2024-12-05 23:18:41 root] (utils.py 283): INFO Epoch: [18] [2230/2502] eta: 0:03:33 lr: 0.000009 loss_cls: 4.1374 (3.8574) grad_norm: 4.5044 (4.3947) time: 0.7891 data: 0.0002 max mem: 8421 +[2024-12-05 23:18:49 root] (utils.py 283): INFO Epoch: [18] [2240/2502] eta: 0:03:25 lr: 0.000009 loss_cls: 3.9697 (3.8583) grad_norm: 4.2873 (4.3942) time: 0.7848 data: 0.0002 max mem: 8421 +[2024-12-05 23:18:57 root] (utils.py 283): INFO Epoch: [18] [2250/2502] eta: 0:03:17 lr: 0.000009 loss_cls: 3.9697 (3.8572) grad_norm: 4.1697 (4.3934) time: 0.7841 data: 0.0002 max mem: 8421 +[2024-12-05 23:19:05 root] (utils.py 283): INFO Epoch: [18] [2260/2502] eta: 0:03:09 lr: 0.000009 loss_cls: 4.0179 (3.8588) grad_norm: 4.1741 (4.3929) time: 0.7822 data: 0.0002 max mem: 8421 +[2024-12-05 23:19:12 root] (utils.py 283): INFO Epoch: [18] [2270/2502] eta: 0:03:01 lr: 0.000009 loss_cls: 4.2600 (3.8596) grad_norm: 4.3969 (4.3935) time: 0.7812 data: 0.0002 max mem: 8421 +[2024-12-05 23:19:20 root] (utils.py 283): INFO Epoch: [18] [2280/2502] eta: 0:02:54 lr: 0.000009 loss_cls: 3.6864 (3.8578) grad_norm: 4.1959 (4.3925) time: 0.7851 data: 0.0003 max mem: 8421 +[2024-12-05 23:19:29 root] (utils.py 283): INFO Epoch: [18] [2290/2502] eta: 0:02:46 lr: 0.000009 loss_cls: 3.6475 (3.8575) grad_norm: 4.0922 (4.3910) time: 0.8467 data: 0.0005 max mem: 8421 +[2024-12-05 23:19:38 root] (utils.py 283): INFO Epoch: [18] [2300/2502] eta: 0:02:38 lr: 0.000009 loss_cls: 3.9144 (3.8581) grad_norm: 4.2540 (4.3912) time: 0.8978 data: 0.0005 max mem: 8421 +[2024-12-05 23:19:46 root] (utils.py 283): INFO Epoch: [18] [2310/2502] eta: 0:02:30 lr: 0.000009 loss_cls: 3.9671 (3.8583) grad_norm: 4.2699 (4.3908) time: 0.8463 data: 0.0004 max mem: 8421 +[2024-12-05 23:19:54 root] (utils.py 283): INFO Epoch: [18] [2320/2502] eta: 0:02:22 lr: 0.000009 loss_cls: 3.6264 (3.8581) grad_norm: 4.2699 (4.3979) time: 0.7904 data: 0.0003 max mem: 8421 +[2024-12-05 23:20:02 root] (utils.py 283): INFO Epoch: [18] [2330/2502] eta: 0:02:15 lr: 0.000009 loss_cls: 3.6256 (3.8568) grad_norm: 4.4493 (4.3986) time: 0.7822 data: 0.0002 max mem: 8421 +[2024-12-05 23:20:10 root] (utils.py 283): INFO Epoch: [18] [2340/2502] eta: 0:02:07 lr: 0.000009 loss_cls: 3.9560 (3.8577) grad_norm: 4.3436 (4.3978) time: 0.7829 data: 0.0003 max mem: 8421 +[2024-12-05 23:20:18 root] (utils.py 283): INFO Epoch: [18] [2350/2502] eta: 0:01:59 lr: 0.000009 loss_cls: 4.0053 (3.8577) grad_norm: 4.2262 (4.3982) time: 0.7791 data: 0.0003 max mem: 8421 +[2024-12-05 23:20:25 root] (utils.py 283): INFO Epoch: [18] [2360/2502] eta: 0:01:51 lr: 0.000009 loss_cls: 4.0280 (3.8575) grad_norm: 4.2739 (4.3978) time: 0.7774 data: 0.0003 max mem: 8421 +[2024-12-05 23:20:33 root] (utils.py 283): INFO Epoch: [18] [2370/2502] eta: 0:01:43 lr: 0.000009 loss_cls: 4.0331 (3.8574) grad_norm: 4.2747 (4.3967) time: 0.7783 data: 0.0003 max mem: 8421 +[2024-12-05 23:20:41 root] (utils.py 283): INFO Epoch: [18] [2380/2502] eta: 0:01:35 lr: 0.000009 loss_cls: 3.8755 (3.8577) grad_norm: 4.1401 (4.3961) time: 0.7785 data: 0.0003 max mem: 8421 +[2024-12-05 23:20:49 root] (utils.py 283): INFO Epoch: [18] [2390/2502] eta: 0:01:27 lr: 0.000009 loss_cls: 4.0272 (3.8586) grad_norm: 4.1162 (4.3956) time: 0.7765 data: 0.0002 max mem: 8421 +[2024-12-05 23:20:56 root] (utils.py 283): INFO Epoch: [18] [2400/2502] eta: 0:01:20 lr: 0.000009 loss_cls: 3.8442 (3.8569) grad_norm: 4.1065 (4.3954) time: 0.7772 data: 0.0002 max mem: 8421 +[2024-12-05 23:21:04 root] (utils.py 283): INFO Epoch: [18] [2410/2502] eta: 0:01:12 lr: 0.000009 loss_cls: 3.5768 (3.8564) grad_norm: 4.1787 (4.3955) time: 0.7767 data: 0.0002 max mem: 8421 +[2024-12-05 23:21:12 root] (utils.py 283): INFO Epoch: [18] [2420/2502] eta: 0:01:04 lr: 0.000009 loss_cls: 3.9558 (3.8572) grad_norm: 4.2261 (4.3956) time: 0.7765 data: 0.0003 max mem: 8421 +[2024-12-05 23:21:20 root] (utils.py 283): INFO Epoch: [18] [2430/2502] eta: 0:00:56 lr: 0.000009 loss_cls: 3.9643 (3.8567) grad_norm: 4.2296 (4.3960) time: 0.7797 data: 0.0003 max mem: 8421 +[2024-12-05 23:21:28 root] (utils.py 283): INFO Epoch: [18] [2440/2502] eta: 0:00:48 lr: 0.000009 loss_cls: 3.6083 (3.8556) grad_norm: 4.2556 (4.3972) time: 0.7799 data: 0.0003 max mem: 8421 +[2024-12-05 23:21:35 root] (utils.py 283): INFO Epoch: [18] [2450/2502] eta: 0:00:40 lr: 0.000009 loss_cls: 3.9075 (3.8560) grad_norm: 4.3013 (4.3966) time: 0.7778 data: 0.0002 max mem: 8421 +[2024-12-05 23:21:43 root] (utils.py 283): INFO Epoch: [18] [2460/2502] eta: 0:00:32 lr: 0.000009 loss_cls: 3.9998 (3.8562) grad_norm: 4.3013 (4.3967) time: 0.7753 data: 0.0002 max mem: 8421 +[2024-12-05 23:21:51 root] (utils.py 283): INFO Epoch: [18] [2470/2502] eta: 0:00:25 lr: 0.000009 loss_cls: 4.0824 (3.8567) grad_norm: 4.1566 (4.3957) time: 0.7766 data: 0.0003 max mem: 8421 +[2024-12-05 23:21:59 root] (utils.py 283): INFO Epoch: [18] [2480/2502] eta: 0:00:17 lr: 0.000009 loss_cls: 3.9584 (3.8570) grad_norm: 4.1213 (4.3947) time: 0.7806 data: 0.0003 max mem: 8421 +[2024-12-05 23:22:07 root] (utils.py 283): INFO Epoch: [18] [2490/2502] eta: 0:00:09 lr: 0.000009 loss_cls: 3.9420 (3.8566) grad_norm: 4.1558 (4.3947) time: 0.8041 data: 0.0254 max mem: 8421 +[2024-12-05 23:22:15 root] (utils.py 283): INFO Epoch: [18] [2500/2502] eta: 0:00:01 lr: 0.000009 loss_cls: 3.7772 (3.8562) grad_norm: 4.4411 (4.3952) time: 0.8035 data: 0.0254 max mem: 8421 +[2024-12-05 23:22:16 root] (utils.py 283): INFO Epoch: [18] [2501/2502] eta: 0:00:00 lr: 0.000009 loss_cls: 3.9045 (3.8562) grad_norm: 4.4411 (4.3952) time: 0.8039 data: 0.0254 max mem: 8421 +[2024-12-05 23:22:16 root] (utils.py 297): INFO Epoch: [18] Total time: 0:32:43 (0.7850 s / it) +[2024-12-05 23:22:16 root] (engine.py 179): INFO Averaged stats:lr: 0.000009 loss_cls: 3.9045 (3.8543) grad_norm: 4.4411 (4.3952) +[2024-12-05 23:22:16 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7638 (0.7638) acc1: 84.3750 (84.3750) acc3: 95.3125 (95.3125) acc5: 96.8750 (96.8750) time: 0.1310 data: 0.0005 max mem: 8421 +[2024-12-05 23:22:17 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8151 (0.8673) acc1: 84.3750 (82.0312) acc3: 92.9688 (92.8267) acc5: 96.0938 (95.7386) time: 0.1316 data: 0.0004 max mem: 8421 +[2024-12-05 23:22:19 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9104 (0.9206) acc1: 78.1250 (80.6548) acc3: 92.1875 (92.2247) acc5: 95.3125 (95.1265) time: 0.1330 data: 0.0005 max mem: 8421 +[2024-12-05 23:22:20 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9631 (0.9210) acc1: 80.4688 (79.9899) acc3: 92.1875 (92.6411) acc5: 95.3125 (95.3377) time: 0.1362 data: 0.0005 max mem: 8421 +[2024-12-05 23:22:21 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8608 (0.9134) acc1: 81.2500 (80.2782) acc3: 93.7500 (92.8544) acc5: 96.8750 (95.4649) time: 0.1382 data: 0.0012 max mem: 8421 +[2024-12-05 23:22:23 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0404 (1.0082) acc1: 75.0000 (78.2629) acc3: 87.5000 (91.2990) acc5: 92.1875 (94.2862) time: 0.1660 data: 0.0277 max mem: 8421 +[2024-12-05 23:22:25 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2833 (1.0548) acc1: 71.8750 (77.3950) acc3: 85.1562 (90.4969) acc5: 89.0625 (93.5835) time: 0.1762 data: 0.0374 max mem: 8421 +[2024-12-05 23:22:27 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:04 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2798 (1.0990) acc1: 71.0938 (76.1884) acc3: 86.7188 (89.8988) acc5: 89.8438 (93.1448) time: 0.1710 data: 0.0342 max mem: 8421 +[2024-12-05 23:22:28 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3084 (1.1363) acc1: 69.5312 (75.3762) acc3: 85.1562 (89.3133) acc5: 89.8438 (92.6119) time: 0.1628 data: 0.0270 max mem: 8421 +[2024-12-05 23:22:30 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3588 (1.1652) acc1: 67.9688 (74.5965) acc3: 84.3750 (88.7448) acc5: 89.0625 (92.1875) time: 0.1403 data: 0.0037 max mem: 8421 +[2024-12-05 23:22:31 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2254 (1.1544) acc1: 73.4375 (74.7120) acc3: 87.5000 (88.9760) acc5: 90.6250 (92.3840) time: 0.1365 data: 0.0009 max mem: 8421 +[2024-12-05 23:22:31 root] (utils.py 297): INFO Test: Total time: 0:00:14 (0.1498 s / it) +[2024-12-05 23:22:32 root] (engine.py 264): INFO * Acc@1 74.704 Acc@3 88.972 Acc@5 92.280 loss 1.154 flops 1.285 layer_flops 1.251 +[2024-12-05 23:22:32 root] (main.py 547): INFO Accuracy of the network on the 50000 test images: 74.7% +[2024-12-05 23:22:33 root] (main.py 551): INFO Max accuracy: 74.70% +[2024-12-05 23:22:33 root] (utils.py 283): INFO Epoch: [19] [ 0/2502] eta: 0:32:48 lr: 0.000008 loss_cls: 3.2252 (3.2252) grad_norm: 4.5245 (4.5245) time: 0.7868 data: 0.0004 max mem: 8421 +[2024-12-05 23:22:41 root] (utils.py 283): INFO Epoch: [19] [ 10/2502] eta: 0:32:19 lr: 0.000008 loss_cls: 3.6685 (3.6595) grad_norm: 4.1389 (4.2191) time: 0.7781 data: 0.0003 max mem: 8421 +[2024-12-05 23:22:49 root] (utils.py 283): INFO Epoch: [19] [ 20/2502] eta: 0:32:28 lr: 0.000008 loss_cls: 3.7878 (3.6704) grad_norm: 4.0974 (4.2349) time: 0.7851 data: 0.0002 max mem: 8421 +[2024-12-05 23:22:57 root] (utils.py 283): INFO Epoch: [19] [ 30/2502] eta: 0:32:26 lr: 0.000008 loss_cls: 3.8002 (3.7021) grad_norm: 4.1796 (4.2527) time: 0.7925 data: 0.0003 max mem: 8421 +[2024-12-05 23:23:05 root] (utils.py 283): INFO Epoch: [19] [ 40/2502] eta: 0:32:16 lr: 0.000008 loss_cls: 3.9092 (3.7761) grad_norm: 4.2741 (4.3119) time: 0.7881 data: 0.0003 max mem: 8421 +[2024-12-05 23:23:13 root] (utils.py 283): INFO Epoch: [19] [ 50/2502] eta: 0:32:03 lr: 0.000008 loss_cls: 3.9321 (3.7397) grad_norm: 4.1948 (4.3399) time: 0.7800 data: 0.0002 max mem: 8421 +[2024-12-05 23:23:20 root] (utils.py 283): INFO Epoch: [19] [ 60/2502] eta: 0:31:53 lr: 0.000008 loss_cls: 3.5504 (3.6934) grad_norm: 4.1734 (4.3204) time: 0.7775 data: 0.0002 max mem: 8421 +[2024-12-05 23:23:28 root] (utils.py 283): INFO Epoch: [19] [ 70/2502] eta: 0:31:50 lr: 0.000008 loss_cls: 3.8830 (3.7312) grad_norm: 4.1190 (4.2850) time: 0.7885 data: 0.0003 max mem: 8421 +[2024-12-05 23:23:36 root] (utils.py 283): INFO Epoch: [19] [ 80/2502] eta: 0:31:41 lr: 0.000008 loss_cls: 4.0094 (3.7709) grad_norm: 4.1366 (4.3125) time: 0.7899 data: 0.0003 max mem: 8421 +[2024-12-05 23:23:44 root] (utils.py 283): INFO Epoch: [19] [ 90/2502] eta: 0:31:39 lr: 0.000008 loss_cls: 3.9470 (3.7617) grad_norm: 4.1971 (4.3015) time: 0.7944 data: 0.0003 max mem: 8421 +[2024-12-05 23:23:52 root] (utils.py 283): INFO Epoch: [19] [ 100/2502] eta: 0:31:36 lr: 0.000008 loss_cls: 3.9470 (3.7746) grad_norm: 3.9845 (4.3345) time: 0.8074 data: 0.0003 max mem: 8421 +[2024-12-05 23:24:00 root] (utils.py 283): INFO Epoch: [19] [ 110/2502] eta: 0:31:30 lr: 0.000008 loss_cls: 4.0239 (3.7551) grad_norm: 4.1919 (4.3384) time: 0.8039 data: 0.0003 max mem: 8421 +[2024-12-05 23:24:08 root] (utils.py 283): INFO Epoch: [19] [ 120/2502] eta: 0:31:22 lr: 0.000008 loss_cls: 3.9963 (3.7680) grad_norm: 4.1515 (4.3349) time: 0.7932 data: 0.0002 max mem: 8421 +[2024-12-05 23:24:16 root] (utils.py 283): INFO Epoch: [19] [ 130/2502] eta: 0:31:13 lr: 0.000008 loss_cls: 4.0755 (3.7829) grad_norm: 4.1146 (4.3574) time: 0.7864 data: 0.0003 max mem: 8421 +[2024-12-05 23:24:24 root] (utils.py 283): INFO Epoch: [19] [ 140/2502] eta: 0:31:07 lr: 0.000008 loss_cls: 3.9372 (3.7835) grad_norm: 4.1146 (4.3424) time: 0.7924 data: 0.0003 max mem: 8421 +[2024-12-05 23:24:32 root] (utils.py 283): INFO Epoch: [19] [ 150/2502] eta: 0:30:57 lr: 0.000008 loss_cls: 3.8331 (3.7871) grad_norm: 4.1036 (4.3314) time: 0.7901 data: 0.0003 max mem: 8421 +[2024-12-05 23:24:40 root] (utils.py 283): INFO Epoch: [19] [ 160/2502] eta: 0:30:48 lr: 0.000008 loss_cls: 4.1052 (3.8094) grad_norm: 4.2653 (4.3413) time: 0.7800 data: 0.0002 max mem: 8421 +[2024-12-05 23:24:47 root] (utils.py 283): INFO Epoch: [19] [ 170/2502] eta: 0:30:38 lr: 0.000008 loss_cls: 4.1052 (3.8059) grad_norm: 4.3694 (4.3542) time: 0.7780 data: 0.0002 max mem: 8421 +[2024-12-05 23:24:55 root] (utils.py 283): INFO Epoch: [19] [ 180/2502] eta: 0:30:29 lr: 0.000008 loss_cls: 3.8341 (3.8057) grad_norm: 4.1484 (4.3447) time: 0.7788 data: 0.0003 max mem: 8421 +[2024-12-05 23:25:03 root] (utils.py 283): INFO Epoch: [19] [ 190/2502] eta: 0:30:20 lr: 0.000008 loss_cls: 3.9777 (3.8170) grad_norm: 4.1484 (4.3350) time: 0.7795 data: 0.0003 max mem: 8421 +[2024-12-05 23:25:11 root] (utils.py 283): INFO Epoch: [19] [ 200/2502] eta: 0:30:11 lr: 0.000008 loss_cls: 3.9987 (3.8277) grad_norm: 4.1928 (4.3401) time: 0.7788 data: 0.0003 max mem: 8421 +[2024-12-05 23:25:19 root] (utils.py 283): INFO Epoch: [19] [ 210/2502] eta: 0:30:03 lr: 0.000008 loss_cls: 3.9924 (3.8248) grad_norm: 4.2632 (4.3329) time: 0.7782 data: 0.0003 max mem: 8421 +[2024-12-05 23:25:26 root] (utils.py 283): INFO Epoch: [19] [ 220/2502] eta: 0:29:54 lr: 0.000008 loss_cls: 3.4340 (3.8051) grad_norm: 4.1832 (4.3214) time: 0.7790 data: 0.0002 max mem: 8421 +[2024-12-05 23:25:34 root] (utils.py 283): INFO Epoch: [19] [ 230/2502] eta: 0:29:45 lr: 0.000008 loss_cls: 3.8243 (3.8108) grad_norm: 4.1832 (4.3203) time: 0.7790 data: 0.0002 max mem: 8421 +[2024-12-05 23:25:42 root] (utils.py 283): INFO Epoch: [19] [ 240/2502] eta: 0:29:36 lr: 0.000008 loss_cls: 3.8389 (3.8037) grad_norm: 4.2130 (4.3162) time: 0.7765 data: 0.0003 max mem: 8421 +[2024-12-05 23:25:50 root] (utils.py 283): INFO Epoch: [19] [ 250/2502] eta: 0:29:28 lr: 0.000008 loss_cls: 3.5127 (3.8043) grad_norm: 4.1925 (4.3188) time: 0.7759 data: 0.0002 max mem: 8421 +[2024-12-05 23:25:57 root] (utils.py 283): INFO Epoch: [19] [ 260/2502] eta: 0:29:19 lr: 0.000008 loss_cls: 3.9320 (3.8035) grad_norm: 4.2378 (4.3156) time: 0.7774 data: 0.0003 max mem: 8421 +[2024-12-05 23:26:05 root] (utils.py 283): INFO Epoch: [19] [ 270/2502] eta: 0:29:11 lr: 0.000008 loss_cls: 3.9320 (3.8050) grad_norm: 4.2181 (4.3217) time: 0.7779 data: 0.0002 max mem: 8421 +[2024-12-05 23:26:13 root] (utils.py 283): INFO Epoch: [19] [ 280/2502] eta: 0:29:03 lr: 0.000008 loss_cls: 3.6315 (3.8012) grad_norm: 4.2268 (4.3197) time: 0.7788 data: 0.0002 max mem: 8421 +[2024-12-05 23:26:21 root] (utils.py 283): INFO Epoch: [19] [ 290/2502] eta: 0:28:54 lr: 0.000008 loss_cls: 3.6315 (3.8053) grad_norm: 4.2426 (4.3182) time: 0.7751 data: 0.0002 max mem: 8421 +[2024-12-05 23:26:28 root] (utils.py 283): INFO Epoch: [19] [ 300/2502] eta: 0:28:45 lr: 0.000008 loss_cls: 3.9121 (3.8085) grad_norm: 4.2402 (4.3192) time: 0.7715 data: 0.0003 max mem: 8421 +[2024-12-05 23:26:36 root] (utils.py 283): INFO Epoch: [19] [ 310/2502] eta: 0:28:37 lr: 0.000008 loss_cls: 3.9121 (3.8116) grad_norm: 4.3297 (4.3241) time: 0.7737 data: 0.0003 max mem: 8421 +[2024-12-05 23:26:44 root] (utils.py 283): INFO Epoch: [19] [ 320/2502] eta: 0:28:28 lr: 0.000008 loss_cls: 3.8466 (3.8073) grad_norm: 4.1669 (4.3216) time: 0.7758 data: 0.0003 max mem: 8421 +[2024-12-05 23:26:52 root] (utils.py 283): INFO Epoch: [19] [ 330/2502] eta: 0:28:20 lr: 0.000008 loss_cls: 4.0560 (3.8152) grad_norm: 4.0682 (4.3222) time: 0.7799 data: 0.0003 max mem: 8421 +[2024-12-05 23:27:00 root] (utils.py 283): INFO Epoch: [19] [ 340/2502] eta: 0:28:13 lr: 0.000008 loss_cls: 4.1605 (3.8131) grad_norm: 4.0682 (4.3185) time: 0.7841 data: 0.0003 max mem: 8421 +[2024-12-05 23:27:07 root] (utils.py 283): INFO Epoch: [19] [ 350/2502] eta: 0:28:04 lr: 0.000008 loss_cls: 3.9731 (3.8109) grad_norm: 4.2459 (4.3210) time: 0.7801 data: 0.0003 max mem: 8421 +[2024-12-05 23:27:15 root] (utils.py 283): INFO Epoch: [19] [ 360/2502] eta: 0:27:56 lr: 0.000008 loss_cls: 4.1225 (3.8184) grad_norm: 4.3849 (4.3313) time: 0.7705 data: 0.0003 max mem: 8421 +[2024-12-05 23:27:23 root] (utils.py 283): INFO Epoch: [19] [ 370/2502] eta: 0:27:47 lr: 0.000008 loss_cls: 4.1804 (3.8239) grad_norm: 4.3014 (4.3318) time: 0.7690 data: 0.0003 max mem: 8421 +[2024-12-05 23:27:31 root] (utils.py 283): INFO Epoch: [19] [ 380/2502] eta: 0:27:39 lr: 0.000008 loss_cls: 3.9579 (3.8200) grad_norm: 4.2571 (4.3304) time: 0.7712 data: 0.0002 max mem: 8421 +[2024-12-05 23:27:38 root] (utils.py 283): INFO Epoch: [19] [ 390/2502] eta: 0:27:30 lr: 0.000008 loss_cls: 3.9579 (3.8230) grad_norm: 4.0044 (4.3263) time: 0.7716 data: 0.0002 max mem: 8421 +[2024-12-05 23:27:46 root] (utils.py 283): INFO Epoch: [19] [ 400/2502] eta: 0:27:22 lr: 0.000008 loss_cls: 4.1442 (3.8304) grad_norm: 4.1480 (4.3345) time: 0.7730 data: 0.0003 max mem: 8421 +[2024-12-05 23:27:54 root] (utils.py 283): INFO Epoch: [19] [ 410/2502] eta: 0:27:14 lr: 0.000008 loss_cls: 3.8199 (3.8279) grad_norm: 4.3056 (4.3351) time: 0.7708 data: 0.0003 max mem: 8421 +[2024-12-05 23:28:01 root] (utils.py 283): INFO Epoch: [19] [ 420/2502] eta: 0:27:06 lr: 0.000008 loss_cls: 3.8199 (3.8276) grad_norm: 4.2376 (4.3307) time: 0.7736 data: 0.0003 max mem: 8421 +[2024-12-05 23:28:09 root] (utils.py 283): INFO Epoch: [19] [ 430/2502] eta: 0:26:58 lr: 0.000008 loss_cls: 3.8856 (3.8250) grad_norm: 4.2233 (4.3276) time: 0.7756 data: 0.0003 max mem: 8421 +[2024-12-05 23:28:17 root] (utils.py 283): INFO Epoch: [19] [ 440/2502] eta: 0:26:49 lr: 0.000008 loss_cls: 3.8856 (3.8286) grad_norm: 4.2233 (4.3270) time: 0.7732 data: 0.0003 max mem: 8421 +[2024-12-05 23:28:25 root] (utils.py 283): INFO Epoch: [19] [ 450/2502] eta: 0:26:41 lr: 0.000008 loss_cls: 3.8513 (3.8246) grad_norm: 4.2035 (4.3240) time: 0.7715 data: 0.0003 max mem: 8421 +[2024-12-05 23:28:32 root] (utils.py 283): INFO Epoch: [19] [ 460/2502] eta: 0:26:33 lr: 0.000008 loss_cls: 3.7855 (3.8226) grad_norm: 4.1257 (4.3195) time: 0.7708 data: 0.0002 max mem: 8421 +[2024-12-05 23:28:40 root] (utils.py 283): INFO Epoch: [19] [ 470/2502] eta: 0:26:25 lr: 0.000008 loss_cls: 3.9792 (3.8232) grad_norm: 4.1658 (4.3221) time: 0.7715 data: 0.0002 max mem: 8421 +[2024-12-05 23:28:48 root] (utils.py 283): INFO Epoch: [19] [ 480/2502] eta: 0:26:16 lr: 0.000008 loss_cls: 3.8442 (3.8228) grad_norm: 4.2871 (4.3215) time: 0.7692 data: 0.0002 max mem: 8421 +[2024-12-05 23:28:55 root] (utils.py 283): INFO Epoch: [19] [ 490/2502] eta: 0:26:08 lr: 0.000008 loss_cls: 3.8442 (3.8210) grad_norm: 4.2742 (4.3227) time: 0.7692 data: 0.0002 max mem: 8421 +[2024-12-05 23:29:03 root] (utils.py 283): INFO Epoch: [19] [ 500/2502] eta: 0:26:00 lr: 0.000008 loss_cls: 4.0049 (3.8231) grad_norm: 4.0994 (4.3198) time: 0.7758 data: 0.0002 max mem: 8421 +[2024-12-05 23:29:11 root] (utils.py 283): INFO Epoch: [19] [ 510/2502] eta: 0:25:52 lr: 0.000008 loss_cls: 3.8239 (3.8218) grad_norm: 4.0653 (4.3164) time: 0.7768 data: 0.0002 max mem: 8421 +[2024-12-05 23:29:19 root] (utils.py 283): INFO Epoch: [19] [ 520/2502] eta: 0:25:44 lr: 0.000008 loss_cls: 3.6219 (3.8167) grad_norm: 4.0556 (4.3134) time: 0.7735 data: 0.0003 max mem: 8421 +[2024-12-05 23:29:26 root] (utils.py 283): INFO Epoch: [19] [ 530/2502] eta: 0:25:36 lr: 0.000008 loss_cls: 3.8256 (3.8210) grad_norm: 4.1728 (4.3178) time: 0.7744 data: 0.0002 max mem: 8421 +[2024-12-05 23:29:34 root] (utils.py 283): INFO Epoch: [19] [ 540/2502] eta: 0:25:28 lr: 0.000008 loss_cls: 3.8554 (3.8193) grad_norm: 4.1468 (4.3165) time: 0.7758 data: 0.0002 max mem: 8421 +[2024-12-05 23:29:42 root] (utils.py 283): INFO Epoch: [19] [ 550/2502] eta: 0:25:21 lr: 0.000008 loss_cls: 3.4317 (3.8124) grad_norm: 4.1468 (4.3200) time: 0.7759 data: 0.0002 max mem: 8421 +[2024-12-05 23:29:50 root] (utils.py 283): INFO Epoch: [19] [ 560/2502] eta: 0:25:13 lr: 0.000008 loss_cls: 3.7302 (3.8124) grad_norm: 4.0091 (4.3150) time: 0.7750 data: 0.0003 max mem: 8421 +[2024-12-05 23:29:58 root] (utils.py 283): INFO Epoch: [19] [ 570/2502] eta: 0:25:05 lr: 0.000008 loss_cls: 4.1409 (3.8168) grad_norm: 4.1471 (4.3157) time: 0.7755 data: 0.0002 max mem: 8421 +[2024-12-05 23:30:05 root] (utils.py 283): INFO Epoch: [19] [ 580/2502] eta: 0:24:57 lr: 0.000008 loss_cls: 4.1076 (3.8174) grad_norm: 4.3454 (4.3153) time: 0.7756 data: 0.0002 max mem: 8421 +[2024-12-05 23:30:13 root] (utils.py 283): INFO Epoch: [19] [ 590/2502] eta: 0:24:49 lr: 0.000008 loss_cls: 4.0692 (3.8236) grad_norm: 4.5279 (4.3263) time: 0.7770 data: 0.0003 max mem: 8421 +[2024-12-05 23:30:21 root] (utils.py 283): INFO Epoch: [19] [ 600/2502] eta: 0:24:41 lr: 0.000008 loss_cls: 3.9421 (3.8192) grad_norm: 4.4966 (4.3285) time: 0.7786 data: 0.0003 max mem: 8421 +[2024-12-05 23:30:29 root] (utils.py 283): INFO Epoch: [19] [ 610/2502] eta: 0:24:33 lr: 0.000008 loss_cls: 3.5478 (3.8174) grad_norm: 4.2595 (4.3270) time: 0.7754 data: 0.0002 max mem: 8421 +[2024-12-05 23:30:36 root] (utils.py 283): INFO Epoch: [19] [ 620/2502] eta: 0:24:25 lr: 0.000008 loss_cls: 3.8818 (3.8217) grad_norm: 4.2522 (4.3287) time: 0.7744 data: 0.0002 max mem: 8421 +[2024-12-05 23:30:44 root] (utils.py 283): INFO Epoch: [19] [ 630/2502] eta: 0:24:17 lr: 0.000008 loss_cls: 3.8334 (3.8202) grad_norm: 4.3761 (4.3322) time: 0.7772 data: 0.0002 max mem: 8421 +[2024-12-05 23:30:52 root] (utils.py 283): INFO Epoch: [19] [ 640/2502] eta: 0:24:10 lr: 0.000008 loss_cls: 3.8023 (3.8218) grad_norm: 4.2123 (4.3339) time: 0.7752 data: 0.0003 max mem: 8421 +[2024-12-05 23:31:00 root] (utils.py 283): INFO Epoch: [19] [ 650/2502] eta: 0:24:02 lr: 0.000008 loss_cls: 3.9324 (3.8219) grad_norm: 4.2123 (4.3425) time: 0.7763 data: 0.0003 max mem: 8421 +[2024-12-05 23:31:07 root] (utils.py 283): INFO Epoch: [19] [ 660/2502] eta: 0:23:54 lr: 0.000008 loss_cls: 3.8710 (3.8199) grad_norm: 4.1715 (4.3402) time: 0.7800 data: 0.0003 max mem: 8421 +[2024-12-05 23:31:15 root] (utils.py 283): INFO Epoch: [19] [ 670/2502] eta: 0:23:46 lr: 0.000008 loss_cls: 3.9004 (3.8214) grad_norm: 4.2307 (4.3446) time: 0.7783 data: 0.0003 max mem: 8421 +[2024-12-05 23:31:23 root] (utils.py 283): INFO Epoch: [19] [ 680/2502] eta: 0:23:38 lr: 0.000008 loss_cls: 4.0207 (3.8234) grad_norm: 4.4368 (4.3481) time: 0.7760 data: 0.0002 max mem: 8421 +[2024-12-05 23:31:31 root] (utils.py 283): INFO Epoch: [19] [ 690/2502] eta: 0:23:31 lr: 0.000008 loss_cls: 3.9877 (3.8220) grad_norm: 4.3887 (4.3535) time: 0.7771 data: 0.0002 max mem: 8421 +[2024-12-05 23:31:38 root] (utils.py 283): INFO Epoch: [19] [ 700/2502] eta: 0:23:23 lr: 0.000008 loss_cls: 3.9643 (3.8242) grad_norm: 4.2278 (4.3522) time: 0.7774 data: 0.0003 max mem: 8421 +[2024-12-05 23:31:46 root] (utils.py 283): INFO Epoch: [19] [ 710/2502] eta: 0:23:15 lr: 0.000008 loss_cls: 3.9557 (3.8230) grad_norm: 4.3140 (4.3570) time: 0.7794 data: 0.0002 max mem: 8421 +[2024-12-05 23:31:54 root] (utils.py 283): INFO Epoch: [19] [ 720/2502] eta: 0:23:07 lr: 0.000008 loss_cls: 3.6755 (3.8186) grad_norm: 4.3140 (4.3564) time: 0.7800 data: 0.0002 max mem: 8421 +[2024-12-05 23:32:02 root] (utils.py 283): INFO Epoch: [19] [ 730/2502] eta: 0:22:59 lr: 0.000008 loss_cls: 3.4786 (3.8173) grad_norm: 4.2973 (4.3619) time: 0.7745 data: 0.0003 max mem: 8421 +[2024-12-05 23:32:10 root] (utils.py 283): INFO Epoch: [19] [ 740/2502] eta: 0:22:51 lr: 0.000008 loss_cls: 4.0923 (3.8184) grad_norm: 4.3260 (4.3622) time: 0.7753 data: 0.0003 max mem: 8421 +[2024-12-05 23:32:17 root] (utils.py 283): INFO Epoch: [19] [ 750/2502] eta: 0:22:44 lr: 0.000008 loss_cls: 4.0972 (3.8198) grad_norm: 4.3260 (4.3623) time: 0.7778 data: 0.0003 max mem: 8421 +[2024-12-05 23:32:25 root] (utils.py 283): INFO Epoch: [19] [ 760/2502] eta: 0:22:36 lr: 0.000008 loss_cls: 4.1691 (3.8251) grad_norm: 4.3094 (4.3634) time: 0.7813 data: 0.0003 max mem: 8421 +[2024-12-05 23:32:33 root] (utils.py 283): INFO Epoch: [19] [ 770/2502] eta: 0:22:28 lr: 0.000008 loss_cls: 4.1616 (3.8265) grad_norm: 4.1166 (4.3590) time: 0.7867 data: 0.0003 max mem: 8421 +[2024-12-05 23:32:41 root] (utils.py 283): INFO Epoch: [19] [ 780/2502] eta: 0:22:21 lr: 0.000008 loss_cls: 3.9662 (3.8285) grad_norm: 4.1166 (4.3597) time: 0.7840 data: 0.0002 max mem: 8421 +[2024-12-05 23:32:49 root] (utils.py 283): INFO Epoch: [19] [ 790/2502] eta: 0:22:13 lr: 0.000008 loss_cls: 3.9831 (3.8297) grad_norm: 4.2875 (4.3655) time: 0.7802 data: 0.0002 max mem: 8421 +[2024-12-05 23:32:57 root] (utils.py 283): INFO Epoch: [19] [ 800/2502] eta: 0:22:05 lr: 0.000008 loss_cls: 3.9176 (3.8275) grad_norm: 4.2128 (4.3643) time: 0.7801 data: 0.0002 max mem: 8421 +[2024-12-05 23:33:04 root] (utils.py 283): INFO Epoch: [19] [ 810/2502] eta: 0:21:57 lr: 0.000008 loss_cls: 3.7425 (3.8275) grad_norm: 4.2129 (4.3650) time: 0.7775 data: 0.0002 max mem: 8421 +[2024-12-05 23:33:12 root] (utils.py 283): INFO Epoch: [19] [ 820/2502] eta: 0:21:49 lr: 0.000008 loss_cls: 3.9934 (3.8288) grad_norm: 4.3133 (4.3666) time: 0.7744 data: 0.0002 max mem: 8421 +[2024-12-05 23:33:20 root] (utils.py 283): INFO Epoch: [19] [ 830/2502] eta: 0:21:42 lr: 0.000008 loss_cls: 4.1383 (3.8306) grad_norm: 4.2917 (4.3652) time: 0.7757 data: 0.0002 max mem: 8421 +[2024-12-05 23:33:28 root] (utils.py 283): INFO Epoch: [19] [ 840/2502] eta: 0:21:34 lr: 0.000008 loss_cls: 3.9099 (3.8277) grad_norm: 4.1625 (4.3626) time: 0.7765 data: 0.0003 max mem: 8421 +[2024-12-05 23:33:35 root] (utils.py 283): INFO Epoch: [19] [ 850/2502] eta: 0:21:26 lr: 0.000008 loss_cls: 3.8559 (3.8290) grad_norm: 4.1377 (4.3631) time: 0.7773 data: 0.0002 max mem: 8421 +[2024-12-05 23:33:43 root] (utils.py 283): INFO Epoch: [19] [ 860/2502] eta: 0:21:18 lr: 0.000008 loss_cls: 3.9743 (3.8289) grad_norm: 4.1504 (4.3610) time: 0.7771 data: 0.0002 max mem: 8421 +[2024-12-05 23:33:51 root] (utils.py 283): INFO Epoch: [19] [ 870/2502] eta: 0:21:10 lr: 0.000008 loss_cls: 3.9569 (3.8295) grad_norm: 4.1716 (4.3612) time: 0.7762 data: 0.0002 max mem: 8421 +[2024-12-05 23:33:59 root] (utils.py 283): INFO Epoch: [19] [ 880/2502] eta: 0:21:02 lr: 0.000008 loss_cls: 3.9569 (3.8309) grad_norm: 4.3058 (4.3615) time: 0.7766 data: 0.0003 max mem: 8421 +[2024-12-05 23:34:06 root] (utils.py 283): INFO Epoch: [19] [ 890/2502] eta: 0:20:55 lr: 0.000008 loss_cls: 3.9656 (3.8330) grad_norm: 4.2448 (4.3597) time: 0.7764 data: 0.0003 max mem: 8421 +[2024-12-05 23:34:14 root] (utils.py 283): INFO Epoch: [19] [ 900/2502] eta: 0:20:47 lr: 0.000008 loss_cls: 3.9656 (3.8332) grad_norm: 4.1735 (4.3599) time: 0.7776 data: 0.0002 max mem: 8421 +[2024-12-05 23:34:22 root] (utils.py 283): INFO Epoch: [19] [ 910/2502] eta: 0:20:39 lr: 0.000008 loss_cls: 3.8277 (3.8302) grad_norm: 4.1719 (4.3584) time: 0.7792 data: 0.0002 max mem: 8421 +[2024-12-05 23:34:30 root] (utils.py 283): INFO Epoch: [19] [ 920/2502] eta: 0:20:31 lr: 0.000008 loss_cls: 3.7452 (3.8299) grad_norm: 4.2552 (4.3581) time: 0.7785 data: 0.0002 max mem: 8421 +[2024-12-05 23:34:38 root] (utils.py 283): INFO Epoch: [19] [ 930/2502] eta: 0:20:23 lr: 0.000008 loss_cls: 3.8132 (3.8304) grad_norm: 4.2531 (4.3596) time: 0.7763 data: 0.0003 max mem: 8421 +[2024-12-05 23:34:45 root] (utils.py 283): INFO Epoch: [19] [ 940/2502] eta: 0:20:16 lr: 0.000008 loss_cls: 3.8906 (3.8320) grad_norm: 4.1566 (4.3580) time: 0.7776 data: 0.0003 max mem: 8421 +[2024-12-05 23:34:53 root] (utils.py 283): INFO Epoch: [19] [ 950/2502] eta: 0:20:08 lr: 0.000008 loss_cls: 4.0734 (3.8351) grad_norm: 4.1566 (4.3610) time: 0.7847 data: 0.0002 max mem: 8421 +[2024-12-05 23:35:01 root] (utils.py 283): INFO Epoch: [19] [ 960/2502] eta: 0:20:00 lr: 0.000008 loss_cls: 4.0962 (3.8356) grad_norm: 4.1274 (4.3647) time: 0.7826 data: 0.0002 max mem: 8421 +[2024-12-05 23:35:09 root] (utils.py 283): INFO Epoch: [19] [ 970/2502] eta: 0:19:52 lr: 0.000008 loss_cls: 3.7702 (3.8337) grad_norm: 4.1486 (4.3637) time: 0.7755 data: 0.0002 max mem: 8421 +[2024-12-05 23:35:16 root] (utils.py 283): INFO Epoch: [19] [ 980/2502] eta: 0:19:45 lr: 0.000008 loss_cls: 3.5798 (3.8313) grad_norm: 4.1486 (4.3624) time: 0.7768 data: 0.0002 max mem: 8421 +[2024-12-05 23:35:24 root] (utils.py 283): INFO Epoch: [19] [ 990/2502] eta: 0:19:37 lr: 0.000008 loss_cls: 3.8972 (3.8345) grad_norm: 4.1656 (4.3621) time: 0.7772 data: 0.0002 max mem: 8421 +[2024-12-05 23:35:32 root] (utils.py 283): INFO Epoch: [19] [1000/2502] eta: 0:19:29 lr: 0.000008 loss_cls: 4.1436 (3.8361) grad_norm: 4.2966 (4.3604) time: 0.7766 data: 0.0002 max mem: 8421 +[2024-12-05 23:35:40 root] (utils.py 283): INFO Epoch: [19] [1010/2502] eta: 0:19:21 lr: 0.000008 loss_cls: 3.9363 (3.8356) grad_norm: 4.1513 (4.3600) time: 0.7766 data: 0.0003 max mem: 8421 +[2024-12-05 23:35:48 root] (utils.py 283): INFO Epoch: [19] [1020/2502] eta: 0:19:13 lr: 0.000008 loss_cls: 4.0242 (3.8360) grad_norm: 4.3974 (4.3625) time: 0.7754 data: 0.0003 max mem: 8421 +[2024-12-05 23:35:55 root] (utils.py 283): INFO Epoch: [19] [1030/2502] eta: 0:19:05 lr: 0.000008 loss_cls: 4.1241 (3.8380) grad_norm: 4.3571 (4.3613) time: 0.7749 data: 0.0003 max mem: 8421 +[2024-12-05 23:36:03 root] (utils.py 283): INFO Epoch: [19] [1040/2502] eta: 0:18:58 lr: 0.000008 loss_cls: 4.1731 (3.8404) grad_norm: 4.1648 (4.3617) time: 0.7764 data: 0.0002 max mem: 8421 +[2024-12-05 23:36:11 root] (utils.py 283): INFO Epoch: [19] [1050/2502] eta: 0:18:50 lr: 0.000008 loss_cls: 4.1668 (3.8427) grad_norm: 4.2902 (4.3626) time: 0.7744 data: 0.0003 max mem: 8421 +[2024-12-05 23:36:19 root] (utils.py 283): INFO Epoch: [19] [1060/2502] eta: 0:18:42 lr: 0.000008 loss_cls: 4.0870 (3.8440) grad_norm: 4.2441 (4.3616) time: 0.7746 data: 0.0002 max mem: 8421 +[2024-12-05 23:36:26 root] (utils.py 283): INFO Epoch: [19] [1070/2502] eta: 0:18:34 lr: 0.000008 loss_cls: 3.9212 (3.8434) grad_norm: 4.2008 (4.3660) time: 0.7774 data: 0.0002 max mem: 8421 +[2024-12-05 23:36:34 root] (utils.py 283): INFO Epoch: [19] [1080/2502] eta: 0:18:26 lr: 0.000008 loss_cls: 3.7569 (3.8425) grad_norm: 4.0759 (4.3656) time: 0.7766 data: 0.0002 max mem: 8421 +[2024-12-05 23:36:42 root] (utils.py 283): INFO Epoch: [19] [1090/2502] eta: 0:18:18 lr: 0.000008 loss_cls: 4.2169 (3.8452) grad_norm: 4.0543 (4.3637) time: 0.7751 data: 0.0003 max mem: 8421 +[2024-12-05 23:36:50 root] (utils.py 283): INFO Epoch: [19] [1100/2502] eta: 0:18:11 lr: 0.000008 loss_cls: 4.2361 (3.8470) grad_norm: 4.0797 (4.3634) time: 0.7751 data: 0.0003 max mem: 8421 +[2024-12-05 23:36:57 root] (utils.py 283): INFO Epoch: [19] [1110/2502] eta: 0:18:03 lr: 0.000008 loss_cls: 4.0857 (3.8472) grad_norm: 4.3054 (4.3649) time: 0.7787 data: 0.0002 max mem: 8421 +[2024-12-05 23:37:05 root] (utils.py 283): INFO Epoch: [19] [1120/2502] eta: 0:17:55 lr: 0.000008 loss_cls: 4.0785 (3.8493) grad_norm: 4.5103 (4.3678) time: 0.7837 data: 0.0002 max mem: 8421 +[2024-12-05 23:37:13 root] (utils.py 283): INFO Epoch: [19] [1130/2502] eta: 0:17:48 lr: 0.000008 loss_cls: 3.9327 (3.8479) grad_norm: 4.5285 (4.3710) time: 0.7843 data: 0.0003 max mem: 8421 +[2024-12-05 23:37:21 root] (utils.py 283): INFO Epoch: [19] [1140/2502] eta: 0:17:40 lr: 0.000008 loss_cls: 4.0111 (3.8499) grad_norm: 4.2350 (4.3716) time: 0.7792 data: 0.0003 max mem: 8421 +[2024-12-05 23:37:29 root] (utils.py 283): INFO Epoch: [19] [1150/2502] eta: 0:17:32 lr: 0.000008 loss_cls: 3.9253 (3.8489) grad_norm: 4.4510 (4.3729) time: 0.7768 data: 0.0003 max mem: 8421 +[2024-12-05 23:37:36 root] (utils.py 283): INFO Epoch: [19] [1160/2502] eta: 0:17:24 lr: 0.000008 loss_cls: 3.8849 (3.8484) grad_norm: 4.4510 (4.3742) time: 0.7771 data: 0.0003 max mem: 8421 +[2024-12-05 23:37:44 root] (utils.py 283): INFO Epoch: [19] [1170/2502] eta: 0:17:16 lr: 0.000008 loss_cls: 3.8109 (3.8474) grad_norm: 4.2513 (4.3719) time: 0.7789 data: 0.0002 max mem: 8421 +[2024-12-05 23:37:52 root] (utils.py 283): INFO Epoch: [19] [1180/2502] eta: 0:17:09 lr: 0.000008 loss_cls: 3.6415 (3.8456) grad_norm: 4.0830 (4.3706) time: 0.7796 data: 0.0002 max mem: 8421 +[2024-12-05 23:38:00 root] (utils.py 283): INFO Epoch: [19] [1190/2502] eta: 0:17:01 lr: 0.000008 loss_cls: 3.6161 (3.8439) grad_norm: 4.2126 (4.3721) time: 0.7772 data: 0.0002 max mem: 8421 +[2024-12-05 23:38:08 root] (utils.py 283): INFO Epoch: [19] [1200/2502] eta: 0:16:53 lr: 0.000008 loss_cls: 3.7786 (3.8453) grad_norm: 4.2791 (4.3728) time: 0.7832 data: 0.0003 max mem: 8421 +[2024-12-05 23:38:15 root] (utils.py 283): INFO Epoch: [19] [1210/2502] eta: 0:16:45 lr: 0.000008 loss_cls: 3.8590 (3.8453) grad_norm: 4.2031 (4.3723) time: 0.7848 data: 0.0003 max mem: 8421 +[2024-12-05 23:38:23 root] (utils.py 283): INFO Epoch: [19] [1220/2502] eta: 0:16:38 lr: 0.000008 loss_cls: 3.7322 (3.8449) grad_norm: 4.1254 (4.3726) time: 0.7856 data: 0.0003 max mem: 8421 +[2024-12-05 23:38:31 root] (utils.py 283): INFO Epoch: [19] [1230/2502] eta: 0:16:30 lr: 0.000008 loss_cls: 3.9652 (3.8467) grad_norm: 4.2458 (4.3728) time: 0.7841 data: 0.0002 max mem: 8421 +[2024-12-05 23:38:39 root] (utils.py 283): INFO Epoch: [19] [1240/2502] eta: 0:16:22 lr: 0.000008 loss_cls: 4.1663 (3.8483) grad_norm: 4.2481 (4.3727) time: 0.7779 data: 0.0003 max mem: 8421 +[2024-12-05 23:38:47 root] (utils.py 283): INFO Epoch: [19] [1250/2502] eta: 0:16:14 lr: 0.000008 loss_cls: 4.0853 (3.8501) grad_norm: 4.2586 (4.3726) time: 0.7809 data: 0.0003 max mem: 8421 +[2024-12-05 23:38:55 root] (utils.py 283): INFO Epoch: [19] [1260/2502] eta: 0:16:07 lr: 0.000008 loss_cls: 3.9821 (3.8504) grad_norm: 4.2430 (4.3730) time: 0.7825 data: 0.0003 max mem: 8421 +[2024-12-05 23:39:02 root] (utils.py 283): INFO Epoch: [19] [1270/2502] eta: 0:15:59 lr: 0.000008 loss_cls: 3.9288 (3.8506) grad_norm: 4.3251 (4.3792) time: 0.7816 data: 0.0002 max mem: 8421 +[2024-12-05 23:39:10 root] (utils.py 283): INFO Epoch: [19] [1280/2502] eta: 0:15:51 lr: 0.000008 loss_cls: 4.0052 (3.8506) grad_norm: 4.3251 (4.3784) time: 0.7816 data: 0.0003 max mem: 8421 +[2024-12-05 23:39:18 root] (utils.py 283): INFO Epoch: [19] [1290/2502] eta: 0:15:43 lr: 0.000008 loss_cls: 4.0893 (3.8515) grad_norm: 4.1596 (4.3767) time: 0.7843 data: 0.0003 max mem: 8421 +[2024-12-05 23:39:26 root] (utils.py 283): INFO Epoch: [19] [1300/2502] eta: 0:15:36 lr: 0.000008 loss_cls: 3.7859 (3.8497) grad_norm: 4.1061 (4.3757) time: 0.7828 data: 0.0003 max mem: 8421 +[2024-12-05 23:39:34 root] (utils.py 283): INFO Epoch: [19] [1310/2502] eta: 0:15:28 lr: 0.000008 loss_cls: 3.7675 (3.8498) grad_norm: 4.1465 (4.3778) time: 0.7809 data: 0.0002 max mem: 8421 +[2024-12-05 23:39:42 root] (utils.py 283): INFO Epoch: [19] [1320/2502] eta: 0:15:20 lr: 0.000008 loss_cls: 3.9524 (3.8501) grad_norm: 4.1520 (4.3763) time: 0.7817 data: 0.0003 max mem: 8421 +[2024-12-05 23:39:49 root] (utils.py 283): INFO Epoch: [19] [1330/2502] eta: 0:15:12 lr: 0.000008 loss_cls: 3.9524 (3.8501) grad_norm: 4.1784 (4.3788) time: 0.7804 data: 0.0003 max mem: 8421 +[2024-12-05 23:39:57 root] (utils.py 283): INFO Epoch: [19] [1340/2502] eta: 0:15:04 lr: 0.000008 loss_cls: 4.0098 (3.8517) grad_norm: 4.2172 (4.3780) time: 0.7797 data: 0.0003 max mem: 8421 +[2024-12-05 23:40:05 root] (utils.py 283): INFO Epoch: [19] [1350/2502] eta: 0:14:57 lr: 0.000008 loss_cls: 3.9273 (3.8505) grad_norm: 4.1842 (4.3779) time: 0.7786 data: 0.0003 max mem: 8421 +[2024-12-05 23:40:13 root] (utils.py 283): INFO Epoch: [19] [1360/2502] eta: 0:14:49 lr: 0.000008 loss_cls: 3.8171 (3.8504) grad_norm: 4.4011 (4.3813) time: 0.7773 data: 0.0002 max mem: 8421 +[2024-12-05 23:40:20 root] (utils.py 283): INFO Epoch: [19] [1370/2502] eta: 0:14:41 lr: 0.000008 loss_cls: 4.0206 (3.8507) grad_norm: 4.2375 (4.3807) time: 0.7776 data: 0.0002 max mem: 8421 +[2024-12-05 23:40:28 root] (utils.py 283): INFO Epoch: [19] [1380/2502] eta: 0:14:33 lr: 0.000008 loss_cls: 3.9515 (3.8501) grad_norm: 4.2484 (4.3804) time: 0.7864 data: 0.0002 max mem: 8421 +[2024-12-05 23:40:36 root] (utils.py 283): INFO Epoch: [19] [1390/2502] eta: 0:14:26 lr: 0.000008 loss_cls: 3.8595 (3.8492) grad_norm: 4.2486 (4.3798) time: 0.7860 data: 0.0002 max mem: 8421 +[2024-12-05 23:40:44 root] (utils.py 283): INFO Epoch: [19] [1400/2502] eta: 0:14:18 lr: 0.000008 loss_cls: 3.8757 (3.8480) grad_norm: 4.1534 (4.3787) time: 0.7783 data: 0.0002 max mem: 8421 +[2024-12-05 23:40:52 root] (utils.py 283): INFO Epoch: [19] [1410/2502] eta: 0:14:10 lr: 0.000008 loss_cls: 3.9114 (3.8482) grad_norm: 4.1534 (4.3802) time: 0.7800 data: 0.0002 max mem: 8421 +[2024-12-05 23:41:00 root] (utils.py 283): INFO Epoch: [19] [1420/2502] eta: 0:14:02 lr: 0.000008 loss_cls: 3.9314 (3.8495) grad_norm: 4.0847 (4.3783) time: 0.7785 data: 0.0002 max mem: 8421 +[2024-12-05 23:41:07 root] (utils.py 283): INFO Epoch: [19] [1430/2502] eta: 0:13:54 lr: 0.000008 loss_cls: 3.9555 (3.8506) grad_norm: 4.1354 (4.3777) time: 0.7788 data: 0.0003 max mem: 8421 +[2024-12-05 23:41:15 root] (utils.py 283): INFO Epoch: [19] [1440/2502] eta: 0:13:47 lr: 0.000008 loss_cls: 3.7490 (3.8495) grad_norm: 4.1468 (4.3771) time: 0.7809 data: 0.0003 max mem: 8421 +[2024-12-05 23:41:23 root] (utils.py 283): INFO Epoch: [19] [1450/2502] eta: 0:13:39 lr: 0.000008 loss_cls: 3.6705 (3.8486) grad_norm: 4.3005 (4.3776) time: 0.7785 data: 0.0002 max mem: 8421 +[2024-12-05 23:41:31 root] (utils.py 283): INFO Epoch: [19] [1460/2502] eta: 0:13:31 lr: 0.000008 loss_cls: 4.1086 (3.8515) grad_norm: 4.3005 (4.3764) time: 0.7766 data: 0.0003 max mem: 8421 +[2024-12-05 23:41:38 root] (utils.py 283): INFO Epoch: [19] [1470/2502] eta: 0:13:23 lr: 0.000008 loss_cls: 4.1086 (3.8505) grad_norm: 4.1500 (4.3794) time: 0.7778 data: 0.0003 max mem: 8421 +[2024-12-05 23:41:46 root] (utils.py 283): INFO Epoch: [19] [1480/2502] eta: 0:13:16 lr: 0.000008 loss_cls: 3.8708 (3.8511) grad_norm: 4.2236 (4.3819) time: 0.7794 data: 0.0003 max mem: 8421 +[2024-12-05 23:41:54 root] (utils.py 283): INFO Epoch: [19] [1490/2502] eta: 0:13:08 lr: 0.000008 loss_cls: 3.9299 (3.8509) grad_norm: 4.2347 (4.3816) time: 0.7786 data: 0.0003 max mem: 8421 +[2024-12-05 23:42:02 root] (utils.py 283): INFO Epoch: [19] [1500/2502] eta: 0:13:00 lr: 0.000008 loss_cls: 3.6452 (3.8483) grad_norm: 4.1082 (4.3796) time: 0.7781 data: 0.0003 max mem: 8421 +[2024-12-05 23:42:10 root] (utils.py 283): INFO Epoch: [19] [1510/2502] eta: 0:12:52 lr: 0.000008 loss_cls: 3.6452 (3.8474) grad_norm: 3.9866 (4.3771) time: 0.7773 data: 0.0003 max mem: 8421 +[2024-12-05 23:42:17 root] (utils.py 283): INFO Epoch: [19] [1520/2502] eta: 0:12:44 lr: 0.000008 loss_cls: 3.9567 (3.8481) grad_norm: 4.1614 (4.3783) time: 0.7783 data: 0.0002 max mem: 8421 +[2024-12-05 23:42:25 root] (utils.py 283): INFO Epoch: [19] [1530/2502] eta: 0:12:37 lr: 0.000008 loss_cls: 4.0321 (3.8484) grad_norm: 4.4475 (4.3788) time: 0.7779 data: 0.0003 max mem: 8421 +[2024-12-05 23:42:33 root] (utils.py 283): INFO Epoch: [19] [1540/2502] eta: 0:12:29 lr: 0.000008 loss_cls: 3.8925 (3.8471) grad_norm: 4.2518 (4.3811) time: 0.7756 data: 0.0003 max mem: 8421 +[2024-12-05 23:42:41 root] (utils.py 283): INFO Epoch: [19] [1550/2502] eta: 0:12:21 lr: 0.000008 loss_cls: 3.8925 (3.8463) grad_norm: 4.2421 (4.3817) time: 0.7757 data: 0.0003 max mem: 8421 +[2024-12-05 23:42:48 root] (utils.py 283): INFO Epoch: [19] [1560/2502] eta: 0:12:13 lr: 0.000008 loss_cls: 3.9539 (3.8454) grad_norm: 4.2673 (4.3817) time: 0.7771 data: 0.0003 max mem: 8421 +[2024-12-05 23:42:56 root] (utils.py 283): INFO Epoch: [19] [1570/2502] eta: 0:12:05 lr: 0.000008 loss_cls: 3.8581 (3.8452) grad_norm: 4.1500 (4.3814) time: 0.7782 data: 0.0003 max mem: 8421 +[2024-12-05 23:43:04 root] (utils.py 283): INFO Epoch: [19] [1580/2502] eta: 0:11:58 lr: 0.000008 loss_cls: 3.8581 (3.8449) grad_norm: 4.1500 (4.3821) time: 0.7774 data: 0.0003 max mem: 8421 +[2024-12-05 23:43:12 root] (utils.py 283): INFO Epoch: [19] [1590/2502] eta: 0:11:50 lr: 0.000008 loss_cls: 3.8951 (3.8445) grad_norm: 4.2665 (4.3815) time: 0.7763 data: 0.0002 max mem: 8421 +[2024-12-05 23:43:20 root] (utils.py 283): INFO Epoch: [19] [1600/2502] eta: 0:11:42 lr: 0.000008 loss_cls: 3.9121 (3.8458) grad_norm: 4.3123 (4.3818) time: 0.7794 data: 0.0002 max mem: 8421 +[2024-12-05 23:43:27 root] (utils.py 283): INFO Epoch: [19] [1610/2502] eta: 0:11:34 lr: 0.000008 loss_cls: 3.9806 (3.8456) grad_norm: 4.3478 (4.3824) time: 0.7830 data: 0.0003 max mem: 8421 +[2024-12-05 23:43:35 root] (utils.py 283): INFO Epoch: [19] [1620/2502] eta: 0:11:26 lr: 0.000008 loss_cls: 3.7605 (3.8443) grad_norm: 4.2940 (4.3825) time: 0.7805 data: 0.0003 max mem: 8421 +[2024-12-05 23:43:43 root] (utils.py 283): INFO Epoch: [19] [1630/2502] eta: 0:11:19 lr: 0.000008 loss_cls: 3.7497 (3.8447) grad_norm: 4.3378 (4.3823) time: 0.7805 data: 0.0003 max mem: 8421 +[2024-12-05 23:43:51 root] (utils.py 283): INFO Epoch: [19] [1640/2502] eta: 0:11:11 lr: 0.000008 loss_cls: 3.9265 (3.8448) grad_norm: 4.3497 (4.3831) time: 0.7841 data: 0.0002 max mem: 8421 +[2024-12-05 23:43:59 root] (utils.py 283): INFO Epoch: [19] [1650/2502] eta: 0:11:03 lr: 0.000008 loss_cls: 3.6891 (3.8440) grad_norm: 4.2918 (4.3832) time: 0.7823 data: 0.0002 max mem: 8421 +[2024-12-05 23:44:06 root] (utils.py 283): INFO Epoch: [19] [1660/2502] eta: 0:10:55 lr: 0.000008 loss_cls: 3.7248 (3.8447) grad_norm: 4.2301 (4.3840) time: 0.7787 data: 0.0003 max mem: 8421 +[2024-12-05 23:44:14 root] (utils.py 283): INFO Epoch: [19] [1670/2502] eta: 0:10:48 lr: 0.000008 loss_cls: 3.8268 (3.8428) grad_norm: 4.2925 (4.3844) time: 0.7857 data: 0.0003 max mem: 8421 +[2024-12-05 23:44:22 root] (utils.py 283): INFO Epoch: [19] [1680/2502] eta: 0:10:40 lr: 0.000008 loss_cls: 3.6578 (3.8420) grad_norm: 4.3619 (4.3844) time: 0.7844 data: 0.0002 max mem: 8421 +[2024-12-05 23:44:30 root] (utils.py 283): INFO Epoch: [19] [1690/2502] eta: 0:10:32 lr: 0.000008 loss_cls: 3.8457 (3.8424) grad_norm: 4.2272 (4.3836) time: 0.7853 data: 0.0003 max mem: 8421 +[2024-12-05 23:44:38 root] (utils.py 283): INFO Epoch: [19] [1700/2502] eta: 0:10:24 lr: 0.000008 loss_cls: 3.9872 (3.8421) grad_norm: 4.1774 (4.3833) time: 0.7848 data: 0.0003 max mem: 8421 +[2024-12-05 23:44:46 root] (utils.py 283): INFO Epoch: [19] [1710/2502] eta: 0:10:16 lr: 0.000008 loss_cls: 4.0462 (3.8435) grad_norm: 4.1779 (4.3826) time: 0.7745 data: 0.0002 max mem: 8421 +[2024-12-05 23:44:53 root] (utils.py 283): INFO Epoch: [19] [1720/2502] eta: 0:10:09 lr: 0.000008 loss_cls: 4.0084 (3.8440) grad_norm: 4.3565 (4.3822) time: 0.7742 data: 0.0002 max mem: 8421 +[2024-12-05 23:45:01 root] (utils.py 283): INFO Epoch: [19] [1730/2502] eta: 0:10:01 lr: 0.000008 loss_cls: 3.7713 (3.8427) grad_norm: 4.3565 (4.3815) time: 0.7745 data: 0.0003 max mem: 8421 +[2024-12-05 23:45:09 root] (utils.py 283): INFO Epoch: [19] [1740/2502] eta: 0:09:53 lr: 0.000008 loss_cls: 3.6729 (3.8417) grad_norm: 4.2469 (4.3811) time: 0.7756 data: 0.0003 max mem: 8421 +[2024-12-05 23:45:17 root] (utils.py 283): INFO Epoch: [19] [1750/2502] eta: 0:09:45 lr: 0.000008 loss_cls: 3.8580 (3.8421) grad_norm: 4.2022 (4.3806) time: 0.7757 data: 0.0003 max mem: 8421 +[2024-12-05 23:45:24 root] (utils.py 283): INFO Epoch: [19] [1760/2502] eta: 0:09:37 lr: 0.000008 loss_cls: 3.9475 (3.8430) grad_norm: 4.3102 (4.3807) time: 0.7763 data: 0.0002 max mem: 8421 +[2024-12-05 23:45:32 root] (utils.py 283): INFO Epoch: [19] [1770/2502] eta: 0:09:30 lr: 0.000008 loss_cls: 3.9712 (3.8434) grad_norm: 4.3102 (4.3814) time: 0.7777 data: 0.0003 max mem: 8421 +[2024-12-05 23:45:40 root] (utils.py 283): INFO Epoch: [19] [1780/2502] eta: 0:09:22 lr: 0.000008 loss_cls: 4.0488 (3.8440) grad_norm: 4.3473 (4.3818) time: 0.7762 data: 0.0003 max mem: 8421 +[2024-12-05 23:45:48 root] (utils.py 283): INFO Epoch: [19] [1790/2502] eta: 0:09:14 lr: 0.000008 loss_cls: 3.8207 (3.8429) grad_norm: 4.5233 (4.3909) time: 0.7743 data: 0.0003 max mem: 8421 +[2024-12-05 23:45:55 root] (utils.py 283): INFO Epoch: [19] [1800/2502] eta: 0:09:06 lr: 0.000008 loss_cls: 3.9173 (3.8436) grad_norm: 4.8615 (4.3931) time: 0.7748 data: 0.0003 max mem: 8421 +[2024-12-05 23:46:03 root] (utils.py 283): INFO Epoch: [19] [1810/2502] eta: 0:08:58 lr: 0.000008 loss_cls: 3.8389 (3.8421) grad_norm: 4.8258 (4.3946) time: 0.7750 data: 0.0002 max mem: 8421 +[2024-12-05 23:46:11 root] (utils.py 283): INFO Epoch: [19] [1820/2502] eta: 0:08:51 lr: 0.000008 loss_cls: 3.7146 (3.8418) grad_norm: 4.4530 (4.3937) time: 0.7793 data: 0.0003 max mem: 8421 +[2024-12-05 23:46:19 root] (utils.py 283): INFO Epoch: [19] [1830/2502] eta: 0:08:43 lr: 0.000008 loss_cls: 3.7146 (3.8405) grad_norm: 4.0885 (4.3991) time: 0.7827 data: 0.0003 max mem: 8421 +[2024-12-05 23:46:27 root] (utils.py 283): INFO Epoch: [19] [1840/2502] eta: 0:08:35 lr: 0.000008 loss_cls: 3.6143 (3.8398) grad_norm: 4.1361 (4.3986) time: 0.7792 data: 0.0002 max mem: 8421 +[2024-12-05 23:46:34 root] (utils.py 283): INFO Epoch: [19] [1850/2502] eta: 0:08:27 lr: 0.000008 loss_cls: 3.8332 (3.8399) grad_norm: 4.1705 (4.3979) time: 0.7761 data: 0.0002 max mem: 8421 +[2024-12-05 23:46:42 root] (utils.py 283): INFO Epoch: [19] [1860/2502] eta: 0:08:19 lr: 0.000008 loss_cls: 4.0445 (3.8417) grad_norm: 4.2131 (4.3971) time: 0.7766 data: 0.0003 max mem: 8421 +[2024-12-05 23:46:50 root] (utils.py 283): INFO Epoch: [19] [1870/2502] eta: 0:08:12 lr: 0.000008 loss_cls: 4.0346 (3.8407) grad_norm: 4.2298 (4.3966) time: 0.7779 data: 0.0003 max mem: 8421 +[2024-12-05 23:46:58 root] (utils.py 283): INFO Epoch: [19] [1880/2502] eta: 0:08:04 lr: 0.000008 loss_cls: 3.9868 (3.8415) grad_norm: 4.3762 (4.3977) time: 0.7790 data: 0.0002 max mem: 8421 +[2024-12-05 23:47:06 root] (utils.py 283): INFO Epoch: [19] [1890/2502] eta: 0:07:56 lr: 0.000008 loss_cls: 4.0701 (3.8424) grad_norm: 4.3189 (4.3991) time: 0.7796 data: 0.0002 max mem: 8421 +[2024-12-05 23:47:13 root] (utils.py 283): INFO Epoch: [19] [1900/2502] eta: 0:07:48 lr: 0.000008 loss_cls: 3.9521 (3.8422) grad_norm: 4.2502 (4.3984) time: 0.7782 data: 0.0003 max mem: 8421 +[2024-12-05 23:47:21 root] (utils.py 283): INFO Epoch: [19] [1910/2502] eta: 0:07:41 lr: 0.000008 loss_cls: 4.0532 (3.8435) grad_norm: 4.2644 (4.3978) time: 0.7791 data: 0.0002 max mem: 8421 +[2024-12-05 23:47:29 root] (utils.py 283): INFO Epoch: [19] [1920/2502] eta: 0:07:33 lr: 0.000008 loss_cls: 4.3244 (3.8454) grad_norm: 4.2644 (4.3971) time: 0.7781 data: 0.0003 max mem: 8421 +[2024-12-05 23:47:37 root] (utils.py 283): INFO Epoch: [19] [1930/2502] eta: 0:07:25 lr: 0.000008 loss_cls: 4.1396 (3.8456) grad_norm: 4.1122 (4.3959) time: 0.7812 data: 0.0002 max mem: 8421 +[2024-12-05 23:47:45 root] (utils.py 283): INFO Epoch: [19] [1940/2502] eta: 0:07:17 lr: 0.000008 loss_cls: 4.0187 (3.8463) grad_norm: 4.0885 (4.3952) time: 0.7905 data: 0.0002 max mem: 8421 +[2024-12-05 23:47:53 root] (utils.py 283): INFO Epoch: [19] [1950/2502] eta: 0:07:09 lr: 0.000008 loss_cls: 3.9593 (3.8456) grad_norm: 4.1166 (4.3937) time: 0.7937 data: 0.0002 max mem: 8421 +[2024-12-05 23:48:01 root] (utils.py 283): INFO Epoch: [19] [1960/2502] eta: 0:07:02 lr: 0.000008 loss_cls: 3.9488 (3.8465) grad_norm: 4.1826 (4.3930) time: 0.7916 data: 0.0003 max mem: 8421 +[2024-12-05 23:48:08 root] (utils.py 283): INFO Epoch: [19] [1970/2502] eta: 0:06:54 lr: 0.000008 loss_cls: 4.0222 (3.8469) grad_norm: 4.1830 (4.3918) time: 0.7866 data: 0.0003 max mem: 8421 +[2024-12-05 23:48:16 root] (utils.py 283): INFO Epoch: [19] [1980/2502] eta: 0:06:46 lr: 0.000008 loss_cls: 3.9982 (3.8485) grad_norm: 4.1830 (4.3917) time: 0.7862 data: 0.0003 max mem: 8421 +[2024-12-05 23:48:24 root] (utils.py 283): INFO Epoch: [19] [1990/2502] eta: 0:06:38 lr: 0.000008 loss_cls: 4.1881 (3.8496) grad_norm: 4.4712 (4.3978) time: 0.7837 data: 0.0003 max mem: 8421 +[2024-12-05 23:48:32 root] (utils.py 283): INFO Epoch: [19] [2000/2502] eta: 0:06:31 lr: 0.000008 loss_cls: 3.6108 (3.8471) grad_norm: 4.5238 (4.3994) time: 0.7778 data: 0.0003 max mem: 8421 +[2024-12-05 23:48:40 root] (utils.py 283): INFO Epoch: [19] [2010/2502] eta: 0:06:23 lr: 0.000008 loss_cls: 3.4012 (3.8457) grad_norm: 4.2893 (4.3983) time: 0.7759 data: 0.0003 max mem: 8421 +[2024-12-05 23:48:47 root] (utils.py 283): INFO Epoch: [19] [2020/2502] eta: 0:06:15 lr: 0.000008 loss_cls: 3.8999 (3.8461) grad_norm: 4.0450 (4.3965) time: 0.7742 data: 0.0003 max mem: 8421 +[2024-12-05 23:48:55 root] (utils.py 283): INFO Epoch: [19] [2030/2502] eta: 0:06:07 lr: 0.000008 loss_cls: 4.1773 (3.8478) grad_norm: 4.1241 (4.3968) time: 0.7773 data: 0.0003 max mem: 8421 +[2024-12-05 23:49:03 root] (utils.py 283): INFO Epoch: [19] [2040/2502] eta: 0:05:59 lr: 0.000008 loss_cls: 3.9556 (3.8477) grad_norm: 4.3117 (4.3972) time: 0.7799 data: 0.0002 max mem: 8421 +[2024-12-05 23:49:11 root] (utils.py 283): INFO Epoch: [19] [2050/2502] eta: 0:05:52 lr: 0.000008 loss_cls: 3.8110 (3.8473) grad_norm: 4.2344 (4.3971) time: 0.7788 data: 0.0002 max mem: 8421 +[2024-12-05 23:49:18 root] (utils.py 283): INFO Epoch: [19] [2060/2502] eta: 0:05:44 lr: 0.000008 loss_cls: 3.8788 (3.8475) grad_norm: 4.1716 (4.3989) time: 0.7764 data: 0.0002 max mem: 8421 +[2024-12-05 23:49:26 root] (utils.py 283): INFO Epoch: [19] [2070/2502] eta: 0:05:36 lr: 0.000008 loss_cls: 3.8788 (3.8474) grad_norm: 4.3310 (4.3996) time: 0.7763 data: 0.0003 max mem: 8421 +[2024-12-05 23:49:34 root] (utils.py 283): INFO Epoch: [19] [2080/2502] eta: 0:05:28 lr: 0.000008 loss_cls: 3.8589 (3.8473) grad_norm: 4.3310 (4.3992) time: 0.7768 data: 0.0002 max mem: 8421 +[2024-12-05 23:49:42 root] (utils.py 283): INFO Epoch: [19] [2090/2502] eta: 0:05:20 lr: 0.000008 loss_cls: 3.8272 (3.8461) grad_norm: 4.1860 (4.3986) time: 0.7766 data: 0.0002 max mem: 8421 +[2024-12-05 23:49:49 root] (utils.py 283): INFO Epoch: [19] [2100/2502] eta: 0:05:13 lr: 0.000008 loss_cls: 3.7438 (3.8458) grad_norm: 4.2968 (4.3990) time: 0.7771 data: 0.0002 max mem: 8421 +[2024-12-05 23:49:57 root] (utils.py 283): INFO Epoch: [19] [2110/2502] eta: 0:05:05 lr: 0.000008 loss_cls: 3.7539 (3.8449) grad_norm: 4.2971 (4.3987) time: 0.7804 data: 0.0003 max mem: 8421 +[2024-12-05 23:50:06 root] (utils.py 283): INFO Epoch: [19] [2120/2502] eta: 0:04:57 lr: 0.000008 loss_cls: 3.9405 (3.8464) grad_norm: 4.2171 (4.3979) time: 0.8031 data: 0.0003 max mem: 8421 +[2024-12-05 23:50:13 root] (utils.py 283): INFO Epoch: [19] [2130/2502] eta: 0:04:49 lr: 0.000008 loss_cls: 3.8426 (3.8457) grad_norm: 4.1316 (4.3983) time: 0.8017 data: 0.0003 max mem: 8421 +[2024-12-05 23:50:21 root] (utils.py 283): INFO Epoch: [19] [2140/2502] eta: 0:04:42 lr: 0.000008 loss_cls: 3.7160 (3.8445) grad_norm: 4.0507 (4.3975) time: 0.7814 data: 0.0003 max mem: 8421 +[2024-12-05 23:50:29 root] (utils.py 283): INFO Epoch: [19] [2150/2502] eta: 0:04:34 lr: 0.000008 loss_cls: 3.8056 (3.8454) grad_norm: 4.2160 (4.3974) time: 0.7801 data: 0.0003 max mem: 8421 +[2024-12-05 23:50:37 root] (utils.py 283): INFO Epoch: [19] [2160/2502] eta: 0:04:26 lr: 0.000008 loss_cls: 4.0729 (3.8456) grad_norm: 4.2160 (4.3965) time: 0.7782 data: 0.0003 max mem: 8421 +[2024-12-05 23:50:45 root] (utils.py 283): INFO Epoch: [19] [2170/2502] eta: 0:04:18 lr: 0.000008 loss_cls: 4.0729 (3.8470) grad_norm: 4.1661 (4.3958) time: 0.7815 data: 0.0002 max mem: 8421 +[2024-12-05 23:50:52 root] (utils.py 283): INFO Epoch: [19] [2180/2502] eta: 0:04:10 lr: 0.000008 loss_cls: 4.0664 (3.8480) grad_norm: 4.1633 (4.3952) time: 0.7867 data: 0.0003 max mem: 8421 +[2024-12-05 23:51:00 root] (utils.py 283): INFO Epoch: [19] [2190/2502] eta: 0:04:03 lr: 0.000008 loss_cls: 4.0282 (3.8485) grad_norm: 4.2518 (4.3957) time: 0.7843 data: 0.0003 max mem: 8421 +[2024-12-05 23:51:08 root] (utils.py 283): INFO Epoch: [19] [2200/2502] eta: 0:03:55 lr: 0.000008 loss_cls: 4.0456 (3.8497) grad_norm: 4.3394 (4.3957) time: 0.7837 data: 0.0003 max mem: 8421 +[2024-12-05 23:51:16 root] (utils.py 283): INFO Epoch: [19] [2210/2502] eta: 0:03:47 lr: 0.000008 loss_cls: 4.0456 (3.8498) grad_norm: 4.0292 (4.3943) time: 0.7865 data: 0.0002 max mem: 8421 +[2024-12-05 23:51:24 root] (utils.py 283): INFO Epoch: [19] [2220/2502] eta: 0:03:39 lr: 0.000008 loss_cls: 3.9609 (3.8495) grad_norm: 4.1703 (4.3997) time: 0.7832 data: 0.0002 max mem: 8421 +[2024-12-05 23:51:32 root] (utils.py 283): INFO Epoch: [19] [2230/2502] eta: 0:03:31 lr: 0.000008 loss_cls: 3.8467 (3.8494) grad_norm: 4.3329 (4.4003) time: 0.7787 data: 0.0003 max mem: 8421 +[2024-12-05 23:51:39 root] (utils.py 283): INFO Epoch: [19] [2240/2502] eta: 0:03:24 lr: 0.000008 loss_cls: 3.8451 (3.8499) grad_norm: 4.3077 (4.3999) time: 0.7810 data: 0.0003 max mem: 8421 +[2024-12-05 23:51:47 root] (utils.py 283): INFO Epoch: [19] [2250/2502] eta: 0:03:16 lr: 0.000008 loss_cls: 3.7681 (3.8490) grad_norm: 4.1360 (4.3989) time: 0.7832 data: 0.0002 max mem: 8421 +[2024-12-05 23:51:55 root] (utils.py 283): INFO Epoch: [19] [2260/2502] eta: 0:03:08 lr: 0.000008 loss_cls: 3.8424 (3.8488) grad_norm: 4.1930 (4.3984) time: 0.7855 data: 0.0003 max mem: 8421 +[2024-12-05 23:52:03 root] (utils.py 283): INFO Epoch: [19] [2270/2502] eta: 0:03:00 lr: 0.000008 loss_cls: 3.8884 (3.8485) grad_norm: 4.2255 (4.3979) time: 0.7847 data: 0.0003 max mem: 8421 +[2024-12-05 23:52:11 root] (utils.py 283): INFO Epoch: [19] [2280/2502] eta: 0:02:53 lr: 0.000008 loss_cls: 3.8452 (3.8488) grad_norm: 4.2255 (4.3981) time: 0.7813 data: 0.0003 max mem: 8421 +[2024-12-05 23:52:19 root] (utils.py 283): INFO Epoch: [19] [2290/2502] eta: 0:02:45 lr: 0.000008 loss_cls: 3.8662 (3.8487) grad_norm: 4.1247 (4.3974) time: 0.7802 data: 0.0003 max mem: 8421 +[2024-12-05 23:52:27 root] (utils.py 283): INFO Epoch: [19] [2300/2502] eta: 0:02:37 lr: 0.000008 loss_cls: 3.9703 (3.8492) grad_norm: 4.2646 (4.3975) time: 0.8175 data: 0.0004 max mem: 8421 +[2024-12-05 23:52:36 root] (utils.py 283): INFO Epoch: [19] [2310/2502] eta: 0:02:29 lr: 0.000008 loss_cls: 3.8263 (3.8485) grad_norm: 4.2952 (4.3979) time: 0.8758 data: 0.0005 max mem: 8421 +[2024-12-05 23:52:44 root] (utils.py 283): INFO Epoch: [19] [2320/2502] eta: 0:02:22 lr: 0.000008 loss_cls: 4.0036 (3.8485) grad_norm: 4.2403 (4.3979) time: 0.8594 data: 0.0004 max mem: 8421 +[2024-12-05 23:52:52 root] (utils.py 283): INFO Epoch: [19] [2330/2502] eta: 0:02:14 lr: 0.000008 loss_cls: 3.9053 (3.8481) grad_norm: 4.2410 (4.3977) time: 0.8040 data: 0.0003 max mem: 8421 +[2024-12-05 23:53:00 root] (utils.py 283): INFO Epoch: [19] [2340/2502] eta: 0:02:06 lr: 0.000008 loss_cls: 3.6831 (3.8474) grad_norm: 4.2524 (4.3970) time: 0.7825 data: 0.0002 max mem: 8421 +[2024-12-05 23:53:08 root] (utils.py 283): INFO Epoch: [19] [2350/2502] eta: 0:01:58 lr: 0.000008 loss_cls: 3.6771 (3.8464) grad_norm: 4.1029 (4.3958) time: 0.7811 data: 0.0002 max mem: 8421 +[2024-12-05 23:53:16 root] (utils.py 283): INFO Epoch: [19] [2360/2502] eta: 0:01:50 lr: 0.000008 loss_cls: 3.8274 (3.8466) grad_norm: 4.2761 (4.3963) time: 0.7830 data: 0.0003 max mem: 8421 +[2024-12-05 23:53:24 root] (utils.py 283): INFO Epoch: [19] [2370/2502] eta: 0:01:43 lr: 0.000008 loss_cls: 3.8860 (3.8468) grad_norm: 4.3309 (4.3961) time: 0.7871 data: 0.0003 max mem: 8421 +[2024-12-05 23:53:31 root] (utils.py 283): INFO Epoch: [19] [2380/2502] eta: 0:01:35 lr: 0.000008 loss_cls: 4.0117 (3.8470) grad_norm: 4.2736 (4.3964) time: 0.7843 data: 0.0003 max mem: 8421 +[2024-12-05 23:53:39 root] (utils.py 283): INFO Epoch: [19] [2390/2502] eta: 0:01:27 lr: 0.000008 loss_cls: 3.9383 (3.8468) grad_norm: 4.3860 (4.3965) time: 0.7797 data: 0.0002 max mem: 8421 +[2024-12-05 23:53:47 root] (utils.py 283): INFO Epoch: [19] [2400/2502] eta: 0:01:19 lr: 0.000008 loss_cls: 3.6958 (3.8463) grad_norm: 4.3860 (4.3965) time: 0.7812 data: 0.0003 max mem: 8421 +[2024-12-05 23:53:55 root] (utils.py 283): INFO Epoch: [19] [2410/2502] eta: 0:01:11 lr: 0.000008 loss_cls: 3.9364 (3.8465) grad_norm: 4.2020 (4.3955) time: 0.7812 data: 0.0003 max mem: 8421 +[2024-12-05 23:54:03 root] (utils.py 283): INFO Epoch: [19] [2420/2502] eta: 0:01:04 lr: 0.000008 loss_cls: 3.9083 (3.8463) grad_norm: 4.1703 (4.3954) time: 0.7809 data: 0.0003 max mem: 8421 +[2024-12-05 23:54:10 root] (utils.py 283): INFO Epoch: [19] [2430/2502] eta: 0:00:56 lr: 0.000008 loss_cls: 4.1020 (3.8470) grad_norm: 4.2395 (4.3961) time: 0.7821 data: 0.0002 max mem: 8421 +[2024-12-05 23:54:18 root] (utils.py 283): INFO Epoch: [19] [2440/2502] eta: 0:00:48 lr: 0.000008 loss_cls: 4.1854 (3.8477) grad_norm: 4.2867 (4.3956) time: 0.7812 data: 0.0002 max mem: 8421 +[2024-12-05 23:54:26 root] (utils.py 283): INFO Epoch: [19] [2450/2502] eta: 0:00:40 lr: 0.000008 loss_cls: 3.7004 (3.8469) grad_norm: 4.2873 (4.3965) time: 0.7800 data: 0.0003 max mem: 8421 +[2024-12-05 23:54:34 root] (utils.py 283): INFO Epoch: [19] [2460/2502] eta: 0:00:32 lr: 0.000008 loss_cls: 3.5522 (3.8461) grad_norm: 4.2892 (4.3966) time: 0.7853 data: 0.0003 max mem: 8421 +[2024-12-05 23:54:42 root] (utils.py 283): INFO Epoch: [19] [2470/2502] eta: 0:00:24 lr: 0.000008 loss_cls: 3.8027 (3.8464) grad_norm: 4.2918 (4.3969) time: 0.7861 data: 0.0003 max mem: 8421 +[2024-12-05 23:54:50 root] (utils.py 283): INFO Epoch: [19] [2480/2502] eta: 0:00:17 lr: 0.000008 loss_cls: 3.9139 (3.8459) grad_norm: 4.4031 (4.3966) time: 0.7804 data: 0.0002 max mem: 8421 +[2024-12-05 23:54:58 root] (utils.py 283): INFO Epoch: [19] [2490/2502] eta: 0:00:09 lr: 0.000008 loss_cls: 3.7818 (3.8454) grad_norm: 4.3530 (4.3980) time: 0.8104 data: 0.0252 max mem: 8421 +[2024-12-05 23:55:06 root] (utils.py 283): INFO Epoch: [19] [2500/2502] eta: 0:00:01 lr: 0.000008 loss_cls: 3.9923 (3.8460) grad_norm: 4.3502 (4.3978) time: 0.8134 data: 0.0252 max mem: 8421 +[2024-12-05 23:55:07 root] (utils.py 283): INFO Epoch: [19] [2501/2502] eta: 0:00:00 lr: 0.000008 loss_cls: 3.9451 (3.8459) grad_norm: 4.3512 (4.3981) time: 0.8127 data: 0.0252 max mem: 8421 +[2024-12-05 23:55:07 root] (utils.py 297): INFO Epoch: [19] Total time: 0:32:33 (0.7810 s / it) +[2024-12-05 23:55:07 root] (engine.py 179): INFO Averaged stats:lr: 0.000008 loss_cls: 3.9451 (3.8415) grad_norm: 4.3512 (4.3981) +[2024-12-05 23:55:07 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7245 (0.7245) acc1: 83.5938 (83.5938) acc3: 95.3125 (95.3125) acc5: 97.6562 (97.6562) time: 0.1309 data: 0.0003 max mem: 8421 +[2024-12-05 23:55:08 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8002 (0.8601) acc1: 83.5938 (81.8182) acc3: 93.7500 (92.8977) acc5: 96.0938 (95.5256) time: 0.1320 data: 0.0004 max mem: 8421 +[2024-12-05 23:55:10 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8952 (0.9104) acc1: 78.9062 (80.5432) acc3: 92.1875 (92.2991) acc5: 95.3125 (95.0893) time: 0.1328 data: 0.0005 max mem: 8421 +[2024-12-05 23:55:11 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9728 (0.9141) acc1: 79.6875 (79.9395) acc3: 91.4062 (92.6411) acc5: 96.0938 (95.4385) time: 0.1346 data: 0.0005 max mem: 8421 +[2024-12-05 23:55:13 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8518 (0.9079) acc1: 81.2500 (80.3925) acc3: 93.7500 (92.7210) acc5: 96.0938 (95.4268) time: 0.1545 data: 0.0185 max mem: 8421 +[2024-12-05 23:55:14 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0776 (0.9997) acc1: 73.4375 (78.3088) acc3: 89.0625 (91.2377) acc5: 92.1875 (94.3168) time: 0.1563 data: 0.0191 max mem: 8421 +[2024-12-05 23:55:16 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2946 (1.0427) acc1: 71.8750 (77.4462) acc3: 85.9375 (90.5353) acc5: 89.8438 (93.6219) time: 0.1433 data: 0.0061 max mem: 8421 +[2024-12-05 23:55:18 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:04 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2758 (1.0868) acc1: 71.8750 (76.2324) acc3: 86.7188 (89.8988) acc5: 89.8438 (93.1448) time: 0.1708 data: 0.0341 max mem: 8421 +[2024-12-05 23:55:19 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3075 (1.1238) acc1: 68.7500 (75.4726) acc3: 85.9375 (89.2747) acc5: 89.8438 (92.6215) time: 0.1834 data: 0.0459 max mem: 8421 +[2024-12-05 23:55:21 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3511 (1.1535) acc1: 67.9688 (74.6738) acc3: 84.3750 (88.7534) acc5: 89.0625 (92.1961) time: 0.1546 data: 0.0173 max mem: 8421 +[2024-12-05 23:55:22 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1785 (1.1422) acc1: 73.4375 (74.7760) acc3: 88.2812 (88.9680) acc5: 91.4062 (92.4000) time: 0.1385 data: 0.0009 max mem: 8421 +[2024-12-05 23:55:22 root] (utils.py 297): INFO Test: Total time: 0:00:14 (0.1510 s / it) +[2024-12-05 23:55:23 root] (engine.py 264): INFO * Acc@1 74.728 Acc@3 88.932 Acc@5 92.318 loss 1.141 flops 1.285 layer_flops 1.251 +[2024-12-05 23:55:23 root] (main.py 547): INFO Accuracy of the network on the 50000 test images: 74.7% +[2024-12-05 23:55:24 root] (main.py 551): INFO Max accuracy: 74.73% +[2024-12-05 23:55:24 root] (utils.py 283): INFO Epoch: [20] [ 0/2502] eta: 0:32:35 lr: 0.000007 loss_cls: 3.9010 (3.9010) grad_norm: 4.5739 (4.5739) time: 0.7818 data: 0.0008 max mem: 8421 +[2024-12-05 23:55:32 root] (utils.py 283): INFO Epoch: [20] [ 10/2502] eta: 0:32:25 lr: 0.000007 loss_cls: 4.1218 (4.0376) grad_norm: 4.4147 (4.5837) time: 0.7805 data: 0.0003 max mem: 8421 +[2024-12-05 23:55:40 root] (utils.py 283): INFO Epoch: [20] [ 20/2502] eta: 0:32:35 lr: 0.000007 loss_cls: 4.1218 (4.0691) grad_norm: 4.3660 (4.8835) time: 0.7884 data: 0.0003 max mem: 8421 +[2024-12-05 23:55:48 root] (utils.py 283): INFO Epoch: [20] [ 30/2502] eta: 0:32:17 lr: 0.000007 loss_cls: 4.2194 (4.0701) grad_norm: 4.3650 (4.7200) time: 0.7856 data: 0.0003 max mem: 8421 +[2024-12-05 23:55:56 root] (utils.py 283): INFO Epoch: [20] [ 40/2502] eta: 0:32:16 lr: 0.000007 loss_cls: 4.0089 (3.9571) grad_norm: 4.2145 (4.5912) time: 0.7851 data: 0.0002 max mem: 8421 +[2024-12-05 23:56:04 root] (utils.py 283): INFO Epoch: [20] [ 50/2502] eta: 0:32:05 lr: 0.000007 loss_cls: 3.9826 (3.9917) grad_norm: 4.1712 (4.5200) time: 0.7879 data: 0.0003 max mem: 8421 +[2024-12-05 23:56:12 root] (utils.py 283): INFO Epoch: [20] [ 60/2502] eta: 0:32:01 lr: 0.000007 loss_cls: 3.9826 (3.9798) grad_norm: 4.1439 (4.4887) time: 0.7869 data: 0.0003 max mem: 8421 +[2024-12-05 23:56:20 root] (utils.py 283): INFO Epoch: [20] [ 70/2502] eta: 0:32:07 lr: 0.000007 loss_cls: 3.9535 (3.9701) grad_norm: 4.2351 (4.4509) time: 0.8111 data: 0.0003 max mem: 8421 +[2024-12-05 23:56:33 root] (utils.py 283): INFO Epoch: [20] [ 80/2502] eta: 0:34:24 lr: 0.000007 loss_cls: 3.9498 (3.9618) grad_norm: 4.1234 (4.4221) time: 1.0531 data: 0.0013 max mem: 8421 +[2024-12-05 23:56:46 root] (utils.py 283): INFO Epoch: [20] [ 90/2502] eta: 0:36:32 lr: 0.000007 loss_cls: 4.1358 (3.9784) grad_norm: 4.2676 (4.4270) time: 1.3229 data: 0.0025 max mem: 8421 +[2024-12-05 23:57:01 root] (utils.py 283): INFO Epoch: [20] [ 100/2502] eta: 0:38:24 lr: 0.000007 loss_cls: 3.8930 (3.9476) grad_norm: 4.2645 (4.4000) time: 1.3931 data: 0.0024 max mem: 8421 +[2024-12-05 23:57:14 root] (utils.py 283): INFO Epoch: [20] [ 110/2502] eta: 0:39:28 lr: 0.000007 loss_cls: 3.8930 (3.9531) grad_norm: 4.1734 (4.4010) time: 1.3589 data: 0.0018 max mem: 8421 +[2024-12-05 23:57:25 root] (utils.py 283): INFO Epoch: [20] [ 120/2502] eta: 0:39:46 lr: 0.000007 loss_cls: 3.9899 (3.9414) grad_norm: 4.3428 (4.6592) time: 1.2147 data: 0.0015 max mem: 8421 +[2024-12-05 23:57:33 root] (utils.py 283): INFO Epoch: [20] [ 130/2502] eta: 0:38:59 lr: 0.000007 loss_cls: 4.0164 (3.9434) grad_norm: 4.4064 (4.6345) time: 0.9657 data: 0.0009 max mem: 8421 +[2024-12-05 23:57:41 root] (utils.py 283): INFO Epoch: [20] [ 140/2502] eta: 0:38:15 lr: 0.000007 loss_cls: 3.8114 (3.9143) grad_norm: 4.2608 (4.6096) time: 0.7906 data: 0.0003 max mem: 8421 +[2024-12-05 23:57:49 root] (utils.py 283): INFO Epoch: [20] [ 150/2502] eta: 0:37:35 lr: 0.000007 loss_cls: 3.5908 (3.9067) grad_norm: 4.2742 (4.5904) time: 0.7800 data: 0.0003 max mem: 8421 +[2024-12-05 23:57:56 root] (utils.py 283): INFO Epoch: [20] [ 160/2502] eta: 0:37:00 lr: 0.000007 loss_cls: 4.0172 (3.9086) grad_norm: 4.2742 (4.5744) time: 0.7815 data: 0.0003 max mem: 8421 +[2024-12-05 23:58:04 root] (utils.py 283): INFO Epoch: [20] [ 170/2502] eta: 0:36:28 lr: 0.000007 loss_cls: 4.0950 (3.9189) grad_norm: 4.1757 (4.5685) time: 0.7816 data: 0.0003 max mem: 8421 +[2024-12-05 23:58:12 root] (utils.py 283): INFO Epoch: [20] [ 180/2502] eta: 0:35:58 lr: 0.000007 loss_cls: 4.1259 (3.9085) grad_norm: 4.1757 (4.5514) time: 0.7808 data: 0.0002 max mem: 8421 +[2024-12-05 23:58:20 root] (utils.py 283): INFO Epoch: [20] [ 190/2502] eta: 0:35:31 lr: 0.000007 loss_cls: 4.1164 (3.9173) grad_norm: 4.2147 (4.5360) time: 0.7811 data: 0.0002 max mem: 8421 +[2024-12-05 23:58:28 root] (utils.py 283): INFO Epoch: [20] [ 200/2502] eta: 0:35:06 lr: 0.000007 loss_cls: 4.0965 (3.9195) grad_norm: 4.3239 (4.5291) time: 0.7828 data: 0.0003 max mem: 8421 +[2024-12-05 23:58:36 root] (utils.py 283): INFO Epoch: [20] [ 210/2502] eta: 0:34:43 lr: 0.000007 loss_cls: 3.9645 (3.9144) grad_norm: 4.0511 (4.5112) time: 0.7863 data: 0.0003 max mem: 8421 +[2024-12-05 23:58:44 root] (utils.py 283): INFO Epoch: [20] [ 220/2502] eta: 0:34:23 lr: 0.000007 loss_cls: 3.7150 (3.8987) grad_norm: 4.2097 (4.5068) time: 0.7940 data: 0.0003 max mem: 8421 +[2024-12-05 23:58:51 root] (utils.py 283): INFO Epoch: [20] [ 230/2502] eta: 0:34:01 lr: 0.000007 loss_cls: 3.7150 (3.8925) grad_norm: 4.2097 (4.4942) time: 0.7900 data: 0.0003 max mem: 8421 +[2024-12-05 23:58:59 root] (utils.py 283): INFO Epoch: [20] [ 240/2502] eta: 0:33:42 lr: 0.000007 loss_cls: 3.9874 (3.9018) grad_norm: 4.2426 (4.4940) time: 0.7819 data: 0.0003 max mem: 8421 +[2024-12-05 23:59:07 root] (utils.py 283): INFO Epoch: [20] [ 250/2502] eta: 0:33:22 lr: 0.000007 loss_cls: 4.2206 (3.9117) grad_norm: 4.3278 (4.4928) time: 0.7804 data: 0.0003 max mem: 8421 +[2024-12-05 23:59:15 root] (utils.py 283): INFO Epoch: [20] [ 260/2502] eta: 0:33:06 lr: 0.000007 loss_cls: 4.1200 (3.9154) grad_norm: 4.2906 (4.4864) time: 0.7897 data: 0.0003 max mem: 8421 +[2024-12-05 23:59:23 root] (utils.py 283): INFO Epoch: [20] [ 270/2502] eta: 0:32:48 lr: 0.000007 loss_cls: 3.8951 (3.9022) grad_norm: 4.2764 (4.4788) time: 0.7893 data: 0.0003 max mem: 8421 +[2024-12-05 23:59:31 root] (utils.py 283): INFO Epoch: [20] [ 280/2502] eta: 0:32:31 lr: 0.000007 loss_cls: 3.8951 (3.9063) grad_norm: 4.1466 (4.4674) time: 0.7786 data: 0.0003 max mem: 8421 +[2024-12-05 23:59:38 root] (utils.py 283): INFO Epoch: [20] [ 290/2502] eta: 0:32:16 lr: 0.000007 loss_cls: 3.9205 (3.9045) grad_norm: 4.1129 (4.4713) time: 0.7857 data: 0.0003 max mem: 8421 +[2024-12-05 23:59:46 root] (utils.py 283): INFO Epoch: [20] [ 300/2502] eta: 0:32:01 lr: 0.000007 loss_cls: 3.9061 (3.9074) grad_norm: 4.2852 (4.4692) time: 0.7893 data: 0.0003 max mem: 8421 +[2024-12-05 23:59:55 root] (utils.py 283): INFO Epoch: [20] [ 310/2502] eta: 0:31:48 lr: 0.000007 loss_cls: 3.7479 (3.8936) grad_norm: 4.2852 (4.4646) time: 0.8009 data: 0.0003 max mem: 8421 +[2024-12-06 00:00:03 root] (utils.py 283): INFO Epoch: [20] [ 320/2502] eta: 0:31:38 lr: 0.000007 loss_cls: 3.6436 (3.8926) grad_norm: 4.2876 (4.4684) time: 0.8342 data: 0.0003 max mem: 8421 +[2024-12-06 00:00:11 root] (utils.py 283): INFO Epoch: [20] [ 330/2502] eta: 0:31:27 lr: 0.000007 loss_cls: 4.0405 (3.8979) grad_norm: 4.4031 (4.4658) time: 0.8463 data: 0.0004 max mem: 8421 +[2024-12-06 00:00:20 root] (utils.py 283): INFO Epoch: [20] [ 340/2502] eta: 0:31:16 lr: 0.000007 loss_cls: 4.0405 (3.8976) grad_norm: 4.1799 (4.4549) time: 0.8363 data: 0.0005 max mem: 8421 +[2024-12-06 00:00:28 root] (utils.py 283): INFO Epoch: [20] [ 350/2502] eta: 0:31:03 lr: 0.000007 loss_cls: 4.0331 (3.8987) grad_norm: 4.0615 (4.4500) time: 0.8108 data: 0.0004 max mem: 8421 +[2024-12-06 00:00:36 root] (utils.py 283): INFO Epoch: [20] [ 360/2502] eta: 0:30:49 lr: 0.000007 loss_cls: 4.0331 (3.9018) grad_norm: 4.0068 (4.4472) time: 0.7887 data: 0.0003 max mem: 8421 +[2024-12-06 00:00:43 root] (utils.py 283): INFO Epoch: [20] [ 370/2502] eta: 0:30:36 lr: 0.000007 loss_cls: 3.8087 (3.8898) grad_norm: 4.5149 (4.4579) time: 0.7858 data: 0.0002 max mem: 8421 +[2024-12-06 00:00:51 root] (utils.py 283): INFO Epoch: [20] [ 380/2502] eta: 0:30:23 lr: 0.000007 loss_cls: 3.3191 (3.8835) grad_norm: 4.5405 (4.4548) time: 0.7812 data: 0.0003 max mem: 8421 +[2024-12-06 00:00:59 root] (utils.py 283): INFO Epoch: [20] [ 390/2502] eta: 0:30:10 lr: 0.000007 loss_cls: 3.7694 (3.8828) grad_norm: 4.2894 (4.4511) time: 0.7824 data: 0.0003 max mem: 8421 +[2024-12-06 00:01:07 root] (utils.py 283): INFO Epoch: [20] [ 400/2502] eta: 0:29:58 lr: 0.000007 loss_cls: 3.6583 (3.8764) grad_norm: 4.2342 (4.4466) time: 0.7822 data: 0.0003 max mem: 8421 +[2024-12-06 00:01:15 root] (utils.py 283): INFO Epoch: [20] [ 410/2502] eta: 0:29:45 lr: 0.000007 loss_cls: 3.6583 (3.8766) grad_norm: 4.1670 (4.4462) time: 0.7790 data: 0.0003 max mem: 8421 +[2024-12-06 00:01:22 root] (utils.py 283): INFO Epoch: [20] [ 420/2502] eta: 0:29:33 lr: 0.000007 loss_cls: 3.6626 (3.8684) grad_norm: 4.2653 (4.4447) time: 0.7781 data: 0.0003 max mem: 8421 +[2024-12-06 00:01:30 root] (utils.py 283): INFO Epoch: [20] [ 430/2502] eta: 0:29:21 lr: 0.000007 loss_cls: 3.6186 (3.8639) grad_norm: 4.3049 (4.4470) time: 0.7783 data: 0.0003 max mem: 8421 +[2024-12-06 00:01:38 root] (utils.py 283): INFO Epoch: [20] [ 440/2502] eta: 0:29:09 lr: 0.000007 loss_cls: 3.9293 (3.8662) grad_norm: 4.4301 (4.4472) time: 0.7804 data: 0.0003 max mem: 8421 +[2024-12-06 00:01:46 root] (utils.py 283): INFO Epoch: [20] [ 450/2502] eta: 0:28:58 lr: 0.000007 loss_cls: 3.9327 (3.8661) grad_norm: 4.2138 (4.4389) time: 0.7863 data: 0.0003 max mem: 8421 +[2024-12-06 00:01:54 root] (utils.py 283): INFO Epoch: [20] [ 460/2502] eta: 0:28:47 lr: 0.000007 loss_cls: 3.9327 (3.8642) grad_norm: 4.1145 (4.4320) time: 0.7841 data: 0.0002 max mem: 8421 +[2024-12-06 00:02:02 root] (utils.py 283): INFO Epoch: [20] [ 470/2502] eta: 0:28:36 lr: 0.000007 loss_cls: 4.0251 (3.8683) grad_norm: 4.0414 (4.4298) time: 0.7837 data: 0.0002 max mem: 8421 +[2024-12-06 00:02:09 root] (utils.py 283): INFO Epoch: [20] [ 480/2502] eta: 0:28:25 lr: 0.000007 loss_cls: 4.0184 (3.8654) grad_norm: 4.0168 (4.4246) time: 0.7864 data: 0.0003 max mem: 8421 +[2024-12-06 00:02:17 root] (utils.py 283): INFO Epoch: [20] [ 490/2502] eta: 0:28:14 lr: 0.000007 loss_cls: 3.9409 (3.8652) grad_norm: 4.1658 (4.4278) time: 0.7808 data: 0.0003 max mem: 8421 +[2024-12-06 00:02:25 root] (utils.py 283): INFO Epoch: [20] [ 500/2502] eta: 0:28:03 lr: 0.000007 loss_cls: 4.0265 (3.8671) grad_norm: 4.1627 (4.4289) time: 0.7825 data: 0.0003 max mem: 8421 +[2024-12-06 00:02:33 root] (utils.py 283): INFO Epoch: [20] [ 510/2502] eta: 0:27:53 lr: 0.000007 loss_cls: 3.9349 (3.8663) grad_norm: 4.1352 (4.4219) time: 0.7980 data: 0.0003 max mem: 8421 +[2024-12-06 00:02:41 root] (utils.py 283): INFO Epoch: [20] [ 520/2502] eta: 0:27:43 lr: 0.000007 loss_cls: 3.8801 (3.8681) grad_norm: 4.1909 (4.4255) time: 0.7956 data: 0.0003 max mem: 8421 +[2024-12-06 00:02:49 root] (utils.py 283): INFO Epoch: [20] [ 530/2502] eta: 0:27:32 lr: 0.000007 loss_cls: 3.5076 (3.8605) grad_norm: 4.4564 (4.4308) time: 0.7812 data: 0.0003 max mem: 8421 +[2024-12-06 00:02:57 root] (utils.py 283): INFO Epoch: [20] [ 540/2502] eta: 0:27:22 lr: 0.000007 loss_cls: 3.5281 (3.8623) grad_norm: 4.1919 (4.4286) time: 0.7828 data: 0.0003 max mem: 8421 +[2024-12-06 00:03:05 root] (utils.py 283): INFO Epoch: [20] [ 550/2502] eta: 0:27:13 lr: 0.000007 loss_cls: 4.0775 (3.8602) grad_norm: 4.2903 (4.4310) time: 0.8027 data: 0.0003 max mem: 8421 +[2024-12-06 00:03:13 root] (utils.py 283): INFO Epoch: [20] [ 560/2502] eta: 0:27:05 lr: 0.000007 loss_cls: 3.4662 (3.8500) grad_norm: 4.3523 (4.4346) time: 0.8318 data: 0.0004 max mem: 8421 +[2024-12-06 00:03:22 root] (utils.py 283): INFO Epoch: [20] [ 570/2502] eta: 0:26:56 lr: 0.000007 loss_cls: 3.3513 (3.8506) grad_norm: 4.2291 (4.4367) time: 0.8400 data: 0.0004 max mem: 8421 +[2024-12-06 00:03:30 root] (utils.py 283): INFO Epoch: [20] [ 580/2502] eta: 0:26:48 lr: 0.000007 loss_cls: 4.0819 (3.8510) grad_norm: 4.2399 (4.4347) time: 0.8384 data: 0.0004 max mem: 8421 +[2024-12-06 00:03:38 root] (utils.py 283): INFO Epoch: [20] [ 590/2502] eta: 0:26:38 lr: 0.000007 loss_cls: 3.8899 (3.8481) grad_norm: 4.3310 (4.4345) time: 0.8160 data: 0.0004 max mem: 8421 +[2024-12-06 00:03:46 root] (utils.py 283): INFO Epoch: [20] [ 600/2502] eta: 0:26:28 lr: 0.000007 loss_cls: 3.8899 (3.8486) grad_norm: 4.4273 (4.4404) time: 0.7861 data: 0.0003 max mem: 8421 +[2024-12-06 00:03:54 root] (utils.py 283): INFO Epoch: [20] [ 610/2502] eta: 0:26:18 lr: 0.000007 loss_cls: 3.8007 (3.8469) grad_norm: 4.2411 (4.4406) time: 0.7787 data: 0.0003 max mem: 8421 +[2024-12-06 00:04:01 root] (utils.py 283): INFO Epoch: [20] [ 620/2502] eta: 0:26:08 lr: 0.000007 loss_cls: 3.8007 (3.8447) grad_norm: 4.1575 (4.4348) time: 0.7807 data: 0.0002 max mem: 8421 +[2024-12-06 00:04:09 root] (utils.py 283): INFO Epoch: [20] [ 630/2502] eta: 0:25:58 lr: 0.000007 loss_cls: 4.1779 (3.8506) grad_norm: 4.0671 (4.4327) time: 0.7837 data: 0.0002 max mem: 8421 +[2024-12-06 00:04:17 root] (utils.py 283): INFO Epoch: [20] [ 640/2502] eta: 0:25:48 lr: 0.000007 loss_cls: 4.0847 (3.8481) grad_norm: 4.1400 (4.4306) time: 0.7813 data: 0.0003 max mem: 8421 +[2024-12-06 00:04:25 root] (utils.py 283): INFO Epoch: [20] [ 650/2502] eta: 0:25:39 lr: 0.000007 loss_cls: 3.7120 (3.8444) grad_norm: 4.2450 (4.4485) time: 0.7902 data: 0.0002 max mem: 8421 +[2024-12-06 00:04:33 root] (utils.py 283): INFO Epoch: [20] [ 660/2502] eta: 0:25:30 lr: 0.000007 loss_cls: 3.7349 (3.8442) grad_norm: 4.2949 (4.4450) time: 0.8055 data: 0.0002 max mem: 8421 +[2024-12-06 00:04:41 root] (utils.py 283): INFO Epoch: [20] [ 670/2502] eta: 0:25:21 lr: 0.000007 loss_cls: 3.9168 (3.8450) grad_norm: 4.2402 (4.4468) time: 0.7975 data: 0.0002 max mem: 8421 +[2024-12-06 00:04:50 root] (utils.py 283): INFO Epoch: [20] [ 680/2502] eta: 0:25:13 lr: 0.000007 loss_cls: 4.0925 (3.8497) grad_norm: 4.2953 (4.4513) time: 0.8225 data: 0.0004 max mem: 8421 +[2024-12-06 00:05:03 root] (utils.py 283): INFO Epoch: [20] [ 690/2502] eta: 0:25:19 lr: 0.000007 loss_cls: 4.0525 (3.8502) grad_norm: 4.3391 (4.4499) time: 1.1076 data: 0.0016 max mem: 8421 +[2024-12-06 00:05:17 root] (utils.py 283): INFO Epoch: [20] [ 700/2502] eta: 0:25:25 lr: 0.000007 loss_cls: 4.0363 (3.8516) grad_norm: 4.3500 (4.4510) time: 1.3803 data: 0.0037 max mem: 8421 +[2024-12-06 00:05:32 root] (utils.py 283): INFO Epoch: [20] [ 710/2502] eta: 0:25:33 lr: 0.000007 loss_cls: 4.0699 (3.8515) grad_norm: 4.3641 (4.4485) time: 1.4603 data: 0.0035 max mem: 8421 +[2024-12-06 00:05:47 root] (utils.py 283): INFO Epoch: [20] [ 720/2502] eta: 0:25:39 lr: 0.000007 loss_cls: 3.9393 (3.8501) grad_norm: 4.2978 (4.4506) time: 1.4844 data: 0.0023 max mem: 8421 +[2024-12-06 00:06:01 root] (utils.py 283): INFO Epoch: [20] [ 730/2502] eta: 0:25:43 lr: 0.000007 loss_cls: 4.0665 (3.8514) grad_norm: 4.3026 (4.4491) time: 1.4102 data: 0.0022 max mem: 8421 +[2024-12-06 00:06:09 root] (utils.py 283): INFO Epoch: [20] [ 740/2502] eta: 0:25:34 lr: 0.000007 loss_cls: 4.1453 (3.8520) grad_norm: 4.3777 (4.4494) time: 1.1136 data: 0.0013 max mem: 8421 +[2024-12-06 00:06:17 root] (utils.py 283): INFO Epoch: [20] [ 750/2502] eta: 0:25:23 lr: 0.000007 loss_cls: 3.9384 (3.8538) grad_norm: 4.3741 (4.4518) time: 0.8218 data: 0.0004 max mem: 8421 +[2024-12-06 00:06:25 root] (utils.py 283): INFO Epoch: [20] [ 760/2502] eta: 0:25:13 lr: 0.000007 loss_cls: 3.7883 (3.8514) grad_norm: 4.3358 (4.4531) time: 0.7854 data: 0.0003 max mem: 8421 +[2024-12-06 00:06:33 root] (utils.py 283): INFO Epoch: [20] [ 770/2502] eta: 0:25:02 lr: 0.000007 loss_cls: 3.7264 (3.8499) grad_norm: 4.3665 (4.4520) time: 0.7857 data: 0.0003 max mem: 8421 +[2024-12-06 00:06:44 root] (utils.py 283): INFO Epoch: [20] [ 780/2502] eta: 0:24:59 lr: 0.000007 loss_cls: 3.9131 (3.8520) grad_norm: 4.3869 (4.4517) time: 0.9617 data: 0.0007 max mem: 8421 +[2024-12-06 00:06:59 root] (utils.py 283): INFO Epoch: [20] [ 790/2502] eta: 0:25:03 lr: 0.000007 loss_cls: 3.7746 (3.8473) grad_norm: 4.2895 (4.4520) time: 1.2944 data: 0.0015 max mem: 8421 +[2024-12-06 00:07:13 root] (utils.py 283): INFO Epoch: [20] [ 800/2502] eta: 0:25:05 lr: 0.000007 loss_cls: 3.8503 (3.8479) grad_norm: 4.0779 (4.4492) time: 1.4212 data: 0.0020 max mem: 8421 +[2024-12-06 00:07:29 root] (utils.py 283): INFO Epoch: [20] [ 810/2502] eta: 0:25:12 lr: 0.000007 loss_cls: 3.8968 (3.8475) grad_norm: 4.2179 (4.4475) time: 1.5107 data: 0.0039 max mem: 8421 +[2024-12-06 00:07:42 root] (utils.py 283): INFO Epoch: [20] [ 820/2502] eta: 0:25:12 lr: 0.000007 loss_cls: 3.7384 (3.8441) grad_norm: 4.2525 (4.4472) time: 1.4714 data: 0.0044 max mem: 8421 +[2024-12-06 00:07:50 root] (utils.py 283): INFO Epoch: [20] [ 830/2502] eta: 0:25:00 lr: 0.000007 loss_cls: 4.0095 (3.8463) grad_norm: 4.2525 (4.4454) time: 1.0491 data: 0.0017 max mem: 8421 +[2024-12-06 00:07:58 root] (utils.py 283): INFO Epoch: [20] [ 840/2502] eta: 0:24:49 lr: 0.000007 loss_cls: 4.1343 (3.8473) grad_norm: 4.3728 (4.4475) time: 0.7861 data: 0.0003 max mem: 8421 +[2024-12-06 00:08:06 root] (utils.py 283): INFO Epoch: [20] [ 850/2502] eta: 0:24:38 lr: 0.000007 loss_cls: 4.0391 (3.8480) grad_norm: 4.3728 (4.4478) time: 0.7841 data: 0.0003 max mem: 8421 +[2024-12-06 00:08:13 root] (utils.py 283): INFO Epoch: [20] [ 860/2502] eta: 0:24:27 lr: 0.000007 loss_cls: 3.9616 (3.8498) grad_norm: 4.3215 (4.4491) time: 0.7814 data: 0.0003 max mem: 8421 +[2024-12-06 00:08:24 root] (utils.py 283): INFO Epoch: [20] [ 870/2502] eta: 0:24:20 lr: 0.000007 loss_cls: 4.0134 (3.8505) grad_norm: 4.2548 (4.4459) time: 0.8979 data: 0.0006 max mem: 8421 +[2024-12-06 00:08:36 root] (utils.py 283): INFO Epoch: [20] [ 880/2502] eta: 0:24:18 lr: 0.000007 loss_cls: 4.0251 (3.8503) grad_norm: 4.0682 (4.4415) time: 1.1311 data: 0.0018 max mem: 8421 +[2024-12-06 00:08:51 root] (utils.py 283): INFO Epoch: [20] [ 890/2502] eta: 0:24:20 lr: 0.000007 loss_cls: 4.0192 (3.8472) grad_norm: 4.1340 (4.4402) time: 1.3946 data: 0.0036 max mem: 8421 +[2024-12-06 00:09:07 root] (utils.py 283): INFO Epoch: [20] [ 900/2502] eta: 0:24:22 lr: 0.000007 loss_cls: 3.7693 (3.8471) grad_norm: 4.2573 (4.4381) time: 1.5355 data: 0.0041 max mem: 8421 +[2024-12-06 00:09:20 root] (utils.py 283): INFO Epoch: [20] [ 910/2502] eta: 0:24:20 lr: 0.000007 loss_cls: 3.8896 (3.8447) grad_norm: 4.2682 (4.4398) time: 1.4265 data: 0.0027 max mem: 8421 +[2024-12-06 00:09:30 root] (utils.py 283): INFO Epoch: [20] [ 920/2502] eta: 0:24:13 lr: 0.000007 loss_cls: 3.4436 (3.8428) grad_norm: 4.3707 (4.4500) time: 1.1729 data: 0.0014 max mem: 8421 +[2024-12-06 00:09:38 root] (utils.py 283): INFO Epoch: [20] [ 930/2502] eta: 0:24:02 lr: 0.000007 loss_cls: 3.5759 (3.8423) grad_norm: 4.4150 (4.4513) time: 0.9013 data: 0.0006 max mem: 8421 +[2024-12-06 00:09:46 root] (utils.py 283): INFO Epoch: [20] [ 940/2502] eta: 0:23:50 lr: 0.000007 loss_cls: 3.8838 (3.8437) grad_norm: 4.3336 (4.4514) time: 0.7831 data: 0.0003 max mem: 8421 +[2024-12-06 00:09:54 root] (utils.py 283): INFO Epoch: [20] [ 950/2502] eta: 0:23:39 lr: 0.000007 loss_cls: 3.8838 (3.8419) grad_norm: 4.2601 (4.4507) time: 0.7839 data: 0.0003 max mem: 8421 +[2024-12-06 00:10:01 root] (utils.py 283): INFO Epoch: [20] [ 960/2502] eta: 0:23:28 lr: 0.000007 loss_cls: 3.8886 (3.8451) grad_norm: 4.3334 (4.4516) time: 0.7790 data: 0.0002 max mem: 8421 +[2024-12-06 00:10:09 root] (utils.py 283): INFO Epoch: [20] [ 970/2502] eta: 0:23:17 lr: 0.000007 loss_cls: 4.1739 (3.8448) grad_norm: 4.3334 (4.4512) time: 0.7873 data: 0.0003 max mem: 8421 +[2024-12-06 00:10:17 root] (utils.py 283): INFO Epoch: [20] [ 980/2502] eta: 0:23:05 lr: 0.000007 loss_cls: 3.8256 (3.8437) grad_norm: 4.3132 (4.4511) time: 0.7895 data: 0.0003 max mem: 8421 +[2024-12-06 00:10:25 root] (utils.py 283): INFO Epoch: [20] [ 990/2502] eta: 0:22:54 lr: 0.000007 loss_cls: 3.9779 (3.8438) grad_norm: 4.3957 (4.4515) time: 0.7837 data: 0.0003 max mem: 8421 +[2024-12-06 00:10:33 root] (utils.py 283): INFO Epoch: [20] [1000/2502] eta: 0:22:43 lr: 0.000007 loss_cls: 4.0908 (3.8462) grad_norm: 4.3706 (4.4490) time: 0.7854 data: 0.0003 max mem: 8421 +[2024-12-06 00:10:42 root] (utils.py 283): INFO Epoch: [20] [1010/2502] eta: 0:22:35 lr: 0.000007 loss_cls: 4.0908 (3.8471) grad_norm: 4.1858 (4.4485) time: 0.8647 data: 0.0005 max mem: 8421 +[2024-12-06 00:10:54 root] (utils.py 283): INFO Epoch: [20] [1020/2502] eta: 0:22:30 lr: 0.000007 loss_cls: 3.8891 (3.8473) grad_norm: 4.2110 (4.4471) time: 1.0759 data: 0.0012 max mem: 8421 +[2024-12-06 00:11:07 root] (utils.py 283): INFO Epoch: [20] [1030/2502] eta: 0:22:26 lr: 0.000007 loss_cls: 3.8891 (3.8472) grad_norm: 4.2100 (4.4447) time: 1.2334 data: 0.0016 max mem: 8421 +[2024-12-06 00:11:21 root] (utils.py 283): INFO Epoch: [20] [1040/2502] eta: 0:22:23 lr: 0.000007 loss_cls: 3.8653 (3.8484) grad_norm: 4.0933 (4.4416) time: 1.3192 data: 0.0017 max mem: 8421 +[2024-12-06 00:11:34 root] (utils.py 283): INFO Epoch: [20] [1050/2502] eta: 0:22:19 lr: 0.000007 loss_cls: 3.8344 (3.8460) grad_norm: 4.0933 (4.4420) time: 1.3349 data: 0.0019 max mem: 8421 +[2024-12-06 00:11:44 root] (utils.py 283): INFO Epoch: [20] [1060/2502] eta: 0:22:12 lr: 0.000007 loss_cls: 4.0467 (3.8477) grad_norm: 4.1551 (4.4397) time: 1.1796 data: 0.0015 max mem: 8421 +[2024-12-06 00:11:52 root] (utils.py 283): INFO Epoch: [20] [1070/2502] eta: 0:22:01 lr: 0.000007 loss_cls: 4.0467 (3.8473) grad_norm: 4.1155 (4.4382) time: 0.9300 data: 0.0007 max mem: 8421 +[2024-12-06 00:12:00 root] (utils.py 283): INFO Epoch: [20] [1080/2502] eta: 0:21:50 lr: 0.000007 loss_cls: 3.7738 (3.8467) grad_norm: 4.1155 (4.4372) time: 0.7881 data: 0.0003 max mem: 8421 +[2024-12-06 00:12:08 root] (utils.py 283): INFO Epoch: [20] [1090/2502] eta: 0:21:39 lr: 0.000007 loss_cls: 4.1427 (3.8488) grad_norm: 4.1618 (4.4366) time: 0.7835 data: 0.0003 max mem: 8421 +[2024-12-06 00:12:16 root] (utils.py 283): INFO Epoch: [20] [1100/2502] eta: 0:21:28 lr: 0.000007 loss_cls: 3.8993 (3.8455) grad_norm: 4.3291 (4.4366) time: 0.7896 data: 0.0003 max mem: 8421 +[2024-12-06 00:12:24 root] (utils.py 283): INFO Epoch: [20] [1110/2502] eta: 0:21:17 lr: 0.000007 loss_cls: 3.7122 (3.8459) grad_norm: 4.3010 (4.4371) time: 0.7914 data: 0.0003 max mem: 8421 +[2024-12-06 00:12:32 root] (utils.py 283): INFO Epoch: [20] [1120/2502] eta: 0:21:06 lr: 0.000007 loss_cls: 3.9432 (3.8469) grad_norm: 4.2257 (4.4374) time: 0.7814 data: 0.0003 max mem: 8421 +[2024-12-06 00:12:39 root] (utils.py 283): INFO Epoch: [20] [1130/2502] eta: 0:20:56 lr: 0.000007 loss_cls: 4.0288 (3.8466) grad_norm: 4.3311 (4.4374) time: 0.7776 data: 0.0002 max mem: 8421 +[2024-12-06 00:12:47 root] (utils.py 283): INFO Epoch: [20] [1140/2502] eta: 0:20:45 lr: 0.000007 loss_cls: 4.0779 (3.8482) grad_norm: 4.2607 (4.4359) time: 0.7805 data: 0.0003 max mem: 8421 +[2024-12-06 00:12:55 root] (utils.py 283): INFO Epoch: [20] [1150/2502] eta: 0:20:34 lr: 0.000007 loss_cls: 4.0429 (3.8489) grad_norm: 4.1977 (4.4343) time: 0.7833 data: 0.0003 max mem: 8421 +[2024-12-06 00:13:03 root] (utils.py 283): INFO Epoch: [20] [1160/2502] eta: 0:20:24 lr: 0.000007 loss_cls: 3.9712 (3.8481) grad_norm: 4.1911 (4.4343) time: 0.7878 data: 0.0003 max mem: 8421 +[2024-12-06 00:13:11 root] (utils.py 283): INFO Epoch: [20] [1170/2502] eta: 0:20:13 lr: 0.000007 loss_cls: 3.7534 (3.8465) grad_norm: 4.1219 (4.4326) time: 0.7930 data: 0.0003 max mem: 8421 +[2024-12-06 00:13:19 root] (utils.py 283): INFO Epoch: [20] [1180/2502] eta: 0:20:03 lr: 0.000007 loss_cls: 3.7534 (3.8451) grad_norm: 4.2662 (4.4339) time: 0.7994 data: 0.0003 max mem: 8421 +[2024-12-06 00:13:27 root] (utils.py 283): INFO Epoch: [20] [1190/2502] eta: 0:19:53 lr: 0.000007 loss_cls: 3.9285 (3.8449) grad_norm: 4.3467 (4.4326) time: 0.8192 data: 0.0002 max mem: 8421 +[2024-12-06 00:13:36 root] (utils.py 283): INFO Epoch: [20] [1200/2502] eta: 0:19:43 lr: 0.000007 loss_cls: 3.9480 (3.8439) grad_norm: 4.3467 (4.4317) time: 0.8313 data: 0.0003 max mem: 8421 +[2024-12-06 00:13:44 root] (utils.py 283): INFO Epoch: [20] [1210/2502] eta: 0:19:33 lr: 0.000007 loss_cls: 3.9663 (3.8456) grad_norm: 4.4108 (4.4324) time: 0.8186 data: 0.0003 max mem: 8421 +[2024-12-06 00:13:52 root] (utils.py 283): INFO Epoch: [20] [1220/2502] eta: 0:19:22 lr: 0.000007 loss_cls: 3.9625 (3.8438) grad_norm: 4.3504 (4.4320) time: 0.7938 data: 0.0003 max mem: 8421 +[2024-12-06 00:13:59 root] (utils.py 283): INFO Epoch: [20] [1230/2502] eta: 0:19:12 lr: 0.000007 loss_cls: 3.5979 (3.8446) grad_norm: 4.3155 (4.4331) time: 0.7822 data: 0.0003 max mem: 8421 +[2024-12-06 00:14:07 root] (utils.py 283): INFO Epoch: [20] [1240/2502] eta: 0:19:02 lr: 0.000007 loss_cls: 3.9471 (3.8431) grad_norm: 4.1365 (4.4311) time: 0.7898 data: 0.0003 max mem: 8421 +[2024-12-06 00:14:16 root] (utils.py 283): INFO Epoch: [20] [1250/2502] eta: 0:18:52 lr: 0.000007 loss_cls: 3.9880 (3.8440) grad_norm: 4.0959 (4.4286) time: 0.8078 data: 0.0003 max mem: 8421 +[2024-12-06 00:14:24 root] (utils.py 283): INFO Epoch: [20] [1260/2502] eta: 0:18:42 lr: 0.000007 loss_cls: 3.9678 (3.8432) grad_norm: 4.1623 (4.4276) time: 0.8288 data: 0.0003 max mem: 8421 +[2024-12-06 00:14:32 root] (utils.py 283): INFO Epoch: [20] [1270/2502] eta: 0:18:32 lr: 0.000007 loss_cls: 3.7910 (3.8417) grad_norm: 4.3643 (4.4269) time: 0.8224 data: 0.0003 max mem: 8421 +[2024-12-06 00:14:40 root] (utils.py 283): INFO Epoch: [20] [1280/2502] eta: 0:18:22 lr: 0.000007 loss_cls: 3.7910 (3.8422) grad_norm: 4.2803 (4.4253) time: 0.8026 data: 0.0003 max mem: 8421 +[2024-12-06 00:14:48 root] (utils.py 283): INFO Epoch: [20] [1290/2502] eta: 0:18:12 lr: 0.000007 loss_cls: 3.9236 (3.8435) grad_norm: 4.2049 (4.4239) time: 0.7909 data: 0.0003 max mem: 8421 +[2024-12-06 00:14:56 root] (utils.py 283): INFO Epoch: [20] [1300/2502] eta: 0:18:02 lr: 0.000007 loss_cls: 4.0680 (3.8447) grad_norm: 4.2558 (4.4265) time: 0.7820 data: 0.0003 max mem: 8421 +[2024-12-06 00:15:03 root] (utils.py 283): INFO Epoch: [20] [1310/2502] eta: 0:17:52 lr: 0.000007 loss_cls: 4.0219 (3.8445) grad_norm: 4.2558 (4.4251) time: 0.7828 data: 0.0003 max mem: 8421 +[2024-12-06 00:15:11 root] (utils.py 283): INFO Epoch: [20] [1320/2502] eta: 0:17:42 lr: 0.000007 loss_cls: 3.8572 (3.8452) grad_norm: 4.0934 (4.4229) time: 0.7850 data: 0.0003 max mem: 8421 +[2024-12-06 00:15:19 root] (utils.py 283): INFO Epoch: [20] [1330/2502] eta: 0:17:32 lr: 0.000007 loss_cls: 3.9011 (3.8446) grad_norm: 4.1503 (4.4213) time: 0.7859 data: 0.0003 max mem: 8421 +[2024-12-06 00:15:27 root] (utils.py 283): INFO Epoch: [20] [1340/2502] eta: 0:17:22 lr: 0.000007 loss_cls: 3.9421 (3.8468) grad_norm: 4.2620 (4.4214) time: 0.7969 data: 0.0003 max mem: 8421 +[2024-12-06 00:15:35 root] (utils.py 283): INFO Epoch: [20] [1350/2502] eta: 0:17:12 lr: 0.000007 loss_cls: 4.2087 (3.8483) grad_norm: 4.2146 (4.4191) time: 0.8078 data: 0.0003 max mem: 8421 +[2024-12-06 00:15:44 root] (utils.py 283): INFO Epoch: [20] [1360/2502] eta: 0:17:03 lr: 0.000007 loss_cls: 4.1213 (3.8488) grad_norm: 4.2753 (4.4205) time: 0.8254 data: 0.0003 max mem: 8421 +[2024-12-06 00:15:52 root] (utils.py 283): INFO Epoch: [20] [1370/2502] eta: 0:16:53 lr: 0.000007 loss_cls: 4.0427 (3.8496) grad_norm: 4.3225 (4.4205) time: 0.8368 data: 0.0003 max mem: 8421 +[2024-12-06 00:16:00 root] (utils.py 283): INFO Epoch: [20] [1380/2502] eta: 0:16:44 lr: 0.000007 loss_cls: 3.8581 (3.8481) grad_norm: 4.2867 (4.4202) time: 0.8191 data: 0.0004 max mem: 8421 +[2024-12-06 00:16:08 root] (utils.py 283): INFO Epoch: [20] [1390/2502] eta: 0:16:34 lr: 0.000007 loss_cls: 3.7724 (3.8479) grad_norm: 4.2198 (4.4186) time: 0.7945 data: 0.0003 max mem: 8421 +[2024-12-06 00:16:16 root] (utils.py 283): INFO Epoch: [20] [1400/2502] eta: 0:16:24 lr: 0.000007 loss_cls: 3.6354 (3.8462) grad_norm: 4.2198 (4.4184) time: 0.7837 data: 0.0002 max mem: 8421 +[2024-12-06 00:16:24 root] (utils.py 283): INFO Epoch: [20] [1410/2502] eta: 0:16:14 lr: 0.000007 loss_cls: 4.1004 (3.8488) grad_norm: 4.4124 (4.4212) time: 0.7833 data: 0.0002 max mem: 8421 +[2024-12-06 00:16:32 root] (utils.py 283): INFO Epoch: [20] [1420/2502] eta: 0:16:05 lr: 0.000007 loss_cls: 4.1484 (3.8477) grad_norm: 4.4124 (4.4210) time: 0.7871 data: 0.0003 max mem: 8421 +[2024-12-06 00:16:40 root] (utils.py 283): INFO Epoch: [20] [1430/2502] eta: 0:15:55 lr: 0.000007 loss_cls: 3.9283 (3.8488) grad_norm: 4.5667 (4.4289) time: 0.8065 data: 0.0003 max mem: 8421 +[2024-12-06 00:16:48 root] (utils.py 283): INFO Epoch: [20] [1440/2502] eta: 0:15:46 lr: 0.000007 loss_cls: 3.9907 (3.8485) grad_norm: 4.4378 (4.4285) time: 0.8210 data: 0.0003 max mem: 8421 +[2024-12-06 00:16:56 root] (utils.py 283): INFO Epoch: [20] [1450/2502] eta: 0:15:36 lr: 0.000007 loss_cls: 4.0701 (3.8493) grad_norm: 4.4378 (4.4347) time: 0.8126 data: 0.0004 max mem: 8421 +[2024-12-06 00:17:04 root] (utils.py 283): INFO Epoch: [20] [1460/2502] eta: 0:15:27 lr: 0.000007 loss_cls: 4.1008 (3.8504) grad_norm: 4.3559 (4.4337) time: 0.7932 data: 0.0003 max mem: 8421 +[2024-12-06 00:17:12 root] (utils.py 283): INFO Epoch: [20] [1470/2502] eta: 0:15:17 lr: 0.000007 loss_cls: 3.9522 (3.8495) grad_norm: 4.2452 (4.4345) time: 0.7863 data: 0.0003 max mem: 8421 +[2024-12-06 00:17:20 root] (utils.py 283): INFO Epoch: [20] [1480/2502] eta: 0:15:07 lr: 0.000007 loss_cls: 4.1028 (3.8515) grad_norm: 4.2823 (4.4357) time: 0.7828 data: 0.0003 max mem: 8421 +[2024-12-06 00:17:27 root] (utils.py 283): INFO Epoch: [20] [1490/2502] eta: 0:14:58 lr: 0.000007 loss_cls: 4.2769 (3.8531) grad_norm: 4.4159 (4.4364) time: 0.7784 data: 0.0003 max mem: 8421 +[2024-12-06 00:17:35 root] (utils.py 283): INFO Epoch: [20] [1500/2502] eta: 0:14:48 lr: 0.000007 loss_cls: 4.1920 (3.8551) grad_norm: 4.4159 (4.4374) time: 0.7822 data: 0.0003 max mem: 8421 +[2024-12-06 00:17:43 root] (utils.py 283): INFO Epoch: [20] [1510/2502] eta: 0:14:39 lr: 0.000007 loss_cls: 4.1186 (3.8556) grad_norm: 4.2616 (4.4355) time: 0.7861 data: 0.0003 max mem: 8421 +[2024-12-06 00:17:51 root] (utils.py 283): INFO Epoch: [20] [1520/2502] eta: 0:14:29 lr: 0.000007 loss_cls: 3.8431 (3.8533) grad_norm: 4.0199 (4.4343) time: 0.8118 data: 0.0003 max mem: 8421 +[2024-12-06 00:18:00 root] (utils.py 283): INFO Epoch: [20] [1530/2502] eta: 0:14:20 lr: 0.000007 loss_cls: 3.8431 (3.8532) grad_norm: 4.1690 (4.4345) time: 0.8342 data: 0.0004 max mem: 8421 +[2024-12-06 00:18:08 root] (utils.py 283): INFO Epoch: [20] [1540/2502] eta: 0:14:11 lr: 0.000007 loss_cls: 4.1513 (3.8546) grad_norm: 4.1957 (4.4339) time: 0.8254 data: 0.0004 max mem: 8421 +[2024-12-06 00:18:16 root] (utils.py 283): INFO Epoch: [20] [1550/2502] eta: 0:14:01 lr: 0.000007 loss_cls: 3.7355 (3.8538) grad_norm: 4.2740 (4.4352) time: 0.8016 data: 0.0004 max mem: 8421 +[2024-12-06 00:18:24 root] (utils.py 283): INFO Epoch: [20] [1560/2502] eta: 0:13:52 lr: 0.000007 loss_cls: 3.7384 (3.8533) grad_norm: 4.2740 (4.4351) time: 0.7829 data: 0.0003 max mem: 8421 +[2024-12-06 00:18:31 root] (utils.py 283): INFO Epoch: [20] [1570/2502] eta: 0:13:43 lr: 0.000007 loss_cls: 3.6898 (3.8520) grad_norm: 4.2975 (4.4350) time: 0.7783 data: 0.0003 max mem: 8421 +[2024-12-06 00:18:39 root] (utils.py 283): INFO Epoch: [20] [1580/2502] eta: 0:13:33 lr: 0.000007 loss_cls: 3.6898 (3.8514) grad_norm: 4.3263 (4.4334) time: 0.7778 data: 0.0003 max mem: 8421 +[2024-12-06 00:18:47 root] (utils.py 283): INFO Epoch: [20] [1590/2502] eta: 0:13:24 lr: 0.000007 loss_cls: 3.8941 (3.8510) grad_norm: 4.3569 (4.4337) time: 0.7781 data: 0.0003 max mem: 8421 +[2024-12-06 00:18:55 root] (utils.py 283): INFO Epoch: [20] [1600/2502] eta: 0:13:14 lr: 0.000007 loss_cls: 3.7407 (3.8507) grad_norm: 4.2599 (4.4321) time: 0.7783 data: 0.0003 max mem: 8421 +[2024-12-06 00:19:03 root] (utils.py 283): INFO Epoch: [20] [1610/2502] eta: 0:13:05 lr: 0.000007 loss_cls: 3.7160 (3.8499) grad_norm: 4.1072 (4.4306) time: 0.7801 data: 0.0003 max mem: 8421 +[2024-12-06 00:19:10 root] (utils.py 283): INFO Epoch: [20] [1620/2502] eta: 0:12:56 lr: 0.000007 loss_cls: 3.6263 (3.8486) grad_norm: 4.2432 (4.4315) time: 0.7810 data: 0.0002 max mem: 8421 +[2024-12-06 00:19:18 root] (utils.py 283): INFO Epoch: [20] [1630/2502] eta: 0:12:46 lr: 0.000007 loss_cls: 3.6937 (3.8484) grad_norm: 4.3301 (4.4316) time: 0.7845 data: 0.0002 max mem: 8421 +[2024-12-06 00:19:26 root] (utils.py 283): INFO Epoch: [20] [1640/2502] eta: 0:12:37 lr: 0.000007 loss_cls: 4.0274 (3.8484) grad_norm: 4.2937 (4.4310) time: 0.7941 data: 0.0003 max mem: 8421 +[2024-12-06 00:19:34 root] (utils.py 283): INFO Epoch: [20] [1650/2502] eta: 0:12:28 lr: 0.000007 loss_cls: 4.1396 (3.8491) grad_norm: 4.1785 (4.4293) time: 0.7890 data: 0.0003 max mem: 8421 +[2024-12-06 00:19:42 root] (utils.py 283): INFO Epoch: [20] [1660/2502] eta: 0:12:18 lr: 0.000007 loss_cls: 4.1173 (3.8488) grad_norm: 4.1887 (4.4298) time: 0.7807 data: 0.0002 max mem: 8421 +[2024-12-06 00:19:50 root] (utils.py 283): INFO Epoch: [20] [1670/2502] eta: 0:12:09 lr: 0.000007 loss_cls: 3.8389 (3.8498) grad_norm: 4.2260 (4.4291) time: 0.7882 data: 0.0002 max mem: 8421 +[2024-12-06 00:19:58 root] (utils.py 283): INFO Epoch: [20] [1680/2502] eta: 0:12:00 lr: 0.000007 loss_cls: 3.8389 (3.8485) grad_norm: 4.1553 (4.4271) time: 0.7846 data: 0.0003 max mem: 8421 +[2024-12-06 00:20:05 root] (utils.py 283): INFO Epoch: [20] [1690/2502] eta: 0:11:51 lr: 0.000007 loss_cls: 3.9504 (3.8490) grad_norm: 3.9975 (4.4255) time: 0.7795 data: 0.0003 max mem: 8421 +[2024-12-06 00:20:13 root] (utils.py 283): INFO Epoch: [20] [1700/2502] eta: 0:11:42 lr: 0.000007 loss_cls: 3.9504 (3.8492) grad_norm: 4.0008 (4.4239) time: 0.7791 data: 0.0003 max mem: 8421 +[2024-12-06 00:20:21 root] (utils.py 283): INFO Epoch: [20] [1710/2502] eta: 0:11:32 lr: 0.000007 loss_cls: 3.7663 (3.8478) grad_norm: 4.1575 (4.4234) time: 0.7792 data: 0.0003 max mem: 8421 +[2024-12-06 00:20:29 root] (utils.py 283): INFO Epoch: [20] [1720/2502] eta: 0:11:23 lr: 0.000007 loss_cls: 3.5019 (3.8460) grad_norm: 4.3445 (4.4238) time: 0.7799 data: 0.0003 max mem: 8421 +[2024-12-06 00:20:37 root] (utils.py 283): INFO Epoch: [20] [1730/2502] eta: 0:11:14 lr: 0.000007 loss_cls: 3.6556 (3.8453) grad_norm: 4.3445 (4.4232) time: 0.7791 data: 0.0003 max mem: 8421 +[2024-12-06 00:20:44 root] (utils.py 283): INFO Epoch: [20] [1740/2502] eta: 0:11:05 lr: 0.000007 loss_cls: 3.8093 (3.8449) grad_norm: 4.2186 (4.4223) time: 0.7830 data: 0.0003 max mem: 8421 +[2024-12-06 00:20:52 root] (utils.py 283): INFO Epoch: [20] [1750/2502] eta: 0:10:56 lr: 0.000007 loss_cls: 3.7100 (3.8435) grad_norm: 4.2550 (4.4217) time: 0.7842 data: 0.0002 max mem: 8421 +[2024-12-06 00:21:00 root] (utils.py 283): INFO Epoch: [20] [1760/2502] eta: 0:10:47 lr: 0.000007 loss_cls: 3.8421 (3.8440) grad_norm: 4.4151 (4.4253) time: 0.7813 data: 0.0003 max mem: 8421 +[2024-12-06 00:21:08 root] (utils.py 283): INFO Epoch: [20] [1770/2502] eta: 0:10:38 lr: 0.000007 loss_cls: 4.0058 (3.8444) grad_norm: 4.4177 (4.4260) time: 0.7835 data: 0.0002 max mem: 8421 +[2024-12-06 00:21:16 root] (utils.py 283): INFO Epoch: [20] [1780/2502] eta: 0:10:29 lr: 0.000007 loss_cls: 4.0058 (3.8446) grad_norm: 4.2167 (4.4244) time: 0.7849 data: 0.0002 max mem: 8421 +[2024-12-06 00:21:24 root] (utils.py 283): INFO Epoch: [20] [1790/2502] eta: 0:10:19 lr: 0.000007 loss_cls: 3.7509 (3.8433) grad_norm: 4.1448 (4.4230) time: 0.7866 data: 0.0002 max mem: 8421 +[2024-12-06 00:21:32 root] (utils.py 283): INFO Epoch: [20] [1800/2502] eta: 0:10:10 lr: 0.000007 loss_cls: 3.7929 (3.8439) grad_norm: 4.2689 (4.4292) time: 0.7917 data: 0.0003 max mem: 8421 +[2024-12-06 00:21:39 root] (utils.py 283): INFO Epoch: [20] [1810/2502] eta: 0:10:01 lr: 0.000007 loss_cls: 4.1576 (3.8451) grad_norm: 4.3650 (4.4283) time: 0.7901 data: 0.0003 max mem: 8421 +[2024-12-06 00:21:47 root] (utils.py 283): INFO Epoch: [20] [1820/2502] eta: 0:09:52 lr: 0.000007 loss_cls: 4.0189 (3.8444) grad_norm: 4.2506 (4.4282) time: 0.7917 data: 0.0003 max mem: 8421 +[2024-12-06 00:21:56 root] (utils.py 283): INFO Epoch: [20] [1830/2502] eta: 0:09:44 lr: 0.000007 loss_cls: 3.8813 (3.8440) grad_norm: 4.3571 (4.4288) time: 0.8180 data: 0.0003 max mem: 8421 +[2024-12-06 00:22:04 root] (utils.py 283): INFO Epoch: [20] [1840/2502] eta: 0:09:35 lr: 0.000007 loss_cls: 3.9043 (3.8447) grad_norm: 4.3571 (4.4285) time: 0.8413 data: 0.0003 max mem: 8421 +[2024-12-06 00:22:12 root] (utils.py 283): INFO Epoch: [20] [1850/2502] eta: 0:09:26 lr: 0.000007 loss_cls: 3.9928 (3.8440) grad_norm: 4.3363 (4.4292) time: 0.8143 data: 0.0003 max mem: 8421 +[2024-12-06 00:22:20 root] (utils.py 283): INFO Epoch: [20] [1860/2502] eta: 0:09:17 lr: 0.000007 loss_cls: 3.9928 (3.8433) grad_norm: 4.2699 (4.4287) time: 0.7849 data: 0.0003 max mem: 8421 +[2024-12-06 00:22:28 root] (utils.py 283): INFO Epoch: [20] [1870/2502] eta: 0:09:08 lr: 0.000007 loss_cls: 3.8011 (3.8433) grad_norm: 4.2699 (4.4287) time: 0.7820 data: 0.0003 max mem: 8421 +[2024-12-06 00:22:36 root] (utils.py 283): INFO Epoch: [20] [1880/2502] eta: 0:08:59 lr: 0.000007 loss_cls: 3.4546 (3.8420) grad_norm: 4.2134 (4.4298) time: 0.7890 data: 0.0003 max mem: 8421 +[2024-12-06 00:22:43 root] (utils.py 283): INFO Epoch: [20] [1890/2502] eta: 0:08:50 lr: 0.000007 loss_cls: 3.9465 (3.8435) grad_norm: 4.2134 (4.4288) time: 0.7876 data: 0.0003 max mem: 8421 +[2024-12-06 00:22:51 root] (utils.py 283): INFO Epoch: [20] [1900/2502] eta: 0:08:41 lr: 0.000007 loss_cls: 3.9477 (3.8441) grad_norm: 4.3224 (4.4294) time: 0.7826 data: 0.0003 max mem: 8421 +[2024-12-06 00:22:59 root] (utils.py 283): INFO Epoch: [20] [1910/2502] eta: 0:08:32 lr: 0.000007 loss_cls: 3.8777 (3.8446) grad_norm: 4.2531 (4.4282) time: 0.7886 data: 0.0003 max mem: 8421 +[2024-12-06 00:23:07 root] (utils.py 283): INFO Epoch: [20] [1920/2502] eta: 0:08:23 lr: 0.000007 loss_cls: 3.8777 (3.8448) grad_norm: 4.1277 (4.4274) time: 0.7937 data: 0.0003 max mem: 8421 +[2024-12-06 00:23:15 root] (utils.py 283): INFO Epoch: [20] [1930/2502] eta: 0:08:15 lr: 0.000007 loss_cls: 3.7028 (3.8441) grad_norm: 4.3012 (4.4287) time: 0.8135 data: 0.0003 max mem: 8421 +[2024-12-06 00:23:24 root] (utils.py 283): INFO Epoch: [20] [1940/2502] eta: 0:08:06 lr: 0.000007 loss_cls: 3.9788 (3.8445) grad_norm: 4.4237 (4.4285) time: 0.8396 data: 0.0004 max mem: 8421 +[2024-12-06 00:23:32 root] (utils.py 283): INFO Epoch: [20] [1950/2502] eta: 0:07:57 lr: 0.000007 loss_cls: 4.0249 (3.8441) grad_norm: 4.3483 (4.4275) time: 0.8380 data: 0.0005 max mem: 8421 +[2024-12-06 00:23:40 root] (utils.py 283): INFO Epoch: [20] [1960/2502] eta: 0:07:48 lr: 0.000007 loss_cls: 4.0249 (3.8445) grad_norm: 4.3483 (4.4275) time: 0.8033 data: 0.0004 max mem: 8421 +[2024-12-06 00:23:48 root] (utils.py 283): INFO Epoch: [20] [1970/2502] eta: 0:07:39 lr: 0.000007 loss_cls: 3.9530 (3.8444) grad_norm: 4.3725 (4.4267) time: 0.7828 data: 0.0003 max mem: 8421 +[2024-12-06 00:23:56 root] (utils.py 283): INFO Epoch: [20] [1980/2502] eta: 0:07:31 lr: 0.000007 loss_cls: 3.7056 (3.8439) grad_norm: 4.2516 (4.4262) time: 0.7931 data: 0.0003 max mem: 8421 +[2024-12-06 00:24:04 root] (utils.py 283): INFO Epoch: [20] [1990/2502] eta: 0:07:22 lr: 0.000007 loss_cls: 3.7557 (3.8430) grad_norm: 4.2984 (4.4257) time: 0.8010 data: 0.0003 max mem: 8421 +[2024-12-06 00:24:12 root] (utils.py 283): INFO Epoch: [20] [2000/2502] eta: 0:07:13 lr: 0.000007 loss_cls: 3.9643 (3.8437) grad_norm: 4.3444 (4.4300) time: 0.8088 data: 0.0003 max mem: 8421 +[2024-12-06 00:24:21 root] (utils.py 283): INFO Epoch: [20] [2010/2502] eta: 0:07:04 lr: 0.000007 loss_cls: 3.9643 (3.8433) grad_norm: 4.4224 (4.4296) time: 0.8382 data: 0.0004 max mem: 8421 +[2024-12-06 00:24:29 root] (utils.py 283): INFO Epoch: [20] [2020/2502] eta: 0:06:56 lr: 0.000007 loss_cls: 3.8664 (3.8431) grad_norm: 4.4180 (4.4308) time: 0.8467 data: 0.0004 max mem: 8421 +[2024-12-06 00:24:37 root] (utils.py 283): INFO Epoch: [20] [2030/2502] eta: 0:06:47 lr: 0.000007 loss_cls: 3.8295 (3.8437) grad_norm: 4.1712 (4.4295) time: 0.8273 data: 0.0004 max mem: 8421 +[2024-12-06 00:24:45 root] (utils.py 283): INFO Epoch: [20] [2040/2502] eta: 0:06:38 lr: 0.000007 loss_cls: 3.8295 (3.8432) grad_norm: 4.2177 (4.4295) time: 0.8065 data: 0.0003 max mem: 8421 +[2024-12-06 00:24:53 root] (utils.py 283): INFO Epoch: [20] [2050/2502] eta: 0:06:29 lr: 0.000007 loss_cls: 3.9666 (3.8442) grad_norm: 4.3134 (4.4280) time: 0.7876 data: 0.0003 max mem: 8421 +[2024-12-06 00:25:01 root] (utils.py 283): INFO Epoch: [20] [2060/2502] eta: 0:06:21 lr: 0.000007 loss_cls: 3.9616 (3.8440) grad_norm: 4.2314 (4.4283) time: 0.7831 data: 0.0003 max mem: 8421 +[2024-12-06 00:25:09 root] (utils.py 283): INFO Epoch: [20] [2070/2502] eta: 0:06:12 lr: 0.000007 loss_cls: 4.0517 (3.8448) grad_norm: 4.2419 (4.4275) time: 0.7845 data: 0.0003 max mem: 8421 +[2024-12-06 00:25:17 root] (utils.py 283): INFO Epoch: [20] [2080/2502] eta: 0:06:03 lr: 0.000007 loss_cls: 4.0492 (3.8449) grad_norm: 4.2023 (4.4273) time: 0.7916 data: 0.0003 max mem: 8421 +[2024-12-06 00:25:24 root] (utils.py 283): INFO Epoch: [20] [2090/2502] eta: 0:05:54 lr: 0.000007 loss_cls: 3.6724 (3.8432) grad_norm: 4.2369 (4.4266) time: 0.7883 data: 0.0002 max mem: 8421 +[2024-12-06 00:25:32 root] (utils.py 283): INFO Epoch: [20] [2100/2502] eta: 0:05:45 lr: 0.000007 loss_cls: 3.5692 (3.8421) grad_norm: 4.2724 (4.4261) time: 0.7868 data: 0.0002 max mem: 8421 +[2024-12-06 00:25:40 root] (utils.py 283): INFO Epoch: [20] [2110/2502] eta: 0:05:37 lr: 0.000007 loss_cls: 3.5842 (3.8416) grad_norm: 4.2065 (4.4249) time: 0.7888 data: 0.0002 max mem: 8421 +[2024-12-06 00:25:48 root] (utils.py 283): INFO Epoch: [20] [2120/2502] eta: 0:05:28 lr: 0.000007 loss_cls: 3.4739 (3.8405) grad_norm: 4.2065 (4.4241) time: 0.7817 data: 0.0002 max mem: 8421 +[2024-12-06 00:25:56 root] (utils.py 283): INFO Epoch: [20] [2130/2502] eta: 0:05:19 lr: 0.000007 loss_cls: 3.4860 (3.8394) grad_norm: 4.2526 (4.4240) time: 0.7804 data: 0.0002 max mem: 8421 +[2024-12-06 00:26:04 root] (utils.py 283): INFO Epoch: [20] [2140/2502] eta: 0:05:11 lr: 0.000007 loss_cls: 3.6061 (3.8394) grad_norm: 4.2029 (4.4231) time: 0.7795 data: 0.0003 max mem: 8421 +[2024-12-06 00:26:12 root] (utils.py 283): INFO Epoch: [20] [2150/2502] eta: 0:05:02 lr: 0.000007 loss_cls: 3.7636 (3.8387) grad_norm: 4.1779 (4.4219) time: 0.7817 data: 0.0003 max mem: 8421 +[2024-12-06 00:26:19 root] (utils.py 283): INFO Epoch: [20] [2160/2502] eta: 0:04:53 lr: 0.000007 loss_cls: 3.9533 (3.8394) grad_norm: 4.0814 (4.4202) time: 0.7889 data: 0.0003 max mem: 8421 +[2024-12-06 00:26:27 root] (utils.py 283): INFO Epoch: [20] [2170/2502] eta: 0:04:44 lr: 0.000007 loss_cls: 3.8358 (3.8386) grad_norm: 4.1801 (4.4205) time: 0.7853 data: 0.0003 max mem: 8421 +[2024-12-06 00:26:35 root] (utils.py 283): INFO Epoch: [20] [2180/2502] eta: 0:04:36 lr: 0.000007 loss_cls: 3.6253 (3.8373) grad_norm: 4.2013 (4.4193) time: 0.7799 data: 0.0003 max mem: 8421 +[2024-12-06 00:26:43 root] (utils.py 283): INFO Epoch: [20] [2190/2502] eta: 0:04:27 lr: 0.000007 loss_cls: 3.5569 (3.8361) grad_norm: 4.2013 (4.4198) time: 0.7898 data: 0.0003 max mem: 8421 +[2024-12-06 00:26:51 root] (utils.py 283): INFO Epoch: [20] [2200/2502] eta: 0:04:18 lr: 0.000007 loss_cls: 3.7985 (3.8354) grad_norm: 4.2266 (4.4195) time: 0.8023 data: 0.0003 max mem: 8421 +[2024-12-06 00:27:01 root] (utils.py 283): INFO Epoch: [20] [2210/2502] eta: 0:04:10 lr: 0.000007 loss_cls: 3.8003 (3.8351) grad_norm: 4.2215 (4.4185) time: 0.8879 data: 0.0005 max mem: 8421 +[2024-12-06 00:27:12 root] (utils.py 283): INFO Epoch: [20] [2220/2502] eta: 0:04:02 lr: 0.000007 loss_cls: 3.8003 (3.8348) grad_norm: 4.1420 (4.4189) time: 1.0587 data: 0.0010 max mem: 8421 +[2024-12-06 00:27:22 root] (utils.py 283): INFO Epoch: [20] [2230/2502] eta: 0:03:53 lr: 0.000007 loss_cls: 3.8658 (3.8354) grad_norm: 4.2413 (4.4181) time: 1.0718 data: 0.0013 max mem: 8421 +[2024-12-06 00:27:30 root] (utils.py 283): INFO Epoch: [20] [2240/2502] eta: 0:03:45 lr: 0.000007 loss_cls: 3.9755 (3.8354) grad_norm: 4.3061 (4.4189) time: 0.8969 data: 0.0008 max mem: 8421 +[2024-12-06 00:27:38 root] (utils.py 283): INFO Epoch: [20] [2250/2502] eta: 0:03:36 lr: 0.000007 loss_cls: 3.9691 (3.8358) grad_norm: 4.3954 (4.4191) time: 0.7887 data: 0.0003 max mem: 8421 +[2024-12-06 00:27:47 root] (utils.py 283): INFO Epoch: [20] [2260/2502] eta: 0:03:28 lr: 0.000007 loss_cls: 4.0119 (3.8366) grad_norm: 4.2014 (4.4183) time: 0.8645 data: 0.0004 max mem: 8421 +[2024-12-06 00:27:55 root] (utils.py 283): INFO Epoch: [20] [2270/2502] eta: 0:03:19 lr: 0.000007 loss_cls: 3.8397 (3.8352) grad_norm: 4.1693 (4.4178) time: 0.8735 data: 0.0004 max mem: 8421 +[2024-12-06 00:28:03 root] (utils.py 283): INFO Epoch: [20] [2280/2502] eta: 0:03:10 lr: 0.000007 loss_cls: 3.9812 (3.8362) grad_norm: 4.2316 (4.4171) time: 0.7897 data: 0.0003 max mem: 8421 +[2024-12-06 00:28:11 root] (utils.py 283): INFO Epoch: [20] [2290/2502] eta: 0:03:02 lr: 0.000007 loss_cls: 4.0357 (3.8366) grad_norm: 4.2380 (4.4161) time: 0.7837 data: 0.0003 max mem: 8421 +[2024-12-06 00:28:19 root] (utils.py 283): INFO Epoch: [20] [2300/2502] eta: 0:02:53 lr: 0.000007 loss_cls: 3.9717 (3.8364) grad_norm: 4.2943 (4.4168) time: 0.7854 data: 0.0003 max mem: 8421 +[2024-12-06 00:28:27 root] (utils.py 283): INFO Epoch: [20] [2310/2502] eta: 0:02:44 lr: 0.000007 loss_cls: 3.7546 (3.8359) grad_norm: 4.2355 (4.4163) time: 0.7894 data: 0.0003 max mem: 8421 +[2024-12-06 00:28:35 root] (utils.py 283): INFO Epoch: [20] [2320/2502] eta: 0:02:36 lr: 0.000007 loss_cls: 4.0271 (3.8364) grad_norm: 4.1551 (4.4170) time: 0.8101 data: 0.0003 max mem: 8421 +[2024-12-06 00:28:44 root] (utils.py 283): INFO Epoch: [20] [2330/2502] eta: 0:02:27 lr: 0.000007 loss_cls: 4.0876 (3.8358) grad_norm: 4.3631 (4.4182) time: 0.8315 data: 0.0003 max mem: 8421 +[2024-12-06 00:28:52 root] (utils.py 283): INFO Epoch: [20] [2340/2502] eta: 0:02:18 lr: 0.000007 loss_cls: 4.0876 (3.8365) grad_norm: 4.3645 (4.4195) time: 0.8186 data: 0.0003 max mem: 8421 +[2024-12-06 00:28:59 root] (utils.py 283): INFO Epoch: [20] [2350/2502] eta: 0:02:10 lr: 0.000007 loss_cls: 3.8953 (3.8357) grad_norm: 4.1729 (4.4182) time: 0.7932 data: 0.0002 max mem: 8421 +[2024-12-06 00:29:07 root] (utils.py 283): INFO Epoch: [20] [2360/2502] eta: 0:02:01 lr: 0.000007 loss_cls: 3.7252 (3.8343) grad_norm: 4.0995 (4.4184) time: 0.7829 data: 0.0003 max mem: 8421 +[2024-12-06 00:29:15 root] (utils.py 283): INFO Epoch: [20] [2370/2502] eta: 0:01:53 lr: 0.000007 loss_cls: 3.7252 (3.8339) grad_norm: 4.1732 (4.4186) time: 0.7815 data: 0.0003 max mem: 8421 +[2024-12-06 00:29:23 root] (utils.py 283): INFO Epoch: [20] [2380/2502] eta: 0:01:44 lr: 0.000007 loss_cls: 4.0790 (3.8340) grad_norm: 4.1800 (4.4181) time: 0.7824 data: 0.0003 max mem: 8421 +[2024-12-06 00:29:31 root] (utils.py 283): INFO Epoch: [20] [2390/2502] eta: 0:01:35 lr: 0.000007 loss_cls: 3.8217 (3.8334) grad_norm: 4.1983 (4.4176) time: 0.7834 data: 0.0003 max mem: 8421 +[2024-12-06 00:29:39 root] (utils.py 283): INFO Epoch: [20] [2400/2502] eta: 0:01:27 lr: 0.000007 loss_cls: 3.7319 (3.8336) grad_norm: 4.1764 (4.4170) time: 0.7905 data: 0.0003 max mem: 8421 +[2024-12-06 00:29:47 root] (utils.py 283): INFO Epoch: [20] [2410/2502] eta: 0:01:18 lr: 0.000007 loss_cls: 3.6723 (3.8326) grad_norm: 4.1735 (4.4175) time: 0.7986 data: 0.0003 max mem: 8421 +[2024-12-06 00:29:55 root] (utils.py 283): INFO Epoch: [20] [2420/2502] eta: 0:01:10 lr: 0.000007 loss_cls: 3.8260 (3.8325) grad_norm: 4.1924 (4.4169) time: 0.7913 data: 0.0003 max mem: 8421 +[2024-12-06 00:30:02 root] (utils.py 283): INFO Epoch: [20] [2430/2502] eta: 0:01:01 lr: 0.000007 loss_cls: 3.7820 (3.8314) grad_norm: 4.1948 (4.4169) time: 0.7831 data: 0.0003 max mem: 8421 +[2024-12-06 00:30:10 root] (utils.py 283): INFO Epoch: [20] [2440/2502] eta: 0:00:52 lr: 0.000007 loss_cls: 3.7820 (3.8316) grad_norm: 4.2304 (4.4170) time: 0.7951 data: 0.0003 max mem: 8421 +[2024-12-06 00:30:19 root] (utils.py 283): INFO Epoch: [20] [2450/2502] eta: 0:00:44 lr: 0.000007 loss_cls: 3.8246 (3.8306) grad_norm: 4.1907 (4.4165) time: 0.8209 data: 0.0003 max mem: 8421 +[2024-12-06 00:30:27 root] (utils.py 283): INFO Epoch: [20] [2460/2502] eta: 0:00:35 lr: 0.000007 loss_cls: 3.8221 (3.8311) grad_norm: 4.1907 (4.4159) time: 0.8196 data: 0.0003 max mem: 8421 +[2024-12-06 00:30:35 root] (utils.py 283): INFO Epoch: [20] [2470/2502] eta: 0:00:27 lr: 0.000007 loss_cls: 3.8292 (3.8309) grad_norm: 4.1774 (4.4154) time: 0.8039 data: 0.0003 max mem: 8421 +[2024-12-06 00:30:43 root] (utils.py 283): INFO Epoch: [20] [2480/2502] eta: 0:00:18 lr: 0.000007 loss_cls: 3.7438 (3.8305) grad_norm: 4.1518 (4.4152) time: 0.8022 data: 0.0003 max mem: 8421 +[2024-12-06 00:30:51 root] (utils.py 283): INFO Epoch: [20] [2490/2502] eta: 0:00:10 lr: 0.000007 loss_cls: 3.9361 (3.8307) grad_norm: 4.3089 (4.4162) time: 0.8194 data: 0.0275 max mem: 8421 +[2024-12-06 00:30:59 root] (utils.py 283): INFO Epoch: [20] [2500/2502] eta: 0:00:01 lr: 0.000007 loss_cls: 4.0087 (3.8308) grad_norm: 4.4575 (4.4163) time: 0.8107 data: 0.0275 max mem: 8421 +[2024-12-06 00:31:00 root] (utils.py 283): INFO Epoch: [20] [2501/2502] eta: 0:00:00 lr: 0.000007 loss_cls: 4.0228 (3.8309) grad_norm: 4.4736 (4.4186) time: 0.8088 data: 0.0275 max mem: 8421 +[2024-12-06 00:31:00 root] (utils.py 297): INFO Epoch: [20] Total time: 0:35:36 (0.8538 s / it) +[2024-12-06 00:31:00 root] (engine.py 179): INFO Averaged stats:lr: 0.000007 loss_cls: 4.0228 (3.8420) grad_norm: 4.4736 (4.4186) +[2024-12-06 00:31:01 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7390 (0.7390) acc1: 85.9375 (85.9375) acc3: 96.0938 (96.0938) acc5: 97.6562 (97.6562) time: 0.1310 data: 0.0004 max mem: 8421 +[2024-12-06 00:31:02 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8104 (0.8612) acc1: 84.3750 (82.3153) acc3: 92.1875 (92.8977) acc5: 95.3125 (95.7386) time: 0.1314 data: 0.0004 max mem: 8421 +[2024-12-06 00:31:03 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8812 (0.9124) acc1: 78.9062 (80.9896) acc3: 92.1875 (92.4479) acc5: 95.3125 (95.2381) time: 0.1332 data: 0.0004 max mem: 8421 +[2024-12-06 00:31:05 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9518 (0.9177) acc1: 78.9062 (80.4940) acc3: 92.1875 (92.7671) acc5: 95.3125 (95.5141) time: 0.1345 data: 0.0005 max mem: 8421 +[2024-12-06 00:31:06 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8391 (0.9090) acc1: 80.4688 (80.6784) acc3: 94.5312 (93.0069) acc5: 96.0938 (95.5983) time: 0.1563 data: 0.0222 max mem: 8421 +[2024-12-06 00:31:08 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9987 (1.0025) acc1: 75.0000 (78.4467) acc3: 88.2812 (91.4216) acc5: 91.4062 (94.3627) time: 0.1629 data: 0.0280 max mem: 8421 +[2024-12-06 00:31:10 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3404 (1.0471) acc1: 69.5312 (77.4846) acc3: 85.9375 (90.6378) acc5: 89.0625 (93.6091) time: 0.1646 data: 0.0286 max mem: 8421 +[2024-12-06 00:31:11 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:04 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2722 (1.0898) acc1: 71.0938 (76.3644) acc3: 87.5000 (90.0858) acc5: 89.0625 (93.1778) time: 0.1649 data: 0.0265 max mem: 8421 +[2024-12-06 00:31:13 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3183 (1.1251) acc1: 71.0938 (75.6655) acc3: 85.9375 (89.4387) acc5: 89.8438 (92.6601) time: 0.1439 data: 0.0047 max mem: 8421 +[2024-12-06 00:31:14 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3205 (1.1557) acc1: 71.0938 (74.8970) acc3: 85.1562 (88.8994) acc5: 89.0625 (92.2390) time: 0.1378 data: 0.0010 max mem: 8421 +[2024-12-06 00:31:15 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1831 (1.1428) acc1: 74.2188 (75.0640) acc3: 87.5000 (89.0800) acc5: 91.4062 (92.4800) time: 0.1373 data: 0.0008 max mem: 8421 +[2024-12-06 00:31:15 root] (utils.py 297): INFO Test: Total time: 0:00:14 (0.1473 s / it) +[2024-12-06 00:31:16 root] (engine.py 264): INFO * Acc@1 74.938 Acc@3 88.912 Acc@5 92.322 loss 1.143 flops 1.285 layer_flops 1.251 +[2024-12-06 00:31:16 root] (main.py 547): INFO Accuracy of the network on the 50000 test images: 74.9% +[2024-12-06 00:31:16 root] (main.py 551): INFO Max accuracy: 74.94% +[2024-12-06 00:31:17 root] (utils.py 283): INFO Epoch: [21] [ 0/2502] eta: 0:37:42 lr: 0.000006 loss_cls: 4.2767 (4.2767) grad_norm: 4.0497 (4.0497) time: 0.9044 data: 0.0005 max mem: 8421 +[2024-12-06 00:31:25 root] (utils.py 283): INFO Epoch: [21] [ 10/2502] eta: 0:33:06 lr: 0.000006 loss_cls: 4.1754 (4.1060) grad_norm: 4.3113 (4.3637) time: 0.7973 data: 0.0003 max mem: 8421 +[2024-12-06 00:31:33 root] (utils.py 283): INFO Epoch: [21] [ 20/2502] eta: 0:32:58 lr: 0.000006 loss_cls: 4.1289 (4.0777) grad_norm: 4.3260 (4.4656) time: 0.7916 data: 0.0003 max mem: 8421 +[2024-12-06 00:31:41 root] (utils.py 283): INFO Epoch: [21] [ 30/2502] eta: 0:32:37 lr: 0.000006 loss_cls: 3.9366 (3.9667) grad_norm: 4.3263 (4.4386) time: 0.7889 data: 0.0003 max mem: 8421 +[2024-12-06 00:31:48 root] (utils.py 283): INFO Epoch: [21] [ 40/2502] eta: 0:32:23 lr: 0.000006 loss_cls: 3.7631 (3.9513) grad_norm: 4.3263 (4.4806) time: 0.7816 data: 0.0003 max mem: 8421 +[2024-12-06 00:31:56 root] (utils.py 283): INFO Epoch: [21] [ 50/2502] eta: 0:32:09 lr: 0.000006 loss_cls: 3.9312 (3.9158) grad_norm: 4.3006 (4.4584) time: 0.7787 data: 0.0002 max mem: 8421 +[2024-12-06 00:32:04 root] (utils.py 283): INFO Epoch: [21] [ 60/2502] eta: 0:31:59 lr: 0.000006 loss_cls: 3.9266 (3.8875) grad_norm: 4.2084 (4.4524) time: 0.7791 data: 0.0003 max mem: 8421 +[2024-12-06 00:32:12 root] (utils.py 283): INFO Epoch: [21] [ 70/2502] eta: 0:31:49 lr: 0.000006 loss_cls: 4.0281 (3.9098) grad_norm: 4.2809 (4.4510) time: 0.7808 data: 0.0003 max mem: 8421 +[2024-12-06 00:32:20 root] (utils.py 283): INFO Epoch: [21] [ 80/2502] eta: 0:31:39 lr: 0.000006 loss_cls: 4.0698 (3.9268) grad_norm: 4.5067 (4.4844) time: 0.7791 data: 0.0003 max mem: 8421 +[2024-12-06 00:32:27 root] (utils.py 283): INFO Epoch: [21] [ 90/2502] eta: 0:31:31 lr: 0.000006 loss_cls: 3.7944 (3.9074) grad_norm: 4.3123 (4.4742) time: 0.7802 data: 0.0003 max mem: 8421 +[2024-12-06 00:32:35 root] (utils.py 283): INFO Epoch: [21] [ 100/2502] eta: 0:31:22 lr: 0.000006 loss_cls: 3.7791 (3.8849) grad_norm: 4.2836 (4.4899) time: 0.7803 data: 0.0003 max mem: 8421 +[2024-12-06 00:32:43 root] (utils.py 283): INFO Epoch: [21] [ 110/2502] eta: 0:31:12 lr: 0.000006 loss_cls: 4.0944 (3.8881) grad_norm: 4.3996 (4.4887) time: 0.7772 data: 0.0002 max mem: 8421 +[2024-12-06 00:32:51 root] (utils.py 283): INFO Epoch: [21] [ 120/2502] eta: 0:31:04 lr: 0.000006 loss_cls: 4.1021 (3.8909) grad_norm: 4.2237 (4.4806) time: 0.7784 data: 0.0003 max mem: 8421 +[2024-12-06 00:32:59 root] (utils.py 283): INFO Epoch: [21] [ 130/2502] eta: 0:30:55 lr: 0.000006 loss_cls: 3.9230 (3.8812) grad_norm: 4.2280 (4.4636) time: 0.7792 data: 0.0003 max mem: 8421 +[2024-12-06 00:33:06 root] (utils.py 283): INFO Epoch: [21] [ 140/2502] eta: 0:30:48 lr: 0.000006 loss_cls: 3.8139 (3.8785) grad_norm: 4.2280 (4.4567) time: 0.7822 data: 0.0003 max mem: 8421 +[2024-12-06 00:33:14 root] (utils.py 283): INFO Epoch: [21] [ 150/2502] eta: 0:30:40 lr: 0.000006 loss_cls: 3.9549 (3.8802) grad_norm: 4.1940 (4.4544) time: 0.7851 data: 0.0003 max mem: 8421 +[2024-12-06 00:33:22 root] (utils.py 283): INFO Epoch: [21] [ 160/2502] eta: 0:30:32 lr: 0.000006 loss_cls: 4.0595 (3.8896) grad_norm: 4.3102 (4.4473) time: 0.7827 data: 0.0003 max mem: 8421 +[2024-12-06 00:33:30 root] (utils.py 283): INFO Epoch: [21] [ 170/2502] eta: 0:30:24 lr: 0.000006 loss_cls: 4.0752 (3.8936) grad_norm: 4.2759 (4.4498) time: 0.7798 data: 0.0003 max mem: 8421 +[2024-12-06 00:33:38 root] (utils.py 283): INFO Epoch: [21] [ 180/2502] eta: 0:30:15 lr: 0.000006 loss_cls: 4.0660 (3.8952) grad_norm: 4.2338 (4.4485) time: 0.7766 data: 0.0003 max mem: 8421 +[2024-12-06 00:33:45 root] (utils.py 283): INFO Epoch: [21] [ 190/2502] eta: 0:30:07 lr: 0.000006 loss_cls: 4.0223 (3.8948) grad_norm: 4.1378 (4.4243) time: 0.7765 data: 0.0003 max mem: 8421 +[2024-12-06 00:33:53 root] (utils.py 283): INFO Epoch: [21] [ 200/2502] eta: 0:29:58 lr: 0.000006 loss_cls: 4.0806 (3.8900) grad_norm: 4.1390 (4.4200) time: 0.7771 data: 0.0003 max mem: 8421 +[2024-12-06 00:34:01 root] (utils.py 283): INFO Epoch: [21] [ 210/2502] eta: 0:29:50 lr: 0.000006 loss_cls: 4.0877 (3.8956) grad_norm: 4.3332 (4.4466) time: 0.7779 data: 0.0003 max mem: 8421 +[2024-12-06 00:34:09 root] (utils.py 283): INFO Epoch: [21] [ 220/2502] eta: 0:29:42 lr: 0.000006 loss_cls: 4.1132 (3.8928) grad_norm: 4.3516 (4.4447) time: 0.7794 data: 0.0003 max mem: 8421 +[2024-12-06 00:34:17 root] (utils.py 283): INFO Epoch: [21] [ 230/2502] eta: 0:29:34 lr: 0.000006 loss_cls: 4.0870 (3.8929) grad_norm: 4.3247 (4.4485) time: 0.7802 data: 0.0003 max mem: 8421 +[2024-12-06 00:34:24 root] (utils.py 283): INFO Epoch: [21] [ 240/2502] eta: 0:29:27 lr: 0.000006 loss_cls: 3.6203 (3.8762) grad_norm: 4.2562 (4.4457) time: 0.7808 data: 0.0003 max mem: 8421 +[2024-12-06 00:34:32 root] (utils.py 283): INFO Epoch: [21] [ 250/2502] eta: 0:29:19 lr: 0.000006 loss_cls: 3.6602 (3.8779) grad_norm: 4.3168 (4.4459) time: 0.7801 data: 0.0003 max mem: 8421 +[2024-12-06 00:34:40 root] (utils.py 283): INFO Epoch: [21] [ 260/2502] eta: 0:29:11 lr: 0.000006 loss_cls: 4.2150 (3.8904) grad_norm: 4.4405 (4.4504) time: 0.7812 data: 0.0003 max mem: 8421 +[2024-12-06 00:34:48 root] (utils.py 283): INFO Epoch: [21] [ 270/2502] eta: 0:29:03 lr: 0.000006 loss_cls: 3.9559 (3.8823) grad_norm: 4.2087 (4.4430) time: 0.7815 data: 0.0003 max mem: 8421 +[2024-12-06 00:34:56 root] (utils.py 283): INFO Epoch: [21] [ 280/2502] eta: 0:28:55 lr: 0.000006 loss_cls: 3.6868 (3.8759) grad_norm: 4.1318 (4.4326) time: 0.7810 data: 0.0003 max mem: 8421 +[2024-12-06 00:35:03 root] (utils.py 283): INFO Epoch: [21] [ 290/2502] eta: 0:28:48 lr: 0.000006 loss_cls: 3.7649 (3.8688) grad_norm: 4.3002 (4.4396) time: 0.7830 data: 0.0003 max mem: 8421 +[2024-12-06 00:35:11 root] (utils.py 283): INFO Epoch: [21] [ 300/2502] eta: 0:28:40 lr: 0.000006 loss_cls: 3.9460 (3.8667) grad_norm: 4.4293 (4.4369) time: 0.7810 data: 0.0003 max mem: 8421 +[2024-12-06 00:35:19 root] (utils.py 283): INFO Epoch: [21] [ 310/2502] eta: 0:28:33 lr: 0.000006 loss_cls: 4.1281 (3.8698) grad_norm: 4.3560 (4.4349) time: 0.7892 data: 0.0002 max mem: 8421 +[2024-12-06 00:35:27 root] (utils.py 283): INFO Epoch: [21] [ 320/2502] eta: 0:28:25 lr: 0.000006 loss_cls: 4.0166 (3.8704) grad_norm: 4.3178 (4.4325) time: 0.7900 data: 0.0003 max mem: 8421 +[2024-12-06 00:35:35 root] (utils.py 283): INFO Epoch: [21] [ 330/2502] eta: 0:28:19 lr: 0.000006 loss_cls: 3.9058 (3.8657) grad_norm: 4.2539 (4.4307) time: 0.7909 data: 0.0003 max mem: 8421 +[2024-12-06 00:35:43 root] (utils.py 283): INFO Epoch: [21] [ 340/2502] eta: 0:28:11 lr: 0.000006 loss_cls: 4.0147 (3.8706) grad_norm: 4.1956 (4.4605) time: 0.7951 data: 0.0003 max mem: 8421 +[2024-12-06 00:35:51 root] (utils.py 283): INFO Epoch: [21] [ 350/2502] eta: 0:28:03 lr: 0.000006 loss_cls: 3.9624 (3.8648) grad_norm: 4.2882 (4.4595) time: 0.7853 data: 0.0003 max mem: 8421 +[2024-12-06 00:35:59 root] (utils.py 283): INFO Epoch: [21] [ 360/2502] eta: 0:27:56 lr: 0.000006 loss_cls: 3.7645 (3.8697) grad_norm: 4.2894 (4.4542) time: 0.7826 data: 0.0003 max mem: 8421 +[2024-12-06 00:36:06 root] (utils.py 283): INFO Epoch: [21] [ 370/2502] eta: 0:27:48 lr: 0.000006 loss_cls: 3.9413 (3.8686) grad_norm: 4.2439 (4.4616) time: 0.7825 data: 0.0003 max mem: 8421 +[2024-12-06 00:36:14 root] (utils.py 283): INFO Epoch: [21] [ 380/2502] eta: 0:27:41 lr: 0.000006 loss_cls: 3.9413 (3.8670) grad_norm: 4.2019 (4.4732) time: 0.7868 data: 0.0003 max mem: 8421 +[2024-12-06 00:36:22 root] (utils.py 283): INFO Epoch: [21] [ 390/2502] eta: 0:27:33 lr: 0.000006 loss_cls: 4.1066 (3.8705) grad_norm: 4.1745 (4.4727) time: 0.7877 data: 0.0003 max mem: 8421 +[2024-12-06 00:36:30 root] (utils.py 283): INFO Epoch: [21] [ 400/2502] eta: 0:27:25 lr: 0.000006 loss_cls: 4.1140 (3.8740) grad_norm: 4.4999 (4.4846) time: 0.7846 data: 0.0003 max mem: 8421 +[2024-12-06 00:36:38 root] (utils.py 283): INFO Epoch: [21] [ 410/2502] eta: 0:27:17 lr: 0.000006 loss_cls: 4.1283 (3.8811) grad_norm: 4.4999 (4.4829) time: 0.7842 data: 0.0003 max mem: 8421 +[2024-12-06 00:36:46 root] (utils.py 283): INFO Epoch: [21] [ 420/2502] eta: 0:27:09 lr: 0.000006 loss_cls: 4.0263 (3.8781) grad_norm: 4.2273 (4.4788) time: 0.7820 data: 0.0003 max mem: 8421 +[2024-12-06 00:36:54 root] (utils.py 283): INFO Epoch: [21] [ 430/2502] eta: 0:27:02 lr: 0.000006 loss_cls: 3.8882 (3.8774) grad_norm: 4.3590 (4.4821) time: 0.7856 data: 0.0003 max mem: 8421 +[2024-12-06 00:37:01 root] (utils.py 283): INFO Epoch: [21] [ 440/2502] eta: 0:26:54 lr: 0.000006 loss_cls: 4.1536 (3.8762) grad_norm: 4.1750 (4.4790) time: 0.7883 data: 0.0003 max mem: 8421 +[2024-12-06 00:37:09 root] (utils.py 283): INFO Epoch: [21] [ 450/2502] eta: 0:26:47 lr: 0.000006 loss_cls: 3.9042 (3.8770) grad_norm: 4.2556 (4.4755) time: 0.7876 data: 0.0003 max mem: 8421 +[2024-12-06 00:37:17 root] (utils.py 283): INFO Epoch: [21] [ 460/2502] eta: 0:26:39 lr: 0.000006 loss_cls: 3.9042 (3.8776) grad_norm: 4.2556 (4.4723) time: 0.7831 data: 0.0003 max mem: 8421 +[2024-12-06 00:37:25 root] (utils.py 283): INFO Epoch: [21] [ 470/2502] eta: 0:26:30 lr: 0.000006 loss_cls: 3.8098 (3.8728) grad_norm: 3.9910 (4.4716) time: 0.7780 data: 0.0003 max mem: 8421 +[2024-12-06 00:37:33 root] (utils.py 283): INFO Epoch: [21] [ 480/2502] eta: 0:26:22 lr: 0.000006 loss_cls: 3.6693 (3.8656) grad_norm: 4.1028 (4.4713) time: 0.7788 data: 0.0003 max mem: 8421 +[2024-12-06 00:37:41 root] (utils.py 283): INFO Epoch: [21] [ 490/2502] eta: 0:26:15 lr: 0.000006 loss_cls: 3.5607 (3.8609) grad_norm: 4.2773 (4.4703) time: 0.7798 data: 0.0003 max mem: 8421 +[2024-12-06 00:37:48 root] (utils.py 283): INFO Epoch: [21] [ 500/2502] eta: 0:26:07 lr: 0.000006 loss_cls: 4.0467 (3.8644) grad_norm: 4.2147 (4.4658) time: 0.7792 data: 0.0003 max mem: 8421 +[2024-12-06 00:37:56 root] (utils.py 283): INFO Epoch: [21] [ 510/2502] eta: 0:25:59 lr: 0.000006 loss_cls: 4.2358 (3.8705) grad_norm: 4.2401 (4.4665) time: 0.7781 data: 0.0003 max mem: 8421 +[2024-12-06 00:38:04 root] (utils.py 283): INFO Epoch: [21] [ 520/2502] eta: 0:25:51 lr: 0.000006 loss_cls: 4.1627 (3.8704) grad_norm: 4.3776 (4.4722) time: 0.7782 data: 0.0003 max mem: 8421 +[2024-12-06 00:38:12 root] (utils.py 283): INFO Epoch: [21] [ 530/2502] eta: 0:25:43 lr: 0.000006 loss_cls: 3.9528 (3.8713) grad_norm: 4.4145 (4.4695) time: 0.7784 data: 0.0003 max mem: 8421 +[2024-12-06 00:38:19 root] (utils.py 283): INFO Epoch: [21] [ 540/2502] eta: 0:25:35 lr: 0.000006 loss_cls: 4.0360 (3.8729) grad_norm: 4.2522 (4.4654) time: 0.7804 data: 0.0002 max mem: 8421 +[2024-12-06 00:38:27 root] (utils.py 283): INFO Epoch: [21] [ 550/2502] eta: 0:25:27 lr: 0.000006 loss_cls: 4.0381 (3.8709) grad_norm: 4.1625 (4.4625) time: 0.7801 data: 0.0003 max mem: 8421 +[2024-12-06 00:38:35 root] (utils.py 283): INFO Epoch: [21] [ 560/2502] eta: 0:25:19 lr: 0.000006 loss_cls: 3.6674 (3.8675) grad_norm: 4.3191 (4.4618) time: 0.7804 data: 0.0003 max mem: 8421 +[2024-12-06 00:38:43 root] (utils.py 283): INFO Epoch: [21] [ 570/2502] eta: 0:25:11 lr: 0.000006 loss_cls: 3.6674 (3.8622) grad_norm: 4.2648 (4.4665) time: 0.7847 data: 0.0003 max mem: 8421 +[2024-12-06 00:38:51 root] (utils.py 283): INFO Epoch: [21] [ 580/2502] eta: 0:25:04 lr: 0.000006 loss_cls: 4.0974 (3.8658) grad_norm: 4.1748 (4.4628) time: 0.7941 data: 0.0003 max mem: 8421 +[2024-12-06 00:38:59 root] (utils.py 283): INFO Epoch: [21] [ 590/2502] eta: 0:24:57 lr: 0.000006 loss_cls: 4.2432 (3.8692) grad_norm: 4.3280 (4.4708) time: 0.8000 data: 0.0003 max mem: 8421 +[2024-12-06 00:39:07 root] (utils.py 283): INFO Epoch: [21] [ 600/2502] eta: 0:24:49 lr: 0.000006 loss_cls: 4.0324 (3.8706) grad_norm: 4.2864 (4.4673) time: 0.7901 data: 0.0003 max mem: 8421 +[2024-12-06 00:39:15 root] (utils.py 283): INFO Epoch: [21] [ 610/2502] eta: 0:24:41 lr: 0.000006 loss_cls: 3.8321 (3.8676) grad_norm: 4.1780 (4.4690) time: 0.7836 data: 0.0003 max mem: 8421 +[2024-12-06 00:39:22 root] (utils.py 283): INFO Epoch: [21] [ 620/2502] eta: 0:24:33 lr: 0.000006 loss_cls: 3.7222 (3.8667) grad_norm: 4.3019 (4.4692) time: 0.7827 data: 0.0003 max mem: 8421 +[2024-12-06 00:39:30 root] (utils.py 283): INFO Epoch: [21] [ 630/2502] eta: 0:24:25 lr: 0.000006 loss_cls: 3.8317 (3.8653) grad_norm: 4.3188 (4.4732) time: 0.7797 data: 0.0003 max mem: 8421 +[2024-12-06 00:39:38 root] (utils.py 283): INFO Epoch: [21] [ 640/2502] eta: 0:24:18 lr: 0.000006 loss_cls: 4.2095 (3.8699) grad_norm: 4.3401 (4.4727) time: 0.7841 data: 0.0003 max mem: 8421 +[2024-12-06 00:39:46 root] (utils.py 283): INFO Epoch: [21] [ 650/2502] eta: 0:24:10 lr: 0.000006 loss_cls: 4.0336 (3.8674) grad_norm: 4.2503 (4.4701) time: 0.7910 data: 0.0003 max mem: 8421 +[2024-12-06 00:39:54 root] (utils.py 283): INFO Epoch: [21] [ 660/2502] eta: 0:24:02 lr: 0.000006 loss_cls: 3.6625 (3.8645) grad_norm: 4.2234 (4.4716) time: 0.7918 data: 0.0003 max mem: 8421 +[2024-12-06 00:40:02 root] (utils.py 283): INFO Epoch: [21] [ 670/2502] eta: 0:23:55 lr: 0.000006 loss_cls: 4.0070 (3.8669) grad_norm: 4.2561 (4.4705) time: 0.7874 data: 0.0003 max mem: 8421 +[2024-12-06 00:40:10 root] (utils.py 283): INFO Epoch: [21] [ 680/2502] eta: 0:23:47 lr: 0.000006 loss_cls: 3.8049 (3.8636) grad_norm: 4.4237 (4.4708) time: 0.7814 data: 0.0003 max mem: 8421 +[2024-12-06 00:40:17 root] (utils.py 283): INFO Epoch: [21] [ 690/2502] eta: 0:23:39 lr: 0.000006 loss_cls: 3.7169 (3.8630) grad_norm: 4.3006 (4.4659) time: 0.7785 data: 0.0003 max mem: 8421 +[2024-12-06 00:40:25 root] (utils.py 283): INFO Epoch: [21] [ 700/2502] eta: 0:23:31 lr: 0.000006 loss_cls: 3.9469 (3.8657) grad_norm: 4.2521 (4.4652) time: 0.7790 data: 0.0003 max mem: 8421 +[2024-12-06 00:40:33 root] (utils.py 283): INFO Epoch: [21] [ 710/2502] eta: 0:23:23 lr: 0.000006 loss_cls: 4.0079 (3.8682) grad_norm: 4.2521 (4.4634) time: 0.7793 data: 0.0003 max mem: 8421 +[2024-12-06 00:40:41 root] (utils.py 283): INFO Epoch: [21] [ 720/2502] eta: 0:23:15 lr: 0.000006 loss_cls: 3.9535 (3.8687) grad_norm: 4.3032 (4.4641) time: 0.7797 data: 0.0003 max mem: 8421 +[2024-12-06 00:40:49 root] (utils.py 283): INFO Epoch: [21] [ 730/2502] eta: 0:23:07 lr: 0.000006 loss_cls: 3.8021 (3.8650) grad_norm: 4.4525 (4.4664) time: 0.7796 data: 0.0002 max mem: 8421 +[2024-12-06 00:40:56 root] (utils.py 283): INFO Epoch: [21] [ 740/2502] eta: 0:22:59 lr: 0.000006 loss_cls: 3.7031 (3.8606) grad_norm: 4.2421 (4.4638) time: 0.7787 data: 0.0003 max mem: 8421 +[2024-12-06 00:41:04 root] (utils.py 283): INFO Epoch: [21] [ 750/2502] eta: 0:22:51 lr: 0.000006 loss_cls: 3.7445 (3.8598) grad_norm: 4.2560 (4.4625) time: 0.7850 data: 0.0003 max mem: 8421 +[2024-12-06 00:41:12 root] (utils.py 283): INFO Epoch: [21] [ 760/2502] eta: 0:22:44 lr: 0.000006 loss_cls: 3.7814 (3.8590) grad_norm: 4.2700 (4.4677) time: 0.7901 data: 0.0003 max mem: 8421 +[2024-12-06 00:41:20 root] (utils.py 283): INFO Epoch: [21] [ 770/2502] eta: 0:22:36 lr: 0.000006 loss_cls: 3.7227 (3.8579) grad_norm: 4.3116 (4.4670) time: 0.7853 data: 0.0003 max mem: 8421 +[2024-12-06 00:41:28 root] (utils.py 283): INFO Epoch: [21] [ 780/2502] eta: 0:22:28 lr: 0.000006 loss_cls: 3.9039 (3.8593) grad_norm: 4.3931 (4.4712) time: 0.7817 data: 0.0003 max mem: 8421 +[2024-12-06 00:41:36 root] (utils.py 283): INFO Epoch: [21] [ 790/2502] eta: 0:22:20 lr: 0.000006 loss_cls: 3.9440 (3.8570) grad_norm: 4.3685 (4.4706) time: 0.7813 data: 0.0003 max mem: 8421 +[2024-12-06 00:41:43 root] (utils.py 283): INFO Epoch: [21] [ 800/2502] eta: 0:22:12 lr: 0.000006 loss_cls: 3.4778 (3.8513) grad_norm: 4.2432 (4.4688) time: 0.7794 data: 0.0002 max mem: 8421 +[2024-12-06 00:41:51 root] (utils.py 283): INFO Epoch: [21] [ 810/2502] eta: 0:22:04 lr: 0.000006 loss_cls: 3.3464 (3.8495) grad_norm: 4.0946 (4.4649) time: 0.7809 data: 0.0003 max mem: 8421 +[2024-12-06 00:41:59 root] (utils.py 283): INFO Epoch: [21] [ 820/2502] eta: 0:21:57 lr: 0.000006 loss_cls: 3.7808 (3.8489) grad_norm: 4.0928 (4.4632) time: 0.7828 data: 0.0003 max mem: 8421 +[2024-12-06 00:42:07 root] (utils.py 283): INFO Epoch: [21] [ 830/2502] eta: 0:21:49 lr: 0.000006 loss_cls: 3.7808 (3.8476) grad_norm: 4.1873 (4.4599) time: 0.7816 data: 0.0003 max mem: 8421 +[2024-12-06 00:42:15 root] (utils.py 283): INFO Epoch: [21] [ 840/2502] eta: 0:21:41 lr: 0.000006 loss_cls: 4.0248 (3.8488) grad_norm: 4.2297 (4.4597) time: 0.7811 data: 0.0003 max mem: 8421 +[2024-12-06 00:42:22 root] (utils.py 283): INFO Epoch: [21] [ 850/2502] eta: 0:21:33 lr: 0.000006 loss_cls: 4.0338 (3.8489) grad_norm: 4.2902 (4.4608) time: 0.7799 data: 0.0003 max mem: 8421 +[2024-12-06 00:42:30 root] (utils.py 283): INFO Epoch: [21] [ 860/2502] eta: 0:21:25 lr: 0.000006 loss_cls: 4.0891 (3.8518) grad_norm: 4.2658 (4.4611) time: 0.7798 data: 0.0003 max mem: 8421 +[2024-12-06 00:42:38 root] (utils.py 283): INFO Epoch: [21] [ 870/2502] eta: 0:21:17 lr: 0.000006 loss_cls: 4.1309 (3.8549) grad_norm: 4.0943 (4.4576) time: 0.7784 data: 0.0002 max mem: 8421 +[2024-12-06 00:42:46 root] (utils.py 283): INFO Epoch: [21] [ 880/2502] eta: 0:21:09 lr: 0.000006 loss_cls: 3.8310 (3.8530) grad_norm: 4.0695 (4.4575) time: 0.7790 data: 0.0003 max mem: 8421 +[2024-12-06 00:42:54 root] (utils.py 283): INFO Epoch: [21] [ 890/2502] eta: 0:21:01 lr: 0.000006 loss_cls: 3.5988 (3.8475) grad_norm: 4.1904 (4.4543) time: 0.7811 data: 0.0003 max mem: 8421 +[2024-12-06 00:43:01 root] (utils.py 283): INFO Epoch: [21] [ 900/2502] eta: 0:20:53 lr: 0.000006 loss_cls: 3.1738 (3.8417) grad_norm: 4.2569 (4.4534) time: 0.7807 data: 0.0003 max mem: 8421 +[2024-12-06 00:43:09 root] (utils.py 283): INFO Epoch: [21] [ 910/2502] eta: 0:20:46 lr: 0.000006 loss_cls: 3.5491 (3.8414) grad_norm: 4.3493 (4.4521) time: 0.7834 data: 0.0003 max mem: 8421 +[2024-12-06 00:43:17 root] (utils.py 283): INFO Epoch: [21] [ 920/2502] eta: 0:20:38 lr: 0.000006 loss_cls: 3.9040 (3.8425) grad_norm: 4.1669 (4.4504) time: 0.7812 data: 0.0003 max mem: 8421 +[2024-12-06 00:43:25 root] (utils.py 283): INFO Epoch: [21] [ 930/2502] eta: 0:20:30 lr: 0.000006 loss_cls: 4.1468 (3.8464) grad_norm: 4.1767 (4.4504) time: 0.7771 data: 0.0003 max mem: 8421 +[2024-12-06 00:43:33 root] (utils.py 283): INFO Epoch: [21] [ 940/2502] eta: 0:20:22 lr: 0.000006 loss_cls: 4.0941 (3.8475) grad_norm: 4.2625 (4.4492) time: 0.7772 data: 0.0003 max mem: 8421 +[2024-12-06 00:43:40 root] (utils.py 283): INFO Epoch: [21] [ 950/2502] eta: 0:20:14 lr: 0.000006 loss_cls: 3.7628 (3.8459) grad_norm: 4.2152 (4.4494) time: 0.7762 data: 0.0003 max mem: 8421 +[2024-12-06 00:43:48 root] (utils.py 283): INFO Epoch: [21] [ 960/2502] eta: 0:20:06 lr: 0.000006 loss_cls: 3.9105 (3.8464) grad_norm: 4.1733 (4.4492) time: 0.7767 data: 0.0003 max mem: 8421 +[2024-12-06 00:43:56 root] (utils.py 283): INFO Epoch: [21] [ 970/2502] eta: 0:19:58 lr: 0.000006 loss_cls: 3.9347 (3.8452) grad_norm: 4.1850 (4.4479) time: 0.7822 data: 0.0003 max mem: 8421 +[2024-12-06 00:44:04 root] (utils.py 283): INFO Epoch: [21] [ 980/2502] eta: 0:19:50 lr: 0.000006 loss_cls: 3.8020 (3.8450) grad_norm: 4.3196 (4.4493) time: 0.7848 data: 0.0003 max mem: 8421 +[2024-12-06 00:44:12 root] (utils.py 283): INFO Epoch: [21] [ 990/2502] eta: 0:19:43 lr: 0.000006 loss_cls: 3.9297 (3.8444) grad_norm: 4.2239 (4.4465) time: 0.7809 data: 0.0003 max mem: 8421 +[2024-12-06 00:44:19 root] (utils.py 283): INFO Epoch: [21] [1000/2502] eta: 0:19:35 lr: 0.000006 loss_cls: 3.8931 (3.8455) grad_norm: 4.1646 (4.4455) time: 0.7815 data: 0.0003 max mem: 8421 +[2024-12-06 00:44:27 root] (utils.py 283): INFO Epoch: [21] [1010/2502] eta: 0:19:27 lr: 0.000006 loss_cls: 3.8497 (3.8416) grad_norm: 4.1646 (4.4416) time: 0.7862 data: 0.0003 max mem: 8421 +[2024-12-06 00:44:35 root] (utils.py 283): INFO Epoch: [21] [1020/2502] eta: 0:19:19 lr: 0.000006 loss_cls: 3.5789 (3.8395) grad_norm: 4.1730 (4.4398) time: 0.7869 data: 0.0003 max mem: 8421 +[2024-12-06 00:44:43 root] (utils.py 283): INFO Epoch: [21] [1030/2502] eta: 0:19:12 lr: 0.000006 loss_cls: 3.6899 (3.8393) grad_norm: 4.2675 (4.4379) time: 0.7900 data: 0.0002 max mem: 8421 +[2024-12-06 00:44:51 root] (utils.py 283): INFO Epoch: [21] [1040/2502] eta: 0:19:04 lr: 0.000006 loss_cls: 3.9192 (3.8379) grad_norm: 4.2411 (4.4370) time: 0.7878 data: 0.0003 max mem: 8421 +[2024-12-06 00:44:59 root] (utils.py 283): INFO Epoch: [21] [1050/2502] eta: 0:18:56 lr: 0.000006 loss_cls: 3.9192 (3.8382) grad_norm: 4.2381 (4.4343) time: 0.7795 data: 0.0003 max mem: 8421 +[2024-12-06 00:45:07 root] (utils.py 283): INFO Epoch: [21] [1060/2502] eta: 0:18:48 lr: 0.000006 loss_cls: 3.7224 (3.8362) grad_norm: 4.0800 (4.4320) time: 0.7794 data: 0.0003 max mem: 8421 +[2024-12-06 00:45:14 root] (utils.py 283): INFO Epoch: [21] [1070/2502] eta: 0:18:40 lr: 0.000006 loss_cls: 3.6426 (3.8350) grad_norm: 4.1131 (4.4312) time: 0.7824 data: 0.0003 max mem: 8421 +[2024-12-06 00:45:22 root] (utils.py 283): INFO Epoch: [21] [1080/2502] eta: 0:18:33 lr: 0.000006 loss_cls: 3.7818 (3.8345) grad_norm: 4.1767 (4.4296) time: 0.7917 data: 0.0002 max mem: 8421 +[2024-12-06 00:45:30 root] (utils.py 283): INFO Epoch: [21] [1090/2502] eta: 0:18:25 lr: 0.000006 loss_cls: 3.6101 (3.8320) grad_norm: 4.1525 (4.4270) time: 0.7891 data: 0.0002 max mem: 8421 +[2024-12-06 00:45:38 root] (utils.py 283): INFO Epoch: [21] [1100/2502] eta: 0:18:17 lr: 0.000006 loss_cls: 3.6101 (3.8307) grad_norm: 4.1097 (4.4237) time: 0.7805 data: 0.0003 max mem: 8421 +[2024-12-06 00:45:46 root] (utils.py 283): INFO Epoch: [21] [1110/2502] eta: 0:18:09 lr: 0.000006 loss_cls: 3.9466 (3.8307) grad_norm: 4.1104 (4.4225) time: 0.7916 data: 0.0003 max mem: 8421 +[2024-12-06 00:45:54 root] (utils.py 283): INFO Epoch: [21] [1120/2502] eta: 0:18:02 lr: 0.000006 loss_cls: 4.1735 (3.8333) grad_norm: 4.1499 (4.4212) time: 0.7935 data: 0.0003 max mem: 8421 +[2024-12-06 00:46:02 root] (utils.py 283): INFO Epoch: [21] [1130/2502] eta: 0:17:54 lr: 0.000006 loss_cls: 4.0684 (3.8300) grad_norm: 4.2452 (4.4201) time: 0.7851 data: 0.0003 max mem: 8421 +[2024-12-06 00:46:10 root] (utils.py 283): INFO Epoch: [21] [1140/2502] eta: 0:17:46 lr: 0.000006 loss_cls: 3.4070 (3.8263) grad_norm: 4.2452 (4.4181) time: 0.7815 data: 0.0003 max mem: 8421 +[2024-12-06 00:46:17 root] (utils.py 283): INFO Epoch: [21] [1150/2502] eta: 0:17:38 lr: 0.000006 loss_cls: 3.3952 (3.8239) grad_norm: 4.1749 (4.4172) time: 0.7800 data: 0.0003 max mem: 8421 +[2024-12-06 00:46:25 root] (utils.py 283): INFO Epoch: [21] [1160/2502] eta: 0:17:30 lr: 0.000006 loss_cls: 3.8540 (3.8249) grad_norm: 4.1932 (4.4182) time: 0.7807 data: 0.0003 max mem: 8421 +[2024-12-06 00:46:33 root] (utils.py 283): INFO Epoch: [21] [1170/2502] eta: 0:17:22 lr: 0.000006 loss_cls: 4.0282 (3.8261) grad_norm: 4.2222 (4.4164) time: 0.7841 data: 0.0003 max mem: 8421 +[2024-12-06 00:46:41 root] (utils.py 283): INFO Epoch: [21] [1180/2502] eta: 0:17:14 lr: 0.000006 loss_cls: 4.0193 (3.8249) grad_norm: 4.2432 (4.4159) time: 0.7831 data: 0.0003 max mem: 8421 +[2024-12-06 00:46:49 root] (utils.py 283): INFO Epoch: [21] [1190/2502] eta: 0:17:07 lr: 0.000006 loss_cls: 3.9651 (3.8255) grad_norm: 4.2947 (4.4179) time: 0.7825 data: 0.0003 max mem: 8421 +[2024-12-06 00:46:57 root] (utils.py 283): INFO Epoch: [21] [1200/2502] eta: 0:16:59 lr: 0.000006 loss_cls: 3.9738 (3.8259) grad_norm: 4.2709 (4.4200) time: 0.7920 data: 0.0003 max mem: 8421 +[2024-12-06 00:47:05 root] (utils.py 283): INFO Epoch: [21] [1210/2502] eta: 0:16:51 lr: 0.000006 loss_cls: 3.9289 (3.8260) grad_norm: 4.3550 (4.4206) time: 0.7973 data: 0.0002 max mem: 8421 +[2024-12-06 00:47:12 root] (utils.py 283): INFO Epoch: [21] [1220/2502] eta: 0:16:43 lr: 0.000006 loss_cls: 3.8955 (3.8255) grad_norm: 4.3761 (4.4199) time: 0.7883 data: 0.0003 max mem: 8421 +[2024-12-06 00:47:20 root] (utils.py 283): INFO Epoch: [21] [1230/2502] eta: 0:16:36 lr: 0.000006 loss_cls: 3.7827 (3.8252) grad_norm: 4.3761 (4.4205) time: 0.7795 data: 0.0003 max mem: 8421 +[2024-12-06 00:47:28 root] (utils.py 283): INFO Epoch: [21] [1240/2502] eta: 0:16:28 lr: 0.000006 loss_cls: 4.0097 (3.8268) grad_norm: 4.3850 (4.4203) time: 0.7838 data: 0.0003 max mem: 8421 +[2024-12-06 00:47:36 root] (utils.py 283): INFO Epoch: [21] [1250/2502] eta: 0:16:20 lr: 0.000006 loss_cls: 4.0097 (3.8282) grad_norm: 4.2510 (4.4184) time: 0.7833 data: 0.0003 max mem: 8421 +[2024-12-06 00:47:44 root] (utils.py 283): INFO Epoch: [21] [1260/2502] eta: 0:16:12 lr: 0.000006 loss_cls: 3.8183 (3.8285) grad_norm: 4.2510 (4.4309) time: 0.7817 data: 0.0003 max mem: 8421 +[2024-12-06 00:47:52 root] (utils.py 283): INFO Epoch: [21] [1270/2502] eta: 0:16:04 lr: 0.000006 loss_cls: 4.1381 (3.8318) grad_norm: 4.5712 (4.4340) time: 0.7838 data: 0.0003 max mem: 8421 +[2024-12-06 00:47:59 root] (utils.py 283): INFO Epoch: [21] [1280/2502] eta: 0:15:56 lr: 0.000006 loss_cls: 4.1302 (3.8307) grad_norm: 4.4158 (4.4340) time: 0.7832 data: 0.0003 max mem: 8421 +[2024-12-06 00:48:07 root] (utils.py 283): INFO Epoch: [21] [1290/2502] eta: 0:15:49 lr: 0.000006 loss_cls: 3.6893 (3.8305) grad_norm: 4.2951 (4.4356) time: 0.7811 data: 0.0003 max mem: 8421 +[2024-12-06 00:48:15 root] (utils.py 283): INFO Epoch: [21] [1300/2502] eta: 0:15:41 lr: 0.000006 loss_cls: 3.6495 (3.8287) grad_norm: 4.4290 (4.4369) time: 0.7792 data: 0.0003 max mem: 8421 +[2024-12-06 00:48:23 root] (utils.py 283): INFO Epoch: [21] [1310/2502] eta: 0:15:33 lr: 0.000006 loss_cls: 3.6662 (3.8271) grad_norm: 4.3365 (4.4376) time: 0.7796 data: 0.0003 max mem: 8421 +[2024-12-06 00:48:31 root] (utils.py 283): INFO Epoch: [21] [1320/2502] eta: 0:15:25 lr: 0.000006 loss_cls: 3.7406 (3.8272) grad_norm: 4.2927 (4.4391) time: 0.7909 data: 0.0003 max mem: 8421 +[2024-12-06 00:48:39 root] (utils.py 283): INFO Epoch: [21] [1330/2502] eta: 0:15:17 lr: 0.000006 loss_cls: 4.0797 (3.8282) grad_norm: 4.2909 (4.4384) time: 0.7914 data: 0.0003 max mem: 8421 +[2024-12-06 00:48:46 root] (utils.py 283): INFO Epoch: [21] [1340/2502] eta: 0:15:10 lr: 0.000006 loss_cls: 4.1644 (3.8281) grad_norm: 4.5312 (4.4404) time: 0.7831 data: 0.0003 max mem: 8421 +[2024-12-06 00:48:54 root] (utils.py 283): INFO Epoch: [21] [1350/2502] eta: 0:15:02 lr: 0.000006 loss_cls: 4.0566 (3.8286) grad_norm: 4.5312 (4.4392) time: 0.7883 data: 0.0003 max mem: 8421 +[2024-12-06 00:49:02 root] (utils.py 283): INFO Epoch: [21] [1360/2502] eta: 0:14:54 lr: 0.000006 loss_cls: 3.8685 (3.8292) grad_norm: 4.2735 (4.4395) time: 0.7948 data: 0.0003 max mem: 8421 +[2024-12-06 00:49:10 root] (utils.py 283): INFO Epoch: [21] [1370/2502] eta: 0:14:46 lr: 0.000006 loss_cls: 3.7855 (3.8273) grad_norm: 4.1777 (4.4385) time: 0.7975 data: 0.0003 max mem: 8421 +[2024-12-06 00:49:18 root] (utils.py 283): INFO Epoch: [21] [1380/2502] eta: 0:14:39 lr: 0.000006 loss_cls: 3.8897 (3.8284) grad_norm: 4.2972 (4.4393) time: 0.7914 data: 0.0003 max mem: 8421 +[2024-12-06 00:49:26 root] (utils.py 283): INFO Epoch: [21] [1390/2502] eta: 0:14:31 lr: 0.000006 loss_cls: 4.0079 (3.8286) grad_norm: 4.3562 (4.4398) time: 0.7852 data: 0.0003 max mem: 8421 +[2024-12-06 00:49:34 root] (utils.py 283): INFO Epoch: [21] [1400/2502] eta: 0:14:23 lr: 0.000006 loss_cls: 3.8554 (3.8287) grad_norm: 4.2865 (4.4403) time: 0.7825 data: 0.0003 max mem: 8421 +[2024-12-06 00:49:42 root] (utils.py 283): INFO Epoch: [21] [1410/2502] eta: 0:14:15 lr: 0.000006 loss_cls: 3.8365 (3.8272) grad_norm: 4.1980 (4.4393) time: 0.7804 data: 0.0003 max mem: 8421 +[2024-12-06 00:49:49 root] (utils.py 283): INFO Epoch: [21] [1420/2502] eta: 0:14:07 lr: 0.000006 loss_cls: 3.9406 (3.8265) grad_norm: 4.1980 (4.4375) time: 0.7792 data: 0.0003 max mem: 8421 +[2024-12-06 00:49:57 root] (utils.py 283): INFO Epoch: [21] [1430/2502] eta: 0:13:59 lr: 0.000006 loss_cls: 3.9406 (3.8265) grad_norm: 4.1037 (4.4358) time: 0.7791 data: 0.0002 max mem: 8421 +[2024-12-06 00:50:05 root] (utils.py 283): INFO Epoch: [21] [1440/2502] eta: 0:13:51 lr: 0.000006 loss_cls: 3.8952 (3.8247) grad_norm: 4.1037 (4.4344) time: 0.7807 data: 0.0003 max mem: 8421 +[2024-12-06 00:50:13 root] (utils.py 283): INFO Epoch: [21] [1450/2502] eta: 0:13:44 lr: 0.000006 loss_cls: 3.9166 (3.8245) grad_norm: 4.1772 (4.4336) time: 0.7785 data: 0.0003 max mem: 8421 +[2024-12-06 00:50:21 root] (utils.py 283): INFO Epoch: [21] [1460/2502] eta: 0:13:36 lr: 0.000006 loss_cls: 3.9264 (3.8249) grad_norm: 4.3406 (4.4363) time: 0.7816 data: 0.0003 max mem: 8421 +[2024-12-06 00:50:28 root] (utils.py 283): INFO Epoch: [21] [1470/2502] eta: 0:13:28 lr: 0.000006 loss_cls: 4.0140 (3.8243) grad_norm: 4.2669 (4.4346) time: 0.7833 data: 0.0003 max mem: 8421 +[2024-12-06 00:50:36 root] (utils.py 283): INFO Epoch: [21] [1480/2502] eta: 0:13:20 lr: 0.000006 loss_cls: 4.0195 (3.8261) grad_norm: 4.0590 (4.4327) time: 0.7774 data: 0.0003 max mem: 8421 +[2024-12-06 00:50:44 root] (utils.py 283): INFO Epoch: [21] [1490/2502] eta: 0:13:12 lr: 0.000006 loss_cls: 4.1500 (3.8278) grad_norm: 4.0582 (4.4320) time: 0.7768 data: 0.0003 max mem: 8421 +[2024-12-06 00:50:52 root] (utils.py 283): INFO Epoch: [21] [1500/2502] eta: 0:13:04 lr: 0.000006 loss_cls: 4.0424 (3.8294) grad_norm: 4.2074 (4.4334) time: 0.7821 data: 0.0003 max mem: 8421 +[2024-12-06 00:51:00 root] (utils.py 283): INFO Epoch: [21] [1510/2502] eta: 0:12:56 lr: 0.000006 loss_cls: 3.9093 (3.8291) grad_norm: 4.4798 (4.4330) time: 0.7844 data: 0.0003 max mem: 8421 +[2024-12-06 00:51:08 root] (utils.py 283): INFO Epoch: [21] [1520/2502] eta: 0:12:49 lr: 0.000006 loss_cls: 3.8510 (3.8293) grad_norm: 4.2984 (4.4368) time: 0.7876 data: 0.0003 max mem: 8421 +[2024-12-06 00:51:15 root] (utils.py 283): INFO Epoch: [21] [1530/2502] eta: 0:12:41 lr: 0.000006 loss_cls: 3.7529 (3.8288) grad_norm: 4.2262 (4.4385) time: 0.7890 data: 0.0003 max mem: 8421 +[2024-12-06 00:51:23 root] (utils.py 283): INFO Epoch: [21] [1540/2502] eta: 0:12:33 lr: 0.000006 loss_cls: 3.9271 (3.8298) grad_norm: 4.2646 (4.4379) time: 0.7837 data: 0.0003 max mem: 8421 +[2024-12-06 00:51:31 root] (utils.py 283): INFO Epoch: [21] [1550/2502] eta: 0:12:25 lr: 0.000006 loss_cls: 3.9271 (3.8288) grad_norm: 4.2276 (4.4363) time: 0.7823 data: 0.0003 max mem: 8421 +[2024-12-06 00:51:39 root] (utils.py 283): INFO Epoch: [21] [1560/2502] eta: 0:12:17 lr: 0.000006 loss_cls: 3.9914 (3.8304) grad_norm: 4.2859 (4.4354) time: 0.7804 data: 0.0003 max mem: 8421 +[2024-12-06 00:51:47 root] (utils.py 283): INFO Epoch: [21] [1570/2502] eta: 0:12:09 lr: 0.000006 loss_cls: 4.1138 (3.8315) grad_norm: 4.2665 (4.4344) time: 0.7785 data: 0.0003 max mem: 8421 +[2024-12-06 00:51:55 root] (utils.py 283): INFO Epoch: [21] [1580/2502] eta: 0:12:02 lr: 0.000006 loss_cls: 3.9909 (3.8332) grad_norm: 4.2665 (4.4349) time: 0.7810 data: 0.0003 max mem: 8421 +[2024-12-06 00:52:02 root] (utils.py 283): INFO Epoch: [21] [1590/2502] eta: 0:11:54 lr: 0.000006 loss_cls: 3.9787 (3.8336) grad_norm: 4.3506 (4.4343) time: 0.7817 data: 0.0003 max mem: 8421 +[2024-12-06 00:52:10 root] (utils.py 283): INFO Epoch: [21] [1600/2502] eta: 0:11:46 lr: 0.000006 loss_cls: 3.7405 (3.8326) grad_norm: 4.1752 (4.4331) time: 0.7827 data: 0.0003 max mem: 8421 +[2024-12-06 00:52:18 root] (utils.py 283): INFO Epoch: [21] [1610/2502] eta: 0:11:38 lr: 0.000006 loss_cls: 3.5398 (3.8317) grad_norm: 4.2526 (4.4324) time: 0.7852 data: 0.0003 max mem: 8421 +[2024-12-06 00:52:26 root] (utils.py 283): INFO Epoch: [21] [1620/2502] eta: 0:11:30 lr: 0.000006 loss_cls: 3.8452 (3.8320) grad_norm: 4.2935 (4.4331) time: 0.7869 data: 0.0003 max mem: 8421 +[2024-12-06 00:52:34 root] (utils.py 283): INFO Epoch: [21] [1630/2502] eta: 0:11:23 lr: 0.000006 loss_cls: 4.0257 (3.8323) grad_norm: 4.2382 (4.4322) time: 0.7970 data: 0.0003 max mem: 8421 +[2024-12-06 00:52:42 root] (utils.py 283): INFO Epoch: [21] [1640/2502] eta: 0:11:15 lr: 0.000006 loss_cls: 4.0119 (3.8334) grad_norm: 4.1354 (4.4305) time: 0.7989 data: 0.0002 max mem: 8421 +[2024-12-06 00:52:50 root] (utils.py 283): INFO Epoch: [21] [1650/2502] eta: 0:11:07 lr: 0.000006 loss_cls: 4.0197 (3.8334) grad_norm: 4.2305 (4.4297) time: 0.7900 data: 0.0003 max mem: 8421 +[2024-12-06 00:52:58 root] (utils.py 283): INFO Epoch: [21] [1660/2502] eta: 0:10:59 lr: 0.000006 loss_cls: 3.9867 (3.8327) grad_norm: 4.3526 (4.4306) time: 0.7894 data: 0.0003 max mem: 8421 +[2024-12-06 00:53:06 root] (utils.py 283): INFO Epoch: [21] [1670/2502] eta: 0:10:51 lr: 0.000006 loss_cls: 3.7041 (3.8319) grad_norm: 4.3852 (4.4306) time: 0.7884 data: 0.0002 max mem: 8421 +[2024-12-06 00:53:14 root] (utils.py 283): INFO Epoch: [21] [1680/2502] eta: 0:10:44 lr: 0.000006 loss_cls: 3.9919 (3.8334) grad_norm: 4.3327 (4.4313) time: 0.7929 data: 0.0002 max mem: 8421 +[2024-12-06 00:53:22 root] (utils.py 283): INFO Epoch: [21] [1690/2502] eta: 0:10:36 lr: 0.000006 loss_cls: 4.0896 (3.8350) grad_norm: 4.4051 (4.4335) time: 0.7970 data: 0.0003 max mem: 8421 +[2024-12-06 00:53:29 root] (utils.py 283): INFO Epoch: [21] [1700/2502] eta: 0:10:28 lr: 0.000006 loss_cls: 3.7818 (3.8324) grad_norm: 4.4051 (4.4356) time: 0.7873 data: 0.0003 max mem: 8421 +[2024-12-06 00:53:37 root] (utils.py 283): INFO Epoch: [21] [1710/2502] eta: 0:10:20 lr: 0.000006 loss_cls: 3.5098 (3.8319) grad_norm: 4.3585 (4.4352) time: 0.7808 data: 0.0003 max mem: 8421 +[2024-12-06 00:53:45 root] (utils.py 283): INFO Epoch: [21] [1720/2502] eta: 0:10:12 lr: 0.000006 loss_cls: 3.8996 (3.8311) grad_norm: 4.2576 (4.4341) time: 0.7828 data: 0.0003 max mem: 8421 +[2024-12-06 00:53:53 root] (utils.py 283): INFO Epoch: [21] [1730/2502] eta: 0:10:04 lr: 0.000006 loss_cls: 4.0046 (3.8318) grad_norm: 4.2896 (4.4365) time: 0.7847 data: 0.0003 max mem: 8421 +[2024-12-06 00:54:01 root] (utils.py 283): INFO Epoch: [21] [1740/2502] eta: 0:09:57 lr: 0.000006 loss_cls: 4.0123 (3.8324) grad_norm: 4.4006 (4.4364) time: 0.7830 data: 0.0003 max mem: 8421 +[2024-12-06 00:54:08 root] (utils.py 283): INFO Epoch: [21] [1750/2502] eta: 0:09:49 lr: 0.000006 loss_cls: 3.8870 (3.8321) grad_norm: 4.2976 (4.4350) time: 0.7781 data: 0.0003 max mem: 8421 +[2024-12-06 00:54:16 root] (utils.py 283): INFO Epoch: [21] [1760/2502] eta: 0:09:41 lr: 0.000006 loss_cls: 3.7086 (3.8319) grad_norm: 4.3143 (4.4359) time: 0.7810 data: 0.0003 max mem: 8421 +[2024-12-06 00:54:24 root] (utils.py 283): INFO Epoch: [21] [1770/2502] eta: 0:09:33 lr: 0.000006 loss_cls: 3.5816 (3.8315) grad_norm: 4.2731 (4.4360) time: 0.7909 data: 0.0003 max mem: 8421 +[2024-12-06 00:54:32 root] (utils.py 283): INFO Epoch: [21] [1780/2502] eta: 0:09:25 lr: 0.000006 loss_cls: 3.8488 (3.8324) grad_norm: 4.2397 (4.4350) time: 0.7881 data: 0.0003 max mem: 8421 +[2024-12-06 00:54:40 root] (utils.py 283): INFO Epoch: [21] [1790/2502] eta: 0:09:17 lr: 0.000006 loss_cls: 3.8488 (3.8325) grad_norm: 4.1618 (4.4333) time: 0.7830 data: 0.0003 max mem: 8421 +[2024-12-06 00:54:48 root] (utils.py 283): INFO Epoch: [21] [1800/2502] eta: 0:09:10 lr: 0.000006 loss_cls: 3.7952 (3.8312) grad_norm: 4.1732 (4.4323) time: 0.7833 data: 0.0003 max mem: 8421 +[2024-12-06 00:54:56 root] (utils.py 283): INFO Epoch: [21] [1810/2502] eta: 0:09:02 lr: 0.000006 loss_cls: 3.9113 (3.8324) grad_norm: 4.2944 (4.4322) time: 0.7818 data: 0.0003 max mem: 8421 +[2024-12-06 00:55:03 root] (utils.py 283): INFO Epoch: [21] [1820/2502] eta: 0:08:54 lr: 0.000006 loss_cls: 3.9373 (3.8325) grad_norm: 4.3680 (4.4315) time: 0.7843 data: 0.0003 max mem: 8421 +[2024-12-06 00:55:11 root] (utils.py 283): INFO Epoch: [21] [1830/2502] eta: 0:08:46 lr: 0.000006 loss_cls: 3.8884 (3.8326) grad_norm: 4.2389 (4.4302) time: 0.7853 data: 0.0003 max mem: 8421 +[2024-12-06 00:55:19 root] (utils.py 283): INFO Epoch: [21] [1840/2502] eta: 0:08:38 lr: 0.000006 loss_cls: 3.8430 (3.8334) grad_norm: 4.1476 (4.4299) time: 0.7925 data: 0.0003 max mem: 8421 +[2024-12-06 00:55:27 root] (utils.py 283): INFO Epoch: [21] [1850/2502] eta: 0:08:31 lr: 0.000006 loss_cls: 3.9237 (3.8327) grad_norm: 4.1442 (4.4284) time: 0.7895 data: 0.0003 max mem: 8421 +[2024-12-06 00:55:35 root] (utils.py 283): INFO Epoch: [21] [1860/2502] eta: 0:08:23 lr: 0.000006 loss_cls: 3.9874 (3.8340) grad_norm: 4.2215 (4.4285) time: 0.7837 data: 0.0003 max mem: 8421 +[2024-12-06 00:55:43 root] (utils.py 283): INFO Epoch: [21] [1870/2502] eta: 0:08:15 lr: 0.000006 loss_cls: 4.0065 (3.8344) grad_norm: 4.3059 (4.4282) time: 0.7864 data: 0.0003 max mem: 8421 +[2024-12-06 00:55:51 root] (utils.py 283): INFO Epoch: [21] [1880/2502] eta: 0:08:07 lr: 0.000006 loss_cls: 3.8913 (3.8343) grad_norm: 4.4551 (4.4292) time: 0.7812 data: 0.0003 max mem: 8421 +[2024-12-06 00:55:58 root] (utils.py 283): INFO Epoch: [21] [1890/2502] eta: 0:07:59 lr: 0.000006 loss_cls: 3.8404 (3.8354) grad_norm: 4.4551 (4.4292) time: 0.7871 data: 0.0003 max mem: 8421 +[2024-12-06 00:56:06 root] (utils.py 283): INFO Epoch: [21] [1900/2502] eta: 0:07:51 lr: 0.000006 loss_cls: 4.0706 (3.8363) grad_norm: 4.3396 (4.4289) time: 0.7884 data: 0.0003 max mem: 8421 +[2024-12-06 00:56:14 root] (utils.py 283): INFO Epoch: [21] [1910/2502] eta: 0:07:44 lr: 0.000006 loss_cls: 3.9749 (3.8358) grad_norm: 4.3396 (4.4297) time: 0.7826 data: 0.0003 max mem: 8421 +[2024-12-06 00:56:22 root] (utils.py 283): INFO Epoch: [21] [1920/2502] eta: 0:07:36 lr: 0.000006 loss_cls: 4.1205 (3.8376) grad_norm: 4.3984 (4.4303) time: 0.7841 data: 0.0003 max mem: 8421 +[2024-12-06 00:56:30 root] (utils.py 283): INFO Epoch: [21] [1930/2502] eta: 0:07:28 lr: 0.000006 loss_cls: 4.1170 (3.8369) grad_norm: 4.3984 (4.4319) time: 0.7829 data: 0.0003 max mem: 8421 +[2024-12-06 00:56:38 root] (utils.py 283): INFO Epoch: [21] [1940/2502] eta: 0:07:20 lr: 0.000006 loss_cls: 3.8258 (3.8374) grad_norm: 4.3012 (4.4318) time: 0.7892 data: 0.0003 max mem: 8421 +[2024-12-06 00:56:46 root] (utils.py 283): INFO Epoch: [21] [1950/2502] eta: 0:07:12 lr: 0.000006 loss_cls: 4.0737 (3.8377) grad_norm: 4.3610 (4.4327) time: 0.7967 data: 0.0002 max mem: 8421 +[2024-12-06 00:56:54 root] (utils.py 283): INFO Epoch: [21] [1960/2502] eta: 0:07:04 lr: 0.000006 loss_cls: 4.1329 (3.8398) grad_norm: 4.3866 (4.4343) time: 0.7959 data: 0.0003 max mem: 8421 +[2024-12-06 00:57:02 root] (utils.py 283): INFO Epoch: [21] [1970/2502] eta: 0:06:57 lr: 0.000006 loss_cls: 4.1343 (3.8400) grad_norm: 4.3802 (4.4339) time: 0.7901 data: 0.0003 max mem: 8421 +[2024-12-06 00:57:10 root] (utils.py 283): INFO Epoch: [21] [1980/2502] eta: 0:06:49 lr: 0.000006 loss_cls: 4.0175 (3.8413) grad_norm: 4.2761 (4.4343) time: 0.7906 data: 0.0003 max mem: 8421 +[2024-12-06 00:57:17 root] (utils.py 283): INFO Epoch: [21] [1990/2502] eta: 0:06:41 lr: 0.000006 loss_cls: 3.9982 (3.8415) grad_norm: 4.6384 (4.4477) time: 0.7929 data: 0.0003 max mem: 8421 +[2024-12-06 00:57:25 root] (utils.py 283): INFO Epoch: [21] [2000/2502] eta: 0:06:33 lr: 0.000006 loss_cls: 3.7985 (3.8412) grad_norm: 4.6504 (4.4473) time: 0.7886 data: 0.0003 max mem: 8421 +[2024-12-06 00:57:33 root] (utils.py 283): INFO Epoch: [21] [2010/2502] eta: 0:06:25 lr: 0.000006 loss_cls: 4.1155 (3.8430) grad_norm: 4.1129 (4.4468) time: 0.7833 data: 0.0003 max mem: 8421 +[2024-12-06 00:57:41 root] (utils.py 283): INFO Epoch: [21] [2020/2502] eta: 0:06:17 lr: 0.000006 loss_cls: 4.1155 (3.8428) grad_norm: 4.2635 (4.4461) time: 0.7869 data: 0.0003 max mem: 8421 +[2024-12-06 00:57:49 root] (utils.py 283): INFO Epoch: [21] [2030/2502] eta: 0:06:10 lr: 0.000006 loss_cls: 4.0058 (3.8433) grad_norm: 4.3065 (4.4456) time: 0.7878 data: 0.0003 max mem: 8421 +[2024-12-06 00:57:57 root] (utils.py 283): INFO Epoch: [21] [2040/2502] eta: 0:06:02 lr: 0.000006 loss_cls: 3.9032 (3.8425) grad_norm: 4.2072 (4.4447) time: 0.7821 data: 0.0003 max mem: 8421 +[2024-12-06 00:58:04 root] (utils.py 283): INFO Epoch: [21] [2050/2502] eta: 0:05:54 lr: 0.000006 loss_cls: 3.9415 (3.8435) grad_norm: 4.1318 (4.4436) time: 0.7820 data: 0.0003 max mem: 8421 +[2024-12-06 00:58:12 root] (utils.py 283): INFO Epoch: [21] [2060/2502] eta: 0:05:46 lr: 0.000006 loss_cls: 3.9628 (3.8428) grad_norm: 4.2352 (4.4433) time: 0.7826 data: 0.0003 max mem: 8421 +[2024-12-06 00:58:20 root] (utils.py 283): INFO Epoch: [21] [2070/2502] eta: 0:05:38 lr: 0.000006 loss_cls: 3.9490 (3.8426) grad_norm: 4.3065 (4.4426) time: 0.7845 data: 0.0003 max mem: 8421 +[2024-12-06 00:58:28 root] (utils.py 283): INFO Epoch: [21] [2080/2502] eta: 0:05:30 lr: 0.000006 loss_cls: 4.1348 (3.8432) grad_norm: 4.3044 (4.4422) time: 0.7827 data: 0.0003 max mem: 8421 +[2024-12-06 00:58:36 root] (utils.py 283): INFO Epoch: [21] [2090/2502] eta: 0:05:23 lr: 0.000006 loss_cls: 4.1371 (3.8443) grad_norm: 4.3044 (4.4421) time: 0.7791 data: 0.0003 max mem: 8421 +[2024-12-06 00:58:44 root] (utils.py 283): INFO Epoch: [21] [2100/2502] eta: 0:05:15 lr: 0.000006 loss_cls: 4.1371 (3.8457) grad_norm: 4.3040 (4.4414) time: 0.7793 data: 0.0003 max mem: 8421 +[2024-12-06 00:58:51 root] (utils.py 283): INFO Epoch: [21] [2110/2502] eta: 0:05:07 lr: 0.000006 loss_cls: 4.0837 (3.8463) grad_norm: 4.3720 (4.4417) time: 0.7851 data: 0.0003 max mem: 8421 +[2024-12-06 00:58:59 root] (utils.py 283): INFO Epoch: [21] [2120/2502] eta: 0:04:59 lr: 0.000006 loss_cls: 4.0417 (3.8463) grad_norm: 4.3862 (4.4418) time: 0.7828 data: 0.0003 max mem: 8421 +[2024-12-06 00:59:07 root] (utils.py 283): INFO Epoch: [21] [2130/2502] eta: 0:04:51 lr: 0.000006 loss_cls: 3.7901 (3.8447) grad_norm: 4.3439 (4.4418) time: 0.7796 data: 0.0003 max mem: 8421 +[2024-12-06 00:59:15 root] (utils.py 283): INFO Epoch: [21] [2140/2502] eta: 0:04:43 lr: 0.000006 loss_cls: 3.8090 (3.8443) grad_norm: 4.3676 (4.4420) time: 0.7786 data: 0.0003 max mem: 8421 +[2024-12-06 00:59:23 root] (utils.py 283): INFO Epoch: [21] [2150/2502] eta: 0:04:35 lr: 0.000006 loss_cls: 4.0077 (3.8448) grad_norm: 4.2820 (4.4405) time: 0.7792 data: 0.0003 max mem: 8421 +[2024-12-06 00:59:31 root] (utils.py 283): INFO Epoch: [21] [2160/2502] eta: 0:04:28 lr: 0.000006 loss_cls: 4.0418 (3.8439) grad_norm: 4.2669 (4.4396) time: 0.7855 data: 0.0003 max mem: 8421 +[2024-12-06 00:59:38 root] (utils.py 283): INFO Epoch: [21] [2170/2502] eta: 0:04:20 lr: 0.000006 loss_cls: 3.5555 (3.8429) grad_norm: 4.2520 (4.4389) time: 0.7830 data: 0.0003 max mem: 8421 +[2024-12-06 00:59:46 root] (utils.py 283): INFO Epoch: [21] [2180/2502] eta: 0:04:12 lr: 0.000006 loss_cls: 3.8126 (3.8426) grad_norm: 4.2103 (4.4382) time: 0.7796 data: 0.0003 max mem: 8421 +[2024-12-06 00:59:54 root] (utils.py 283): INFO Epoch: [21] [2190/2502] eta: 0:04:04 lr: 0.000006 loss_cls: 3.5522 (3.8396) grad_norm: 4.2866 (4.4398) time: 0.7826 data: 0.0003 max mem: 8421 +[2024-12-06 01:00:02 root] (utils.py 283): INFO Epoch: [21] [2200/2502] eta: 0:03:56 lr: 0.000006 loss_cls: 3.3525 (3.8394) grad_norm: 4.3090 (4.4397) time: 0.7862 data: 0.0002 max mem: 8421 +[2024-12-06 01:00:10 root] (utils.py 283): INFO Epoch: [21] [2210/2502] eta: 0:03:48 lr: 0.000006 loss_cls: 3.8936 (3.8391) grad_norm: 4.2878 (4.4397) time: 0.7852 data: 0.0003 max mem: 8421 +[2024-12-06 01:00:17 root] (utils.py 283): INFO Epoch: [21] [2220/2502] eta: 0:03:41 lr: 0.000006 loss_cls: 3.8936 (3.8393) grad_norm: 4.2841 (4.4395) time: 0.7807 data: 0.0003 max mem: 8421 +[2024-12-06 01:00:25 root] (utils.py 283): INFO Epoch: [21] [2230/2502] eta: 0:03:33 lr: 0.000006 loss_cls: 4.0960 (3.8400) grad_norm: 4.2984 (4.4395) time: 0.7822 data: 0.0003 max mem: 8421 +[2024-12-06 01:00:33 root] (utils.py 283): INFO Epoch: [21] [2240/2502] eta: 0:03:25 lr: 0.000006 loss_cls: 3.9750 (3.8398) grad_norm: 4.2984 (4.4390) time: 0.7820 data: 0.0003 max mem: 8421 +[2024-12-06 01:00:41 root] (utils.py 283): INFO Epoch: [21] [2250/2502] eta: 0:03:17 lr: 0.000006 loss_cls: 3.6582 (3.8387) grad_norm: 4.3085 (4.4393) time: 0.7792 data: 0.0003 max mem: 8421 +[2024-12-06 01:00:49 root] (utils.py 283): INFO Epoch: [21] [2260/2502] eta: 0:03:09 lr: 0.000006 loss_cls: 3.5224 (3.8382) grad_norm: 4.3644 (4.4395) time: 0.7856 data: 0.0003 max mem: 8421 +[2024-12-06 01:00:57 root] (utils.py 283): INFO Epoch: [21] [2270/2502] eta: 0:03:01 lr: 0.000006 loss_cls: 3.5224 (3.8367) grad_norm: 4.4148 (4.4426) time: 0.7856 data: 0.0003 max mem: 8421 +[2024-12-06 01:01:05 root] (utils.py 283): INFO Epoch: [21] [2280/2502] eta: 0:02:54 lr: 0.000006 loss_cls: 3.7714 (3.8377) grad_norm: 4.4809 (4.4453) time: 0.7919 data: 0.0003 max mem: 8421 +[2024-12-06 01:01:14 root] (utils.py 283): INFO Epoch: [21] [2290/2502] eta: 0:02:46 lr: 0.000006 loss_cls: 4.1116 (3.8384) grad_norm: 4.4556 (4.4447) time: 0.8538 data: 0.0004 max mem: 8421 +[2024-12-06 01:01:22 root] (utils.py 283): INFO Epoch: [21] [2300/2502] eta: 0:02:38 lr: 0.000006 loss_cls: 4.0915 (3.8384) grad_norm: 4.2684 (4.4445) time: 0.8658 data: 0.0004 max mem: 8421 +[2024-12-06 01:01:30 root] (utils.py 283): INFO Epoch: [21] [2310/2502] eta: 0:02:30 lr: 0.000006 loss_cls: 3.6845 (3.8377) grad_norm: 4.4135 (4.4447) time: 0.8147 data: 0.0003 max mem: 8421 +[2024-12-06 01:01:38 root] (utils.py 283): INFO Epoch: [21] [2320/2502] eta: 0:02:22 lr: 0.000006 loss_cls: 3.5392 (3.8369) grad_norm: 4.5011 (4.4453) time: 0.8027 data: 0.0003 max mem: 8421 +[2024-12-06 01:01:46 root] (utils.py 283): INFO Epoch: [21] [2330/2502] eta: 0:02:15 lr: 0.000006 loss_cls: 3.9145 (3.8374) grad_norm: 4.4619 (4.4458) time: 0.7990 data: 0.0003 max mem: 8421 +[2024-12-06 01:01:54 root] (utils.py 283): INFO Epoch: [21] [2340/2502] eta: 0:02:07 lr: 0.000006 loss_cls: 3.9484 (3.8381) grad_norm: 4.4160 (4.4458) time: 0.7928 data: 0.0003 max mem: 8421 +[2024-12-06 01:02:02 root] (utils.py 283): INFO Epoch: [21] [2350/2502] eta: 0:01:59 lr: 0.000006 loss_cls: 3.8927 (3.8377) grad_norm: 4.4884 (4.4475) time: 0.7874 data: 0.0003 max mem: 8421 +[2024-12-06 01:02:10 root] (utils.py 283): INFO Epoch: [21] [2360/2502] eta: 0:01:51 lr: 0.000006 loss_cls: 3.8131 (3.8377) grad_norm: 4.3808 (4.4475) time: 0.7834 data: 0.0003 max mem: 8421 +[2024-12-06 01:02:17 root] (utils.py 283): INFO Epoch: [21] [2370/2502] eta: 0:01:43 lr: 0.000006 loss_cls: 4.0409 (3.8375) grad_norm: 4.1744 (4.4467) time: 0.7893 data: 0.0003 max mem: 8421 +[2024-12-06 01:02:25 root] (utils.py 283): INFO Epoch: [21] [2380/2502] eta: 0:01:35 lr: 0.000006 loss_cls: 4.0585 (3.8373) grad_norm: 4.0920 (4.4453) time: 0.7942 data: 0.0003 max mem: 8421 +[2024-12-06 01:02:33 root] (utils.py 283): INFO Epoch: [21] [2390/2502] eta: 0:01:27 lr: 0.000006 loss_cls: 3.9526 (3.8371) grad_norm: 4.1664 (4.4449) time: 0.7887 data: 0.0002 max mem: 8421 +[2024-12-06 01:02:41 root] (utils.py 283): INFO Epoch: [21] [2400/2502] eta: 0:01:20 lr: 0.000006 loss_cls: 3.8840 (3.8370) grad_norm: 4.3442 (4.4449) time: 0.7870 data: 0.0002 max mem: 8421 +[2024-12-06 01:02:49 root] (utils.py 283): INFO Epoch: [21] [2410/2502] eta: 0:01:12 lr: 0.000006 loss_cls: 4.0078 (3.8373) grad_norm: 4.4785 (4.4453) time: 0.7866 data: 0.0002 max mem: 8421 +[2024-12-06 01:02:57 root] (utils.py 283): INFO Epoch: [21] [2420/2502] eta: 0:01:04 lr: 0.000006 loss_cls: 3.9531 (3.8359) grad_norm: 4.3335 (4.4444) time: 0.7828 data: 0.0002 max mem: 8421 +[2024-12-06 01:03:05 root] (utils.py 283): INFO Epoch: [21] [2430/2502] eta: 0:00:56 lr: 0.000006 loss_cls: 4.0004 (3.8367) grad_norm: 4.2199 (4.4441) time: 0.7845 data: 0.0003 max mem: 8421 +[2024-12-06 01:03:13 root] (utils.py 283): INFO Epoch: [21] [2440/2502] eta: 0:00:48 lr: 0.000006 loss_cls: 3.9908 (3.8364) grad_norm: 4.2029 (4.4432) time: 0.7883 data: 0.0003 max mem: 8421 +[2024-12-06 01:03:20 root] (utils.py 283): INFO Epoch: [21] [2450/2502] eta: 0:00:40 lr: 0.000006 loss_cls: 3.8056 (3.8362) grad_norm: 4.1917 (4.4435) time: 0.7871 data: 0.0003 max mem: 8421 +[2024-12-06 01:03:28 root] (utils.py 283): INFO Epoch: [21] [2460/2502] eta: 0:00:32 lr: 0.000006 loss_cls: 3.7857 (3.8360) grad_norm: 4.2955 (4.4442) time: 0.7855 data: 0.0002 max mem: 8421 +[2024-12-06 01:03:36 root] (utils.py 283): INFO Epoch: [21] [2470/2502] eta: 0:00:25 lr: 0.000006 loss_cls: 3.7857 (3.8355) grad_norm: 4.3501 (4.4448) time: 0.7890 data: 0.0003 max mem: 8421 +[2024-12-06 01:03:44 root] (utils.py 283): INFO Epoch: [21] [2480/2502] eta: 0:00:17 lr: 0.000006 loss_cls: 3.8491 (3.8359) grad_norm: 4.3501 (4.4445) time: 0.7863 data: 0.0003 max mem: 8421 +[2024-12-06 01:03:52 root] (utils.py 283): INFO Epoch: [21] [2490/2502] eta: 0:00:09 lr: 0.000006 loss_cls: 4.2192 (3.8369) grad_norm: 4.2864 (4.4445) time: 0.8120 data: 0.0262 max mem: 8421 +[2024-12-06 01:04:00 root] (utils.py 283): INFO Epoch: [21] [2500/2502] eta: 0:00:01 lr: 0.000006 loss_cls: 4.2192 (3.8373) grad_norm: 4.4187 (4.4450) time: 0.8121 data: 0.0262 max mem: 8421 +[2024-12-06 01:04:01 root] (utils.py 283): INFO Epoch: [21] [2501/2502] eta: 0:00:00 lr: 0.000006 loss_cls: 4.2192 (3.8374) grad_norm: 4.5103 (4.4452) time: 0.8114 data: 0.0262 max mem: 8421 +[2024-12-06 01:04:01 root] (utils.py 297): INFO Epoch: [21] Total time: 0:32:45 (0.7854 s / it) +[2024-12-06 01:04:01 root] (engine.py 179): INFO Averaged stats:lr: 0.000006 loss_cls: 4.2192 (3.8285) grad_norm: 4.5103 (4.4452) +[2024-12-06 01:04:02 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7218 (0.7218) acc1: 87.5000 (87.5000) acc3: 94.5312 (94.5312) acc5: 98.4375 (98.4375) time: 0.1308 data: 0.0004 max mem: 8421 +[2024-12-06 01:04:03 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8121 (0.8643) acc1: 83.5938 (82.2443) acc3: 92.1875 (92.6136) acc5: 94.5312 (95.3835) time: 0.1326 data: 0.0004 max mem: 8421 +[2024-12-06 01:04:04 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8665 (0.9033) acc1: 78.9062 (81.0640) acc3: 92.1875 (92.3735) acc5: 94.5312 (95.1265) time: 0.1334 data: 0.0005 max mem: 8421 +[2024-12-06 01:04:06 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9398 (0.9090) acc1: 78.9062 (80.3175) acc3: 92.1875 (92.7923) acc5: 96.0938 (95.3629) time: 0.1360 data: 0.0005 max mem: 8421 +[2024-12-06 01:04:07 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8269 (0.8990) acc1: 80.4688 (80.5831) acc3: 93.7500 (92.9688) acc5: 96.0938 (95.4268) time: 0.1365 data: 0.0015 max mem: 8421 +[2024-12-06 01:04:09 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9717 (0.9927) acc1: 74.2188 (78.4161) acc3: 88.2812 (91.3756) acc5: 91.4062 (94.3321) time: 0.1588 data: 0.0253 max mem: 8421 +[2024-12-06 01:04:11 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2727 (1.0356) acc1: 71.0938 (77.5102) acc3: 86.7188 (90.6506) acc5: 90.6250 (93.6603) time: 0.1797 data: 0.0447 max mem: 8421 +[2024-12-06 01:04:12 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:04 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2688 (1.0788) acc1: 71.8750 (76.4415) acc3: 86.7188 (90.0418) acc5: 89.8438 (93.2658) time: 0.1691 data: 0.0302 max mem: 8421 +[2024-12-06 01:04:14 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2908 (1.1159) acc1: 69.5312 (75.5015) acc3: 85.1562 (89.4676) acc5: 89.0625 (92.7276) time: 0.1612 data: 0.0213 max mem: 8421 +[2024-12-06 01:04:15 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3204 (1.1465) acc1: 68.7500 (74.7510) acc3: 84.3750 (89.0024) acc5: 89.0625 (92.2734) time: 0.1613 data: 0.0238 max mem: 8421 +[2024-12-06 01:04:16 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2305 (1.1351) acc1: 71.8750 (74.9200) acc3: 87.5000 (89.1760) acc5: 91.4062 (92.4720) time: 0.1504 data: 0.0127 max mem: 8421 +[2024-12-06 01:04:16 root] (utils.py 297): INFO Test: Total time: 0:00:14 (0.1527 s / it) +[2024-12-06 01:04:19 root] (engine.py 264): INFO * Acc@1 74.874 Acc@3 89.082 Acc@5 92.350 loss 1.136 flops 1.285 layer_flops 1.251 +[2024-12-06 01:04:19 root] (main.py 547): INFO Accuracy of the network on the 50000 test images: 74.9% +[2024-12-06 01:04:19 root] (main.py 551): INFO Max accuracy: 74.94% +[2024-12-06 01:04:19 root] (utils.py 283): INFO Epoch: [22] [ 0/2502] eta: 0:32:37 lr: 0.000005 loss_cls: 4.4044 (4.4044) grad_norm: 4.3479 (4.3479) time: 0.7824 data: 0.0004 max mem: 8421 +[2024-12-06 01:04:27 root] (utils.py 283): INFO Epoch: [22] [ 10/2502] eta: 0:32:27 lr: 0.000005 loss_cls: 3.9312 (3.7538) grad_norm: 4.3492 (4.4027) time: 0.7816 data: 0.0003 max mem: 8421 +[2024-12-06 01:04:35 root] (utils.py 283): INFO Epoch: [22] [ 20/2502] eta: 0:32:15 lr: 0.000005 loss_cls: 3.9015 (3.7281) grad_norm: 4.3130 (4.3906) time: 0.7796 data: 0.0003 max mem: 8421 +[2024-12-06 01:04:43 root] (utils.py 283): INFO Epoch: [22] [ 30/2502] eta: 0:32:08 lr: 0.000005 loss_cls: 4.0031 (3.7862) grad_norm: 4.2634 (4.6650) time: 0.7796 data: 0.0003 max mem: 8421 +[2024-12-06 01:04:51 root] (utils.py 283): INFO Epoch: [22] [ 40/2502] eta: 0:32:02 lr: 0.000005 loss_cls: 3.8392 (3.7606) grad_norm: 4.4537 (4.6858) time: 0.7820 data: 0.0003 max mem: 8421 +[2024-12-06 01:04:59 root] (utils.py 283): INFO Epoch: [22] [ 50/2502] eta: 0:31:56 lr: 0.000005 loss_cls: 3.7619 (3.7288) grad_norm: 4.4594 (4.6319) time: 0.7831 data: 0.0003 max mem: 8421 +[2024-12-06 01:05:06 root] (utils.py 283): INFO Epoch: [22] [ 60/2502] eta: 0:31:48 lr: 0.000005 loss_cls: 3.7619 (3.6938) grad_norm: 4.3421 (4.6001) time: 0.7831 data: 0.0003 max mem: 8421 +[2024-12-06 01:05:14 root] (utils.py 283): INFO Epoch: [22] [ 70/2502] eta: 0:31:39 lr: 0.000005 loss_cls: 3.9377 (3.7315) grad_norm: 4.4474 (4.6514) time: 0.7801 data: 0.0002 max mem: 8421 +[2024-12-06 01:05:22 root] (utils.py 283): INFO Epoch: [22] [ 80/2502] eta: 0:31:33 lr: 0.000005 loss_cls: 4.0067 (3.7459) grad_norm: 4.5713 (4.6209) time: 0.7829 data: 0.0003 max mem: 8421 +[2024-12-06 01:05:30 root] (utils.py 283): INFO Epoch: [22] [ 90/2502] eta: 0:31:28 lr: 0.000005 loss_cls: 4.0014 (3.7516) grad_norm: 4.3674 (4.5900) time: 0.7900 data: 0.0003 max mem: 8421 +[2024-12-06 01:05:38 root] (utils.py 283): INFO Epoch: [22] [ 100/2502] eta: 0:31:26 lr: 0.000005 loss_cls: 4.0502 (3.7551) grad_norm: 4.1002 (4.5416) time: 0.8000 data: 0.0002 max mem: 8421 +[2024-12-06 01:05:46 root] (utils.py 283): INFO Epoch: [22] [ 110/2502] eta: 0:31:17 lr: 0.000005 loss_cls: 4.0931 (3.7720) grad_norm: 4.2352 (4.5348) time: 0.7937 data: 0.0003 max mem: 8421 +[2024-12-06 01:05:54 root] (utils.py 283): INFO Epoch: [22] [ 120/2502] eta: 0:31:09 lr: 0.000005 loss_cls: 3.8430 (3.7677) grad_norm: 4.2671 (4.5202) time: 0.7818 data: 0.0003 max mem: 8421 +[2024-12-06 01:06:01 root] (utils.py 283): INFO Epoch: [22] [ 130/2502] eta: 0:30:59 lr: 0.000005 loss_cls: 3.8321 (3.7701) grad_norm: 4.3287 (4.5609) time: 0.7797 data: 0.0003 max mem: 8421 +[2024-12-06 01:06:09 root] (utils.py 283): INFO Epoch: [22] [ 140/2502] eta: 0:30:51 lr: 0.000005 loss_cls: 4.0511 (3.8046) grad_norm: 4.3870 (4.5403) time: 0.7773 data: 0.0002 max mem: 8421 +[2024-12-06 01:06:17 root] (utils.py 283): INFO Epoch: [22] [ 150/2502] eta: 0:30:42 lr: 0.000005 loss_cls: 3.9748 (3.8009) grad_norm: 4.1825 (4.5156) time: 0.7799 data: 0.0003 max mem: 8421 +[2024-12-06 01:06:25 root] (utils.py 283): INFO Epoch: [22] [ 160/2502] eta: 0:30:35 lr: 0.000005 loss_cls: 3.9701 (3.8101) grad_norm: 4.2783 (4.5322) time: 0.7817 data: 0.0003 max mem: 8421 +[2024-12-06 01:06:33 root] (utils.py 283): INFO Epoch: [22] [ 170/2502] eta: 0:30:27 lr: 0.000005 loss_cls: 4.0423 (3.8213) grad_norm: 4.3185 (4.5166) time: 0.7837 data: 0.0003 max mem: 8421 +[2024-12-06 01:06:40 root] (utils.py 283): INFO Epoch: [22] [ 180/2502] eta: 0:30:19 lr: 0.000005 loss_cls: 4.0142 (3.8112) grad_norm: 4.3133 (4.5188) time: 0.7821 data: 0.0002 max mem: 8421 +[2024-12-06 01:06:48 root] (utils.py 283): INFO Epoch: [22] [ 190/2502] eta: 0:30:11 lr: 0.000005 loss_cls: 3.6986 (3.8061) grad_norm: 4.1030 (4.5009) time: 0.7849 data: 0.0002 max mem: 8421 +[2024-12-06 01:06:56 root] (utils.py 283): INFO Epoch: [22] [ 200/2502] eta: 0:30:03 lr: 0.000005 loss_cls: 3.9013 (3.8152) grad_norm: 4.0450 (4.4808) time: 0.7847 data: 0.0003 max mem: 8421 +[2024-12-06 01:07:04 root] (utils.py 283): INFO Epoch: [22] [ 210/2502] eta: 0:29:55 lr: 0.000005 loss_cls: 3.9555 (3.8167) grad_norm: 4.1481 (4.4765) time: 0.7805 data: 0.0003 max mem: 8421 +[2024-12-06 01:07:12 root] (utils.py 283): INFO Epoch: [22] [ 220/2502] eta: 0:29:49 lr: 0.000005 loss_cls: 3.8978 (3.8198) grad_norm: 4.3852 (4.4815) time: 0.7886 data: 0.0003 max mem: 8421 +[2024-12-06 01:07:20 root] (utils.py 283): INFO Epoch: [22] [ 230/2502] eta: 0:29:41 lr: 0.000005 loss_cls: 3.6412 (3.8137) grad_norm: 4.2706 (4.4839) time: 0.7903 data: 0.0003 max mem: 8421 +[2024-12-06 01:07:28 root] (utils.py 283): INFO Epoch: [22] [ 240/2502] eta: 0:29:33 lr: 0.000005 loss_cls: 3.9422 (3.8204) grad_norm: 4.2690 (4.4844) time: 0.7841 data: 0.0003 max mem: 8421 +[2024-12-06 01:07:36 root] (utils.py 283): INFO Epoch: [22] [ 250/2502] eta: 0:29:27 lr: 0.000005 loss_cls: 4.0117 (3.8198) grad_norm: 4.2338 (4.4791) time: 0.7922 data: 0.0003 max mem: 8421 +[2024-12-06 01:07:43 root] (utils.py 283): INFO Epoch: [22] [ 260/2502] eta: 0:29:18 lr: 0.000005 loss_cls: 3.9409 (3.8224) grad_norm: 4.2577 (4.4762) time: 0.7900 data: 0.0003 max mem: 8421 +[2024-12-06 01:07:51 root] (utils.py 283): INFO Epoch: [22] [ 270/2502] eta: 0:29:10 lr: 0.000005 loss_cls: 4.1649 (3.8340) grad_norm: 4.3340 (4.4809) time: 0.7808 data: 0.0002 max mem: 8421 +[2024-12-06 01:07:59 root] (utils.py 283): INFO Epoch: [22] [ 280/2502] eta: 0:29:02 lr: 0.000005 loss_cls: 4.2003 (3.8372) grad_norm: 4.4279 (4.4902) time: 0.7809 data: 0.0003 max mem: 8421 +[2024-12-06 01:08:07 root] (utils.py 283): INFO Epoch: [22] [ 290/2502] eta: 0:28:54 lr: 0.000005 loss_cls: 3.8966 (3.8337) grad_norm: 4.2209 (4.4773) time: 0.7790 data: 0.0003 max mem: 8421 +[2024-12-06 01:08:15 root] (utils.py 283): INFO Epoch: [22] [ 300/2502] eta: 0:28:46 lr: 0.000005 loss_cls: 3.8966 (3.8320) grad_norm: 4.1488 (4.4760) time: 0.7795 data: 0.0003 max mem: 8421 +[2024-12-06 01:08:22 root] (utils.py 283): INFO Epoch: [22] [ 310/2502] eta: 0:28:37 lr: 0.000005 loss_cls: 4.0267 (3.8255) grad_norm: 4.2548 (4.4685) time: 0.7777 data: 0.0003 max mem: 8421 +[2024-12-06 01:08:30 root] (utils.py 283): INFO Epoch: [22] [ 320/2502] eta: 0:28:30 lr: 0.000005 loss_cls: 3.8130 (3.8204) grad_norm: 4.1966 (4.4622) time: 0.7830 data: 0.0003 max mem: 8421 +[2024-12-06 01:08:38 root] (utils.py 283): INFO Epoch: [22] [ 330/2502] eta: 0:28:22 lr: 0.000005 loss_cls: 3.7635 (3.8188) grad_norm: 4.0472 (4.4580) time: 0.7890 data: 0.0003 max mem: 8421 +[2024-12-06 01:08:46 root] (utils.py 283): INFO Epoch: [22] [ 340/2502] eta: 0:28:14 lr: 0.000005 loss_cls: 3.7635 (3.8237) grad_norm: 4.3744 (4.5067) time: 0.7842 data: 0.0003 max mem: 8421 +[2024-12-06 01:08:54 root] (utils.py 283): INFO Epoch: [22] [ 350/2502] eta: 0:28:06 lr: 0.000005 loss_cls: 4.0801 (3.8275) grad_norm: 4.4076 (4.5028) time: 0.7809 data: 0.0003 max mem: 8421 +[2024-12-06 01:09:02 root] (utils.py 283): INFO Epoch: [22] [ 360/2502] eta: 0:27:58 lr: 0.000005 loss_cls: 3.9354 (3.8300) grad_norm: 4.3184 (4.5027) time: 0.7825 data: 0.0003 max mem: 8421 +[2024-12-06 01:09:09 root] (utils.py 283): INFO Epoch: [22] [ 370/2502] eta: 0:27:50 lr: 0.000005 loss_cls: 3.9354 (3.8339) grad_norm: 4.3034 (4.4999) time: 0.7828 data: 0.0003 max mem: 8421 +[2024-12-06 01:09:17 root] (utils.py 283): INFO Epoch: [22] [ 380/2502] eta: 0:27:42 lr: 0.000005 loss_cls: 3.6400 (3.8229) grad_norm: 4.3034 (4.4925) time: 0.7811 data: 0.0003 max mem: 8421 +[2024-12-06 01:09:25 root] (utils.py 283): INFO Epoch: [22] [ 390/2502] eta: 0:27:34 lr: 0.000005 loss_cls: 3.3916 (3.8227) grad_norm: 4.2660 (4.4906) time: 0.7812 data: 0.0003 max mem: 8421 +[2024-12-06 01:09:33 root] (utils.py 283): INFO Epoch: [22] [ 400/2502] eta: 0:27:26 lr: 0.000005 loss_cls: 3.8787 (3.8228) grad_norm: 4.2660 (4.4858) time: 0.7798 data: 0.0003 max mem: 8421 +[2024-12-06 01:09:41 root] (utils.py 283): INFO Epoch: [22] [ 410/2502] eta: 0:27:18 lr: 0.000005 loss_cls: 3.8787 (3.8195) grad_norm: 4.2411 (4.4820) time: 0.7795 data: 0.0003 max mem: 8421 +[2024-12-06 01:09:49 root] (utils.py 283): INFO Epoch: [22] [ 420/2502] eta: 0:27:10 lr: 0.000005 loss_cls: 3.5600 (3.8149) grad_norm: 4.1125 (4.4725) time: 0.7821 data: 0.0003 max mem: 8421 +[2024-12-06 01:09:56 root] (utils.py 283): INFO Epoch: [22] [ 430/2502] eta: 0:27:03 lr: 0.000005 loss_cls: 3.7277 (3.8182) grad_norm: 4.0986 (4.4951) time: 0.7836 data: 0.0003 max mem: 8421 +[2024-12-06 01:10:04 root] (utils.py 283): INFO Epoch: [22] [ 440/2502] eta: 0:26:55 lr: 0.000005 loss_cls: 3.8598 (3.8170) grad_norm: 4.4427 (4.4986) time: 0.7846 data: 0.0003 max mem: 8421 +[2024-12-06 01:10:12 root] (utils.py 283): INFO Epoch: [22] [ 450/2502] eta: 0:26:47 lr: 0.000005 loss_cls: 3.8598 (3.8156) grad_norm: 4.5515 (4.4982) time: 0.7838 data: 0.0003 max mem: 8421 +[2024-12-06 01:10:20 root] (utils.py 283): INFO Epoch: [22] [ 460/2502] eta: 0:26:39 lr: 0.000005 loss_cls: 3.4776 (3.8073) grad_norm: 4.3605 (4.4949) time: 0.7815 data: 0.0003 max mem: 8421 +[2024-12-06 01:10:28 root] (utils.py 283): INFO Epoch: [22] [ 470/2502] eta: 0:26:31 lr: 0.000005 loss_cls: 3.6394 (3.8069) grad_norm: 4.3299 (4.5124) time: 0.7826 data: 0.0003 max mem: 8421 +[2024-12-06 01:10:35 root] (utils.py 283): INFO Epoch: [22] [ 480/2502] eta: 0:26:23 lr: 0.000005 loss_cls: 4.1498 (3.8157) grad_norm: 4.3685 (4.5152) time: 0.7816 data: 0.0003 max mem: 8421 +[2024-12-06 01:10:43 root] (utils.py 283): INFO Epoch: [22] [ 490/2502] eta: 0:26:16 lr: 0.000005 loss_cls: 4.1498 (3.8159) grad_norm: 4.4376 (4.5130) time: 0.7853 data: 0.0003 max mem: 8421 +[2024-12-06 01:10:51 root] (utils.py 283): INFO Epoch: [22] [ 500/2502] eta: 0:26:08 lr: 0.000005 loss_cls: 3.9752 (3.8162) grad_norm: 4.3552 (4.5091) time: 0.7858 data: 0.0003 max mem: 8421 +[2024-12-06 01:10:59 root] (utils.py 283): INFO Epoch: [22] [ 510/2502] eta: 0:26:00 lr: 0.000005 loss_cls: 4.0017 (3.8182) grad_norm: 4.2581 (4.5050) time: 0.7798 data: 0.0003 max mem: 8421 +[2024-12-06 01:11:07 root] (utils.py 283): INFO Epoch: [22] [ 520/2502] eta: 0:25:52 lr: 0.000005 loss_cls: 3.9724 (3.8172) grad_norm: 4.3336 (4.5090) time: 0.7837 data: 0.0003 max mem: 8421 +[2024-12-06 01:11:15 root] (utils.py 283): INFO Epoch: [22] [ 530/2502] eta: 0:25:45 lr: 0.000005 loss_cls: 3.9724 (3.8203) grad_norm: 4.5653 (4.5121) time: 0.7899 data: 0.0003 max mem: 8421 +[2024-12-06 01:11:23 root] (utils.py 283): INFO Epoch: [22] [ 540/2502] eta: 0:25:37 lr: 0.000005 loss_cls: 4.0660 (3.8204) grad_norm: 4.5412 (4.5123) time: 0.7849 data: 0.0003 max mem: 8421 +[2024-12-06 01:11:30 root] (utils.py 283): INFO Epoch: [22] [ 550/2502] eta: 0:25:29 lr: 0.000005 loss_cls: 4.0376 (3.8217) grad_norm: 4.4126 (4.5138) time: 0.7786 data: 0.0003 max mem: 8421 +[2024-12-06 01:11:38 root] (utils.py 283): INFO Epoch: [22] [ 560/2502] eta: 0:25:21 lr: 0.000005 loss_cls: 4.0821 (3.8274) grad_norm: 4.4148 (4.5118) time: 0.7911 data: 0.0003 max mem: 8421 +[2024-12-06 01:11:46 root] (utils.py 283): INFO Epoch: [22] [ 570/2502] eta: 0:25:14 lr: 0.000005 loss_cls: 4.1051 (3.8253) grad_norm: 4.2961 (4.5081) time: 0.7924 data: 0.0003 max mem: 8421 +[2024-12-06 01:11:54 root] (utils.py 283): INFO Epoch: [22] [ 580/2502] eta: 0:25:06 lr: 0.000005 loss_cls: 3.6345 (3.8235) grad_norm: 4.2274 (4.5122) time: 0.7806 data: 0.0003 max mem: 8421 +[2024-12-06 01:12:02 root] (utils.py 283): INFO Epoch: [22] [ 590/2502] eta: 0:24:58 lr: 0.000005 loss_cls: 3.8599 (3.8227) grad_norm: 4.2614 (4.5079) time: 0.7782 data: 0.0003 max mem: 8421 +[2024-12-06 01:12:10 root] (utils.py 283): INFO Epoch: [22] [ 600/2502] eta: 0:24:50 lr: 0.000005 loss_cls: 4.0286 (3.8247) grad_norm: 4.3333 (4.5121) time: 0.7788 data: 0.0003 max mem: 8421 +[2024-12-06 01:12:17 root] (utils.py 283): INFO Epoch: [22] [ 610/2502] eta: 0:24:42 lr: 0.000005 loss_cls: 4.0286 (3.8265) grad_norm: 4.4564 (4.5126) time: 0.7791 data: 0.0003 max mem: 8421 +[2024-12-06 01:12:25 root] (utils.py 283): INFO Epoch: [22] [ 620/2502] eta: 0:24:34 lr: 0.000005 loss_cls: 3.9712 (3.8288) grad_norm: 4.4185 (4.5136) time: 0.7786 data: 0.0003 max mem: 8421 +[2024-12-06 01:12:33 root] (utils.py 283): INFO Epoch: [22] [ 630/2502] eta: 0:24:26 lr: 0.000005 loss_cls: 3.7711 (3.8259) grad_norm: 4.4927 (4.5154) time: 0.7798 data: 0.0003 max mem: 8421 +[2024-12-06 01:12:41 root] (utils.py 283): INFO Epoch: [22] [ 640/2502] eta: 0:24:18 lr: 0.000005 loss_cls: 3.7045 (3.8230) grad_norm: 4.3927 (4.5139) time: 0.7791 data: 0.0003 max mem: 8421 +[2024-12-06 01:12:49 root] (utils.py 283): INFO Epoch: [22] [ 650/2502] eta: 0:24:10 lr: 0.000005 loss_cls: 3.8724 (3.8257) grad_norm: 4.3619 (4.5125) time: 0.7844 data: 0.0003 max mem: 8421 +[2024-12-06 01:12:57 root] (utils.py 283): INFO Epoch: [22] [ 660/2502] eta: 0:24:02 lr: 0.000005 loss_cls: 4.1625 (3.8280) grad_norm: 4.3824 (4.5150) time: 0.7881 data: 0.0003 max mem: 8421 +[2024-12-06 01:13:04 root] (utils.py 283): INFO Epoch: [22] [ 670/2502] eta: 0:23:54 lr: 0.000005 loss_cls: 4.0588 (3.8275) grad_norm: 4.3354 (4.5142) time: 0.7815 data: 0.0003 max mem: 8421 +[2024-12-06 01:13:12 root] (utils.py 283): INFO Epoch: [22] [ 680/2502] eta: 0:23:47 lr: 0.000005 loss_cls: 3.6612 (3.8252) grad_norm: 4.3012 (4.5119) time: 0.7825 data: 0.0003 max mem: 8421 +[2024-12-06 01:13:20 root] (utils.py 283): INFO Epoch: [22] [ 690/2502] eta: 0:23:39 lr: 0.000005 loss_cls: 3.9220 (3.8261) grad_norm: 4.2669 (4.5084) time: 0.7830 data: 0.0003 max mem: 8421 +[2024-12-06 01:13:28 root] (utils.py 283): INFO Epoch: [22] [ 700/2502] eta: 0:23:31 lr: 0.000005 loss_cls: 3.9295 (3.8251) grad_norm: 4.1990 (4.5080) time: 0.7807 data: 0.0003 max mem: 8421 +[2024-12-06 01:13:36 root] (utils.py 283): INFO Epoch: [22] [ 710/2502] eta: 0:23:23 lr: 0.000005 loss_cls: 3.8693 (3.8245) grad_norm: 4.2659 (4.5127) time: 0.7841 data: 0.0003 max mem: 8421 +[2024-12-06 01:13:43 root] (utils.py 283): INFO Epoch: [22] [ 720/2502] eta: 0:23:15 lr: 0.000005 loss_cls: 3.9997 (3.8254) grad_norm: 4.4381 (4.5111) time: 0.7840 data: 0.0003 max mem: 8421 +[2024-12-06 01:13:51 root] (utils.py 283): INFO Epoch: [22] [ 730/2502] eta: 0:23:07 lr: 0.000005 loss_cls: 4.0051 (3.8250) grad_norm: 4.4064 (4.5147) time: 0.7821 data: 0.0003 max mem: 8421 +[2024-12-06 01:13:59 root] (utils.py 283): INFO Epoch: [22] [ 740/2502] eta: 0:23:00 lr: 0.000005 loss_cls: 3.9575 (3.8256) grad_norm: 4.3721 (4.5192) time: 0.7840 data: 0.0003 max mem: 8421 +[2024-12-06 01:14:07 root] (utils.py 283): INFO Epoch: [22] [ 750/2502] eta: 0:22:52 lr: 0.000005 loss_cls: 3.9575 (3.8269) grad_norm: 4.2299 (4.5166) time: 0.7844 data: 0.0003 max mem: 8421 +[2024-12-06 01:14:15 root] (utils.py 283): INFO Epoch: [22] [ 760/2502] eta: 0:22:44 lr: 0.000005 loss_cls: 3.8441 (3.8253) grad_norm: 4.3781 (4.5207) time: 0.7823 data: 0.0003 max mem: 8421 +[2024-12-06 01:14:23 root] (utils.py 283): INFO Epoch: [22] [ 770/2502] eta: 0:22:36 lr: 0.000005 loss_cls: 3.5489 (3.8228) grad_norm: 4.1940 (4.5244) time: 0.7825 data: 0.0003 max mem: 8421 +[2024-12-06 01:14:30 root] (utils.py 283): INFO Epoch: [22] [ 780/2502] eta: 0:22:28 lr: 0.000005 loss_cls: 3.9906 (3.8252) grad_norm: 4.4072 (4.5276) time: 0.7817 data: 0.0003 max mem: 8421 +[2024-12-06 01:14:38 root] (utils.py 283): INFO Epoch: [22] [ 790/2502] eta: 0:22:20 lr: 0.000005 loss_cls: 4.0165 (3.8256) grad_norm: 4.4072 (4.5242) time: 0.7796 data: 0.0003 max mem: 8421 +[2024-12-06 01:14:46 root] (utils.py 283): INFO Epoch: [22] [ 800/2502] eta: 0:22:12 lr: 0.000005 loss_cls: 3.9392 (3.8242) grad_norm: 4.2693 (4.5226) time: 0.7785 data: 0.0003 max mem: 8421 +[2024-12-06 01:14:54 root] (utils.py 283): INFO Epoch: [22] [ 810/2502] eta: 0:22:04 lr: 0.000005 loss_cls: 3.9396 (3.8283) grad_norm: 4.3771 (4.5494) time: 0.7780 data: 0.0003 max mem: 8421 +[2024-12-06 01:15:02 root] (utils.py 283): INFO Epoch: [22] [ 820/2502] eta: 0:21:56 lr: 0.000005 loss_cls: 4.3065 (3.8310) grad_norm: 4.3305 (4.5458) time: 0.7790 data: 0.0003 max mem: 8421 +[2024-12-06 01:15:09 root] (utils.py 283): INFO Epoch: [22] [ 830/2502] eta: 0:21:49 lr: 0.000005 loss_cls: 4.1096 (3.8335) grad_norm: 4.3305 (4.5463) time: 0.7825 data: 0.0003 max mem: 8421 +[2024-12-06 01:15:17 root] (utils.py 283): INFO Epoch: [22] [ 840/2502] eta: 0:21:41 lr: 0.000005 loss_cls: 3.9337 (3.8308) grad_norm: 4.5082 (4.5461) time: 0.7822 data: 0.0003 max mem: 8421 +[2024-12-06 01:15:25 root] (utils.py 283): INFO Epoch: [22] [ 850/2502] eta: 0:21:33 lr: 0.000005 loss_cls: 4.0791 (3.8351) grad_norm: 4.4576 (4.5452) time: 0.7826 data: 0.0003 max mem: 8421 +[2024-12-06 01:15:33 root] (utils.py 283): INFO Epoch: [22] [ 860/2502] eta: 0:21:25 lr: 0.000005 loss_cls: 4.1236 (3.8343) grad_norm: 4.3245 (4.5480) time: 0.7829 data: 0.0003 max mem: 8421 +[2024-12-06 01:15:41 root] (utils.py 283): INFO Epoch: [22] [ 870/2502] eta: 0:21:17 lr: 0.000005 loss_cls: 4.0483 (3.8381) grad_norm: 4.3245 (4.5483) time: 0.7805 data: 0.0003 max mem: 8421 +[2024-12-06 01:15:48 root] (utils.py 283): INFO Epoch: [22] [ 880/2502] eta: 0:21:09 lr: 0.000005 loss_cls: 4.1770 (3.8395) grad_norm: 4.2819 (4.5456) time: 0.7786 data: 0.0003 max mem: 8421 +[2024-12-06 01:15:56 root] (utils.py 283): INFO Epoch: [22] [ 890/2502] eta: 0:21:01 lr: 0.000005 loss_cls: 3.9157 (3.8372) grad_norm: 4.2819 (4.5450) time: 0.7772 data: 0.0003 max mem: 8421 +[2024-12-06 01:16:04 root] (utils.py 283): INFO Epoch: [22] [ 900/2502] eta: 0:20:54 lr: 0.000005 loss_cls: 3.5682 (3.8372) grad_norm: 4.6086 (4.5454) time: 0.7796 data: 0.0003 max mem: 8421 +[2024-12-06 01:16:12 root] (utils.py 283): INFO Epoch: [22] [ 910/2502] eta: 0:20:46 lr: 0.000005 loss_cls: 3.6818 (3.8357) grad_norm: 4.6086 (4.5459) time: 0.7827 data: 0.0003 max mem: 8421 +[2024-12-06 01:16:20 root] (utils.py 283): INFO Epoch: [22] [ 920/2502] eta: 0:20:38 lr: 0.000005 loss_cls: 3.2278 (3.8296) grad_norm: 4.3813 (4.5419) time: 0.7885 data: 0.0003 max mem: 8421 +[2024-12-06 01:16:28 root] (utils.py 283): INFO Epoch: [22] [ 930/2502] eta: 0:20:30 lr: 0.000005 loss_cls: 3.6753 (3.8301) grad_norm: 4.1762 (4.5408) time: 0.7866 data: 0.0003 max mem: 8421 +[2024-12-06 01:16:35 root] (utils.py 283): INFO Epoch: [22] [ 940/2502] eta: 0:20:22 lr: 0.000005 loss_cls: 3.8326 (3.8279) grad_norm: 4.2810 (4.5389) time: 0.7829 data: 0.0003 max mem: 8421 +[2024-12-06 01:16:43 root] (utils.py 283): INFO Epoch: [22] [ 950/2502] eta: 0:20:15 lr: 0.000005 loss_cls: 3.8326 (3.8280) grad_norm: 4.4611 (4.5389) time: 0.7840 data: 0.0003 max mem: 8421 +[2024-12-06 01:16:51 root] (utils.py 283): INFO Epoch: [22] [ 960/2502] eta: 0:20:07 lr: 0.000005 loss_cls: 4.1157 (3.8265) grad_norm: 4.2872 (4.5367) time: 0.7792 data: 0.0003 max mem: 8421 +[2024-12-06 01:16:59 root] (utils.py 283): INFO Epoch: [22] [ 970/2502] eta: 0:19:59 lr: 0.000005 loss_cls: 3.9123 (3.8251) grad_norm: 4.2872 (4.5347) time: 0.7749 data: 0.0003 max mem: 8421 +[2024-12-06 01:17:07 root] (utils.py 283): INFO Epoch: [22] [ 980/2502] eta: 0:19:51 lr: 0.000005 loss_cls: 3.6023 (3.8222) grad_norm: 4.3004 (4.5356) time: 0.7833 data: 0.0003 max mem: 8421 +[2024-12-06 01:17:15 root] (utils.py 283): INFO Epoch: [22] [ 990/2502] eta: 0:19:43 lr: 0.000005 loss_cls: 3.7843 (3.8247) grad_norm: 4.3001 (4.5330) time: 0.7935 data: 0.0003 max mem: 8421 +[2024-12-06 01:17:22 root] (utils.py 283): INFO Epoch: [22] [1000/2502] eta: 0:19:35 lr: 0.000005 loss_cls: 4.0182 (3.8223) grad_norm: 4.3001 (4.5336) time: 0.7865 data: 0.0003 max mem: 8421 +[2024-12-06 01:17:30 root] (utils.py 283): INFO Epoch: [22] [1010/2502] eta: 0:19:28 lr: 0.000005 loss_cls: 3.7928 (3.8233) grad_norm: 4.2984 (4.5344) time: 0.7800 data: 0.0002 max mem: 8421 +[2024-12-06 01:17:38 root] (utils.py 283): INFO Epoch: [22] [1020/2502] eta: 0:19:20 lr: 0.000005 loss_cls: 3.7928 (3.8227) grad_norm: 4.2984 (4.5324) time: 0.7800 data: 0.0003 max mem: 8421 +[2024-12-06 01:17:46 root] (utils.py 283): INFO Epoch: [22] [1030/2502] eta: 0:19:12 lr: 0.000005 loss_cls: 4.0212 (3.8224) grad_norm: 4.2468 (4.5312) time: 0.7792 data: 0.0003 max mem: 8421 +[2024-12-06 01:17:54 root] (utils.py 283): INFO Epoch: [22] [1040/2502] eta: 0:19:04 lr: 0.000005 loss_cls: 3.6894 (3.8210) grad_norm: 4.2022 (4.5302) time: 0.7789 data: 0.0003 max mem: 8421 +[2024-12-06 01:18:01 root] (utils.py 283): INFO Epoch: [22] [1050/2502] eta: 0:18:56 lr: 0.000005 loss_cls: 3.6894 (3.8192) grad_norm: 4.3204 (4.5287) time: 0.7786 data: 0.0003 max mem: 8421 +[2024-12-06 01:18:09 root] (utils.py 283): INFO Epoch: [22] [1060/2502] eta: 0:18:48 lr: 0.000005 loss_cls: 4.0505 (3.8195) grad_norm: 4.3204 (4.5274) time: 0.7815 data: 0.0003 max mem: 8421 +[2024-12-06 01:18:17 root] (utils.py 283): INFO Epoch: [22] [1070/2502] eta: 0:18:41 lr: 0.000005 loss_cls: 4.0813 (3.8211) grad_norm: 4.2891 (4.5266) time: 0.7873 data: 0.0003 max mem: 8421 +[2024-12-06 01:18:25 root] (utils.py 283): INFO Epoch: [22] [1080/2502] eta: 0:18:33 lr: 0.000005 loss_cls: 3.9248 (3.8201) grad_norm: 4.2260 (4.5238) time: 0.7930 data: 0.0003 max mem: 8421 +[2024-12-06 01:18:33 root] (utils.py 283): INFO Epoch: [22] [1090/2502] eta: 0:18:25 lr: 0.000005 loss_cls: 3.8279 (3.8187) grad_norm: 4.2410 (4.5224) time: 0.7879 data: 0.0003 max mem: 8421 +[2024-12-06 01:18:41 root] (utils.py 283): INFO Epoch: [22] [1100/2502] eta: 0:18:17 lr: 0.000005 loss_cls: 3.8688 (3.8188) grad_norm: 4.3156 (4.5205) time: 0.7818 data: 0.0003 max mem: 8421 +[2024-12-06 01:18:49 root] (utils.py 283): INFO Epoch: [22] [1110/2502] eta: 0:18:09 lr: 0.000005 loss_cls: 3.9809 (3.8188) grad_norm: 4.2290 (4.5190) time: 0.7884 data: 0.0003 max mem: 8421 +[2024-12-06 01:18:57 root] (utils.py 283): INFO Epoch: [22] [1120/2502] eta: 0:18:02 lr: 0.000005 loss_cls: 3.8701 (3.8189) grad_norm: 4.2290 (4.5175) time: 0.7885 data: 0.0003 max mem: 8421 +[2024-12-06 01:19:04 root] (utils.py 283): INFO Epoch: [22] [1130/2502] eta: 0:17:54 lr: 0.000005 loss_cls: 4.0585 (3.8209) grad_norm: 4.2341 (4.5150) time: 0.7821 data: 0.0003 max mem: 8421 +[2024-12-06 01:19:12 root] (utils.py 283): INFO Epoch: [22] [1140/2502] eta: 0:17:46 lr: 0.000005 loss_cls: 4.1660 (3.8231) grad_norm: 4.2341 (4.5149) time: 0.7806 data: 0.0003 max mem: 8421 +[2024-12-06 01:19:20 root] (utils.py 283): INFO Epoch: [22] [1150/2502] eta: 0:17:38 lr: 0.000005 loss_cls: 4.0412 (3.8237) grad_norm: 4.4293 (4.5180) time: 0.7791 data: 0.0003 max mem: 8421 +[2024-12-06 01:19:28 root] (utils.py 283): INFO Epoch: [22] [1160/2502] eta: 0:17:30 lr: 0.000005 loss_cls: 4.0564 (3.8237) grad_norm: 4.2719 (4.5171) time: 0.7785 data: 0.0003 max mem: 8421 +[2024-12-06 01:19:36 root] (utils.py 283): INFO Epoch: [22] [1170/2502] eta: 0:17:22 lr: 0.000005 loss_cls: 3.5951 (3.8209) grad_norm: 4.2771 (4.5160) time: 0.7773 data: 0.0003 max mem: 8421 +[2024-12-06 01:19:43 root] (utils.py 283): INFO Epoch: [22] [1180/2502] eta: 0:17:14 lr: 0.000005 loss_cls: 3.7230 (3.8218) grad_norm: 4.2810 (4.5141) time: 0.7754 data: 0.0003 max mem: 8421 +[2024-12-06 01:19:51 root] (utils.py 283): INFO Epoch: [22] [1190/2502] eta: 0:17:07 lr: 0.000005 loss_cls: 4.1191 (3.8239) grad_norm: 4.2493 (4.5144) time: 0.7789 data: 0.0003 max mem: 8421 +[2024-12-06 01:19:59 root] (utils.py 283): INFO Epoch: [22] [1200/2502] eta: 0:16:59 lr: 0.000005 loss_cls: 4.0901 (3.8245) grad_norm: 4.5015 (4.5151) time: 0.7817 data: 0.0003 max mem: 8421 +[2024-12-06 01:20:07 root] (utils.py 283): INFO Epoch: [22] [1210/2502] eta: 0:16:51 lr: 0.000005 loss_cls: 4.0501 (3.8240) grad_norm: 4.6055 (4.5175) time: 0.7802 data: 0.0003 max mem: 8421 +[2024-12-06 01:20:15 root] (utils.py 283): INFO Epoch: [22] [1220/2502] eta: 0:16:43 lr: 0.000005 loss_cls: 3.9199 (3.8251) grad_norm: 4.4498 (4.5168) time: 0.7830 data: 0.0002 max mem: 8421 +[2024-12-06 01:20:22 root] (utils.py 283): INFO Epoch: [22] [1230/2502] eta: 0:16:35 lr: 0.000005 loss_cls: 3.7335 (3.8215) grad_norm: 4.2278 (4.5145) time: 0.7830 data: 0.0003 max mem: 8421 +[2024-12-06 01:20:30 root] (utils.py 283): INFO Epoch: [22] [1240/2502] eta: 0:16:27 lr: 0.000005 loss_cls: 3.6906 (3.8218) grad_norm: 4.3149 (4.5156) time: 0.7831 data: 0.0003 max mem: 8421 +[2024-12-06 01:20:38 root] (utils.py 283): INFO Epoch: [22] [1250/2502] eta: 0:16:20 lr: 0.000005 loss_cls: 3.7382 (3.8231) grad_norm: 4.4879 (4.5157) time: 0.7847 data: 0.0003 max mem: 8421 +[2024-12-06 01:20:46 root] (utils.py 283): INFO Epoch: [22] [1260/2502] eta: 0:16:12 lr: 0.000005 loss_cls: 3.7382 (3.8217) grad_norm: 4.3739 (4.5140) time: 0.7865 data: 0.0003 max mem: 8421 +[2024-12-06 01:20:54 root] (utils.py 283): INFO Epoch: [22] [1270/2502] eta: 0:16:04 lr: 0.000005 loss_cls: 3.9143 (3.8209) grad_norm: 4.2651 (4.5116) time: 0.7873 data: 0.0003 max mem: 8421 +[2024-12-06 01:21:02 root] (utils.py 283): INFO Epoch: [22] [1280/2502] eta: 0:15:56 lr: 0.000005 loss_cls: 3.7774 (3.8209) grad_norm: 4.3149 (4.5112) time: 0.7824 data: 0.0003 max mem: 8421 +[2024-12-06 01:21:09 root] (utils.py 283): INFO Epoch: [22] [1290/2502] eta: 0:15:48 lr: 0.000005 loss_cls: 3.7158 (3.8199) grad_norm: 4.2858 (4.5110) time: 0.7805 data: 0.0003 max mem: 8421 +[2024-12-06 01:21:17 root] (utils.py 283): INFO Epoch: [22] [1300/2502] eta: 0:15:40 lr: 0.000005 loss_cls: 3.8557 (3.8196) grad_norm: 4.1930 (4.5101) time: 0.7831 data: 0.0003 max mem: 8421 +[2024-12-06 01:21:25 root] (utils.py 283): INFO Epoch: [22] [1310/2502] eta: 0:15:33 lr: 0.000005 loss_cls: 3.8734 (3.8204) grad_norm: 4.1436 (4.5104) time: 0.7926 data: 0.0003 max mem: 8421 +[2024-12-06 01:21:33 root] (utils.py 283): INFO Epoch: [22] [1320/2502] eta: 0:15:25 lr: 0.000005 loss_cls: 3.9464 (3.8203) grad_norm: 4.1789 (4.5101) time: 0.7943 data: 0.0003 max mem: 8421 +[2024-12-06 01:21:41 root] (utils.py 283): INFO Epoch: [22] [1330/2502] eta: 0:15:17 lr: 0.000005 loss_cls: 3.9471 (3.8208) grad_norm: 4.1789 (4.5087) time: 0.7850 data: 0.0003 max mem: 8421 +[2024-12-06 01:21:49 root] (utils.py 283): INFO Epoch: [22] [1340/2502] eta: 0:15:09 lr: 0.000005 loss_cls: 3.8401 (3.8186) grad_norm: 4.1770 (4.5066) time: 0.7815 data: 0.0003 max mem: 8421 +[2024-12-06 01:21:57 root] (utils.py 283): INFO Epoch: [22] [1350/2502] eta: 0:15:01 lr: 0.000005 loss_cls: 3.3944 (3.8174) grad_norm: 4.3344 (4.5084) time: 0.7816 data: 0.0003 max mem: 8421 +[2024-12-06 01:22:04 root] (utils.py 283): INFO Epoch: [22] [1360/2502] eta: 0:14:54 lr: 0.000005 loss_cls: 3.9051 (3.8165) grad_norm: 4.1953 (4.5068) time: 0.7813 data: 0.0003 max mem: 8421 +[2024-12-06 01:22:12 root] (utils.py 283): INFO Epoch: [22] [1370/2502] eta: 0:14:46 lr: 0.000005 loss_cls: 3.6665 (3.8155) grad_norm: 4.2789 (4.5061) time: 0.7803 data: 0.0003 max mem: 8421 +[2024-12-06 01:22:20 root] (utils.py 283): INFO Epoch: [22] [1380/2502] eta: 0:14:38 lr: 0.000005 loss_cls: 3.9062 (3.8159) grad_norm: 4.4155 (4.5050) time: 0.7878 data: 0.0003 max mem: 8421 +[2024-12-06 01:22:28 root] (utils.py 283): INFO Epoch: [22] [1390/2502] eta: 0:14:30 lr: 0.000005 loss_cls: 3.8791 (3.8157) grad_norm: 4.2425 (4.5031) time: 0.7918 data: 0.0003 max mem: 8421 +[2024-12-06 01:22:36 root] (utils.py 283): INFO Epoch: [22] [1400/2502] eta: 0:14:23 lr: 0.000005 loss_cls: 3.8608 (3.8169) grad_norm: 4.1544 (4.5018) time: 0.7978 data: 0.0003 max mem: 8421 +[2024-12-06 01:22:44 root] (utils.py 283): INFO Epoch: [22] [1410/2502] eta: 0:14:15 lr: 0.000005 loss_cls: 3.8380 (3.8164) grad_norm: 4.3234 (4.5032) time: 0.7974 data: 0.0003 max mem: 8421 +[2024-12-06 01:22:52 root] (utils.py 283): INFO Epoch: [22] [1420/2502] eta: 0:14:07 lr: 0.000005 loss_cls: 3.6954 (3.8161) grad_norm: 4.0973 (4.5006) time: 0.7927 data: 0.0003 max mem: 8421 +[2024-12-06 01:23:00 root] (utils.py 283): INFO Epoch: [22] [1430/2502] eta: 0:13:59 lr: 0.000005 loss_cls: 4.0708 (3.8186) grad_norm: 4.1907 (4.4986) time: 0.7908 data: 0.0003 max mem: 8421 +[2024-12-06 01:23:08 root] (utils.py 283): INFO Epoch: [22] [1440/2502] eta: 0:13:52 lr: 0.000005 loss_cls: 4.0134 (3.8185) grad_norm: 4.2992 (4.4989) time: 0.7915 data: 0.0003 max mem: 8421 +[2024-12-06 01:23:16 root] (utils.py 283): INFO Epoch: [22] [1450/2502] eta: 0:13:44 lr: 0.000005 loss_cls: 3.6931 (3.8176) grad_norm: 4.3204 (4.4998) time: 0.7922 data: 0.0003 max mem: 8421 +[2024-12-06 01:23:23 root] (utils.py 283): INFO Epoch: [22] [1460/2502] eta: 0:13:36 lr: 0.000005 loss_cls: 3.8396 (3.8180) grad_norm: 4.2664 (4.4989) time: 0.7824 data: 0.0003 max mem: 8421 +[2024-12-06 01:23:31 root] (utils.py 283): INFO Epoch: [22] [1470/2502] eta: 0:13:28 lr: 0.000005 loss_cls: 3.8396 (3.8177) grad_norm: 4.1642 (4.4968) time: 0.7819 data: 0.0002 max mem: 8421 +[2024-12-06 01:23:39 root] (utils.py 283): INFO Epoch: [22] [1480/2502] eta: 0:13:20 lr: 0.000005 loss_cls: 3.8331 (3.8169) grad_norm: 4.1642 (4.4962) time: 0.7864 data: 0.0002 max mem: 8421 +[2024-12-06 01:23:47 root] (utils.py 283): INFO Epoch: [22] [1490/2502] eta: 0:13:13 lr: 0.000005 loss_cls: 4.0897 (3.8193) grad_norm: 4.5611 (4.4967) time: 0.7954 data: 0.0002 max mem: 8421 +[2024-12-06 01:23:55 root] (utils.py 283): INFO Epoch: [22] [1500/2502] eta: 0:13:05 lr: 0.000005 loss_cls: 4.2150 (3.8207) grad_norm: 4.4271 (4.4955) time: 0.7901 data: 0.0003 max mem: 8421 +[2024-12-06 01:24:03 root] (utils.py 283): INFO Epoch: [22] [1510/2502] eta: 0:12:57 lr: 0.000005 loss_cls: 4.0037 (3.8198) grad_norm: 4.2970 (4.4943) time: 0.7820 data: 0.0003 max mem: 8421 +[2024-12-06 01:24:11 root] (utils.py 283): INFO Epoch: [22] [1520/2502] eta: 0:12:49 lr: 0.000005 loss_cls: 3.8146 (3.8197) grad_norm: 4.4050 (4.4951) time: 0.7835 data: 0.0003 max mem: 8421 +[2024-12-06 01:24:19 root] (utils.py 283): INFO Epoch: [22] [1530/2502] eta: 0:12:41 lr: 0.000005 loss_cls: 3.8146 (3.8186) grad_norm: 4.4050 (4.4943) time: 0.7939 data: 0.0003 max mem: 8421 +[2024-12-06 01:24:27 root] (utils.py 283): INFO Epoch: [22] [1540/2502] eta: 0:12:33 lr: 0.000005 loss_cls: 3.7643 (3.8174) grad_norm: 4.3355 (4.4937) time: 0.7954 data: 0.0003 max mem: 8421 +[2024-12-06 01:24:35 root] (utils.py 283): INFO Epoch: [22] [1550/2502] eta: 0:12:26 lr: 0.000005 loss_cls: 3.7643 (3.8165) grad_norm: 4.3087 (4.4952) time: 0.7908 data: 0.0003 max mem: 8421 +[2024-12-06 01:24:42 root] (utils.py 283): INFO Epoch: [22] [1560/2502] eta: 0:12:18 lr: 0.000005 loss_cls: 3.9066 (3.8166) grad_norm: 4.3087 (4.4949) time: 0.7893 data: 0.0003 max mem: 8421 +[2024-12-06 01:24:50 root] (utils.py 283): INFO Epoch: [22] [1570/2502] eta: 0:12:10 lr: 0.000005 loss_cls: 3.9066 (3.8171) grad_norm: 4.1615 (4.4924) time: 0.7813 data: 0.0003 max mem: 8421 +[2024-12-06 01:24:58 root] (utils.py 283): INFO Epoch: [22] [1580/2502] eta: 0:12:02 lr: 0.000005 loss_cls: 4.0339 (3.8183) grad_norm: 4.2110 (4.4940) time: 0.7799 data: 0.0003 max mem: 8421 +[2024-12-06 01:25:06 root] (utils.py 283): INFO Epoch: [22] [1590/2502] eta: 0:11:54 lr: 0.000005 loss_cls: 3.6602 (3.8170) grad_norm: 4.5864 (4.4943) time: 0.7800 data: 0.0003 max mem: 8421 +[2024-12-06 01:25:14 root] (utils.py 283): INFO Epoch: [22] [1600/2502] eta: 0:11:47 lr: 0.000005 loss_cls: 3.6400 (3.8172) grad_norm: 4.4084 (4.4927) time: 0.7913 data: 0.0003 max mem: 8421 +[2024-12-06 01:25:22 root] (utils.py 283): INFO Epoch: [22] [1610/2502] eta: 0:11:39 lr: 0.000005 loss_cls: 3.9875 (3.8166) grad_norm: 4.2491 (4.4935) time: 0.7899 data: 0.0003 max mem: 8421 +[2024-12-06 01:25:29 root] (utils.py 283): INFO Epoch: [22] [1620/2502] eta: 0:11:31 lr: 0.000005 loss_cls: 3.9971 (3.8182) grad_norm: 4.3126 (4.4941) time: 0.7811 data: 0.0003 max mem: 8421 +[2024-12-06 01:25:37 root] (utils.py 283): INFO Epoch: [22] [1630/2502] eta: 0:11:23 lr: 0.000005 loss_cls: 3.9971 (3.8177) grad_norm: 4.4664 (4.4936) time: 0.7845 data: 0.0003 max mem: 8421 +[2024-12-06 01:25:45 root] (utils.py 283): INFO Epoch: [22] [1640/2502] eta: 0:11:15 lr: 0.000005 loss_cls: 3.9697 (3.8167) grad_norm: 4.4212 (4.4938) time: 0.7811 data: 0.0003 max mem: 8421 +[2024-12-06 01:25:53 root] (utils.py 283): INFO Epoch: [22] [1650/2502] eta: 0:11:07 lr: 0.000005 loss_cls: 3.9958 (3.8170) grad_norm: 4.3245 (4.4933) time: 0.7794 data: 0.0003 max mem: 8421 +[2024-12-06 01:26:01 root] (utils.py 283): INFO Epoch: [22] [1660/2502] eta: 0:10:59 lr: 0.000005 loss_cls: 3.9394 (3.8169) grad_norm: 4.3691 (4.4926) time: 0.7815 data: 0.0003 max mem: 8421 +[2024-12-06 01:26:09 root] (utils.py 283): INFO Epoch: [22] [1670/2502] eta: 0:10:52 lr: 0.000005 loss_cls: 3.9273 (3.8169) grad_norm: 4.3791 (4.4915) time: 0.7815 data: 0.0002 max mem: 8421 +[2024-12-06 01:26:16 root] (utils.py 283): INFO Epoch: [22] [1680/2502] eta: 0:10:44 lr: 0.000005 loss_cls: 3.8877 (3.8171) grad_norm: 4.3507 (4.4908) time: 0.7822 data: 0.0003 max mem: 8421 +[2024-12-06 01:26:24 root] (utils.py 283): INFO Epoch: [22] [1690/2502] eta: 0:10:36 lr: 0.000005 loss_cls: 3.9311 (3.8183) grad_norm: 4.3507 (4.4914) time: 0.7836 data: 0.0003 max mem: 8421 +[2024-12-06 01:26:32 root] (utils.py 283): INFO Epoch: [22] [1700/2502] eta: 0:10:28 lr: 0.000005 loss_cls: 3.9779 (3.8196) grad_norm: 4.5357 (4.4915) time: 0.7848 data: 0.0003 max mem: 8421 +[2024-12-06 01:26:40 root] (utils.py 283): INFO Epoch: [22] [1710/2502] eta: 0:10:20 lr: 0.000005 loss_cls: 3.8996 (3.8179) grad_norm: 4.4469 (4.4903) time: 0.7825 data: 0.0003 max mem: 8421 +[2024-12-06 01:26:48 root] (utils.py 283): INFO Epoch: [22] [1720/2502] eta: 0:10:12 lr: 0.000005 loss_cls: 3.5617 (3.8170) grad_norm: 4.3357 (4.4921) time: 0.7777 data: 0.0003 max mem: 8421 +[2024-12-06 01:26:55 root] (utils.py 283): INFO Epoch: [22] [1730/2502] eta: 0:10:05 lr: 0.000005 loss_cls: 3.8724 (3.8172) grad_norm: 4.2259 (4.4905) time: 0.7815 data: 0.0003 max mem: 8421 +[2024-12-06 01:27:03 root] (utils.py 283): INFO Epoch: [22] [1740/2502] eta: 0:09:57 lr: 0.000005 loss_cls: 4.0557 (3.8187) grad_norm: 4.2759 (4.4909) time: 0.7835 data: 0.0003 max mem: 8421 +[2024-12-06 01:27:11 root] (utils.py 283): INFO Epoch: [22] [1750/2502] eta: 0:09:49 lr: 0.000005 loss_cls: 4.0924 (3.8195) grad_norm: 4.3403 (4.4893) time: 0.7819 data: 0.0003 max mem: 8421 +[2024-12-06 01:27:19 root] (utils.py 283): INFO Epoch: [22] [1760/2502] eta: 0:09:41 lr: 0.000005 loss_cls: 4.1723 (3.8217) grad_norm: 4.3178 (4.4899) time: 0.7837 data: 0.0003 max mem: 8421 +[2024-12-06 01:27:27 root] (utils.py 283): INFO Epoch: [22] [1770/2502] eta: 0:09:33 lr: 0.000005 loss_cls: 4.0833 (3.8213) grad_norm: 4.3178 (4.4886) time: 0.7809 data: 0.0003 max mem: 8421 +[2024-12-06 01:27:35 root] (utils.py 283): INFO Epoch: [22] [1780/2502] eta: 0:09:25 lr: 0.000005 loss_cls: 3.5978 (3.8193) grad_norm: 4.1799 (4.4872) time: 0.7779 data: 0.0003 max mem: 8421 +[2024-12-06 01:27:42 root] (utils.py 283): INFO Epoch: [22] [1790/2502] eta: 0:09:17 lr: 0.000005 loss_cls: 3.5978 (3.8187) grad_norm: 4.1919 (4.4872) time: 0.7752 data: 0.0003 max mem: 8421 +[2024-12-06 01:27:50 root] (utils.py 283): INFO Epoch: [22] [1800/2502] eta: 0:09:10 lr: 0.000005 loss_cls: 3.8232 (3.8176) grad_norm: 4.3238 (4.4866) time: 0.7750 data: 0.0002 max mem: 8421 +[2024-12-06 01:27:58 root] (utils.py 283): INFO Epoch: [22] [1810/2502] eta: 0:09:02 lr: 0.000005 loss_cls: 3.7338 (3.8172) grad_norm: 4.2772 (4.4858) time: 0.7799 data: 0.0002 max mem: 8421 +[2024-12-06 01:28:06 root] (utils.py 283): INFO Epoch: [22] [1820/2502] eta: 0:08:54 lr: 0.000005 loss_cls: 3.8701 (3.8176) grad_norm: 4.2550 (4.4862) time: 0.7860 data: 0.0002 max mem: 8421 +[2024-12-06 01:28:13 root] (utils.py 283): INFO Epoch: [22] [1830/2502] eta: 0:08:46 lr: 0.000005 loss_cls: 3.9653 (3.8184) grad_norm: 4.2470 (4.4866) time: 0.7827 data: 0.0003 max mem: 8421 +[2024-12-06 01:28:21 root] (utils.py 283): INFO Epoch: [22] [1840/2502] eta: 0:08:38 lr: 0.000005 loss_cls: 3.9653 (3.8194) grad_norm: 4.4072 (4.4866) time: 0.7790 data: 0.0003 max mem: 8421 +[2024-12-06 01:28:29 root] (utils.py 283): INFO Epoch: [22] [1850/2502] eta: 0:08:30 lr: 0.000005 loss_cls: 3.9605 (3.8183) grad_norm: 4.4816 (4.4869) time: 0.7940 data: 0.0003 max mem: 8421 +[2024-12-06 01:28:37 root] (utils.py 283): INFO Epoch: [22] [1860/2502] eta: 0:08:23 lr: 0.000005 loss_cls: 4.0356 (3.8194) grad_norm: 4.6372 (4.4882) time: 0.7901 data: 0.0003 max mem: 8421 +[2024-12-06 01:28:45 root] (utils.py 283): INFO Epoch: [22] [1870/2502] eta: 0:08:15 lr: 0.000005 loss_cls: 4.0818 (3.8202) grad_norm: 4.3084 (4.4900) time: 0.7758 data: 0.0003 max mem: 8421 +[2024-12-06 01:28:53 root] (utils.py 283): INFO Epoch: [22] [1880/2502] eta: 0:08:07 lr: 0.000005 loss_cls: 3.9572 (3.8204) grad_norm: 4.3084 (4.4910) time: 0.7834 data: 0.0003 max mem: 8421 +[2024-12-06 01:29:01 root] (utils.py 283): INFO Epoch: [22] [1890/2502] eta: 0:07:59 lr: 0.000005 loss_cls: 3.7149 (3.8188) grad_norm: 4.2918 (4.4900) time: 0.7859 data: 0.0003 max mem: 8421 +[2024-12-06 01:29:08 root] (utils.py 283): INFO Epoch: [22] [1900/2502] eta: 0:07:51 lr: 0.000005 loss_cls: 3.5162 (3.8173) grad_norm: 4.2918 (4.4895) time: 0.7805 data: 0.0002 max mem: 8421 +[2024-12-06 01:29:16 root] (utils.py 283): INFO Epoch: [22] [1910/2502] eta: 0:07:43 lr: 0.000005 loss_cls: 3.9272 (3.8186) grad_norm: 4.3673 (4.4892) time: 0.7781 data: 0.0002 max mem: 8421 +[2024-12-06 01:29:24 root] (utils.py 283): INFO Epoch: [22] [1920/2502] eta: 0:07:35 lr: 0.000005 loss_cls: 3.9323 (3.8180) grad_norm: 4.2723 (4.4893) time: 0.7754 data: 0.0003 max mem: 8421 +[2024-12-06 01:29:32 root] (utils.py 283): INFO Epoch: [22] [1930/2502] eta: 0:07:28 lr: 0.000005 loss_cls: 3.8599 (3.8175) grad_norm: 4.3229 (4.4892) time: 0.7825 data: 0.0003 max mem: 8421 +[2024-12-06 01:29:40 root] (utils.py 283): INFO Epoch: [22] [1940/2502] eta: 0:07:20 lr: 0.000005 loss_cls: 4.0765 (3.8194) grad_norm: 4.3450 (4.4913) time: 0.7944 data: 0.0003 max mem: 8421 +[2024-12-06 01:29:48 root] (utils.py 283): INFO Epoch: [22] [1950/2502] eta: 0:07:12 lr: 0.000005 loss_cls: 4.0765 (3.8186) grad_norm: 4.3319 (4.4904) time: 0.7875 data: 0.0003 max mem: 8421 +[2024-12-06 01:29:55 root] (utils.py 283): INFO Epoch: [22] [1960/2502] eta: 0:07:04 lr: 0.000005 loss_cls: 3.8564 (3.8182) grad_norm: 4.2873 (4.4892) time: 0.7787 data: 0.0003 max mem: 8421 +[2024-12-06 01:30:03 root] (utils.py 283): INFO Epoch: [22] [1970/2502] eta: 0:06:56 lr: 0.000005 loss_cls: 3.8771 (3.8182) grad_norm: 4.1892 (4.4877) time: 0.7931 data: 0.0003 max mem: 8421 +[2024-12-06 01:30:12 root] (utils.py 283): INFO Epoch: [22] [1980/2502] eta: 0:06:49 lr: 0.000005 loss_cls: 4.0409 (3.8179) grad_norm: 4.1321 (4.4863) time: 0.8125 data: 0.0003 max mem: 8421 +[2024-12-06 01:30:19 root] (utils.py 283): INFO Epoch: [22] [1990/2502] eta: 0:06:41 lr: 0.000005 loss_cls: 3.9594 (3.8184) grad_norm: 4.1321 (4.4859) time: 0.8001 data: 0.0003 max mem: 8421 +[2024-12-06 01:30:27 root] (utils.py 283): INFO Epoch: [22] [2000/2502] eta: 0:06:33 lr: 0.000005 loss_cls: 3.8233 (3.8182) grad_norm: 4.2473 (4.4882) time: 0.7824 data: 0.0003 max mem: 8421 +[2024-12-06 01:30:35 root] (utils.py 283): INFO Epoch: [22] [2010/2502] eta: 0:06:25 lr: 0.000005 loss_cls: 3.5369 (3.8163) grad_norm: 4.1573 (4.4867) time: 0.7803 data: 0.0003 max mem: 8421 +[2024-12-06 01:30:43 root] (utils.py 283): INFO Epoch: [22] [2020/2502] eta: 0:06:17 lr: 0.000005 loss_cls: 3.6205 (3.8163) grad_norm: 4.1697 (4.4860) time: 0.7801 data: 0.0003 max mem: 8421 +[2024-12-06 01:30:51 root] (utils.py 283): INFO Epoch: [22] [2030/2502] eta: 0:06:09 lr: 0.000005 loss_cls: 3.7011 (3.8158) grad_norm: 4.3431 (4.4867) time: 0.7838 data: 0.0003 max mem: 8421 +[2024-12-06 01:30:59 root] (utils.py 283): INFO Epoch: [22] [2040/2502] eta: 0:06:02 lr: 0.000005 loss_cls: 3.6217 (3.8150) grad_norm: 4.3868 (4.4866) time: 0.7833 data: 0.0003 max mem: 8421 +[2024-12-06 01:31:06 root] (utils.py 283): INFO Epoch: [22] [2050/2502] eta: 0:05:54 lr: 0.000005 loss_cls: 3.9103 (3.8154) grad_norm: 4.2303 (4.4878) time: 0.7818 data: 0.0003 max mem: 8421 +[2024-12-06 01:31:14 root] (utils.py 283): INFO Epoch: [22] [2060/2502] eta: 0:05:46 lr: 0.000005 loss_cls: 3.9103 (3.8148) grad_norm: 4.1071 (4.4869) time: 0.7808 data: 0.0003 max mem: 8421 +[2024-12-06 01:31:22 root] (utils.py 283): INFO Epoch: [22] [2070/2502] eta: 0:05:38 lr: 0.000005 loss_cls: 3.9574 (3.8151) grad_norm: 4.1362 (4.4878) time: 0.7847 data: 0.0003 max mem: 8421 +[2024-12-06 01:31:30 root] (utils.py 283): INFO Epoch: [22] [2080/2502] eta: 0:05:30 lr: 0.000005 loss_cls: 3.9240 (3.8152) grad_norm: 4.5093 (4.4895) time: 0.7855 data: 0.0003 max mem: 8421 +[2024-12-06 01:31:38 root] (utils.py 283): INFO Epoch: [22] [2090/2502] eta: 0:05:22 lr: 0.000005 loss_cls: 4.0889 (3.8162) grad_norm: 4.3374 (4.4905) time: 0.7852 data: 0.0003 max mem: 8421 +[2024-12-06 01:31:46 root] (utils.py 283): INFO Epoch: [22] [2100/2502] eta: 0:05:15 lr: 0.000005 loss_cls: 3.8326 (3.8154) grad_norm: 4.1899 (4.4888) time: 0.7852 data: 0.0003 max mem: 8421 +[2024-12-06 01:31:54 root] (utils.py 283): INFO Epoch: [22] [2110/2502] eta: 0:05:07 lr: 0.000005 loss_cls: 3.9200 (3.8171) grad_norm: 4.2798 (4.4893) time: 0.7865 data: 0.0003 max mem: 8421 +[2024-12-06 01:32:01 root] (utils.py 283): INFO Epoch: [22] [2120/2502] eta: 0:04:59 lr: 0.000005 loss_cls: 4.1847 (3.8165) grad_norm: 4.4056 (4.4888) time: 0.7848 data: 0.0003 max mem: 8421 +[2024-12-06 01:32:09 root] (utils.py 283): INFO Epoch: [22] [2130/2502] eta: 0:04:51 lr: 0.000005 loss_cls: 4.0534 (3.8164) grad_norm: 4.2216 (4.4878) time: 0.7794 data: 0.0003 max mem: 8421 +[2024-12-06 01:32:17 root] (utils.py 283): INFO Epoch: [22] [2140/2502] eta: 0:04:43 lr: 0.000005 loss_cls: 3.9934 (3.8168) grad_norm: 4.2999 (4.4886) time: 0.7823 data: 0.0003 max mem: 8421 +[2024-12-06 01:32:25 root] (utils.py 283): INFO Epoch: [22] [2150/2502] eta: 0:04:35 lr: 0.000005 loss_cls: 4.2300 (3.8180) grad_norm: 4.4766 (4.4883) time: 0.7834 data: 0.0003 max mem: 8421 +[2024-12-06 01:32:33 root] (utils.py 283): INFO Epoch: [22] [2160/2502] eta: 0:04:28 lr: 0.000005 loss_cls: 4.0241 (3.8181) grad_norm: 4.2569 (4.4873) time: 0.7871 data: 0.0003 max mem: 8421 +[2024-12-06 01:32:40 root] (utils.py 283): INFO Epoch: [22] [2170/2502] eta: 0:04:20 lr: 0.000005 loss_cls: 3.7355 (3.8168) grad_norm: 4.4032 (4.4879) time: 0.7840 data: 0.0003 max mem: 8421 +[2024-12-06 01:32:48 root] (utils.py 283): INFO Epoch: [22] [2180/2502] eta: 0:04:12 lr: 0.000005 loss_cls: 3.2647 (3.8152) grad_norm: 4.5074 (4.4881) time: 0.7871 data: 0.0003 max mem: 8421 +[2024-12-06 01:32:56 root] (utils.py 283): INFO Epoch: [22] [2190/2502] eta: 0:04:04 lr: 0.000005 loss_cls: 3.8838 (3.8158) grad_norm: 4.3841 (4.4874) time: 0.7884 data: 0.0003 max mem: 8421 +[2024-12-06 01:33:04 root] (utils.py 283): INFO Epoch: [22] [2200/2502] eta: 0:03:56 lr: 0.000005 loss_cls: 3.9118 (3.8159) grad_norm: 4.1469 (4.4865) time: 0.7786 data: 0.0003 max mem: 8421 +[2024-12-06 01:33:12 root] (utils.py 283): INFO Epoch: [22] [2210/2502] eta: 0:03:48 lr: 0.000005 loss_cls: 3.7626 (3.8150) grad_norm: 4.2839 (4.4874) time: 0.7798 data: 0.0003 max mem: 8421 +[2024-12-06 01:33:20 root] (utils.py 283): INFO Epoch: [22] [2220/2502] eta: 0:03:41 lr: 0.000005 loss_cls: 3.7626 (3.8153) grad_norm: 4.2582 (4.4861) time: 0.7852 data: 0.0002 max mem: 8421 +[2024-12-06 01:33:28 root] (utils.py 283): INFO Epoch: [22] [2230/2502] eta: 0:03:33 lr: 0.000005 loss_cls: 4.0175 (3.8158) grad_norm: 4.3324 (4.4883) time: 0.7858 data: 0.0002 max mem: 8421 +[2024-12-06 01:33:35 root] (utils.py 283): INFO Epoch: [22] [2240/2502] eta: 0:03:25 lr: 0.000005 loss_cls: 4.0811 (3.8162) grad_norm: 4.4184 (4.4891) time: 0.7821 data: 0.0003 max mem: 8421 +[2024-12-06 01:33:43 root] (utils.py 283): INFO Epoch: [22] [2250/2502] eta: 0:03:17 lr: 0.000005 loss_cls: 3.9908 (3.8166) grad_norm: 4.2875 (4.4889) time: 0.7861 data: 0.0003 max mem: 8421 +[2024-12-06 01:33:51 root] (utils.py 283): INFO Epoch: [22] [2260/2502] eta: 0:03:09 lr: 0.000005 loss_cls: 3.9611 (3.8161) grad_norm: 4.2834 (4.4888) time: 0.7859 data: 0.0003 max mem: 8421 +[2024-12-06 01:33:59 root] (utils.py 283): INFO Epoch: [22] [2270/2502] eta: 0:03:01 lr: 0.000005 loss_cls: 3.7986 (3.8160) grad_norm: 4.3816 (4.4891) time: 0.7813 data: 0.0003 max mem: 8421 +[2024-12-06 01:34:07 root] (utils.py 283): INFO Epoch: [22] [2280/2502] eta: 0:02:53 lr: 0.000005 loss_cls: 4.0283 (3.8168) grad_norm: 4.2760 (4.4885) time: 0.7811 data: 0.0003 max mem: 8421 +[2024-12-06 01:34:15 root] (utils.py 283): INFO Epoch: [22] [2290/2502] eta: 0:02:46 lr: 0.000005 loss_cls: 3.8951 (3.8170) grad_norm: 4.2627 (4.4912) time: 0.7853 data: 0.0003 max mem: 8421 +[2024-12-06 01:34:23 root] (utils.py 283): INFO Epoch: [22] [2300/2502] eta: 0:02:38 lr: 0.000005 loss_cls: 3.8913 (3.8171) grad_norm: 4.2454 (4.4905) time: 0.8012 data: 0.0003 max mem: 8421 +[2024-12-06 01:34:32 root] (utils.py 283): INFO Epoch: [22] [2310/2502] eta: 0:02:30 lr: 0.000005 loss_cls: 3.8633 (3.8168) grad_norm: 4.3223 (4.4917) time: 0.8696 data: 0.0005 max mem: 8421 +[2024-12-06 01:34:40 root] (utils.py 283): INFO Epoch: [22] [2320/2502] eta: 0:02:22 lr: 0.000005 loss_cls: 3.9261 (3.8178) grad_norm: 4.4978 (4.4926) time: 0.8817 data: 0.0005 max mem: 8421 +[2024-12-06 01:34:48 root] (utils.py 283): INFO Epoch: [22] [2330/2502] eta: 0:02:14 lr: 0.000005 loss_cls: 3.9261 (3.8166) grad_norm: 4.3187 (4.4919) time: 0.8146 data: 0.0004 max mem: 8421 +[2024-12-06 01:34:56 root] (utils.py 283): INFO Epoch: [22] [2340/2502] eta: 0:02:07 lr: 0.000005 loss_cls: 3.7803 (3.8170) grad_norm: 4.3187 (4.4918) time: 0.7872 data: 0.0003 max mem: 8421 +[2024-12-06 01:35:04 root] (utils.py 283): INFO Epoch: [22] [2350/2502] eta: 0:01:59 lr: 0.000005 loss_cls: 3.8546 (3.8161) grad_norm: 4.3332 (4.4912) time: 0.7810 data: 0.0003 max mem: 8421 +[2024-12-06 01:35:12 root] (utils.py 283): INFO Epoch: [22] [2360/2502] eta: 0:01:51 lr: 0.000005 loss_cls: 3.8596 (3.8157) grad_norm: 4.1163 (4.4899) time: 0.7832 data: 0.0003 max mem: 8421 +[2024-12-06 01:35:20 root] (utils.py 283): INFO Epoch: [22] [2370/2502] eta: 0:01:43 lr: 0.000005 loss_cls: 3.5228 (3.8148) grad_norm: 4.1941 (4.4906) time: 0.7831 data: 0.0003 max mem: 8421 +[2024-12-06 01:35:27 root] (utils.py 283): INFO Epoch: [22] [2380/2502] eta: 0:01:35 lr: 0.000005 loss_cls: 3.8090 (3.8153) grad_norm: 4.4679 (4.4905) time: 0.7816 data: 0.0003 max mem: 8421 +[2024-12-06 01:35:35 root] (utils.py 283): INFO Epoch: [22] [2390/2502] eta: 0:01:27 lr: 0.000005 loss_cls: 3.9839 (3.8158) grad_norm: 4.2447 (4.4893) time: 0.7819 data: 0.0003 max mem: 8421 +[2024-12-06 01:35:43 root] (utils.py 283): INFO Epoch: [22] [2400/2502] eta: 0:01:20 lr: 0.000005 loss_cls: 3.7807 (3.8158) grad_norm: 4.1449 (4.4900) time: 0.7822 data: 0.0003 max mem: 8421 +[2024-12-06 01:35:51 root] (utils.py 283): INFO Epoch: [22] [2410/2502] eta: 0:01:12 lr: 0.000005 loss_cls: 3.9843 (3.8170) grad_norm: 4.0928 (4.4884) time: 0.7810 data: 0.0003 max mem: 8421 +[2024-12-06 01:35:59 root] (utils.py 283): INFO Epoch: [22] [2420/2502] eta: 0:01:04 lr: 0.000005 loss_cls: 4.0129 (3.8168) grad_norm: 4.2060 (4.4878) time: 0.7802 data: 0.0003 max mem: 8421 +[2024-12-06 01:36:06 root] (utils.py 283): INFO Epoch: [22] [2430/2502] eta: 0:00:56 lr: 0.000005 loss_cls: 3.7233 (3.8163) grad_norm: 4.2767 (4.4878) time: 0.7795 data: 0.0003 max mem: 8421 +[2024-12-06 01:36:14 root] (utils.py 283): INFO Epoch: [22] [2440/2502] eta: 0:00:48 lr: 0.000005 loss_cls: 3.6275 (3.8152) grad_norm: 4.3389 (4.4874) time: 0.7784 data: 0.0002 max mem: 8421 +[2024-12-06 01:36:22 root] (utils.py 283): INFO Epoch: [22] [2450/2502] eta: 0:00:40 lr: 0.000005 loss_cls: 3.8520 (3.8147) grad_norm: 4.2920 (4.4871) time: 0.7812 data: 0.0003 max mem: 8421 +[2024-12-06 01:36:30 root] (utils.py 283): INFO Epoch: [22] [2460/2502] eta: 0:00:32 lr: 0.000005 loss_cls: 3.9242 (3.8148) grad_norm: 4.2263 (4.4870) time: 0.7809 data: 0.0003 max mem: 8421 +[2024-12-06 01:36:38 root] (utils.py 283): INFO Epoch: [22] [2470/2502] eta: 0:00:25 lr: 0.000005 loss_cls: 4.0577 (3.8150) grad_norm: 4.4061 (4.4873) time: 0.7793 data: 0.0003 max mem: 8421 +[2024-12-06 01:36:45 root] (utils.py 283): INFO Epoch: [22] [2480/2502] eta: 0:00:17 lr: 0.000005 loss_cls: 4.0819 (3.8152) grad_norm: 4.3303 (4.4866) time: 0.7798 data: 0.0003 max mem: 8421 +[2024-12-06 01:36:54 root] (utils.py 283): INFO Epoch: [22] [2490/2502] eta: 0:00:09 lr: 0.000005 loss_cls: 3.7963 (3.8139) grad_norm: 4.5803 (4.4873) time: 0.8096 data: 0.0287 max mem: 8421 +[2024-12-06 01:37:02 root] (utils.py 283): INFO Epoch: [22] [2500/2502] eta: 0:00:01 lr: 0.000005 loss_cls: 3.7657 (3.8143) grad_norm: 4.5411 (4.4874) time: 0.8103 data: 0.0287 max mem: 8421 +[2024-12-06 01:37:02 root] (utils.py 283): INFO Epoch: [22] [2501/2502] eta: 0:00:00 lr: 0.000005 loss_cls: 3.7657 (3.8143) grad_norm: 4.5411 (4.4878) time: 0.8108 data: 0.0287 max mem: 8421 +[2024-12-06 01:37:02 root] (utils.py 297): INFO Epoch: [22] Total time: 0:32:43 (0.7849 s / it) +[2024-12-06 01:37:02 root] (engine.py 179): INFO Averaged stats:lr: 0.000005 loss_cls: 3.7657 (3.8267) grad_norm: 4.5411 (4.4878) +[2024-12-06 01:37:03 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7012 (0.7012) acc1: 87.5000 (87.5000) acc3: 96.0938 (96.0938) acc5: 98.4375 (98.4375) time: 0.1306 data: 0.0003 max mem: 8421 +[2024-12-06 01:37:04 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7663 (0.8429) acc1: 83.5938 (82.3153) acc3: 93.7500 (93.2528) acc5: 96.8750 (95.8807) time: 0.1319 data: 0.0004 max mem: 8421 +[2024-12-06 01:37:06 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8754 (0.8981) acc1: 80.4688 (81.2500) acc3: 92.1875 (92.5595) acc5: 95.3125 (95.2753) time: 0.1348 data: 0.0005 max mem: 8421 +[2024-12-06 01:37:07 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9456 (0.9039) acc1: 80.4688 (80.5444) acc3: 92.1875 (92.8931) acc5: 96.0938 (95.5393) time: 0.1366 data: 0.0005 max mem: 8421 +[2024-12-06 01:37:09 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8199 (0.8949) acc1: 81.2500 (80.8117) acc3: 93.7500 (93.0259) acc5: 96.8750 (95.6745) time: 0.1738 data: 0.0377 max mem: 8421 +[2024-12-06 01:37:11 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0113 (0.9884) acc1: 75.0000 (78.5999) acc3: 87.5000 (91.3909) acc5: 92.1875 (94.5159) time: 0.2002 data: 0.0610 max mem: 8421 +[2024-12-06 01:37:13 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2736 (1.0336) acc1: 71.0938 (77.6895) acc3: 85.9375 (90.6634) acc5: 90.6250 (93.8140) time: 0.1734 data: 0.0326 max mem: 8421 +[2024-12-06 01:37:14 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:04 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2830 (1.0775) acc1: 71.0938 (76.5625) acc3: 87.5000 (90.1078) acc5: 90.6250 (93.2768) time: 0.1596 data: 0.0199 max mem: 8421 +[2024-12-06 01:37:16 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2903 (1.1152) acc1: 70.3125 (75.8005) acc3: 85.1562 (89.4676) acc5: 89.0625 (92.7180) time: 0.1507 data: 0.0114 max mem: 8421 +[2024-12-06 01:37:17 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3178 (1.1449) acc1: 69.5312 (75.1288) acc3: 84.3750 (88.9681) acc5: 89.0625 (92.3249) time: 0.1398 data: 0.0009 max mem: 8421 +[2024-12-06 01:37:18 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1999 (1.1338) acc1: 74.2188 (75.2960) acc3: 87.5000 (89.1680) acc5: 90.6250 (92.5120) time: 0.1402 data: 0.0008 max mem: 8421 +[2024-12-06 01:37:18 root] (utils.py 297): INFO Test: Total time: 0:00:15 (0.1549 s / it) +[2024-12-06 01:37:19 root] (engine.py 264): INFO * Acc@1 75.040 Acc@3 89.092 Acc@5 92.406 loss 1.137 flops 1.285 layer_flops 1.251 +[2024-12-06 01:37:19 root] (main.py 547): INFO Accuracy of the network on the 50000 test images: 75.0% +[2024-12-06 01:37:19 root] (main.py 551): INFO Max accuracy: 75.04% +[2024-12-06 01:37:20 root] (utils.py 283): INFO Epoch: [23] [ 0/2502] eta: 0:35:07 lr: 0.000004 loss_cls: 4.2905 (4.2905) grad_norm: 4.1690 (4.1690) time: 0.8425 data: 0.0004 max mem: 8421 +[2024-12-06 01:37:28 root] (utils.py 283): INFO Epoch: [23] [ 10/2502] eta: 0:32:49 lr: 0.000004 loss_cls: 4.1550 (4.0403) grad_norm: 4.4300 (4.5932) time: 0.7903 data: 0.0003 max mem: 8421 +[2024-12-06 01:37:36 root] (utils.py 283): INFO Epoch: [23] [ 20/2502] eta: 0:32:31 lr: 0.000004 loss_cls: 4.0491 (3.8455) grad_norm: 4.4300 (4.4567) time: 0.7833 data: 0.0003 max mem: 8421 +[2024-12-06 01:37:44 root] (utils.py 283): INFO Epoch: [23] [ 30/2502] eta: 0:32:20 lr: 0.000004 loss_cls: 4.0491 (3.8772) grad_norm: 4.4328 (4.5465) time: 0.7823 data: 0.0003 max mem: 8421 +[2024-12-06 01:37:51 root] (utils.py 283): INFO Epoch: [23] [ 40/2502] eta: 0:32:10 lr: 0.000004 loss_cls: 3.9053 (3.8042) grad_norm: 4.3914 (4.5000) time: 0.7819 data: 0.0003 max mem: 8421 +[2024-12-06 01:37:59 root] (utils.py 283): INFO Epoch: [23] [ 50/2502] eta: 0:32:02 lr: 0.000004 loss_cls: 3.7200 (3.7908) grad_norm: 4.2331 (4.4337) time: 0.7819 data: 0.0003 max mem: 8421 +[2024-12-06 01:38:07 root] (utils.py 283): INFO Epoch: [23] [ 60/2502] eta: 0:32:00 lr: 0.000004 loss_cls: 4.0875 (3.8269) grad_norm: 4.1894 (4.4044) time: 0.7911 data: 0.0003 max mem: 8421 +[2024-12-06 01:38:15 root] (utils.py 283): INFO Epoch: [23] [ 70/2502] eta: 0:31:55 lr: 0.000004 loss_cls: 4.0766 (3.8133) grad_norm: 4.4109 (4.5151) time: 0.7965 data: 0.0003 max mem: 8421 +[2024-12-06 01:38:23 root] (utils.py 283): INFO Epoch: [23] [ 80/2502] eta: 0:31:49 lr: 0.000004 loss_cls: 3.8513 (3.8268) grad_norm: 4.3909 (4.4839) time: 0.7952 data: 0.0003 max mem: 8421 +[2024-12-06 01:38:31 root] (utils.py 283): INFO Epoch: [23] [ 90/2502] eta: 0:31:40 lr: 0.000004 loss_cls: 3.9604 (3.8358) grad_norm: 4.0508 (4.4906) time: 0.7891 data: 0.0003 max mem: 8421 +[2024-12-06 01:38:39 root] (utils.py 283): INFO Epoch: [23] [ 100/2502] eta: 0:31:30 lr: 0.000004 loss_cls: 3.9213 (3.8388) grad_norm: 4.2756 (4.4720) time: 0.7804 data: 0.0003 max mem: 8421 +[2024-12-06 01:38:47 root] (utils.py 283): INFO Epoch: [23] [ 110/2502] eta: 0:31:20 lr: 0.000004 loss_cls: 3.9213 (3.8329) grad_norm: 4.2468 (4.4548) time: 0.7797 data: 0.0003 max mem: 8421 +[2024-12-06 01:38:54 root] (utils.py 283): INFO Epoch: [23] [ 120/2502] eta: 0:31:12 lr: 0.000004 loss_cls: 3.7595 (3.8099) grad_norm: 4.1393 (4.4244) time: 0.7826 data: 0.0003 max mem: 8421 +[2024-12-06 01:39:02 root] (utils.py 283): INFO Epoch: [23] [ 130/2502] eta: 0:31:04 lr: 0.000004 loss_cls: 3.8773 (3.8188) grad_norm: 4.1393 (4.4154) time: 0.7840 data: 0.0003 max mem: 8421 +[2024-12-06 01:39:10 root] (utils.py 283): INFO Epoch: [23] [ 140/2502] eta: 0:30:56 lr: 0.000004 loss_cls: 4.0370 (3.8126) grad_norm: 4.1906 (4.4001) time: 0.7858 data: 0.0003 max mem: 8421 +[2024-12-06 01:39:18 root] (utils.py 283): INFO Epoch: [23] [ 150/2502] eta: 0:30:48 lr: 0.000004 loss_cls: 3.8852 (3.8083) grad_norm: 4.2574 (4.3985) time: 0.7864 data: 0.0003 max mem: 8421 +[2024-12-06 01:39:26 root] (utils.py 283): INFO Epoch: [23] [ 160/2502] eta: 0:30:39 lr: 0.000004 loss_cls: 3.9638 (3.8258) grad_norm: 4.3102 (4.4001) time: 0.7803 data: 0.0003 max mem: 8421 +[2024-12-06 01:39:34 root] (utils.py 283): INFO Epoch: [23] [ 170/2502] eta: 0:30:31 lr: 0.000004 loss_cls: 3.9769 (3.8106) grad_norm: 4.4911 (4.4078) time: 0.7820 data: 0.0003 max mem: 8421 +[2024-12-06 01:39:41 root] (utils.py 283): INFO Epoch: [23] [ 180/2502] eta: 0:30:24 lr: 0.000004 loss_cls: 3.0089 (3.7810) grad_norm: 4.5703 (4.4033) time: 0.7871 data: 0.0003 max mem: 8421 +[2024-12-06 01:39:49 root] (utils.py 283): INFO Epoch: [23] [ 190/2502] eta: 0:30:15 lr: 0.000004 loss_cls: 3.8797 (3.7954) grad_norm: 4.3349 (4.4096) time: 0.7824 data: 0.0003 max mem: 8421 +[2024-12-06 01:39:57 root] (utils.py 283): INFO Epoch: [23] [ 200/2502] eta: 0:30:07 lr: 0.000004 loss_cls: 4.1133 (3.7955) grad_norm: 4.4488 (4.4179) time: 0.7815 data: 0.0003 max mem: 8421 +[2024-12-06 01:40:05 root] (utils.py 283): INFO Epoch: [23] [ 210/2502] eta: 0:30:00 lr: 0.000004 loss_cls: 3.7333 (3.7913) grad_norm: 4.4082 (4.4153) time: 0.7897 data: 0.0002 max mem: 8421 +[2024-12-06 01:40:13 root] (utils.py 283): INFO Epoch: [23] [ 220/2502] eta: 0:29:53 lr: 0.000004 loss_cls: 3.6681 (3.7846) grad_norm: 4.3321 (4.4153) time: 0.7924 data: 0.0003 max mem: 8421 +[2024-12-06 01:40:21 root] (utils.py 283): INFO Epoch: [23] [ 230/2502] eta: 0:29:45 lr: 0.000004 loss_cls: 3.9295 (3.7903) grad_norm: 4.4107 (4.4204) time: 0.7867 data: 0.0003 max mem: 8421 +[2024-12-06 01:40:29 root] (utils.py 283): INFO Epoch: [23] [ 240/2502] eta: 0:29:37 lr: 0.000004 loss_cls: 4.0898 (3.8014) grad_norm: 4.4274 (4.4225) time: 0.7858 data: 0.0003 max mem: 8421 +[2024-12-06 01:40:36 root] (utils.py 283): INFO Epoch: [23] [ 250/2502] eta: 0:29:29 lr: 0.000004 loss_cls: 3.9956 (3.8036) grad_norm: 4.2155 (4.4193) time: 0.7838 data: 0.0003 max mem: 8421 +[2024-12-06 01:40:44 root] (utils.py 283): INFO Epoch: [23] [ 260/2502] eta: 0:29:21 lr: 0.000004 loss_cls: 3.9655 (3.8010) grad_norm: 4.1981 (4.4222) time: 0.7818 data: 0.0003 max mem: 8421 +[2024-12-06 01:40:52 root] (utils.py 283): INFO Epoch: [23] [ 270/2502] eta: 0:29:13 lr: 0.000004 loss_cls: 3.7141 (3.7943) grad_norm: 4.1682 (4.4174) time: 0.7849 data: 0.0003 max mem: 8421 +[2024-12-06 01:41:00 root] (utils.py 283): INFO Epoch: [23] [ 280/2502] eta: 0:29:05 lr: 0.000004 loss_cls: 3.7928 (3.7949) grad_norm: 4.1682 (4.4205) time: 0.7861 data: 0.0003 max mem: 8421 +[2024-12-06 01:41:08 root] (utils.py 283): INFO Epoch: [23] [ 290/2502] eta: 0:28:57 lr: 0.000004 loss_cls: 3.7928 (3.7911) grad_norm: 4.3096 (4.4208) time: 0.7844 data: 0.0003 max mem: 8421 +[2024-12-06 01:41:16 root] (utils.py 283): INFO Epoch: [23] [ 300/2502] eta: 0:28:49 lr: 0.000004 loss_cls: 3.9198 (3.7973) grad_norm: 4.4249 (4.4278) time: 0.7827 data: 0.0003 max mem: 8421 +[2024-12-06 01:41:23 root] (utils.py 283): INFO Epoch: [23] [ 310/2502] eta: 0:28:41 lr: 0.000004 loss_cls: 3.6342 (3.7899) grad_norm: 4.3674 (4.4225) time: 0.7810 data: 0.0003 max mem: 8421 +[2024-12-06 01:41:31 root] (utils.py 283): INFO Epoch: [23] [ 320/2502] eta: 0:28:32 lr: 0.000004 loss_cls: 3.5564 (3.7868) grad_norm: 4.2528 (4.4201) time: 0.7796 data: 0.0002 max mem: 8421 +[2024-12-06 01:41:39 root] (utils.py 283): INFO Epoch: [23] [ 330/2502] eta: 0:28:25 lr: 0.000004 loss_cls: 4.0050 (3.7927) grad_norm: 4.3656 (4.4218) time: 0.7825 data: 0.0002 max mem: 8421 +[2024-12-06 01:41:47 root] (utils.py 283): INFO Epoch: [23] [ 340/2502] eta: 0:28:16 lr: 0.000004 loss_cls: 3.9525 (3.7884) grad_norm: 4.3606 (4.4301) time: 0.7825 data: 0.0002 max mem: 8421 +[2024-12-06 01:41:55 root] (utils.py 283): INFO Epoch: [23] [ 350/2502] eta: 0:28:08 lr: 0.000004 loss_cls: 3.6920 (3.7901) grad_norm: 4.1933 (4.4291) time: 0.7800 data: 0.0003 max mem: 8421 +[2024-12-06 01:42:03 root] (utils.py 283): INFO Epoch: [23] [ 360/2502] eta: 0:28:00 lr: 0.000004 loss_cls: 3.9478 (3.7928) grad_norm: 4.5009 (4.4450) time: 0.7790 data: 0.0003 max mem: 8421 +[2024-12-06 01:42:10 root] (utils.py 283): INFO Epoch: [23] [ 370/2502] eta: 0:27:53 lr: 0.000004 loss_cls: 3.8779 (3.7860) grad_norm: 4.3339 (4.4408) time: 0.7872 data: 0.0003 max mem: 8421 +[2024-12-06 01:42:18 root] (utils.py 283): INFO Epoch: [23] [ 380/2502] eta: 0:27:45 lr: 0.000004 loss_cls: 3.8206 (3.7863) grad_norm: 4.2366 (4.4369) time: 0.7887 data: 0.0002 max mem: 8421 +[2024-12-06 01:42:26 root] (utils.py 283): INFO Epoch: [23] [ 390/2502] eta: 0:27:37 lr: 0.000004 loss_cls: 3.9281 (3.7891) grad_norm: 4.3133 (4.4389) time: 0.7858 data: 0.0003 max mem: 8421 +[2024-12-06 01:42:34 root] (utils.py 283): INFO Epoch: [23] [ 400/2502] eta: 0:27:29 lr: 0.000004 loss_cls: 4.1012 (3.7936) grad_norm: 4.3133 (4.4343) time: 0.7845 data: 0.0003 max mem: 8421 +[2024-12-06 01:42:42 root] (utils.py 283): INFO Epoch: [23] [ 410/2502] eta: 0:27:21 lr: 0.000004 loss_cls: 4.1012 (3.7966) grad_norm: 4.4157 (4.4397) time: 0.7777 data: 0.0003 max mem: 8421 +[2024-12-06 01:42:50 root] (utils.py 283): INFO Epoch: [23] [ 420/2502] eta: 0:27:13 lr: 0.000004 loss_cls: 3.7623 (3.7920) grad_norm: 4.4168 (4.4474) time: 0.7776 data: 0.0003 max mem: 8421 +[2024-12-06 01:42:57 root] (utils.py 283): INFO Epoch: [23] [ 430/2502] eta: 0:27:05 lr: 0.000004 loss_cls: 3.7587 (3.7943) grad_norm: 4.4168 (4.4504) time: 0.7795 data: 0.0003 max mem: 8421 +[2024-12-06 01:43:05 root] (utils.py 283): INFO Epoch: [23] [ 440/2502] eta: 0:26:57 lr: 0.000004 loss_cls: 3.6019 (3.7878) grad_norm: 4.3203 (4.4471) time: 0.7801 data: 0.0003 max mem: 8421 +[2024-12-06 01:43:13 root] (utils.py 283): INFO Epoch: [23] [ 450/2502] eta: 0:26:49 lr: 0.000004 loss_cls: 3.3960 (3.7870) grad_norm: 4.2578 (4.4487) time: 0.7845 data: 0.0003 max mem: 8421 +[2024-12-06 01:43:21 root] (utils.py 283): INFO Epoch: [23] [ 460/2502] eta: 0:26:41 lr: 0.000004 loss_cls: 4.0024 (3.7907) grad_norm: 4.2676 (4.4491) time: 0.7897 data: 0.0003 max mem: 8421 +[2024-12-06 01:43:29 root] (utils.py 283): INFO Epoch: [23] [ 470/2502] eta: 0:26:33 lr: 0.000004 loss_cls: 3.9558 (3.7832) grad_norm: 4.1592 (4.4450) time: 0.7854 data: 0.0003 max mem: 8421 +[2024-12-06 01:43:37 root] (utils.py 283): INFO Epoch: [23] [ 480/2502] eta: 0:26:26 lr: 0.000004 loss_cls: 3.5518 (3.7793) grad_norm: 4.1592 (4.4441) time: 0.7831 data: 0.0003 max mem: 8421 +[2024-12-06 01:43:44 root] (utils.py 283): INFO Epoch: [23] [ 490/2502] eta: 0:26:18 lr: 0.000004 loss_cls: 3.8679 (3.7819) grad_norm: 4.1922 (4.4432) time: 0.7836 data: 0.0003 max mem: 8421 +[2024-12-06 01:43:52 root] (utils.py 283): INFO Epoch: [23] [ 500/2502] eta: 0:26:10 lr: 0.000004 loss_cls: 4.0291 (3.7842) grad_norm: 4.1866 (4.4438) time: 0.7809 data: 0.0003 max mem: 8421 +[2024-12-06 01:44:00 root] (utils.py 283): INFO Epoch: [23] [ 510/2502] eta: 0:26:02 lr: 0.000004 loss_cls: 4.0275 (3.7860) grad_norm: 4.1458 (4.4400) time: 0.7812 data: 0.0003 max mem: 8421 +[2024-12-06 01:44:08 root] (utils.py 283): INFO Epoch: [23] [ 520/2502] eta: 0:25:54 lr: 0.000004 loss_cls: 3.7404 (3.7837) grad_norm: 4.2452 (4.4401) time: 0.7917 data: 0.0003 max mem: 8421 +[2024-12-06 01:44:16 root] (utils.py 283): INFO Epoch: [23] [ 530/2502] eta: 0:25:47 lr: 0.000004 loss_cls: 3.8377 (3.7848) grad_norm: 4.3155 (4.4381) time: 0.7920 data: 0.0003 max mem: 8421 +[2024-12-06 01:44:24 root] (utils.py 283): INFO Epoch: [23] [ 540/2502] eta: 0:25:39 lr: 0.000004 loss_cls: 3.9448 (3.7821) grad_norm: 4.2124 (4.4369) time: 0.7835 data: 0.0003 max mem: 8421 +[2024-12-06 01:44:32 root] (utils.py 283): INFO Epoch: [23] [ 550/2502] eta: 0:25:31 lr: 0.000004 loss_cls: 3.9448 (3.7824) grad_norm: 4.1299 (4.4372) time: 0.7826 data: 0.0003 max mem: 8421 +[2024-12-06 01:44:39 root] (utils.py 283): INFO Epoch: [23] [ 560/2502] eta: 0:25:23 lr: 0.000004 loss_cls: 4.0934 (3.7865) grad_norm: 4.2116 (4.4412) time: 0.7813 data: 0.0003 max mem: 8421 +[2024-12-06 01:44:47 root] (utils.py 283): INFO Epoch: [23] [ 570/2502] eta: 0:25:15 lr: 0.000004 loss_cls: 4.1707 (3.7880) grad_norm: 4.3477 (4.4450) time: 0.7809 data: 0.0003 max mem: 8421 +[2024-12-06 01:44:55 root] (utils.py 283): INFO Epoch: [23] [ 580/2502] eta: 0:25:07 lr: 0.000004 loss_cls: 4.1707 (3.7935) grad_norm: 4.4024 (4.4456) time: 0.7813 data: 0.0003 max mem: 8421 +[2024-12-06 01:45:03 root] (utils.py 283): INFO Epoch: [23] [ 590/2502] eta: 0:24:59 lr: 0.000004 loss_cls: 4.2265 (3.7986) grad_norm: 4.4024 (4.4513) time: 0.7815 data: 0.0003 max mem: 8421 +[2024-12-06 01:45:11 root] (utils.py 283): INFO Epoch: [23] [ 600/2502] eta: 0:24:51 lr: 0.000004 loss_cls: 4.1661 (3.7993) grad_norm: 4.4197 (4.4519) time: 0.7833 data: 0.0003 max mem: 8421 +[2024-12-06 01:45:18 root] (utils.py 283): INFO Epoch: [23] [ 610/2502] eta: 0:24:43 lr: 0.000004 loss_cls: 3.8849 (3.7994) grad_norm: 4.3707 (4.4488) time: 0.7835 data: 0.0003 max mem: 8421 +[2024-12-06 01:45:26 root] (utils.py 283): INFO Epoch: [23] [ 620/2502] eta: 0:24:35 lr: 0.000004 loss_cls: 3.7795 (3.7973) grad_norm: 4.2787 (4.4458) time: 0.7803 data: 0.0003 max mem: 8421 +[2024-12-06 01:45:34 root] (utils.py 283): INFO Epoch: [23] [ 630/2502] eta: 0:24:28 lr: 0.000004 loss_cls: 3.8485 (3.7997) grad_norm: 4.1926 (4.4414) time: 0.7842 data: 0.0003 max mem: 8421 +[2024-12-06 01:45:42 root] (utils.py 283): INFO Epoch: [23] [ 640/2502] eta: 0:24:20 lr: 0.000004 loss_cls: 3.9614 (3.7997) grad_norm: 4.2848 (4.4403) time: 0.7844 data: 0.0003 max mem: 8421 +[2024-12-06 01:45:50 root] (utils.py 283): INFO Epoch: [23] [ 650/2502] eta: 0:24:12 lr: 0.000004 loss_cls: 3.8161 (3.7961) grad_norm: 4.3138 (4.4404) time: 0.7797 data: 0.0002 max mem: 8421 +[2024-12-06 01:45:58 root] (utils.py 283): INFO Epoch: [23] [ 660/2502] eta: 0:24:04 lr: 0.000004 loss_cls: 3.9557 (3.8015) grad_norm: 4.2479 (4.4418) time: 0.7810 data: 0.0003 max mem: 8421 +[2024-12-06 01:46:05 root] (utils.py 283): INFO Epoch: [23] [ 670/2502] eta: 0:23:56 lr: 0.000004 loss_cls: 4.1285 (3.8005) grad_norm: 4.4394 (4.4411) time: 0.7869 data: 0.0003 max mem: 8421 +[2024-12-06 01:46:13 root] (utils.py 283): INFO Epoch: [23] [ 680/2502] eta: 0:23:48 lr: 0.000004 loss_cls: 3.8340 (3.8016) grad_norm: 4.3261 (4.4385) time: 0.7851 data: 0.0003 max mem: 8421 +[2024-12-06 01:46:21 root] (utils.py 283): INFO Epoch: [23] [ 690/2502] eta: 0:23:40 lr: 0.000004 loss_cls: 3.9783 (3.8035) grad_norm: 4.2167 (4.4370) time: 0.7799 data: 0.0003 max mem: 8421 +[2024-12-06 01:46:29 root] (utils.py 283): INFO Epoch: [23] [ 700/2502] eta: 0:23:32 lr: 0.000004 loss_cls: 3.9347 (3.8053) grad_norm: 4.2167 (4.4370) time: 0.7834 data: 0.0003 max mem: 8421 +[2024-12-06 01:46:37 root] (utils.py 283): INFO Epoch: [23] [ 710/2502] eta: 0:23:25 lr: 0.000004 loss_cls: 3.9877 (3.8062) grad_norm: 4.4133 (4.4457) time: 0.7871 data: 0.0003 max mem: 8421 +[2024-12-06 01:46:45 root] (utils.py 283): INFO Epoch: [23] [ 720/2502] eta: 0:23:17 lr: 0.000004 loss_cls: 4.0949 (3.8094) grad_norm: 4.2914 (4.4433) time: 0.7858 data: 0.0003 max mem: 8421 +[2024-12-06 01:46:53 root] (utils.py 283): INFO Epoch: [23] [ 730/2502] eta: 0:23:09 lr: 0.000004 loss_cls: 4.1002 (3.8111) grad_norm: 4.2914 (4.4434) time: 0.7837 data: 0.0003 max mem: 8421 +[2024-12-06 01:47:00 root] (utils.py 283): INFO Epoch: [23] [ 740/2502] eta: 0:23:01 lr: 0.000004 loss_cls: 4.1049 (3.8129) grad_norm: 4.2462 (4.4403) time: 0.7824 data: 0.0003 max mem: 8421 +[2024-12-06 01:47:08 root] (utils.py 283): INFO Epoch: [23] [ 750/2502] eta: 0:22:54 lr: 0.000004 loss_cls: 4.1049 (3.8163) grad_norm: 4.1918 (4.4425) time: 0.7906 data: 0.0003 max mem: 8421 +[2024-12-06 01:47:16 root] (utils.py 283): INFO Epoch: [23] [ 760/2502] eta: 0:22:46 lr: 0.000004 loss_cls: 4.1708 (3.8192) grad_norm: 4.2471 (4.4433) time: 0.7908 data: 0.0003 max mem: 8421 +[2024-12-06 01:47:24 root] (utils.py 283): INFO Epoch: [23] [ 770/2502] eta: 0:22:38 lr: 0.000004 loss_cls: 4.1708 (3.8228) grad_norm: 4.4116 (4.4464) time: 0.7837 data: 0.0003 max mem: 8421 +[2024-12-06 01:47:32 root] (utils.py 283): INFO Epoch: [23] [ 780/2502] eta: 0:22:30 lr: 0.000004 loss_cls: 4.0167 (3.8196) grad_norm: 4.3370 (4.4469) time: 0.7849 data: 0.0003 max mem: 8421 +[2024-12-06 01:47:40 root] (utils.py 283): INFO Epoch: [23] [ 790/2502] eta: 0:22:22 lr: 0.000004 loss_cls: 3.7071 (3.8221) grad_norm: 4.4342 (4.4533) time: 0.7843 data: 0.0003 max mem: 8421 +[2024-12-06 01:47:48 root] (utils.py 283): INFO Epoch: [23] [ 800/2502] eta: 0:22:14 lr: 0.000004 loss_cls: 3.7927 (3.8195) grad_norm: 4.5447 (4.4573) time: 0.7863 data: 0.0003 max mem: 8421 +[2024-12-06 01:47:55 root] (utils.py 283): INFO Epoch: [23] [ 810/2502] eta: 0:22:07 lr: 0.000004 loss_cls: 3.8117 (3.8223) grad_norm: 4.4065 (4.4567) time: 0.7889 data: 0.0003 max mem: 8421 +[2024-12-06 01:48:03 root] (utils.py 283): INFO Epoch: [23] [ 820/2502] eta: 0:21:59 lr: 0.000004 loss_cls: 4.0621 (3.8222) grad_norm: 4.3049 (4.4532) time: 0.7934 data: 0.0003 max mem: 8421 +[2024-12-06 01:48:11 root] (utils.py 283): INFO Epoch: [23] [ 830/2502] eta: 0:21:51 lr: 0.000004 loss_cls: 3.8096 (3.8203) grad_norm: 4.2389 (4.4513) time: 0.7889 data: 0.0003 max mem: 8421 +[2024-12-06 01:48:19 root] (utils.py 283): INFO Epoch: [23] [ 840/2502] eta: 0:21:43 lr: 0.000004 loss_cls: 3.4730 (3.8175) grad_norm: 4.3304 (4.4515) time: 0.7801 data: 0.0003 max mem: 8421 +[2024-12-06 01:48:27 root] (utils.py 283): INFO Epoch: [23] [ 850/2502] eta: 0:21:35 lr: 0.000004 loss_cls: 3.7665 (3.8172) grad_norm: 4.4219 (4.4506) time: 0.7799 data: 0.0003 max mem: 8421 +[2024-12-06 01:48:35 root] (utils.py 283): INFO Epoch: [23] [ 860/2502] eta: 0:21:27 lr: 0.000004 loss_cls: 3.8827 (3.8183) grad_norm: 4.4219 (4.4531) time: 0.7793 data: 0.0003 max mem: 8421 +[2024-12-06 01:48:43 root] (utils.py 283): INFO Epoch: [23] [ 870/2502] eta: 0:21:20 lr: 0.000004 loss_cls: 4.1086 (3.8212) grad_norm: 4.4288 (4.4564) time: 0.7827 data: 0.0003 max mem: 8421 +[2024-12-06 01:48:50 root] (utils.py 283): INFO Epoch: [23] [ 880/2502] eta: 0:21:12 lr: 0.000004 loss_cls: 4.1086 (3.8213) grad_norm: 4.2692 (4.4534) time: 0.7845 data: 0.0003 max mem: 8421 +[2024-12-06 01:48:58 root] (utils.py 283): INFO Epoch: [23] [ 890/2502] eta: 0:21:04 lr: 0.000004 loss_cls: 4.0986 (3.8244) grad_norm: 4.2105 (4.4515) time: 0.7814 data: 0.0003 max mem: 8421 +[2024-12-06 01:49:06 root] (utils.py 283): INFO Epoch: [23] [ 900/2502] eta: 0:20:56 lr: 0.000004 loss_cls: 4.0337 (3.8260) grad_norm: 4.2051 (4.4497) time: 0.7831 data: 0.0003 max mem: 8421 +[2024-12-06 01:49:14 root] (utils.py 283): INFO Epoch: [23] [ 910/2502] eta: 0:20:48 lr: 0.000004 loss_cls: 3.8573 (3.8261) grad_norm: 4.2402 (4.4495) time: 0.7860 data: 0.0003 max mem: 8421 +[2024-12-06 01:49:22 root] (utils.py 283): INFO Epoch: [23] [ 920/2502] eta: 0:20:40 lr: 0.000004 loss_cls: 3.7427 (3.8247) grad_norm: 4.3850 (4.4499) time: 0.7843 data: 0.0003 max mem: 8421 +[2024-12-06 01:49:29 root] (utils.py 283): INFO Epoch: [23] [ 930/2502] eta: 0:20:32 lr: 0.000004 loss_cls: 3.5933 (3.8227) grad_norm: 4.4929 (4.4509) time: 0.7817 data: 0.0003 max mem: 8421 +[2024-12-06 01:49:37 root] (utils.py 283): INFO Epoch: [23] [ 940/2502] eta: 0:20:24 lr: 0.000004 loss_cls: 3.5462 (3.8214) grad_norm: 4.6605 (4.4519) time: 0.7819 data: 0.0003 max mem: 8421 +[2024-12-06 01:49:45 root] (utils.py 283): INFO Epoch: [23] [ 950/2502] eta: 0:20:17 lr: 0.000004 loss_cls: 3.5822 (3.8199) grad_norm: 4.4372 (4.4534) time: 0.7880 data: 0.0003 max mem: 8421 +[2024-12-06 01:49:53 root] (utils.py 283): INFO Epoch: [23] [ 960/2502] eta: 0:20:09 lr: 0.000004 loss_cls: 3.9135 (3.8224) grad_norm: 4.4228 (4.4519) time: 0.7979 data: 0.0003 max mem: 8421 +[2024-12-06 01:50:01 root] (utils.py 283): INFO Epoch: [23] [ 970/2502] eta: 0:20:01 lr: 0.000004 loss_cls: 4.1412 (3.8219) grad_norm: 4.4154 (4.4513) time: 0.7939 data: 0.0003 max mem: 8421 +[2024-12-06 01:50:09 root] (utils.py 283): INFO Epoch: [23] [ 980/2502] eta: 0:19:54 lr: 0.000004 loss_cls: 3.9023 (3.8230) grad_norm: 4.4133 (4.4524) time: 0.7863 data: 0.0003 max mem: 8421 +[2024-12-06 01:50:17 root] (utils.py 283): INFO Epoch: [23] [ 990/2502] eta: 0:19:46 lr: 0.000004 loss_cls: 4.1111 (3.8243) grad_norm: 4.3960 (4.4531) time: 0.7840 data: 0.0003 max mem: 8421 +[2024-12-06 01:50:25 root] (utils.py 283): INFO Epoch: [23] [1000/2502] eta: 0:19:38 lr: 0.000004 loss_cls: 3.9791 (3.8238) grad_norm: 4.3960 (4.4519) time: 0.7811 data: 0.0003 max mem: 8421 +[2024-12-06 01:50:33 root] (utils.py 283): INFO Epoch: [23] [1010/2502] eta: 0:19:30 lr: 0.000004 loss_cls: 3.9201 (3.8252) grad_norm: 4.3202 (4.4513) time: 0.7863 data: 0.0003 max mem: 8421 +[2024-12-06 01:50:40 root] (utils.py 283): INFO Epoch: [23] [1020/2502] eta: 0:19:22 lr: 0.000004 loss_cls: 4.0606 (3.8253) grad_norm: 4.3942 (4.4568) time: 0.7856 data: 0.0003 max mem: 8421 +[2024-12-06 01:50:48 root] (utils.py 283): INFO Epoch: [23] [1030/2502] eta: 0:19:14 lr: 0.000004 loss_cls: 3.4382 (3.8208) grad_norm: 4.3942 (4.4565) time: 0.7827 data: 0.0003 max mem: 8421 +[2024-12-06 01:50:56 root] (utils.py 283): INFO Epoch: [23] [1040/2502] eta: 0:19:06 lr: 0.000004 loss_cls: 3.7662 (3.8229) grad_norm: 4.3012 (4.4563) time: 0.7864 data: 0.0003 max mem: 8421 +[2024-12-06 01:51:04 root] (utils.py 283): INFO Epoch: [23] [1050/2502] eta: 0:18:59 lr: 0.000004 loss_cls: 4.0403 (3.8218) grad_norm: 4.3012 (4.4550) time: 0.7908 data: 0.0003 max mem: 8421 +[2024-12-06 01:51:12 root] (utils.py 283): INFO Epoch: [23] [1060/2502] eta: 0:18:51 lr: 0.000004 loss_cls: 3.7362 (3.8213) grad_norm: 4.3006 (4.4543) time: 0.7866 data: 0.0003 max mem: 8421 +[2024-12-06 01:51:20 root] (utils.py 283): INFO Epoch: [23] [1070/2502] eta: 0:18:43 lr: 0.000004 loss_cls: 3.8144 (3.8210) grad_norm: 4.2997 (4.4518) time: 0.7810 data: 0.0003 max mem: 8421 +[2024-12-06 01:51:27 root] (utils.py 283): INFO Epoch: [23] [1080/2502] eta: 0:18:35 lr: 0.000004 loss_cls: 3.8190 (3.8205) grad_norm: 4.2127 (4.4511) time: 0.7808 data: 0.0003 max mem: 8421 +[2024-12-06 01:51:35 root] (utils.py 283): INFO Epoch: [23] [1090/2502] eta: 0:18:27 lr: 0.000004 loss_cls: 3.8967 (3.8206) grad_norm: 4.3061 (4.4518) time: 0.7779 data: 0.0003 max mem: 8421 +[2024-12-06 01:51:43 root] (utils.py 283): INFO Epoch: [23] [1100/2502] eta: 0:18:19 lr: 0.000004 loss_cls: 3.9023 (3.8205) grad_norm: 4.3061 (4.4500) time: 0.7800 data: 0.0003 max mem: 8421 +[2024-12-06 01:51:51 root] (utils.py 283): INFO Epoch: [23] [1110/2502] eta: 0:18:11 lr: 0.000004 loss_cls: 3.8642 (3.8180) grad_norm: 4.2532 (4.4485) time: 0.7809 data: 0.0003 max mem: 8421 +[2024-12-06 01:51:59 root] (utils.py 283): INFO Epoch: [23] [1120/2502] eta: 0:18:03 lr: 0.000004 loss_cls: 3.4579 (3.8154) grad_norm: 4.2532 (4.4473) time: 0.7806 data: 0.0003 max mem: 8421 +[2024-12-06 01:52:07 root] (utils.py 283): INFO Epoch: [23] [1130/2502] eta: 0:17:56 lr: 0.000004 loss_cls: 3.8136 (3.8173) grad_norm: 4.2344 (4.4476) time: 0.7940 data: 0.0003 max mem: 8421 +[2024-12-06 01:52:15 root] (utils.py 283): INFO Epoch: [23] [1140/2502] eta: 0:17:48 lr: 0.000004 loss_cls: 4.0395 (3.8173) grad_norm: 4.2041 (4.4465) time: 0.8028 data: 0.0003 max mem: 8421 +[2024-12-06 01:52:23 root] (utils.py 283): INFO Epoch: [23] [1150/2502] eta: 0:17:40 lr: 0.000004 loss_cls: 4.0081 (3.8175) grad_norm: 4.2041 (4.4463) time: 0.7899 data: 0.0002 max mem: 8421 +[2024-12-06 01:52:30 root] (utils.py 283): INFO Epoch: [23] [1160/2502] eta: 0:17:32 lr: 0.000004 loss_cls: 3.9421 (3.8169) grad_norm: 4.3612 (4.4476) time: 0.7803 data: 0.0003 max mem: 8421 +[2024-12-06 01:52:38 root] (utils.py 283): INFO Epoch: [23] [1170/2502] eta: 0:17:25 lr: 0.000004 loss_cls: 3.9930 (3.8178) grad_norm: 4.5142 (4.4527) time: 0.7829 data: 0.0003 max mem: 8421 +[2024-12-06 01:52:46 root] (utils.py 283): INFO Epoch: [23] [1180/2502] eta: 0:17:17 lr: 0.000004 loss_cls: 4.0990 (3.8200) grad_norm: 4.4108 (4.4521) time: 0.7848 data: 0.0003 max mem: 8421 +[2024-12-06 01:52:54 root] (utils.py 283): INFO Epoch: [23] [1190/2502] eta: 0:17:09 lr: 0.000004 loss_cls: 4.0816 (3.8197) grad_norm: 4.3593 (4.4519) time: 0.7834 data: 0.0003 max mem: 8421 +[2024-12-06 01:53:02 root] (utils.py 283): INFO Epoch: [23] [1200/2502] eta: 0:17:01 lr: 0.000004 loss_cls: 3.5730 (3.8180) grad_norm: 4.3593 (4.4518) time: 0.7811 data: 0.0002 max mem: 8421 +[2024-12-06 01:53:10 root] (utils.py 283): INFO Epoch: [23] [1210/2502] eta: 0:16:53 lr: 0.000004 loss_cls: 3.5966 (3.8176) grad_norm: 4.5096 (4.4609) time: 0.7831 data: 0.0002 max mem: 8421 +[2024-12-06 01:53:17 root] (utils.py 283): INFO Epoch: [23] [1220/2502] eta: 0:16:45 lr: 0.000004 loss_cls: 3.6248 (3.8154) grad_norm: 4.7563 (4.4613) time: 0.7905 data: 0.0003 max mem: 8421 +[2024-12-06 01:53:25 root] (utils.py 283): INFO Epoch: [23] [1230/2502] eta: 0:16:38 lr: 0.000004 loss_cls: 3.6688 (3.8153) grad_norm: 4.3471 (4.4604) time: 0.7875 data: 0.0003 max mem: 8421 +[2024-12-06 01:53:33 root] (utils.py 283): INFO Epoch: [23] [1240/2502] eta: 0:16:30 lr: 0.000004 loss_cls: 3.7174 (3.8143) grad_norm: 4.2485 (4.4581) time: 0.7893 data: 0.0003 max mem: 8421 +[2024-12-06 01:53:41 root] (utils.py 283): INFO Epoch: [23] [1250/2502] eta: 0:16:22 lr: 0.000004 loss_cls: 3.7238 (3.8143) grad_norm: 4.2747 (4.4653) time: 0.7883 data: 0.0002 max mem: 8421 +[2024-12-06 01:53:49 root] (utils.py 283): INFO Epoch: [23] [1260/2502] eta: 0:16:14 lr: 0.000004 loss_cls: 3.9769 (3.8163) grad_norm: 4.2305 (4.4649) time: 0.7841 data: 0.0003 max mem: 8421 +[2024-12-06 01:53:57 root] (utils.py 283): INFO Epoch: [23] [1270/2502] eta: 0:16:06 lr: 0.000004 loss_cls: 3.9769 (3.8176) grad_norm: 4.2137 (4.4642) time: 0.7912 data: 0.0002 max mem: 8421 +[2024-12-06 01:54:05 root] (utils.py 283): INFO Epoch: [23] [1280/2502] eta: 0:15:58 lr: 0.000004 loss_cls: 3.9161 (3.8179) grad_norm: 4.2975 (4.4627) time: 0.7863 data: 0.0002 max mem: 8421 +[2024-12-06 01:54:12 root] (utils.py 283): INFO Epoch: [23] [1290/2502] eta: 0:15:51 lr: 0.000004 loss_cls: 3.9108 (3.8182) grad_norm: 4.2514 (4.4636) time: 0.7797 data: 0.0002 max mem: 8421 +[2024-12-06 01:54:20 root] (utils.py 283): INFO Epoch: [23] [1300/2502] eta: 0:15:43 lr: 0.000004 loss_cls: 3.9336 (3.8192) grad_norm: 4.3025 (4.4691) time: 0.7810 data: 0.0003 max mem: 8421 +[2024-12-06 01:54:28 root] (utils.py 283): INFO Epoch: [23] [1310/2502] eta: 0:15:35 lr: 0.000004 loss_cls: 3.8976 (3.8184) grad_norm: 4.3025 (4.4682) time: 0.7834 data: 0.0003 max mem: 8421 +[2024-12-06 01:54:36 root] (utils.py 283): INFO Epoch: [23] [1320/2502] eta: 0:15:27 lr: 0.000004 loss_cls: 3.8123 (3.8177) grad_norm: 4.2354 (4.4681) time: 0.7827 data: 0.0003 max mem: 8421 +[2024-12-06 01:54:44 root] (utils.py 283): INFO Epoch: [23] [1330/2502] eta: 0:15:19 lr: 0.000004 loss_cls: 3.8623 (3.8160) grad_norm: 4.2130 (4.4655) time: 0.7798 data: 0.0003 max mem: 8421 +[2024-12-06 01:54:52 root] (utils.py 283): INFO Epoch: [23] [1340/2502] eta: 0:15:11 lr: 0.000004 loss_cls: 3.7643 (3.8142) grad_norm: 4.0590 (4.4642) time: 0.7816 data: 0.0003 max mem: 8421 +[2024-12-06 01:54:59 root] (utils.py 283): INFO Epoch: [23] [1350/2502] eta: 0:15:03 lr: 0.000004 loss_cls: 3.9752 (3.8164) grad_norm: 4.2753 (4.4653) time: 0.7829 data: 0.0003 max mem: 8421 +[2024-12-06 01:55:07 root] (utils.py 283): INFO Epoch: [23] [1360/2502] eta: 0:14:55 lr: 0.000004 loss_cls: 4.1330 (3.8162) grad_norm: 4.3142 (4.4643) time: 0.7828 data: 0.0003 max mem: 8421 +[2024-12-06 01:55:15 root] (utils.py 283): INFO Epoch: [23] [1370/2502] eta: 0:14:48 lr: 0.000004 loss_cls: 3.9944 (3.8179) grad_norm: 4.2400 (4.4624) time: 0.7796 data: 0.0003 max mem: 8421 +[2024-12-06 01:55:23 root] (utils.py 283): INFO Epoch: [23] [1380/2502] eta: 0:14:40 lr: 0.000004 loss_cls: 4.0239 (3.8198) grad_norm: 4.2083 (4.4640) time: 0.7779 data: 0.0003 max mem: 8421 +[2024-12-06 01:55:31 root] (utils.py 283): INFO Epoch: [23] [1390/2502] eta: 0:14:32 lr: 0.000004 loss_cls: 3.8629 (3.8200) grad_norm: 4.3496 (4.4664) time: 0.7808 data: 0.0003 max mem: 8421 +[2024-12-06 01:55:38 root] (utils.py 283): INFO Epoch: [23] [1400/2502] eta: 0:14:24 lr: 0.000004 loss_cls: 3.8629 (3.8186) grad_norm: 4.3518 (4.4653) time: 0.7842 data: 0.0003 max mem: 8421 +[2024-12-06 01:55:46 root] (utils.py 283): INFO Epoch: [23] [1410/2502] eta: 0:14:16 lr: 0.000004 loss_cls: 3.8689 (3.8190) grad_norm: 4.3518 (4.4645) time: 0.7858 data: 0.0003 max mem: 8421 +[2024-12-06 01:55:54 root] (utils.py 283): INFO Epoch: [23] [1420/2502] eta: 0:14:08 lr: 0.000004 loss_cls: 3.9283 (3.8184) grad_norm: 4.3647 (4.4657) time: 0.7818 data: 0.0003 max mem: 8421 +[2024-12-06 01:56:02 root] (utils.py 283): INFO Epoch: [23] [1430/2502] eta: 0:14:00 lr: 0.000004 loss_cls: 3.6419 (3.8171) grad_norm: 4.3381 (4.4652) time: 0.7820 data: 0.0003 max mem: 8421 +[2024-12-06 01:56:10 root] (utils.py 283): INFO Epoch: [23] [1440/2502] eta: 0:13:53 lr: 0.000004 loss_cls: 3.6404 (3.8157) grad_norm: 4.3012 (4.4634) time: 0.7826 data: 0.0003 max mem: 8421 +[2024-12-06 01:56:18 root] (utils.py 283): INFO Epoch: [23] [1450/2502] eta: 0:13:45 lr: 0.000004 loss_cls: 3.6404 (3.8149) grad_norm: 4.2274 (4.4621) time: 0.7786 data: 0.0003 max mem: 8421 +[2024-12-06 01:56:25 root] (utils.py 283): INFO Epoch: [23] [1460/2502] eta: 0:13:37 lr: 0.000004 loss_cls: 3.9765 (3.8169) grad_norm: 4.2017 (4.4688) time: 0.7772 data: 0.0003 max mem: 8421 +[2024-12-06 01:56:33 root] (utils.py 283): INFO Epoch: [23] [1470/2502] eta: 0:13:29 lr: 0.000004 loss_cls: 4.0759 (3.8161) grad_norm: 4.2017 (4.4680) time: 0.7803 data: 0.0003 max mem: 8421 +[2024-12-06 01:56:41 root] (utils.py 283): INFO Epoch: [23] [1480/2502] eta: 0:13:21 lr: 0.000004 loss_cls: 3.8216 (3.8150) grad_norm: 4.3678 (4.4676) time: 0.7809 data: 0.0003 max mem: 8421 +[2024-12-06 01:56:49 root] (utils.py 283): INFO Epoch: [23] [1490/2502] eta: 0:13:13 lr: 0.000004 loss_cls: 3.9820 (3.8152) grad_norm: 4.3690 (4.4720) time: 0.7786 data: 0.0003 max mem: 8421 +[2024-12-06 01:56:56 root] (utils.py 283): INFO Epoch: [23] [1500/2502] eta: 0:13:05 lr: 0.000004 loss_cls: 4.0126 (3.8154) grad_norm: 4.0879 (4.4744) time: 0.7778 data: 0.0003 max mem: 8421 +[2024-12-06 01:57:04 root] (utils.py 283): INFO Epoch: [23] [1510/2502] eta: 0:12:57 lr: 0.000004 loss_cls: 4.0035 (3.8166) grad_norm: 4.2948 (4.4740) time: 0.7761 data: 0.0003 max mem: 8421 +[2024-12-06 01:57:12 root] (utils.py 283): INFO Epoch: [23] [1520/2502] eta: 0:12:49 lr: 0.000004 loss_cls: 3.9671 (3.8154) grad_norm: 4.2984 (4.4729) time: 0.7742 data: 0.0003 max mem: 8421 +[2024-12-06 01:57:20 root] (utils.py 283): INFO Epoch: [23] [1530/2502] eta: 0:12:42 lr: 0.000004 loss_cls: 3.6669 (3.8143) grad_norm: 4.2700 (4.4720) time: 0.7733 data: 0.0003 max mem: 8421 +[2024-12-06 01:57:27 root] (utils.py 283): INFO Epoch: [23] [1540/2502] eta: 0:12:34 lr: 0.000004 loss_cls: 4.0111 (3.8146) grad_norm: 4.2700 (4.4711) time: 0.7745 data: 0.0003 max mem: 8421 +[2024-12-06 01:57:35 root] (utils.py 283): INFO Epoch: [23] [1550/2502] eta: 0:12:26 lr: 0.000004 loss_cls: 3.9856 (3.8149) grad_norm: 4.2541 (4.4700) time: 0.7768 data: 0.0003 max mem: 8421 +[2024-12-06 01:57:43 root] (utils.py 283): INFO Epoch: [23] [1560/2502] eta: 0:12:18 lr: 0.000004 loss_cls: 3.9057 (3.8148) grad_norm: 4.2012 (4.4703) time: 0.7766 data: 0.0003 max mem: 8421 +[2024-12-06 01:57:51 root] (utils.py 283): INFO Epoch: [23] [1570/2502] eta: 0:12:10 lr: 0.000004 loss_cls: 3.9718 (3.8166) grad_norm: 4.2869 (4.4702) time: 0.7786 data: 0.0003 max mem: 8421 +[2024-12-06 01:57:59 root] (utils.py 283): INFO Epoch: [23] [1580/2502] eta: 0:12:02 lr: 0.000004 loss_cls: 3.9903 (3.8159) grad_norm: 4.3627 (4.4700) time: 0.7828 data: 0.0003 max mem: 8421 +[2024-12-06 01:58:06 root] (utils.py 283): INFO Epoch: [23] [1590/2502] eta: 0:11:54 lr: 0.000004 loss_cls: 3.9975 (3.8179) grad_norm: 4.2272 (4.4687) time: 0.7824 data: 0.0002 max mem: 8421 +[2024-12-06 01:58:14 root] (utils.py 283): INFO Epoch: [23] [1600/2502] eta: 0:11:46 lr: 0.000004 loss_cls: 4.1316 (3.8193) grad_norm: 4.3767 (4.4692) time: 0.7822 data: 0.0003 max mem: 8421 +[2024-12-06 01:58:22 root] (utils.py 283): INFO Epoch: [23] [1610/2502] eta: 0:11:39 lr: 0.000004 loss_cls: 3.9943 (3.8184) grad_norm: 4.4242 (4.4681) time: 0.7885 data: 0.0003 max mem: 8421 +[2024-12-06 01:58:30 root] (utils.py 283): INFO Epoch: [23] [1620/2502] eta: 0:11:31 lr: 0.000004 loss_cls: 3.5480 (3.8162) grad_norm: 4.2303 (4.4681) time: 0.7892 data: 0.0003 max mem: 8421 +[2024-12-06 01:58:38 root] (utils.py 283): INFO Epoch: [23] [1630/2502] eta: 0:11:23 lr: 0.000004 loss_cls: 3.7775 (3.8162) grad_norm: 4.2692 (4.4688) time: 0.7865 data: 0.0003 max mem: 8421 +[2024-12-06 01:58:46 root] (utils.py 283): INFO Epoch: [23] [1640/2502] eta: 0:11:15 lr: 0.000004 loss_cls: 3.9168 (3.8166) grad_norm: 4.3862 (4.4687) time: 0.7853 data: 0.0003 max mem: 8421 +[2024-12-06 01:58:54 root] (utils.py 283): INFO Epoch: [23] [1650/2502] eta: 0:11:07 lr: 0.000004 loss_cls: 4.1269 (3.8161) grad_norm: 4.3145 (4.4682) time: 0.7844 data: 0.0003 max mem: 8421 +[2024-12-06 01:59:01 root] (utils.py 283): INFO Epoch: [23] [1660/2502] eta: 0:11:00 lr: 0.000004 loss_cls: 4.1269 (3.8172) grad_norm: 4.2678 (4.4668) time: 0.7832 data: 0.0003 max mem: 8421 +[2024-12-06 01:59:09 root] (utils.py 283): INFO Epoch: [23] [1670/2502] eta: 0:10:52 lr: 0.000004 loss_cls: 4.0788 (3.8181) grad_norm: 4.2775 (4.4657) time: 0.7810 data: 0.0003 max mem: 8421 +[2024-12-06 01:59:17 root] (utils.py 283): INFO Epoch: [23] [1680/2502] eta: 0:10:44 lr: 0.000004 loss_cls: 3.9566 (3.8183) grad_norm: 4.3474 (4.4657) time: 0.7803 data: 0.0003 max mem: 8421 +[2024-12-06 01:59:25 root] (utils.py 283): INFO Epoch: [23] [1690/2502] eta: 0:10:36 lr: 0.000004 loss_cls: 3.9309 (3.8187) grad_norm: 4.3474 (4.4647) time: 0.7917 data: 0.0003 max mem: 8421 +[2024-12-06 01:59:33 root] (utils.py 283): INFO Epoch: [23] [1700/2502] eta: 0:10:28 lr: 0.000004 loss_cls: 3.9818 (3.8196) grad_norm: 4.2071 (4.4649) time: 0.7926 data: 0.0003 max mem: 8421 +[2024-12-06 01:59:41 root] (utils.py 283): INFO Epoch: [23] [1710/2502] eta: 0:10:20 lr: 0.000004 loss_cls: 4.0073 (3.8210) grad_norm: 4.3489 (4.4714) time: 0.7854 data: 0.0002 max mem: 8421 +[2024-12-06 01:59:49 root] (utils.py 283): INFO Epoch: [23] [1720/2502] eta: 0:10:13 lr: 0.000004 loss_cls: 4.0073 (3.8220) grad_norm: 4.2563 (4.4707) time: 0.7920 data: 0.0003 max mem: 8421 +[2024-12-06 01:59:57 root] (utils.py 283): INFO Epoch: [23] [1730/2502] eta: 0:10:05 lr: 0.000004 loss_cls: 3.9222 (3.8216) grad_norm: 4.4135 (4.4710) time: 0.8011 data: 0.0003 max mem: 8421 +[2024-12-06 02:00:05 root] (utils.py 283): INFO Epoch: [23] [1740/2502] eta: 0:09:57 lr: 0.000004 loss_cls: 3.8894 (3.8210) grad_norm: 4.4932 (4.4728) time: 0.8080 data: 0.0003 max mem: 8421 +[2024-12-06 02:00:13 root] (utils.py 283): INFO Epoch: [23] [1750/2502] eta: 0:09:49 lr: 0.000004 loss_cls: 3.9107 (3.8202) grad_norm: 4.5266 (4.4735) time: 0.8058 data: 0.0003 max mem: 8421 +[2024-12-06 02:00:21 root] (utils.py 283): INFO Epoch: [23] [1760/2502] eta: 0:09:42 lr: 0.000004 loss_cls: 3.5256 (3.8191) grad_norm: 4.4178 (4.4734) time: 0.8057 data: 0.0003 max mem: 8421 +[2024-12-06 02:00:29 root] (utils.py 283): INFO Epoch: [23] [1770/2502] eta: 0:09:34 lr: 0.000004 loss_cls: 3.3931 (3.8176) grad_norm: 4.3566 (4.4733) time: 0.8066 data: 0.0003 max mem: 8421 +[2024-12-06 02:00:37 root] (utils.py 283): INFO Epoch: [23] [1780/2502] eta: 0:09:26 lr: 0.000004 loss_cls: 3.9212 (3.8183) grad_norm: 4.5092 (4.4761) time: 0.8051 data: 0.0003 max mem: 8421 +[2024-12-06 02:00:45 root] (utils.py 283): INFO Epoch: [23] [1790/2502] eta: 0:09:18 lr: 0.000004 loss_cls: 3.9529 (3.8181) grad_norm: 4.4089 (4.4747) time: 0.8060 data: 0.0003 max mem: 8421 +[2024-12-06 02:00:53 root] (utils.py 283): INFO Epoch: [23] [1800/2502] eta: 0:09:11 lr: 0.000004 loss_cls: 3.9180 (3.8182) grad_norm: 4.2461 (4.4746) time: 0.8067 data: 0.0003 max mem: 8421 +[2024-12-06 02:01:01 root] (utils.py 283): INFO Epoch: [23] [1810/2502] eta: 0:09:03 lr: 0.000004 loss_cls: 3.8982 (3.8177) grad_norm: 4.3623 (4.4747) time: 0.8051 data: 0.0003 max mem: 8421 +[2024-12-06 02:01:09 root] (utils.py 283): INFO Epoch: [23] [1820/2502] eta: 0:08:55 lr: 0.000004 loss_cls: 3.8432 (3.8170) grad_norm: 4.3237 (4.4741) time: 0.8049 data: 0.0003 max mem: 8421 +[2024-12-06 02:01:17 root] (utils.py 283): INFO Epoch: [23] [1830/2502] eta: 0:08:47 lr: 0.000004 loss_cls: 3.8874 (3.8178) grad_norm: 4.2377 (4.4734) time: 0.8077 data: 0.0003 max mem: 8421 +[2024-12-06 02:01:26 root] (utils.py 283): INFO Epoch: [23] [1840/2502] eta: 0:08:40 lr: 0.000004 loss_cls: 3.9209 (3.8178) grad_norm: 4.1151 (4.4723) time: 0.8110 data: 0.0003 max mem: 8421 +[2024-12-06 02:01:34 root] (utils.py 283): INFO Epoch: [23] [1850/2502] eta: 0:08:32 lr: 0.000004 loss_cls: 3.9077 (3.8181) grad_norm: 4.2493 (4.4730) time: 0.8096 data: 0.0003 max mem: 8421 +[2024-12-06 02:01:42 root] (utils.py 283): INFO Epoch: [23] [1860/2502] eta: 0:08:24 lr: 0.000004 loss_cls: 3.7778 (3.8167) grad_norm: 4.3915 (4.4720) time: 0.8083 data: 0.0003 max mem: 8421 +[2024-12-06 02:01:50 root] (utils.py 283): INFO Epoch: [23] [1870/2502] eta: 0:08:16 lr: 0.000004 loss_cls: 3.5837 (3.8162) grad_norm: 4.2878 (4.4719) time: 0.8088 data: 0.0003 max mem: 8421 +[2024-12-06 02:01:58 root] (utils.py 283): INFO Epoch: [23] [1880/2502] eta: 0:08:08 lr: 0.000004 loss_cls: 3.7348 (3.8141) grad_norm: 4.2461 (4.4718) time: 0.8071 data: 0.0003 max mem: 8421 +[2024-12-06 02:02:06 root] (utils.py 283): INFO Epoch: [23] [1890/2502] eta: 0:08:01 lr: 0.000004 loss_cls: 3.6812 (3.8147) grad_norm: 4.3754 (4.4715) time: 0.8077 data: 0.0003 max mem: 8421 +[2024-12-06 02:02:14 root] (utils.py 283): INFO Epoch: [23] [1900/2502] eta: 0:07:53 lr: 0.000004 loss_cls: 4.0388 (3.8151) grad_norm: 4.4204 (4.4710) time: 0.8082 data: 0.0003 max mem: 8421 +[2024-12-06 02:02:22 root] (utils.py 283): INFO Epoch: [23] [1910/2502] eta: 0:07:45 lr: 0.000004 loss_cls: 4.0912 (3.8170) grad_norm: 4.4147 (4.4717) time: 0.8072 data: 0.0003 max mem: 8421 +[2024-12-06 02:02:30 root] (utils.py 283): INFO Epoch: [23] [1920/2502] eta: 0:07:37 lr: 0.000004 loss_cls: 3.9900 (3.8166) grad_norm: 4.3046 (4.4707) time: 0.8060 data: 0.0003 max mem: 8421 +[2024-12-06 02:02:38 root] (utils.py 283): INFO Epoch: [23] [1930/2502] eta: 0:07:29 lr: 0.000004 loss_cls: 3.9861 (3.8174) grad_norm: 4.3009 (4.4719) time: 0.7983 data: 0.0003 max mem: 8421 +[2024-12-06 02:02:46 root] (utils.py 283): INFO Epoch: [23] [1940/2502] eta: 0:07:22 lr: 0.000004 loss_cls: 3.9477 (3.8179) grad_norm: 4.4001 (4.4738) time: 0.7967 data: 0.0003 max mem: 8421 +[2024-12-06 02:02:54 root] (utils.py 283): INFO Epoch: [23] [1950/2502] eta: 0:07:14 lr: 0.000004 loss_cls: 3.9477 (3.8184) grad_norm: 4.3548 (4.4730) time: 0.8013 data: 0.0002 max mem: 8421 +[2024-12-06 02:03:02 root] (utils.py 283): INFO Epoch: [23] [1960/2502] eta: 0:07:06 lr: 0.000004 loss_cls: 3.9695 (3.8183) grad_norm: 4.3864 (4.4738) time: 0.7929 data: 0.0002 max mem: 8421 +[2024-12-06 02:03:10 root] (utils.py 283): INFO Epoch: [23] [1970/2502] eta: 0:06:58 lr: 0.000004 loss_cls: 4.1574 (3.8196) grad_norm: 4.2292 (4.4726) time: 0.7850 data: 0.0003 max mem: 8421 +[2024-12-06 02:03:18 root] (utils.py 283): INFO Epoch: [23] [1980/2502] eta: 0:06:50 lr: 0.000004 loss_cls: 4.1902 (3.8202) grad_norm: 4.1359 (4.4711) time: 0.7861 data: 0.0003 max mem: 8421 +[2024-12-06 02:03:25 root] (utils.py 283): INFO Epoch: [23] [1990/2502] eta: 0:06:42 lr: 0.000004 loss_cls: 3.8317 (3.8198) grad_norm: 4.1293 (4.4697) time: 0.7805 data: 0.0003 max mem: 8421 +[2024-12-06 02:03:33 root] (utils.py 283): INFO Epoch: [23] [2000/2502] eta: 0:06:34 lr: 0.000004 loss_cls: 3.8317 (3.8191) grad_norm: 4.1574 (4.4691) time: 0.7766 data: 0.0003 max mem: 8421 +[2024-12-06 02:03:41 root] (utils.py 283): INFO Epoch: [23] [2010/2502] eta: 0:06:26 lr: 0.000004 loss_cls: 3.9922 (3.8192) grad_norm: 4.2281 (4.4700) time: 0.7811 data: 0.0002 max mem: 8421 +[2024-12-06 02:03:49 root] (utils.py 283): INFO Epoch: [23] [2020/2502] eta: 0:06:19 lr: 0.000004 loss_cls: 3.9922 (3.8197) grad_norm: 4.3125 (4.4713) time: 0.7850 data: 0.0003 max mem: 8421 +[2024-12-06 02:03:57 root] (utils.py 283): INFO Epoch: [23] [2030/2502] eta: 0:06:11 lr: 0.000004 loss_cls: 4.0415 (3.8207) grad_norm: 4.4284 (4.4729) time: 0.7843 data: 0.0003 max mem: 8421 +[2024-12-06 02:04:05 root] (utils.py 283): INFO Epoch: [23] [2040/2502] eta: 0:06:03 lr: 0.000004 loss_cls: 4.0415 (3.8211) grad_norm: 4.4008 (4.4719) time: 0.7816 data: 0.0003 max mem: 8421 +[2024-12-06 02:04:12 root] (utils.py 283): INFO Epoch: [23] [2050/2502] eta: 0:05:55 lr: 0.000004 loss_cls: 4.0146 (3.8216) grad_norm: 4.3698 (4.4721) time: 0.7818 data: 0.0003 max mem: 8421 +[2024-12-06 02:04:20 root] (utils.py 283): INFO Epoch: [23] [2060/2502] eta: 0:05:47 lr: 0.000004 loss_cls: 4.0291 (3.8227) grad_norm: 4.3263 (4.4715) time: 0.7825 data: 0.0003 max mem: 8421 +[2024-12-06 02:04:28 root] (utils.py 283): INFO Epoch: [23] [2070/2502] eta: 0:05:39 lr: 0.000004 loss_cls: 4.0291 (3.8216) grad_norm: 4.2364 (4.4704) time: 0.7852 data: 0.0003 max mem: 8421 +[2024-12-06 02:04:36 root] (utils.py 283): INFO Epoch: [23] [2080/2502] eta: 0:05:31 lr: 0.000004 loss_cls: 3.8090 (3.8220) grad_norm: 4.3732 (4.4712) time: 0.7850 data: 0.0003 max mem: 8421 +[2024-12-06 02:04:44 root] (utils.py 283): INFO Epoch: [23] [2090/2502] eta: 0:05:23 lr: 0.000004 loss_cls: 3.8652 (3.8220) grad_norm: 4.3713 (4.4698) time: 0.7812 data: 0.0003 max mem: 8421 +[2024-12-06 02:04:52 root] (utils.py 283): INFO Epoch: [23] [2100/2502] eta: 0:05:16 lr: 0.000004 loss_cls: 3.8877 (3.8223) grad_norm: 4.3007 (4.4715) time: 0.7803 data: 0.0003 max mem: 8421 +[2024-12-06 02:04:59 root] (utils.py 283): INFO Epoch: [23] [2110/2502] eta: 0:05:08 lr: 0.000004 loss_cls: 3.7142 (3.8223) grad_norm: 4.3588 (4.4725) time: 0.7824 data: 0.0003 max mem: 8421 +[2024-12-06 02:05:07 root] (utils.py 283): INFO Epoch: [23] [2120/2502] eta: 0:05:00 lr: 0.000004 loss_cls: 3.9660 (3.8221) grad_norm: 4.3092 (4.4725) time: 0.7906 data: 0.0003 max mem: 8421 +[2024-12-06 02:05:15 root] (utils.py 283): INFO Epoch: [23] [2130/2502] eta: 0:04:52 lr: 0.000004 loss_cls: 3.9660 (3.8213) grad_norm: 4.2351 (4.4729) time: 0.7902 data: 0.0003 max mem: 8421 +[2024-12-06 02:05:23 root] (utils.py 283): INFO Epoch: [23] [2140/2502] eta: 0:04:44 lr: 0.000004 loss_cls: 3.5894 (3.8202) grad_norm: 4.4092 (4.4730) time: 0.7873 data: 0.0003 max mem: 8421 +[2024-12-06 02:05:31 root] (utils.py 283): INFO Epoch: [23] [2150/2502] eta: 0:04:36 lr: 0.000004 loss_cls: 3.7095 (3.8193) grad_norm: 4.4092 (4.4729) time: 0.7902 data: 0.0003 max mem: 8421 +[2024-12-06 02:05:39 root] (utils.py 283): INFO Epoch: [23] [2160/2502] eta: 0:04:28 lr: 0.000004 loss_cls: 3.7095 (3.8189) grad_norm: 4.4579 (4.4749) time: 0.7830 data: 0.0002 max mem: 8421 +[2024-12-06 02:05:47 root] (utils.py 283): INFO Epoch: [23] [2170/2502] eta: 0:04:21 lr: 0.000004 loss_cls: 3.6899 (3.8187) grad_norm: 4.5468 (4.4762) time: 0.7820 data: 0.0003 max mem: 8421 +[2024-12-06 02:05:54 root] (utils.py 283): INFO Epoch: [23] [2180/2502] eta: 0:04:13 lr: 0.000004 loss_cls: 4.0022 (3.8184) grad_norm: 4.5650 (4.4773) time: 0.7845 data: 0.0003 max mem: 8421 +[2024-12-06 02:06:02 root] (utils.py 283): INFO Epoch: [23] [2190/2502] eta: 0:04:05 lr: 0.000004 loss_cls: 4.1197 (3.8195) grad_norm: 4.3618 (4.4762) time: 0.7857 data: 0.0003 max mem: 8421 +[2024-12-06 02:06:10 root] (utils.py 283): INFO Epoch: [23] [2200/2502] eta: 0:03:57 lr: 0.000004 loss_cls: 3.9639 (3.8187) grad_norm: 4.3351 (4.4765) time: 0.7879 data: 0.0003 max mem: 8421 +[2024-12-06 02:06:18 root] (utils.py 283): INFO Epoch: [23] [2210/2502] eta: 0:03:49 lr: 0.000004 loss_cls: 3.7527 (3.8196) grad_norm: 4.2164 (4.4756) time: 0.7823 data: 0.0003 max mem: 8421 +[2024-12-06 02:06:26 root] (utils.py 283): INFO Epoch: [23] [2220/2502] eta: 0:03:41 lr: 0.000004 loss_cls: 4.0325 (3.8201) grad_norm: 4.2164 (4.4754) time: 0.7779 data: 0.0003 max mem: 8421 +[2024-12-06 02:06:34 root] (utils.py 283): INFO Epoch: [23] [2230/2502] eta: 0:03:33 lr: 0.000004 loss_cls: 4.0265 (3.8204) grad_norm: 4.3322 (4.4754) time: 0.7803 data: 0.0003 max mem: 8421 +[2024-12-06 02:06:41 root] (utils.py 283): INFO Epoch: [23] [2240/2502] eta: 0:03:25 lr: 0.000004 loss_cls: 3.8599 (3.8199) grad_norm: 4.3322 (4.4750) time: 0.7803 data: 0.0003 max mem: 8421 +[2024-12-06 02:06:49 root] (utils.py 283): INFO Epoch: [23] [2250/2502] eta: 0:03:18 lr: 0.000004 loss_cls: 4.0035 (3.8209) grad_norm: 4.2782 (4.4739) time: 0.7819 data: 0.0003 max mem: 8421 +[2024-12-06 02:06:57 root] (utils.py 283): INFO Epoch: [23] [2260/2502] eta: 0:03:10 lr: 0.000004 loss_cls: 3.9943 (3.8198) grad_norm: 4.1253 (4.4727) time: 0.7822 data: 0.0003 max mem: 8421 +[2024-12-06 02:07:05 root] (utils.py 283): INFO Epoch: [23] [2270/2502] eta: 0:03:02 lr: 0.000004 loss_cls: 3.6670 (3.8197) grad_norm: 4.1775 (4.4723) time: 0.7864 data: 0.0003 max mem: 8421 +[2024-12-06 02:07:13 root] (utils.py 283): INFO Epoch: [23] [2280/2502] eta: 0:02:54 lr: 0.000004 loss_cls: 3.7900 (3.8192) grad_norm: 4.3347 (4.4723) time: 0.7878 data: 0.0003 max mem: 8421 +[2024-12-06 02:07:21 root] (utils.py 283): INFO Epoch: [23] [2290/2502] eta: 0:02:46 lr: 0.000004 loss_cls: 3.8841 (3.8190) grad_norm: 4.3111 (4.4715) time: 0.7809 data: 0.0003 max mem: 8421 +[2024-12-06 02:07:28 root] (utils.py 283): INFO Epoch: [23] [2300/2502] eta: 0:02:38 lr: 0.000004 loss_cls: 3.7065 (3.8175) grad_norm: 4.1947 (4.4706) time: 0.7807 data: 0.0003 max mem: 8421 +[2024-12-06 02:07:36 root] (utils.py 283): INFO Epoch: [23] [2310/2502] eta: 0:02:30 lr: 0.000004 loss_cls: 3.5853 (3.8167) grad_norm: 4.3807 (4.4712) time: 0.7838 data: 0.0003 max mem: 8421 +[2024-12-06 02:07:45 root] (utils.py 283): INFO Epoch: [23] [2320/2502] eta: 0:02:23 lr: 0.000004 loss_cls: 4.0162 (3.8168) grad_norm: 4.2529 (4.4707) time: 0.8467 data: 0.0005 max mem: 8421 +[2024-12-06 02:07:54 root] (utils.py 283): INFO Epoch: [23] [2330/2502] eta: 0:02:15 lr: 0.000004 loss_cls: 4.0275 (3.8174) grad_norm: 4.2720 (4.4717) time: 0.8893 data: 0.0007 max mem: 8421 +[2024-12-06 02:08:02 root] (utils.py 283): INFO Epoch: [23] [2340/2502] eta: 0:02:07 lr: 0.000004 loss_cls: 3.7002 (3.8161) grad_norm: 4.3277 (4.4712) time: 0.8249 data: 0.0005 max mem: 8421 +[2024-12-06 02:08:10 root] (utils.py 283): INFO Epoch: [23] [2350/2502] eta: 0:01:59 lr: 0.000004 loss_cls: 3.8842 (3.8167) grad_norm: 4.2846 (4.4707) time: 0.7761 data: 0.0002 max mem: 8421 +[2024-12-06 02:08:17 root] (utils.py 283): INFO Epoch: [23] [2360/2502] eta: 0:01:51 lr: 0.000004 loss_cls: 3.9948 (3.8170) grad_norm: 4.2621 (4.4700) time: 0.7773 data: 0.0003 max mem: 8421 +[2024-12-06 02:08:25 root] (utils.py 283): INFO Epoch: [23] [2370/2502] eta: 0:01:43 lr: 0.000004 loss_cls: 4.0339 (3.8175) grad_norm: 4.2621 (4.4699) time: 0.7759 data: 0.0003 max mem: 8421 +[2024-12-06 02:08:33 root] (utils.py 283): INFO Epoch: [23] [2380/2502] eta: 0:01:35 lr: 0.000004 loss_cls: 4.0550 (3.8183) grad_norm: 4.3034 (4.4697) time: 0.7712 data: 0.0003 max mem: 8421 +[2024-12-06 02:08:41 root] (utils.py 283): INFO Epoch: [23] [2390/2502] eta: 0:01:28 lr: 0.000004 loss_cls: 3.9665 (3.8187) grad_norm: 4.2851 (4.4693) time: 0.7780 data: 0.0003 max mem: 8421 +[2024-12-06 02:08:48 root] (utils.py 283): INFO Epoch: [23] [2400/2502] eta: 0:01:20 lr: 0.000004 loss_cls: 3.9432 (3.8185) grad_norm: 4.2851 (4.4691) time: 0.7770 data: 0.0003 max mem: 8421 +[2024-12-06 02:08:56 root] (utils.py 283): INFO Epoch: [23] [2410/2502] eta: 0:01:12 lr: 0.000004 loss_cls: 3.8900 (3.8192) grad_norm: 4.3302 (4.4699) time: 0.7690 data: 0.0003 max mem: 8421 +[2024-12-06 02:09:04 root] (utils.py 283): INFO Epoch: [23] [2420/2502] eta: 0:01:04 lr: 0.000004 loss_cls: 3.9142 (3.8197) grad_norm: 4.4407 (4.4698) time: 0.7680 data: 0.0003 max mem: 8421 +[2024-12-06 02:09:12 root] (utils.py 283): INFO Epoch: [23] [2430/2502] eta: 0:00:56 lr: 0.000004 loss_cls: 3.8328 (3.8193) grad_norm: 4.2304 (4.4693) time: 0.7762 data: 0.0003 max mem: 8421 +[2024-12-06 02:09:19 root] (utils.py 283): INFO Epoch: [23] [2440/2502] eta: 0:00:48 lr: 0.000004 loss_cls: 3.8623 (3.8201) grad_norm: 4.2874 (4.4690) time: 0.7784 data: 0.0003 max mem: 8421 +[2024-12-06 02:09:27 root] (utils.py 283): INFO Epoch: [23] [2450/2502] eta: 0:00:40 lr: 0.000004 loss_cls: 4.0019 (3.8198) grad_norm: 4.3679 (4.4689) time: 0.7730 data: 0.0002 max mem: 8421 +[2024-12-06 02:09:35 root] (utils.py 283): INFO Epoch: [23] [2460/2502] eta: 0:00:33 lr: 0.000004 loss_cls: 3.6980 (3.8190) grad_norm: 4.3243 (4.4685) time: 0.7713 data: 0.0002 max mem: 8421 +[2024-12-06 02:09:42 root] (utils.py 283): INFO Epoch: [23] [2470/2502] eta: 0:00:25 lr: 0.000004 loss_cls: 3.6560 (3.8188) grad_norm: 4.3243 (4.4681) time: 0.7654 data: 0.0003 max mem: 8421 +[2024-12-06 02:09:50 root] (utils.py 283): INFO Epoch: [23] [2480/2502] eta: 0:00:17 lr: 0.000004 loss_cls: 3.7913 (3.8187) grad_norm: 4.3087 (4.4674) time: 0.7644 data: 0.0003 max mem: 8421 +[2024-12-06 02:09:58 root] (utils.py 283): INFO Epoch: [23] [2490/2502] eta: 0:00:09 lr: 0.000004 loss_cls: 3.7427 (3.8191) grad_norm: 4.3087 (4.4682) time: 0.7885 data: 0.0223 max mem: 8421 +[2024-12-06 02:10:06 root] (utils.py 283): INFO Epoch: [23] [2500/2502] eta: 0:00:01 lr: 0.000004 loss_cls: 3.6993 (3.8181) grad_norm: 4.4038 (4.4707) time: 0.7965 data: 0.0223 max mem: 8421 +[2024-12-06 02:10:07 root] (utils.py 283): INFO Epoch: [23] [2501/2502] eta: 0:00:00 lr: 0.000004 loss_cls: 3.6993 (3.8183) grad_norm: 4.4038 (4.4706) time: 0.7957 data: 0.0223 max mem: 8421 +[2024-12-06 02:10:07 root] (utils.py 297): INFO Epoch: [23] Total time: 0:32:47 (0.7864 s / it) +[2024-12-06 02:10:07 root] (engine.py 179): INFO Averaged stats:lr: 0.000004 loss_cls: 3.6993 (3.8178) grad_norm: 4.4038 (4.4706) +[2024-12-06 02:10:07 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7249 (0.7249) acc1: 85.9375 (85.9375) acc3: 96.0938 (96.0938) acc5: 97.6562 (97.6562) time: 0.1312 data: 0.0005 max mem: 8421 +[2024-12-06 02:10:09 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7888 (0.8550) acc1: 83.5938 (82.1023) acc3: 92.9688 (92.8977) acc5: 96.0938 (96.0938) time: 0.1317 data: 0.0004 max mem: 8421 +[2024-12-06 02:10:10 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8833 (0.9023) acc1: 80.4688 (81.0640) acc3: 92.9688 (92.4851) acc5: 95.3125 (95.3125) time: 0.1322 data: 0.0004 max mem: 8421 +[2024-12-06 02:10:11 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9487 (0.9061) acc1: 79.6875 (80.4435) acc3: 92.1875 (92.8175) acc5: 95.3125 (95.3629) time: 0.1324 data: 0.0005 max mem: 8421 +[2024-12-06 02:10:13 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8284 (0.8994) acc1: 81.2500 (80.6974) acc3: 93.7500 (93.0069) acc5: 96.0938 (95.5030) time: 0.1334 data: 0.0005 max mem: 8421 +[2024-12-06 02:10:14 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0217 (0.9925) acc1: 74.2188 (78.5386) acc3: 88.2812 (91.3450) acc5: 92.9688 (94.4700) time: 0.1333 data: 0.0005 max mem: 8421 +[2024-12-06 02:10:15 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3048 (1.0354) acc1: 72.6562 (77.7408) acc3: 85.9375 (90.6506) acc5: 89.8438 (93.6988) time: 0.1321 data: 0.0005 max mem: 8421 +[2024-12-06 02:10:17 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2545 (1.0793) acc1: 72.6562 (76.5955) acc3: 86.7188 (90.0748) acc5: 89.8438 (93.2328) time: 0.1340 data: 0.0005 max mem: 8421 +[2024-12-06 02:10:18 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2895 (1.1158) acc1: 70.3125 (75.8005) acc3: 84.3750 (89.4676) acc5: 89.8438 (92.6794) time: 0.1356 data: 0.0007 max mem: 8421 +[2024-12-06 02:10:19 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3294 (1.1450) acc1: 70.3125 (74.9485) acc3: 84.3750 (89.0539) acc5: 89.8438 (92.2648) time: 0.1336 data: 0.0007 max mem: 8421 +[2024-12-06 02:10:20 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2156 (1.1350) acc1: 72.6562 (75.0800) acc3: 89.0625 (89.2240) acc5: 90.6250 (92.4080) time: 0.1309 data: 0.0006 max mem: 8421 +[2024-12-06 02:10:20 root] (utils.py 297): INFO Test: Total time: 0:00:13 (0.1331 s / it) +[2024-12-06 02:10:23 root] (engine.py 264): INFO * Acc@1 74.984 Acc@3 89.106 Acc@5 92.348 loss 1.137 flops 1.285 layer_flops 1.251 +[2024-12-06 02:10:23 root] (main.py 547): INFO Accuracy of the network on the 50000 test images: 75.0% +[2024-12-06 02:10:23 root] (main.py 551): INFO Max accuracy: 75.04% +[2024-12-06 02:10:23 root] (utils.py 283): INFO Epoch: [24] [ 0/2502] eta: 0:32:16 lr: 0.000003 loss_cls: 4.0306 (4.0306) grad_norm: 4.3912 (4.3912) time: 0.7739 data: 0.0004 max mem: 8421 +[2024-12-06 02:10:31 root] (utils.py 283): INFO Epoch: [24] [ 10/2502] eta: 0:31:53 lr: 0.000003 loss_cls: 4.0629 (3.8123) grad_norm: 4.5611 (4.7640) time: 0.7679 data: 0.0003 max mem: 8421 +[2024-12-06 02:10:39 root] (utils.py 283): INFO Epoch: [24] [ 20/2502] eta: 0:31:53 lr: 0.000003 loss_cls: 4.1338 (3.9786) grad_norm: 4.5250 (4.6094) time: 0.7709 data: 0.0003 max mem: 8421 +[2024-12-06 02:10:46 root] (utils.py 283): INFO Epoch: [24] [ 30/2502] eta: 0:31:41 lr: 0.000003 loss_cls: 4.1878 (3.9750) grad_norm: 4.2955 (4.6626) time: 0.7698 data: 0.0003 max mem: 8421 +[2024-12-06 02:10:54 root] (utils.py 283): INFO Epoch: [24] [ 40/2502] eta: 0:31:31 lr: 0.000003 loss_cls: 4.0578 (3.9625) grad_norm: 4.2849 (4.5549) time: 0.7652 data: 0.0003 max mem: 8421 +[2024-12-06 02:11:02 root] (utils.py 283): INFO Epoch: [24] [ 50/2502] eta: 0:31:23 lr: 0.000003 loss_cls: 4.0578 (3.9329) grad_norm: 4.2528 (4.4927) time: 0.7662 data: 0.0003 max mem: 8421 +[2024-12-06 02:11:10 root] (utils.py 283): INFO Epoch: [24] [ 60/2502] eta: 0:31:21 lr: 0.000003 loss_cls: 4.0512 (3.9606) grad_norm: 4.2532 (4.4535) time: 0.7746 data: 0.0003 max mem: 8421 +[2024-12-06 02:11:17 root] (utils.py 283): INFO Epoch: [24] [ 70/2502] eta: 0:31:13 lr: 0.000003 loss_cls: 4.0312 (3.9581) grad_norm: 4.0729 (4.4212) time: 0.7762 data: 0.0002 max mem: 8421 +[2024-12-06 02:11:25 root] (utils.py 283): INFO Epoch: [24] [ 80/2502] eta: 0:31:07 lr: 0.000003 loss_cls: 4.0520 (3.9703) grad_norm: 4.3140 (4.4606) time: 0.7729 data: 0.0003 max mem: 8421 +[2024-12-06 02:11:33 root] (utils.py 283): INFO Epoch: [24] [ 90/2502] eta: 0:31:00 lr: 0.000003 loss_cls: 4.0214 (3.9542) grad_norm: 4.4826 (4.4823) time: 0.7754 data: 0.0003 max mem: 8421 +[2024-12-06 02:11:40 root] (utils.py 283): INFO Epoch: [24] [ 100/2502] eta: 0:30:53 lr: 0.000003 loss_cls: 3.5959 (3.9195) grad_norm: 4.4486 (4.4697) time: 0.7737 data: 0.0003 max mem: 8421 +[2024-12-06 02:11:48 root] (utils.py 283): INFO Epoch: [24] [ 110/2502] eta: 0:30:48 lr: 0.000003 loss_cls: 3.5959 (3.8999) grad_norm: 4.4486 (4.4770) time: 0.7780 data: 0.0003 max mem: 8421 +[2024-12-06 02:11:56 root] (utils.py 283): INFO Epoch: [24] [ 120/2502] eta: 0:30:39 lr: 0.000003 loss_cls: 3.8864 (3.8866) grad_norm: 4.3512 (4.4745) time: 0.7755 data: 0.0003 max mem: 8421 +[2024-12-06 02:12:04 root] (utils.py 283): INFO Epoch: [24] [ 130/2502] eta: 0:30:33 lr: 0.000003 loss_cls: 3.9168 (3.8799) grad_norm: 4.2249 (4.4558) time: 0.7743 data: 0.0003 max mem: 8421 +[2024-12-06 02:12:11 root] (utils.py 283): INFO Epoch: [24] [ 140/2502] eta: 0:30:24 lr: 0.000003 loss_cls: 3.9413 (3.8953) grad_norm: 4.2468 (4.4496) time: 0.7749 data: 0.0002 max mem: 8421 +[2024-12-06 02:12:19 root] (utils.py 283): INFO Epoch: [24] [ 150/2502] eta: 0:30:15 lr: 0.000003 loss_cls: 3.9413 (3.8866) grad_norm: 4.1731 (4.4307) time: 0.7663 data: 0.0003 max mem: 8421 +[2024-12-06 02:12:27 root] (utils.py 283): INFO Epoch: [24] [ 160/2502] eta: 0:30:07 lr: 0.000003 loss_cls: 3.7035 (3.8803) grad_norm: 4.1858 (4.4498) time: 0.7681 data: 0.0003 max mem: 8421 +[2024-12-06 02:12:34 root] (utils.py 283): INFO Epoch: [24] [ 170/2502] eta: 0:29:59 lr: 0.000003 loss_cls: 3.6290 (3.8558) grad_norm: 4.1858 (4.4371) time: 0.7680 data: 0.0003 max mem: 8421 +[2024-12-06 02:12:42 root] (utils.py 283): INFO Epoch: [24] [ 180/2502] eta: 0:29:50 lr: 0.000003 loss_cls: 3.5146 (3.8540) grad_norm: 4.1391 (4.4612) time: 0.7630 data: 0.0003 max mem: 8421 +[2024-12-06 02:12:50 root] (utils.py 283): INFO Epoch: [24] [ 190/2502] eta: 0:29:41 lr: 0.000003 loss_cls: 3.9670 (3.8528) grad_norm: 4.2750 (4.4616) time: 0.7619 data: 0.0003 max mem: 8421 +[2024-12-06 02:12:57 root] (utils.py 283): INFO Epoch: [24] [ 200/2502] eta: 0:29:32 lr: 0.000003 loss_cls: 3.8911 (3.8467) grad_norm: 4.2750 (4.4614) time: 0.7624 data: 0.0003 max mem: 8421 +[2024-12-06 02:13:05 root] (utils.py 283): INFO Epoch: [24] [ 210/2502] eta: 0:29:24 lr: 0.000003 loss_cls: 3.8452 (3.8516) grad_norm: 4.2602 (4.4545) time: 0.7649 data: 0.0003 max mem: 8421 +[2024-12-06 02:13:13 root] (utils.py 283): INFO Epoch: [24] [ 220/2502] eta: 0:29:17 lr: 0.000003 loss_cls: 3.9283 (3.8492) grad_norm: 4.2602 (4.4524) time: 0.7716 data: 0.0003 max mem: 8421 +[2024-12-06 02:13:20 root] (utils.py 283): INFO Epoch: [24] [ 230/2502] eta: 0:29:09 lr: 0.000003 loss_cls: 4.0688 (3.8530) grad_norm: 4.3710 (4.4569) time: 0.7712 data: 0.0003 max mem: 8421 +[2024-12-06 02:13:28 root] (utils.py 283): INFO Epoch: [24] [ 240/2502] eta: 0:29:01 lr: 0.000003 loss_cls: 3.7450 (3.8346) grad_norm: 4.3710 (4.4552) time: 0.7656 data: 0.0003 max mem: 8421 +[2024-12-06 02:13:36 root] (utils.py 283): INFO Epoch: [24] [ 250/2502] eta: 0:28:53 lr: 0.000003 loss_cls: 3.5710 (3.8355) grad_norm: 4.2704 (4.4536) time: 0.7650 data: 0.0003 max mem: 8421 +[2024-12-06 02:13:44 root] (utils.py 283): INFO Epoch: [24] [ 260/2502] eta: 0:28:47 lr: 0.000003 loss_cls: 3.8792 (3.8336) grad_norm: 4.2649 (4.4583) time: 0.7800 data: 0.0003 max mem: 8421 +[2024-12-06 02:13:52 root] (utils.py 283): INFO Epoch: [24] [ 270/2502] eta: 0:28:44 lr: 0.000003 loss_cls: 4.0149 (3.8396) grad_norm: 4.2955 (4.4574) time: 0.8087 data: 0.0004 max mem: 8421 +[2024-12-06 02:14:00 root] (utils.py 283): INFO Epoch: [24] [ 280/2502] eta: 0:28:40 lr: 0.000003 loss_cls: 4.1079 (3.8469) grad_norm: 4.2625 (4.4852) time: 0.8208 data: 0.0004 max mem: 8421 +[2024-12-06 02:14:09 root] (utils.py 283): INFO Epoch: [24] [ 290/2502] eta: 0:28:38 lr: 0.000003 loss_cls: 3.9094 (3.8352) grad_norm: 4.2438 (4.4849) time: 0.8367 data: 0.0004 max mem: 8421 +[2024-12-06 02:14:17 root] (utils.py 283): INFO Epoch: [24] [ 300/2502] eta: 0:28:32 lr: 0.000003 loss_cls: 3.9340 (3.8401) grad_norm: 4.4282 (4.4845) time: 0.8297 data: 0.0004 max mem: 8421 +[2024-12-06 02:14:24 root] (utils.py 283): INFO Epoch: [24] [ 310/2502] eta: 0:28:24 lr: 0.000003 loss_cls: 3.9736 (3.8367) grad_norm: 4.2674 (4.4760) time: 0.7870 data: 0.0003 max mem: 8421 +[2024-12-06 02:14:33 root] (utils.py 283): INFO Epoch: [24] [ 320/2502] eta: 0:28:19 lr: 0.000003 loss_cls: 3.9156 (3.8435) grad_norm: 4.1718 (4.4765) time: 0.7960 data: 0.0003 max mem: 8421 +[2024-12-06 02:14:47 root] (utils.py 283): INFO Epoch: [24] [ 330/2502] eta: 0:28:54 lr: 0.000003 loss_cls: 3.7530 (3.8344) grad_norm: 4.3297 (4.4751) time: 1.1271 data: 0.0003 max mem: 8421 +[2024-12-06 02:15:07 root] (utils.py 283): INFO Epoch: [24] [ 340/2502] eta: 0:30:01 lr: 0.000003 loss_cls: 3.4428 (3.8315) grad_norm: 4.2454 (4.4680) time: 1.7048 data: 0.0004 max mem: 8421 +[2024-12-06 02:15:14 root] (utils.py 283): INFO Epoch: [24] [ 350/2502] eta: 0:29:49 lr: 0.000003 loss_cls: 3.5186 (3.8295) grad_norm: 4.0878 (4.4849) time: 1.3752 data: 0.0003 max mem: 8421 +[2024-12-06 02:15:22 root] (utils.py 283): INFO Epoch: [24] [ 360/2502] eta: 0:29:37 lr: 0.000003 loss_cls: 4.0077 (3.8327) grad_norm: 4.1421 (4.4780) time: 0.7735 data: 0.0003 max mem: 8421 +[2024-12-06 02:15:30 root] (utils.py 283): INFO Epoch: [24] [ 370/2502] eta: 0:29:26 lr: 0.000003 loss_cls: 3.8128 (3.8283) grad_norm: 4.2538 (4.4822) time: 0.7753 data: 0.0003 max mem: 8421 +[2024-12-06 02:15:38 root] (utils.py 283): INFO Epoch: [24] [ 380/2502] eta: 0:29:15 lr: 0.000003 loss_cls: 3.8931 (3.8318) grad_norm: 4.3458 (4.4821) time: 0.7750 data: 0.0003 max mem: 8421 +[2024-12-06 02:15:46 root] (utils.py 283): INFO Epoch: [24] [ 390/2502] eta: 0:29:05 lr: 0.000003 loss_cls: 3.9220 (3.8288) grad_norm: 4.3171 (4.4852) time: 0.7836 data: 0.0003 max mem: 8421 +[2024-12-06 02:15:55 root] (utils.py 283): INFO Epoch: [24] [ 400/2502] eta: 0:29:03 lr: 0.000003 loss_cls: 3.6520 (3.8277) grad_norm: 4.3025 (4.4851) time: 0.8729 data: 0.0012 max mem: 8421 +[2024-12-06 02:16:03 root] (utils.py 283): INFO Epoch: [24] [ 410/2502] eta: 0:28:52 lr: 0.000003 loss_cls: 4.0754 (3.8352) grad_norm: 4.2769 (4.4793) time: 0.8645 data: 0.0012 max mem: 8421 +[2024-12-06 02:16:11 root] (utils.py 283): INFO Epoch: [24] [ 420/2502] eta: 0:28:41 lr: 0.000003 loss_cls: 4.1498 (3.8342) grad_norm: 4.2538 (4.4765) time: 0.7787 data: 0.0003 max mem: 8421 +[2024-12-06 02:16:19 root] (utils.py 283): INFO Epoch: [24] [ 430/2502] eta: 0:28:31 lr: 0.000003 loss_cls: 3.6521 (3.8284) grad_norm: 4.2860 (4.4742) time: 0.7816 data: 0.0003 max mem: 8421 +[2024-12-06 02:16:26 root] (utils.py 283): INFO Epoch: [24] [ 440/2502] eta: 0:28:21 lr: 0.000003 loss_cls: 3.7178 (3.8307) grad_norm: 4.4675 (4.4883) time: 0.7851 data: 0.0003 max mem: 8421 +[2024-12-06 02:16:34 root] (utils.py 283): INFO Epoch: [24] [ 450/2502] eta: 0:28:11 lr: 0.000003 loss_cls: 3.8269 (3.8306) grad_norm: 4.4108 (4.4849) time: 0.7865 data: 0.0003 max mem: 8421 +[2024-12-06 02:16:42 root] (utils.py 283): INFO Epoch: [24] [ 460/2502] eta: 0:28:01 lr: 0.000003 loss_cls: 3.8301 (3.8323) grad_norm: 4.3000 (4.4840) time: 0.7829 data: 0.0003 max mem: 8421 +[2024-12-06 02:16:50 root] (utils.py 283): INFO Epoch: [24] [ 470/2502] eta: 0:27:51 lr: 0.000003 loss_cls: 3.9543 (3.8341) grad_norm: 4.4099 (4.4869) time: 0.7839 data: 0.0003 max mem: 8421 +[2024-12-06 02:16:58 root] (utils.py 283): INFO Epoch: [24] [ 480/2502] eta: 0:27:41 lr: 0.000003 loss_cls: 3.8450 (3.8308) grad_norm: 4.4099 (4.4927) time: 0.7863 data: 0.0003 max mem: 8421 +[2024-12-06 02:17:06 root] (utils.py 283): INFO Epoch: [24] [ 490/2502] eta: 0:27:31 lr: 0.000003 loss_cls: 3.8450 (3.8299) grad_norm: 4.4196 (4.4935) time: 0.7859 data: 0.0003 max mem: 8421 +[2024-12-06 02:17:14 root] (utils.py 283): INFO Epoch: [24] [ 500/2502] eta: 0:27:22 lr: 0.000003 loss_cls: 4.0538 (3.8317) grad_norm: 4.4196 (4.4898) time: 0.7863 data: 0.0003 max mem: 8421 +[2024-12-06 02:17:21 root] (utils.py 283): INFO Epoch: [24] [ 510/2502] eta: 0:27:12 lr: 0.000003 loss_cls: 4.0392 (3.8317) grad_norm: 4.2633 (4.4901) time: 0.7849 data: 0.0003 max mem: 8421 +[2024-12-06 02:17:29 root] (utils.py 283): INFO Epoch: [24] [ 520/2502] eta: 0:27:02 lr: 0.000003 loss_cls: 4.0117 (3.8316) grad_norm: 4.3135 (4.4854) time: 0.7810 data: 0.0003 max mem: 8421 +[2024-12-06 02:17:37 root] (utils.py 283): INFO Epoch: [24] [ 530/2502] eta: 0:26:53 lr: 0.000003 loss_cls: 4.0028 (3.8354) grad_norm: 4.3241 (4.5009) time: 0.7811 data: 0.0003 max mem: 8421 +[2024-12-06 02:17:45 root] (utils.py 283): INFO Epoch: [24] [ 540/2502] eta: 0:26:43 lr: 0.000003 loss_cls: 3.9759 (3.8301) grad_norm: 4.3354 (4.5054) time: 0.7831 data: 0.0003 max mem: 8421 +[2024-12-06 02:17:53 root] (utils.py 283): INFO Epoch: [24] [ 550/2502] eta: 0:26:34 lr: 0.000003 loss_cls: 3.8666 (3.8319) grad_norm: 4.3038 (4.5034) time: 0.7820 data: 0.0003 max mem: 8421 +[2024-12-06 02:18:01 root] (utils.py 283): INFO Epoch: [24] [ 560/2502] eta: 0:26:26 lr: 0.000003 loss_cls: 3.9920 (3.8334) grad_norm: 4.3072 (4.5019) time: 0.7950 data: 0.0003 max mem: 8421 +[2024-12-06 02:18:09 root] (utils.py 283): INFO Epoch: [24] [ 570/2502] eta: 0:26:16 lr: 0.000003 loss_cls: 3.9920 (3.8366) grad_norm: 4.3291 (4.5003) time: 0.7983 data: 0.0003 max mem: 8421 +[2024-12-06 02:18:17 root] (utils.py 283): INFO Epoch: [24] [ 580/2502] eta: 0:26:07 lr: 0.000003 loss_cls: 3.9891 (3.8365) grad_norm: 4.2649 (4.4976) time: 0.7879 data: 0.0003 max mem: 8421 +[2024-12-06 02:18:24 root] (utils.py 283): INFO Epoch: [24] [ 590/2502] eta: 0:25:58 lr: 0.000003 loss_cls: 3.8479 (3.8370) grad_norm: 4.2214 (4.4952) time: 0.7894 data: 0.0003 max mem: 8421 +[2024-12-06 02:18:32 root] (utils.py 283): INFO Epoch: [24] [ 600/2502] eta: 0:25:49 lr: 0.000003 loss_cls: 4.1541 (3.8438) grad_norm: 4.3440 (4.4953) time: 0.7890 data: 0.0003 max mem: 8421 +[2024-12-06 02:18:40 root] (utils.py 283): INFO Epoch: [24] [ 610/2502] eta: 0:25:40 lr: 0.000003 loss_cls: 4.1541 (3.8426) grad_norm: 4.3440 (4.4921) time: 0.7840 data: 0.0003 max mem: 8421 +[2024-12-06 02:18:48 root] (utils.py 283): INFO Epoch: [24] [ 620/2502] eta: 0:25:31 lr: 0.000003 loss_cls: 3.8302 (3.8412) grad_norm: 4.3778 (4.4913) time: 0.7820 data: 0.0003 max mem: 8421 +[2024-12-06 02:18:56 root] (utils.py 283): INFO Epoch: [24] [ 630/2502] eta: 0:25:22 lr: 0.000003 loss_cls: 3.9429 (3.8405) grad_norm: 4.4257 (4.4921) time: 0.7834 data: 0.0003 max mem: 8421 +[2024-12-06 02:19:04 root] (utils.py 283): INFO Epoch: [24] [ 640/2502] eta: 0:25:13 lr: 0.000003 loss_cls: 4.0997 (3.8443) grad_norm: 4.2832 (4.4912) time: 0.7840 data: 0.0003 max mem: 8421 +[2024-12-06 02:19:11 root] (utils.py 283): INFO Epoch: [24] [ 650/2502] eta: 0:25:04 lr: 0.000003 loss_cls: 4.0171 (3.8418) grad_norm: 4.2287 (4.4913) time: 0.7830 data: 0.0003 max mem: 8421 +[2024-12-06 02:19:19 root] (utils.py 283): INFO Epoch: [24] [ 660/2502] eta: 0:24:55 lr: 0.000003 loss_cls: 3.7930 (3.8425) grad_norm: 4.6027 (4.4946) time: 0.7844 data: 0.0003 max mem: 8421 +[2024-12-06 02:19:27 root] (utils.py 283): INFO Epoch: [24] [ 670/2502] eta: 0:24:46 lr: 0.000003 loss_cls: 3.9165 (3.8425) grad_norm: 4.4222 (4.4924) time: 0.7834 data: 0.0003 max mem: 8421 +[2024-12-06 02:19:35 root] (utils.py 283): INFO Epoch: [24] [ 680/2502] eta: 0:24:38 lr: 0.000003 loss_cls: 3.9215 (3.8444) grad_norm: 4.1500 (4.4884) time: 0.7899 data: 0.0003 max mem: 8421 +[2024-12-06 02:19:43 root] (utils.py 283): INFO Epoch: [24] [ 690/2502] eta: 0:24:29 lr: 0.000003 loss_cls: 3.9500 (3.8439) grad_norm: 4.2163 (4.4864) time: 0.7951 data: 0.0003 max mem: 8421 +[2024-12-06 02:19:51 root] (utils.py 283): INFO Epoch: [24] [ 700/2502] eta: 0:24:20 lr: 0.000003 loss_cls: 3.9471 (3.8441) grad_norm: 4.3752 (4.4843) time: 0.7865 data: 0.0003 max mem: 8421 +[2024-12-06 02:19:59 root] (utils.py 283): INFO Epoch: [24] [ 710/2502] eta: 0:24:11 lr: 0.000003 loss_cls: 3.9594 (3.8452) grad_norm: 4.3198 (4.4933) time: 0.7831 data: 0.0003 max mem: 8421 +[2024-12-06 02:20:06 root] (utils.py 283): INFO Epoch: [24] [ 720/2502] eta: 0:24:03 lr: 0.000003 loss_cls: 3.7842 (3.8424) grad_norm: 4.3577 (4.4934) time: 0.7811 data: 0.0003 max mem: 8421 +[2024-12-06 02:20:14 root] (utils.py 283): INFO Epoch: [24] [ 730/2502] eta: 0:23:54 lr: 0.000003 loss_cls: 3.7265 (3.8420) grad_norm: 4.4168 (4.4954) time: 0.7833 data: 0.0003 max mem: 8421 +[2024-12-06 02:20:22 root] (utils.py 283): INFO Epoch: [24] [ 740/2502] eta: 0:23:45 lr: 0.000003 loss_cls: 3.9750 (3.8417) grad_norm: 4.3225 (4.4951) time: 0.7871 data: 0.0003 max mem: 8421 +[2024-12-06 02:20:30 root] (utils.py 283): INFO Epoch: [24] [ 750/2502] eta: 0:23:37 lr: 0.000003 loss_cls: 3.9750 (3.8406) grad_norm: 4.2189 (4.4911) time: 0.7847 data: 0.0003 max mem: 8421 +[2024-12-06 02:20:38 root] (utils.py 283): INFO Epoch: [24] [ 760/2502] eta: 0:23:28 lr: 0.000003 loss_cls: 4.0234 (3.8402) grad_norm: 4.1798 (4.4877) time: 0.7821 data: 0.0003 max mem: 8421 +[2024-12-06 02:20:46 root] (utils.py 283): INFO Epoch: [24] [ 770/2502] eta: 0:23:19 lr: 0.000003 loss_cls: 4.1297 (3.8431) grad_norm: 4.1996 (4.4872) time: 0.7892 data: 0.0003 max mem: 8421 +[2024-12-06 02:20:54 root] (utils.py 283): INFO Epoch: [24] [ 780/2502] eta: 0:23:11 lr: 0.000003 loss_cls: 4.1168 (3.8387) grad_norm: 4.4740 (4.4899) time: 0.7905 data: 0.0003 max mem: 8421 +[2024-12-06 02:21:01 root] (utils.py 283): INFO Epoch: [24] [ 790/2502] eta: 0:23:02 lr: 0.000003 loss_cls: 3.5372 (3.8369) grad_norm: 4.5926 (4.4914) time: 0.7810 data: 0.0003 max mem: 8421 +[2024-12-06 02:21:09 root] (utils.py 283): INFO Epoch: [24] [ 800/2502] eta: 0:22:53 lr: 0.000003 loss_cls: 4.0297 (3.8388) grad_norm: 4.4377 (4.4872) time: 0.7780 data: 0.0002 max mem: 8421 +[2024-12-06 02:21:17 root] (utils.py 283): INFO Epoch: [24] [ 810/2502] eta: 0:22:45 lr: 0.000003 loss_cls: 3.9680 (3.8381) grad_norm: 4.2654 (4.4877) time: 0.7805 data: 0.0003 max mem: 8421 +[2024-12-06 02:21:25 root] (utils.py 283): INFO Epoch: [24] [ 820/2502] eta: 0:22:37 lr: 0.000003 loss_cls: 4.0380 (3.8412) grad_norm: 4.4298 (4.4878) time: 0.7882 data: 0.0003 max mem: 8421 +[2024-12-06 02:21:33 root] (utils.py 283): INFO Epoch: [24] [ 830/2502] eta: 0:22:28 lr: 0.000003 loss_cls: 4.1503 (3.8436) grad_norm: 4.3213 (4.4861) time: 0.7909 data: 0.0002 max mem: 8421 +[2024-12-06 02:21:41 root] (utils.py 283): INFO Epoch: [24] [ 840/2502] eta: 0:22:20 lr: 0.000003 loss_cls: 4.0508 (3.8434) grad_norm: 4.3213 (4.4904) time: 0.7874 data: 0.0002 max mem: 8421 +[2024-12-06 02:21:49 root] (utils.py 283): INFO Epoch: [24] [ 850/2502] eta: 0:22:11 lr: 0.000003 loss_cls: 3.7681 (3.8388) grad_norm: 4.3387 (4.4883) time: 0.7886 data: 0.0003 max mem: 8421 +[2024-12-06 02:21:56 root] (utils.py 283): INFO Epoch: [24] [ 860/2502] eta: 0:22:03 lr: 0.000003 loss_cls: 3.8954 (3.8412) grad_norm: 4.3648 (4.4883) time: 0.7844 data: 0.0003 max mem: 8421 +[2024-12-06 02:22:04 root] (utils.py 283): INFO Epoch: [24] [ 870/2502] eta: 0:21:54 lr: 0.000003 loss_cls: 3.8661 (3.8368) grad_norm: 4.3648 (4.4856) time: 0.7829 data: 0.0003 max mem: 8421 +[2024-12-06 02:22:12 root] (utils.py 283): INFO Epoch: [24] [ 880/2502] eta: 0:21:46 lr: 0.000003 loss_cls: 3.7730 (3.8379) grad_norm: 4.2620 (4.4833) time: 0.7968 data: 0.0003 max mem: 8421 +[2024-12-06 02:22:20 root] (utils.py 283): INFO Epoch: [24] [ 890/2502] eta: 0:21:38 lr: 0.000003 loss_cls: 3.7730 (3.8372) grad_norm: 4.3232 (4.4842) time: 0.7988 data: 0.0003 max mem: 8421 +[2024-12-06 02:22:28 root] (utils.py 283): INFO Epoch: [24] [ 900/2502] eta: 0:21:29 lr: 0.000003 loss_cls: 3.7452 (3.8370) grad_norm: 4.3232 (4.4820) time: 0.7884 data: 0.0003 max mem: 8421 +[2024-12-06 02:22:36 root] (utils.py 283): INFO Epoch: [24] [ 910/2502] eta: 0:21:21 lr: 0.000003 loss_cls: 3.9691 (3.8368) grad_norm: 4.3121 (4.4836) time: 0.7858 data: 0.0002 max mem: 8421 +[2024-12-06 02:22:44 root] (utils.py 283): INFO Epoch: [24] [ 920/2502] eta: 0:21:13 lr: 0.000003 loss_cls: 3.9156 (3.8362) grad_norm: 4.3247 (4.4829) time: 0.7822 data: 0.0003 max mem: 8421 +[2024-12-06 02:22:52 root] (utils.py 283): INFO Epoch: [24] [ 930/2502] eta: 0:21:04 lr: 0.000003 loss_cls: 3.4639 (3.8312) grad_norm: 4.3179 (4.4809) time: 0.7837 data: 0.0003 max mem: 8421 +[2024-12-06 02:23:00 root] (utils.py 283): INFO Epoch: [24] [ 940/2502] eta: 0:20:56 lr: 0.000003 loss_cls: 3.6338 (3.8334) grad_norm: 4.2438 (4.4794) time: 0.7905 data: 0.0003 max mem: 8421 +[2024-12-06 02:23:07 root] (utils.py 283): INFO Epoch: [24] [ 950/2502] eta: 0:20:48 lr: 0.000003 loss_cls: 4.1186 (3.8328) grad_norm: 4.3902 (4.4799) time: 0.7876 data: 0.0003 max mem: 8421 +[2024-12-06 02:23:15 root] (utils.py 283): INFO Epoch: [24] [ 960/2502] eta: 0:20:39 lr: 0.000003 loss_cls: 3.9430 (3.8302) grad_norm: 4.4846 (4.4796) time: 0.7870 data: 0.0003 max mem: 8421 +[2024-12-06 02:23:23 root] (utils.py 283): INFO Epoch: [24] [ 970/2502] eta: 0:20:31 lr: 0.000003 loss_cls: 3.6606 (3.8290) grad_norm: 4.5494 (4.4816) time: 0.7908 data: 0.0003 max mem: 8421 +[2024-12-06 02:23:31 root] (utils.py 283): INFO Epoch: [24] [ 980/2502] eta: 0:20:23 lr: 0.000003 loss_cls: 3.7970 (3.8314) grad_norm: 4.5389 (4.4814) time: 0.7891 data: 0.0003 max mem: 8421 +[2024-12-06 02:23:39 root] (utils.py 283): INFO Epoch: [24] [ 990/2502] eta: 0:20:15 lr: 0.000003 loss_cls: 3.9437 (3.8322) grad_norm: 4.3028 (4.4782) time: 0.7897 data: 0.0003 max mem: 8421 +[2024-12-06 02:23:47 root] (utils.py 283): INFO Epoch: [24] [1000/2502] eta: 0:20:07 lr: 0.000003 loss_cls: 3.9113 (3.8310) grad_norm: 4.2192 (4.4763) time: 0.7975 data: 0.0003 max mem: 8421 +[2024-12-06 02:23:55 root] (utils.py 283): INFO Epoch: [24] [1010/2502] eta: 0:19:58 lr: 0.000003 loss_cls: 3.5177 (3.8290) grad_norm: 4.4293 (4.4798) time: 0.7958 data: 0.0003 max mem: 8421 +[2024-12-06 02:24:03 root] (utils.py 283): INFO Epoch: [24] [1020/2502] eta: 0:19:50 lr: 0.000003 loss_cls: 3.9137 (3.8288) grad_norm: 4.5094 (4.4797) time: 0.7843 data: 0.0003 max mem: 8421 +[2024-12-06 02:24:11 root] (utils.py 283): INFO Epoch: [24] [1030/2502] eta: 0:19:42 lr: 0.000003 loss_cls: 3.9137 (3.8300) grad_norm: 4.1353 (4.4786) time: 0.7850 data: 0.0003 max mem: 8421 +[2024-12-06 02:24:19 root] (utils.py 283): INFO Epoch: [24] [1040/2502] eta: 0:19:33 lr: 0.000003 loss_cls: 3.8577 (3.8285) grad_norm: 4.2485 (4.4772) time: 0.7866 data: 0.0003 max mem: 8421 +[2024-12-06 02:24:26 root] (utils.py 283): INFO Epoch: [24] [1050/2502] eta: 0:19:25 lr: 0.000003 loss_cls: 3.8691 (3.8288) grad_norm: 4.3964 (4.4782) time: 0.7839 data: 0.0003 max mem: 8421 +[2024-12-06 02:24:34 root] (utils.py 283): INFO Epoch: [24] [1060/2502] eta: 0:19:17 lr: 0.000003 loss_cls: 3.8691 (3.8268) grad_norm: 4.3696 (4.4763) time: 0.7841 data: 0.0003 max mem: 8421 +[2024-12-06 02:24:42 root] (utils.py 283): INFO Epoch: [24] [1070/2502] eta: 0:19:09 lr: 0.000003 loss_cls: 3.9355 (3.8273) grad_norm: 4.3696 (4.4810) time: 0.7835 data: 0.0002 max mem: 8421 +[2024-12-06 02:24:50 root] (utils.py 283): INFO Epoch: [24] [1080/2502] eta: 0:19:00 lr: 0.000003 loss_cls: 3.9387 (3.8259) grad_norm: 4.4814 (4.4939) time: 0.7825 data: 0.0003 max mem: 8421 +[2024-12-06 02:24:58 root] (utils.py 283): INFO Epoch: [24] [1090/2502] eta: 0:18:52 lr: 0.000003 loss_cls: 3.9101 (3.8241) grad_norm: 4.2878 (4.4922) time: 0.7838 data: 0.0003 max mem: 8421 +[2024-12-06 02:25:06 root] (utils.py 283): INFO Epoch: [24] [1100/2502] eta: 0:18:44 lr: 0.000003 loss_cls: 3.7856 (3.8239) grad_norm: 4.2714 (4.4925) time: 0.7827 data: 0.0003 max mem: 8421 +[2024-12-06 02:25:13 root] (utils.py 283): INFO Epoch: [24] [1110/2502] eta: 0:18:36 lr: 0.000003 loss_cls: 3.6345 (3.8214) grad_norm: 4.4065 (4.4962) time: 0.7889 data: 0.0003 max mem: 8421 +[2024-12-06 02:25:22 root] (utils.py 283): INFO Epoch: [24] [1120/2502] eta: 0:18:28 lr: 0.000003 loss_cls: 3.6345 (3.8184) grad_norm: 4.4065 (4.4949) time: 0.8010 data: 0.0003 max mem: 8421 +[2024-12-06 02:25:29 root] (utils.py 283): INFO Epoch: [24] [1130/2502] eta: 0:18:20 lr: 0.000003 loss_cls: 3.9302 (3.8199) grad_norm: 4.1662 (4.4918) time: 0.7995 data: 0.0002 max mem: 8421 +[2024-12-06 02:25:38 root] (utils.py 283): INFO Epoch: [24] [1140/2502] eta: 0:18:12 lr: 0.000003 loss_cls: 4.0634 (3.8203) grad_norm: 4.2494 (4.5086) time: 0.7998 data: 0.0003 max mem: 8421 +[2024-12-06 02:25:45 root] (utils.py 283): INFO Epoch: [24] [1150/2502] eta: 0:18:03 lr: 0.000003 loss_cls: 3.9582 (3.8211) grad_norm: 4.4656 (4.5095) time: 0.7965 data: 0.0003 max mem: 8421 +[2024-12-06 02:25:53 root] (utils.py 283): INFO Epoch: [24] [1160/2502] eta: 0:17:55 lr: 0.000003 loss_cls: 3.9145 (3.8202) grad_norm: 4.3578 (4.5088) time: 0.7861 data: 0.0003 max mem: 8421 +[2024-12-06 02:26:01 root] (utils.py 283): INFO Epoch: [24] [1170/2502] eta: 0:17:47 lr: 0.000003 loss_cls: 4.0337 (3.8214) grad_norm: 4.3374 (4.5087) time: 0.7847 data: 0.0003 max mem: 8421 +[2024-12-06 02:26:09 root] (utils.py 283): INFO Epoch: [24] [1180/2502] eta: 0:17:39 lr: 0.000003 loss_cls: 4.0653 (3.8242) grad_norm: 4.3581 (4.5085) time: 0.7845 data: 0.0003 max mem: 8421 +[2024-12-06 02:26:17 root] (utils.py 283): INFO Epoch: [24] [1190/2502] eta: 0:17:30 lr: 0.000003 loss_cls: 4.0341 (3.8239) grad_norm: 4.3140 (4.5068) time: 0.7811 data: 0.0003 max mem: 8421 +[2024-12-06 02:26:25 root] (utils.py 283): INFO Epoch: [24] [1200/2502] eta: 0:17:22 lr: 0.000003 loss_cls: 4.0341 (3.8259) grad_norm: 4.4043 (4.5079) time: 0.7847 data: 0.0002 max mem: 8421 +[2024-12-06 02:26:32 root] (utils.py 283): INFO Epoch: [24] [1210/2502] eta: 0:17:14 lr: 0.000003 loss_cls: 4.2565 (3.8254) grad_norm: 4.4043 (4.5090) time: 0.7876 data: 0.0003 max mem: 8421 +[2024-12-06 02:26:40 root] (utils.py 283): INFO Epoch: [24] [1220/2502] eta: 0:17:06 lr: 0.000003 loss_cls: 3.9276 (3.8252) grad_norm: 4.3301 (4.5083) time: 0.7838 data: 0.0003 max mem: 8421 +[2024-12-06 02:26:48 root] (utils.py 283): INFO Epoch: [24] [1230/2502] eta: 0:16:58 lr: 0.000003 loss_cls: 3.9537 (3.8266) grad_norm: 4.3765 (4.5086) time: 0.7814 data: 0.0002 max mem: 8421 +[2024-12-06 02:26:56 root] (utils.py 283): INFO Epoch: [24] [1240/2502] eta: 0:16:50 lr: 0.000003 loss_cls: 3.9537 (3.8263) grad_norm: 4.3616 (4.5091) time: 0.7828 data: 0.0003 max mem: 8421 +[2024-12-06 02:27:04 root] (utils.py 283): INFO Epoch: [24] [1250/2502] eta: 0:16:42 lr: 0.000003 loss_cls: 3.9133 (3.8259) grad_norm: 4.4283 (4.5086) time: 0.7880 data: 0.0003 max mem: 8421 +[2024-12-06 02:27:12 root] (utils.py 283): INFO Epoch: [24] [1260/2502] eta: 0:16:33 lr: 0.000003 loss_cls: 3.9133 (3.8269) grad_norm: 4.3041 (4.5061) time: 0.7903 data: 0.0003 max mem: 8421 +[2024-12-06 02:27:20 root] (utils.py 283): INFO Epoch: [24] [1270/2502] eta: 0:16:25 lr: 0.000003 loss_cls: 3.8668 (3.8280) grad_norm: 4.3041 (4.5052) time: 0.7852 data: 0.0003 max mem: 8421 +[2024-12-06 02:27:27 root] (utils.py 283): INFO Epoch: [24] [1280/2502] eta: 0:16:17 lr: 0.000003 loss_cls: 3.8668 (3.8257) grad_norm: 4.3978 (4.5042) time: 0.7804 data: 0.0003 max mem: 8421 +[2024-12-06 02:27:35 root] (utils.py 283): INFO Epoch: [24] [1290/2502] eta: 0:16:09 lr: 0.000003 loss_cls: 3.5201 (3.8238) grad_norm: 4.2234 (4.5032) time: 0.7802 data: 0.0003 max mem: 8421 +[2024-12-06 02:27:43 root] (utils.py 283): INFO Epoch: [24] [1300/2502] eta: 0:16:01 lr: 0.000003 loss_cls: 3.1557 (3.8190) grad_norm: 4.1675 (4.5015) time: 0.7798 data: 0.0003 max mem: 8421 +[2024-12-06 02:27:51 root] (utils.py 283): INFO Epoch: [24] [1310/2502] eta: 0:15:53 lr: 0.000003 loss_cls: 3.6765 (3.8204) grad_norm: 4.1772 (4.4996) time: 0.7821 data: 0.0003 max mem: 8421 +[2024-12-06 02:27:59 root] (utils.py 283): INFO Epoch: [24] [1320/2502] eta: 0:15:44 lr: 0.000003 loss_cls: 3.9601 (3.8202) grad_norm: 4.2609 (4.4978) time: 0.7855 data: 0.0003 max mem: 8421 +[2024-12-06 02:28:07 root] (utils.py 283): INFO Epoch: [24] [1330/2502] eta: 0:15:36 lr: 0.000003 loss_cls: 3.9347 (3.8209) grad_norm: 4.1583 (4.4961) time: 0.7860 data: 0.0003 max mem: 8421 +[2024-12-06 02:28:14 root] (utils.py 283): INFO Epoch: [24] [1340/2502] eta: 0:15:28 lr: 0.000003 loss_cls: 4.0025 (3.8222) grad_norm: 4.1544 (4.4935) time: 0.7835 data: 0.0003 max mem: 8421 +[2024-12-06 02:28:22 root] (utils.py 283): INFO Epoch: [24] [1350/2502] eta: 0:15:20 lr: 0.000003 loss_cls: 4.1416 (3.8241) grad_norm: 4.2530 (4.4934) time: 0.7808 data: 0.0003 max mem: 8421 +[2024-12-06 02:28:30 root] (utils.py 283): INFO Epoch: [24] [1360/2502] eta: 0:15:12 lr: 0.000003 loss_cls: 4.0976 (3.8241) grad_norm: 4.4079 (4.4927) time: 0.7818 data: 0.0003 max mem: 8421 +[2024-12-06 02:28:38 root] (utils.py 283): INFO Epoch: [24] [1370/2502] eta: 0:15:04 lr: 0.000003 loss_cls: 3.8298 (3.8225) grad_norm: 4.3698 (4.4913) time: 0.7886 data: 0.0003 max mem: 8421 +[2024-12-06 02:28:46 root] (utils.py 283): INFO Epoch: [24] [1380/2502] eta: 0:14:56 lr: 0.000003 loss_cls: 3.6966 (3.8232) grad_norm: 4.3698 (4.4909) time: 0.7905 data: 0.0003 max mem: 8421 +[2024-12-06 02:28:54 root] (utils.py 283): INFO Epoch: [24] [1390/2502] eta: 0:14:48 lr: 0.000003 loss_cls: 3.6966 (3.8225) grad_norm: 4.4466 (4.4945) time: 0.7874 data: 0.0003 max mem: 8421 +[2024-12-06 02:29:02 root] (utils.py 283): INFO Epoch: [24] [1400/2502] eta: 0:14:40 lr: 0.000003 loss_cls: 3.5980 (3.8213) grad_norm: 4.4137 (4.4977) time: 0.7834 data: 0.0003 max mem: 8421 +[2024-12-06 02:29:09 root] (utils.py 283): INFO Epoch: [24] [1410/2502] eta: 0:14:32 lr: 0.000003 loss_cls: 3.7224 (3.8212) grad_norm: 4.3464 (4.4961) time: 0.7877 data: 0.0003 max mem: 8421 +[2024-12-06 02:29:17 root] (utils.py 283): INFO Epoch: [24] [1420/2502] eta: 0:14:23 lr: 0.000003 loss_cls: 3.8467 (3.8206) grad_norm: 4.1408 (4.4952) time: 0.7891 data: 0.0003 max mem: 8421 +[2024-12-06 02:29:25 root] (utils.py 283): INFO Epoch: [24] [1430/2502] eta: 0:14:15 lr: 0.000003 loss_cls: 3.7598 (3.8201) grad_norm: 4.3743 (4.4950) time: 0.7887 data: 0.0003 max mem: 8421 +[2024-12-06 02:29:33 root] (utils.py 283): INFO Epoch: [24] [1440/2502] eta: 0:14:07 lr: 0.000003 loss_cls: 3.7598 (3.8202) grad_norm: 4.3743 (4.4939) time: 0.7904 data: 0.0003 max mem: 8421 +[2024-12-06 02:29:41 root] (utils.py 283): INFO Epoch: [24] [1450/2502] eta: 0:13:59 lr: 0.000003 loss_cls: 3.8431 (3.8179) grad_norm: 4.3140 (4.4933) time: 0.7841 data: 0.0003 max mem: 8421 +[2024-12-06 02:29:49 root] (utils.py 283): INFO Epoch: [24] [1460/2502] eta: 0:13:51 lr: 0.000003 loss_cls: 3.8636 (3.8183) grad_norm: 4.3744 (4.4935) time: 0.7870 data: 0.0003 max mem: 8421 +[2024-12-06 02:29:57 root] (utils.py 283): INFO Epoch: [24] [1470/2502] eta: 0:13:43 lr: 0.000003 loss_cls: 3.9935 (3.8182) grad_norm: 4.4295 (4.4937) time: 0.7922 data: 0.0003 max mem: 8421 +[2024-12-06 02:30:05 root] (utils.py 283): INFO Epoch: [24] [1480/2502] eta: 0:13:35 lr: 0.000003 loss_cls: 3.8110 (3.8164) grad_norm: 4.4534 (4.4947) time: 0.7882 data: 0.0003 max mem: 8421 +[2024-12-06 02:30:12 root] (utils.py 283): INFO Epoch: [24] [1490/2502] eta: 0:13:27 lr: 0.000003 loss_cls: 3.3844 (3.8148) grad_norm: 4.3468 (4.4931) time: 0.7830 data: 0.0003 max mem: 8421 +[2024-12-06 02:30:20 root] (utils.py 283): INFO Epoch: [24] [1500/2502] eta: 0:13:19 lr: 0.000003 loss_cls: 3.7490 (3.8136) grad_norm: 4.3251 (4.4923) time: 0.7788 data: 0.0003 max mem: 8421 +[2024-12-06 02:30:28 root] (utils.py 283): INFO Epoch: [24] [1510/2502] eta: 0:13:11 lr: 0.000003 loss_cls: 3.8363 (3.8143) grad_norm: 4.3251 (4.4916) time: 0.7867 data: 0.0003 max mem: 8421 +[2024-12-06 02:30:36 root] (utils.py 283): INFO Epoch: [24] [1520/2502] eta: 0:13:03 lr: 0.000003 loss_cls: 3.7702 (3.8120) grad_norm: 4.2303 (4.4900) time: 0.7871 data: 0.0003 max mem: 8421 +[2024-12-06 02:30:44 root] (utils.py 283): INFO Epoch: [24] [1530/2502] eta: 0:12:55 lr: 0.000003 loss_cls: 3.4417 (3.8107) grad_norm: 4.2303 (4.4896) time: 0.7777 data: 0.0002 max mem: 8421 +[2024-12-06 02:30:51 root] (utils.py 283): INFO Epoch: [24] [1540/2502] eta: 0:12:47 lr: 0.000003 loss_cls: 3.4417 (3.8096) grad_norm: 4.2105 (4.4925) time: 0.7781 data: 0.0002 max mem: 8421 +[2024-12-06 02:30:59 root] (utils.py 283): INFO Epoch: [24] [1550/2502] eta: 0:12:39 lr: 0.000003 loss_cls: 3.8525 (3.8104) grad_norm: 4.2572 (4.4907) time: 0.7796 data: 0.0002 max mem: 8421 +[2024-12-06 02:31:07 root] (utils.py 283): INFO Epoch: [24] [1560/2502] eta: 0:12:30 lr: 0.000003 loss_cls: 4.0299 (3.8106) grad_norm: 4.3309 (4.4899) time: 0.7841 data: 0.0003 max mem: 8421 +[2024-12-06 02:31:15 root] (utils.py 283): INFO Epoch: [24] [1570/2502] eta: 0:12:22 lr: 0.000003 loss_cls: 4.0964 (3.8113) grad_norm: 4.2996 (4.4890) time: 0.7869 data: 0.0003 max mem: 8421 +[2024-12-06 02:31:23 root] (utils.py 283): INFO Epoch: [24] [1580/2502] eta: 0:12:14 lr: 0.000003 loss_cls: 3.9115 (3.8096) grad_norm: 4.2356 (4.4879) time: 0.7845 data: 0.0003 max mem: 8421 +[2024-12-06 02:31:31 root] (utils.py 283): INFO Epoch: [24] [1590/2502] eta: 0:12:06 lr: 0.000003 loss_cls: 3.7271 (3.8084) grad_norm: 4.3130 (4.4868) time: 0.7826 data: 0.0003 max mem: 8421 +[2024-12-06 02:31:39 root] (utils.py 283): INFO Epoch: [24] [1600/2502] eta: 0:11:58 lr: 0.000003 loss_cls: 3.4066 (3.8063) grad_norm: 4.2800 (4.4863) time: 0.7837 data: 0.0003 max mem: 8421 +[2024-12-06 02:31:46 root] (utils.py 283): INFO Epoch: [24] [1610/2502] eta: 0:11:50 lr: 0.000003 loss_cls: 3.8203 (3.8078) grad_norm: 4.3285 (4.4904) time: 0.7823 data: 0.0003 max mem: 8421 +[2024-12-06 02:31:54 root] (utils.py 283): INFO Epoch: [24] [1620/2502] eta: 0:11:42 lr: 0.000003 loss_cls: 4.0581 (3.8079) grad_norm: 4.4770 (4.4908) time: 0.7810 data: 0.0003 max mem: 8421 +[2024-12-06 02:32:02 root] (utils.py 283): INFO Epoch: [24] [1630/2502] eta: 0:11:34 lr: 0.000003 loss_cls: 4.0140 (3.8081) grad_norm: 4.3074 (4.4896) time: 0.7839 data: 0.0003 max mem: 8421 +[2024-12-06 02:32:10 root] (utils.py 283): INFO Epoch: [24] [1640/2502] eta: 0:11:26 lr: 0.000003 loss_cls: 3.9131 (3.8080) grad_norm: 4.2996 (4.4913) time: 0.7810 data: 0.0003 max mem: 8421 +[2024-12-06 02:32:18 root] (utils.py 283): INFO Epoch: [24] [1650/2502] eta: 0:11:18 lr: 0.000003 loss_cls: 3.5638 (3.8070) grad_norm: 4.3586 (4.4898) time: 0.7758 data: 0.0003 max mem: 8421 +[2024-12-06 02:32:25 root] (utils.py 283): INFO Epoch: [24] [1660/2502] eta: 0:11:10 lr: 0.000003 loss_cls: 3.7988 (3.8080) grad_norm: 4.2254 (4.4882) time: 0.7785 data: 0.0003 max mem: 8421 +[2024-12-06 02:32:33 root] (utils.py 283): INFO Epoch: [24] [1670/2502] eta: 0:11:02 lr: 0.000003 loss_cls: 3.8225 (3.8077) grad_norm: 4.3234 (4.4889) time: 0.7802 data: 0.0003 max mem: 8421 +[2024-12-06 02:32:41 root] (utils.py 283): INFO Epoch: [24] [1680/2502] eta: 0:10:54 lr: 0.000003 loss_cls: 4.0253 (3.8088) grad_norm: 4.3510 (4.4899) time: 0.7832 data: 0.0003 max mem: 8421 +[2024-12-06 02:32:49 root] (utils.py 283): INFO Epoch: [24] [1690/2502] eta: 0:10:46 lr: 0.000003 loss_cls: 3.9824 (3.8086) grad_norm: 4.3065 (4.4888) time: 0.7865 data: 0.0003 max mem: 8421 +[2024-12-06 02:32:57 root] (utils.py 283): INFO Epoch: [24] [1700/2502] eta: 0:10:38 lr: 0.000003 loss_cls: 3.8636 (3.8089) grad_norm: 4.1960 (4.4877) time: 0.7870 data: 0.0003 max mem: 8421 +[2024-12-06 02:33:05 root] (utils.py 283): INFO Epoch: [24] [1710/2502] eta: 0:10:30 lr: 0.000003 loss_cls: 4.0196 (3.8101) grad_norm: 4.2728 (4.4867) time: 0.7843 data: 0.0003 max mem: 8421 +[2024-12-06 02:33:12 root] (utils.py 283): INFO Epoch: [24] [1720/2502] eta: 0:10:22 lr: 0.000003 loss_cls: 4.0196 (3.8104) grad_norm: 4.2826 (4.4856) time: 0.7794 data: 0.0003 max mem: 8421 +[2024-12-06 02:33:20 root] (utils.py 283): INFO Epoch: [24] [1730/2502] eta: 0:10:14 lr: 0.000003 loss_cls: 3.7406 (3.8098) grad_norm: 4.1372 (4.4848) time: 0.7804 data: 0.0003 max mem: 8421 +[2024-12-06 02:33:28 root] (utils.py 283): INFO Epoch: [24] [1740/2502] eta: 0:10:06 lr: 0.000003 loss_cls: 3.8901 (3.8104) grad_norm: 4.1762 (4.4839) time: 0.7869 data: 0.0003 max mem: 8421 +[2024-12-06 02:33:36 root] (utils.py 283): INFO Epoch: [24] [1750/2502] eta: 0:09:58 lr: 0.000003 loss_cls: 4.1289 (3.8122) grad_norm: 4.2162 (4.4834) time: 0.7890 data: 0.0003 max mem: 8421 +[2024-12-06 02:33:44 root] (utils.py 283): INFO Epoch: [24] [1760/2502] eta: 0:09:50 lr: 0.000003 loss_cls: 4.1384 (3.8114) grad_norm: 4.4314 (4.4841) time: 0.7838 data: 0.0003 max mem: 8421 +[2024-12-06 02:33:52 root] (utils.py 283): INFO Epoch: [24] [1770/2502] eta: 0:09:42 lr: 0.000003 loss_cls: 3.7452 (3.8123) grad_norm: 4.5016 (4.4841) time: 0.7769 data: 0.0003 max mem: 8421 +[2024-12-06 02:33:59 root] (utils.py 283): INFO Epoch: [24] [1780/2502] eta: 0:09:34 lr: 0.000003 loss_cls: 3.7452 (3.8106) grad_norm: 4.3856 (4.4835) time: 0.7745 data: 0.0003 max mem: 8421 +[2024-12-06 02:34:07 root] (utils.py 283): INFO Epoch: [24] [1790/2502] eta: 0:09:26 lr: 0.000003 loss_cls: 3.8903 (3.8112) grad_norm: 4.3105 (4.4831) time: 0.7791 data: 0.0002 max mem: 8421 +[2024-12-06 02:34:15 root] (utils.py 283): INFO Epoch: [24] [1800/2502] eta: 0:09:18 lr: 0.000003 loss_cls: 4.0670 (3.8129) grad_norm: 4.2666 (4.4820) time: 0.7786 data: 0.0003 max mem: 8421 +[2024-12-06 02:34:23 root] (utils.py 283): INFO Epoch: [24] [1810/2502] eta: 0:09:10 lr: 0.000003 loss_cls: 4.1415 (3.8139) grad_norm: 4.2781 (4.4842) time: 0.7775 data: 0.0003 max mem: 8421 +[2024-12-06 02:34:30 root] (utils.py 283): INFO Epoch: [24] [1820/2502] eta: 0:09:02 lr: 0.000003 loss_cls: 4.1415 (3.8137) grad_norm: 4.4791 (4.4847) time: 0.7790 data: 0.0003 max mem: 8421 +[2024-12-06 02:34:38 root] (utils.py 283): INFO Epoch: [24] [1830/2502] eta: 0:08:54 lr: 0.000003 loss_cls: 3.9896 (3.8140) grad_norm: 4.3156 (4.4839) time: 0.7800 data: 0.0003 max mem: 8421 +[2024-12-06 02:34:46 root] (utils.py 283): INFO Epoch: [24] [1840/2502] eta: 0:08:46 lr: 0.000003 loss_cls: 3.6242 (3.8118) grad_norm: 4.3156 (4.4843) time: 0.7818 data: 0.0003 max mem: 8421 +[2024-12-06 02:34:54 root] (utils.py 283): INFO Epoch: [24] [1850/2502] eta: 0:08:38 lr: 0.000003 loss_cls: 3.8828 (3.8124) grad_norm: 4.6361 (4.4854) time: 0.7807 data: 0.0003 max mem: 8421 +[2024-12-06 02:35:02 root] (utils.py 283): INFO Epoch: [24] [1860/2502] eta: 0:08:30 lr: 0.000003 loss_cls: 4.1036 (3.8137) grad_norm: 4.3390 (4.4848) time: 0.7787 data: 0.0003 max mem: 8421 +[2024-12-06 02:35:09 root] (utils.py 283): INFO Epoch: [24] [1870/2502] eta: 0:08:22 lr: 0.000003 loss_cls: 3.8681 (3.8122) grad_norm: 4.2508 (4.4840) time: 0.7780 data: 0.0003 max mem: 8421 +[2024-12-06 02:35:17 root] (utils.py 283): INFO Epoch: [24] [1880/2502] eta: 0:08:14 lr: 0.000003 loss_cls: 3.9223 (3.8141) grad_norm: 4.2823 (4.4843) time: 0.7797 data: 0.0002 max mem: 8421 +[2024-12-06 02:35:25 root] (utils.py 283): INFO Epoch: [24] [1890/2502] eta: 0:08:06 lr: 0.000003 loss_cls: 4.0678 (3.8144) grad_norm: 4.2348 (4.4837) time: 0.7814 data: 0.0003 max mem: 8421 +[2024-12-06 02:35:33 root] (utils.py 283): INFO Epoch: [24] [1900/2502] eta: 0:07:58 lr: 0.000003 loss_cls: 3.8495 (3.8135) grad_norm: 4.1749 (4.4831) time: 0.7788 data: 0.0003 max mem: 8421 +[2024-12-06 02:35:41 root] (utils.py 283): INFO Epoch: [24] [1910/2502] eta: 0:07:50 lr: 0.000003 loss_cls: 3.9161 (3.8144) grad_norm: 4.3252 (4.4853) time: 0.7783 data: 0.0002 max mem: 8421 +[2024-12-06 02:35:49 root] (utils.py 283): INFO Epoch: [24] [1920/2502] eta: 0:07:42 lr: 0.000003 loss_cls: 4.0019 (3.8142) grad_norm: 4.4408 (4.4876) time: 0.7841 data: 0.0003 max mem: 8421 +[2024-12-06 02:35:57 root] (utils.py 283): INFO Epoch: [24] [1930/2502] eta: 0:07:34 lr: 0.000003 loss_cls: 3.8051 (3.8135) grad_norm: 4.3235 (4.4862) time: 0.7941 data: 0.0003 max mem: 8421 +[2024-12-06 02:36:05 root] (utils.py 283): INFO Epoch: [24] [1940/2502] eta: 0:07:26 lr: 0.000003 loss_cls: 3.8544 (3.8130) grad_norm: 4.1951 (4.4859) time: 0.8056 data: 0.0003 max mem: 8421 +[2024-12-06 02:36:12 root] (utils.py 283): INFO Epoch: [24] [1950/2502] eta: 0:07:18 lr: 0.000003 loss_cls: 3.6041 (3.8119) grad_norm: 4.2663 (4.4847) time: 0.7950 data: 0.0003 max mem: 8421 +[2024-12-06 02:36:20 root] (utils.py 283): INFO Epoch: [24] [1960/2502] eta: 0:07:10 lr: 0.000003 loss_cls: 3.8589 (3.8134) grad_norm: 4.2663 (4.4845) time: 0.7790 data: 0.0002 max mem: 8421 +[2024-12-06 02:36:28 root] (utils.py 283): INFO Epoch: [24] [1970/2502] eta: 0:07:02 lr: 0.000003 loss_cls: 4.1161 (3.8141) grad_norm: 4.3743 (4.4834) time: 0.7791 data: 0.0003 max mem: 8421 +[2024-12-06 02:36:36 root] (utils.py 283): INFO Epoch: [24] [1980/2502] eta: 0:06:54 lr: 0.000003 loss_cls: 4.0233 (3.8141) grad_norm: 4.2809 (4.4835) time: 0.7784 data: 0.0003 max mem: 8421 +[2024-12-06 02:36:44 root] (utils.py 283): INFO Epoch: [24] [1990/2502] eta: 0:06:46 lr: 0.000003 loss_cls: 3.9668 (3.8150) grad_norm: 4.2640 (4.4832) time: 0.7803 data: 0.0003 max mem: 8421 +[2024-12-06 02:36:51 root] (utils.py 283): INFO Epoch: [24] [2000/2502] eta: 0:06:38 lr: 0.000003 loss_cls: 3.9668 (3.8154) grad_norm: 4.3144 (4.4834) time: 0.7820 data: 0.0003 max mem: 8421 +[2024-12-06 02:36:59 root] (utils.py 283): INFO Epoch: [24] [2010/2502] eta: 0:06:30 lr: 0.000003 loss_cls: 3.6481 (3.8141) grad_norm: 4.3356 (4.4834) time: 0.7849 data: 0.0003 max mem: 8421 +[2024-12-06 02:37:07 root] (utils.py 283): INFO Epoch: [24] [2020/2502] eta: 0:06:22 lr: 0.000003 loss_cls: 3.5341 (3.8125) grad_norm: 4.5448 (4.4848) time: 0.7832 data: 0.0003 max mem: 8421 +[2024-12-06 02:37:15 root] (utils.py 283): INFO Epoch: [24] [2030/2502] eta: 0:06:14 lr: 0.000003 loss_cls: 3.9460 (3.8127) grad_norm: 4.4444 (4.4842) time: 0.7804 data: 0.0003 max mem: 8421 +[2024-12-06 02:37:23 root] (utils.py 283): INFO Epoch: [24] [2040/2502] eta: 0:06:06 lr: 0.000003 loss_cls: 3.9820 (3.8132) grad_norm: 4.3615 (4.4836) time: 0.7815 data: 0.0003 max mem: 8421 +[2024-12-06 02:37:30 root] (utils.py 283): INFO Epoch: [24] [2050/2502] eta: 0:05:58 lr: 0.000003 loss_cls: 3.9662 (3.8138) grad_norm: 4.2720 (4.4824) time: 0.7789 data: 0.0003 max mem: 8421 +[2024-12-06 02:37:38 root] (utils.py 283): INFO Epoch: [24] [2060/2502] eta: 0:05:50 lr: 0.000003 loss_cls: 4.0034 (3.8134) grad_norm: 4.2673 (4.4821) time: 0.7787 data: 0.0003 max mem: 8421 +[2024-12-06 02:37:46 root] (utils.py 283): INFO Epoch: [24] [2070/2502] eta: 0:05:42 lr: 0.000003 loss_cls: 3.9518 (3.8131) grad_norm: 4.3335 (4.4827) time: 0.7780 data: 0.0003 max mem: 8421 +[2024-12-06 02:37:54 root] (utils.py 283): INFO Epoch: [24] [2080/2502] eta: 0:05:34 lr: 0.000003 loss_cls: 3.5842 (3.8130) grad_norm: 4.3728 (4.4828) time: 0.7786 data: 0.0003 max mem: 8421 +[2024-12-06 02:38:02 root] (utils.py 283): INFO Epoch: [24] [2090/2502] eta: 0:05:26 lr: 0.000003 loss_cls: 3.7586 (3.8133) grad_norm: 4.3673 (4.4820) time: 0.7805 data: 0.0003 max mem: 8421 +[2024-12-06 02:38:09 root] (utils.py 283): INFO Epoch: [24] [2100/2502] eta: 0:05:18 lr: 0.000003 loss_cls: 3.6469 (3.8114) grad_norm: 4.3341 (4.4821) time: 0.7803 data: 0.0003 max mem: 8421 +[2024-12-06 02:38:17 root] (utils.py 283): INFO Epoch: [24] [2110/2502] eta: 0:05:10 lr: 0.000003 loss_cls: 3.5587 (3.8114) grad_norm: 4.5514 (4.4839) time: 0.7860 data: 0.0003 max mem: 8421 +[2024-12-06 02:38:25 root] (utils.py 283): INFO Epoch: [24] [2120/2502] eta: 0:05:03 lr: 0.000003 loss_cls: 3.8031 (3.8117) grad_norm: 4.5514 (4.4847) time: 0.7869 data: 0.0003 max mem: 8421 +[2024-12-06 02:38:33 root] (utils.py 283): INFO Epoch: [24] [2130/2502] eta: 0:04:55 lr: 0.000003 loss_cls: 3.6291 (3.8112) grad_norm: 4.3742 (4.4872) time: 0.7806 data: 0.0003 max mem: 8421 +[2024-12-06 02:38:41 root] (utils.py 283): INFO Epoch: [24] [2140/2502] eta: 0:04:47 lr: 0.000003 loss_cls: 3.8657 (3.8113) grad_norm: 4.5142 (4.4874) time: 0.7799 data: 0.0003 max mem: 8421 +[2024-12-06 02:38:49 root] (utils.py 283): INFO Epoch: [24] [2150/2502] eta: 0:04:39 lr: 0.000003 loss_cls: 3.8657 (3.8108) grad_norm: 4.5439 (4.4880) time: 0.7803 data: 0.0003 max mem: 8421 +[2024-12-06 02:38:56 root] (utils.py 283): INFO Epoch: [24] [2160/2502] eta: 0:04:31 lr: 0.000003 loss_cls: 3.9787 (3.8119) grad_norm: 4.5912 (4.4899) time: 0.7812 data: 0.0003 max mem: 8421 +[2024-12-06 02:39:04 root] (utils.py 283): INFO Epoch: [24] [2170/2502] eta: 0:04:23 lr: 0.000003 loss_cls: 4.2052 (3.8133) grad_norm: 4.5768 (4.4957) time: 0.7857 data: 0.0003 max mem: 8421 +[2024-12-06 02:39:12 root] (utils.py 283): INFO Epoch: [24] [2180/2502] eta: 0:04:15 lr: 0.000003 loss_cls: 4.0326 (3.8120) grad_norm: 4.4542 (4.4962) time: 0.7849 data: 0.0003 max mem: 8421 +[2024-12-06 02:39:20 root] (utils.py 283): INFO Epoch: [24] [2190/2502] eta: 0:04:07 lr: 0.000003 loss_cls: 3.9457 (3.8121) grad_norm: 4.3420 (4.4954) time: 0.7793 data: 0.0003 max mem: 8421 +[2024-12-06 02:39:28 root] (utils.py 283): INFO Epoch: [24] [2200/2502] eta: 0:03:59 lr: 0.000003 loss_cls: 3.9287 (3.8113) grad_norm: 4.2431 (4.4948) time: 0.7766 data: 0.0003 max mem: 8421 +[2024-12-06 02:39:36 root] (utils.py 283): INFO Epoch: [24] [2210/2502] eta: 0:03:51 lr: 0.000003 loss_cls: 3.9287 (3.8121) grad_norm: 4.5409 (4.4969) time: 0.7791 data: 0.0003 max mem: 8421 +[2024-12-06 02:39:43 root] (utils.py 283): INFO Epoch: [24] [2220/2502] eta: 0:03:43 lr: 0.000003 loss_cls: 3.8299 (3.8114) grad_norm: 4.2760 (4.4987) time: 0.7830 data: 0.0003 max mem: 8421 +[2024-12-06 02:39:51 root] (utils.py 283): INFO Epoch: [24] [2230/2502] eta: 0:03:35 lr: 0.000003 loss_cls: 3.8022 (3.8118) grad_norm: 4.2432 (4.4983) time: 0.7829 data: 0.0003 max mem: 8421 +[2024-12-06 02:39:59 root] (utils.py 283): INFO Epoch: [24] [2240/2502] eta: 0:03:27 lr: 0.000003 loss_cls: 3.9795 (3.8128) grad_norm: 4.4015 (4.4982) time: 0.7795 data: 0.0003 max mem: 8421 +[2024-12-06 02:40:07 root] (utils.py 283): INFO Epoch: [24] [2250/2502] eta: 0:03:19 lr: 0.000003 loss_cls: 4.0371 (3.8145) grad_norm: 4.3912 (4.4977) time: 0.7803 data: 0.0003 max mem: 8421 +[2024-12-06 02:40:15 root] (utils.py 283): INFO Epoch: [24] [2260/2502] eta: 0:03:11 lr: 0.000003 loss_cls: 4.3186 (3.8164) grad_norm: 4.3964 (4.5023) time: 0.7828 data: 0.0003 max mem: 8421 +[2024-12-06 02:40:23 root] (utils.py 283): INFO Epoch: [24] [2270/2502] eta: 0:03:03 lr: 0.000003 loss_cls: 4.1222 (3.8163) grad_norm: 4.3905 (4.5022) time: 0.7874 data: 0.0003 max mem: 8421 +[2024-12-06 02:40:30 root] (utils.py 283): INFO Epoch: [24] [2280/2502] eta: 0:02:55 lr: 0.000003 loss_cls: 3.7013 (3.8159) grad_norm: 4.3563 (4.5016) time: 0.7892 data: 0.0003 max mem: 8421 +[2024-12-06 02:40:38 root] (utils.py 283): INFO Epoch: [24] [2290/2502] eta: 0:02:47 lr: 0.000003 loss_cls: 3.7419 (3.8155) grad_norm: 4.3895 (4.5015) time: 0.7851 data: 0.0003 max mem: 8421 +[2024-12-06 02:40:46 root] (utils.py 283): INFO Epoch: [24] [2300/2502] eta: 0:02:40 lr: 0.000003 loss_cls: 3.8328 (3.8161) grad_norm: 4.3895 (4.5008) time: 0.7842 data: 0.0003 max mem: 8421 +[2024-12-06 02:40:54 root] (utils.py 283): INFO Epoch: [24] [2310/2502] eta: 0:02:32 lr: 0.000003 loss_cls: 4.0526 (3.8166) grad_norm: 4.2261 (4.4996) time: 0.7811 data: 0.0003 max mem: 8421 +[2024-12-06 02:41:02 root] (utils.py 283): INFO Epoch: [24] [2320/2502] eta: 0:02:24 lr: 0.000003 loss_cls: 4.1419 (3.8182) grad_norm: 4.2261 (4.4988) time: 0.7857 data: 0.0003 max mem: 8421 +[2024-12-06 02:41:10 root] (utils.py 283): INFO Epoch: [24] [2330/2502] eta: 0:02:16 lr: 0.000003 loss_cls: 4.1672 (3.8187) grad_norm: 4.2157 (4.4970) time: 0.7841 data: 0.0003 max mem: 8421 +[2024-12-06 02:41:17 root] (utils.py 283): INFO Epoch: [24] [2340/2502] eta: 0:02:08 lr: 0.000003 loss_cls: 3.7003 (3.8178) grad_norm: 4.1612 (4.4987) time: 0.7773 data: 0.0003 max mem: 8421 +[2024-12-06 02:41:25 root] (utils.py 283): INFO Epoch: [24] [2350/2502] eta: 0:02:00 lr: 0.000003 loss_cls: 3.7003 (3.8181) grad_norm: 4.2427 (4.4999) time: 0.7810 data: 0.0003 max mem: 8421 +[2024-12-06 02:41:33 root] (utils.py 283): INFO Epoch: [24] [2360/2502] eta: 0:01:52 lr: 0.000003 loss_cls: 3.8917 (3.8173) grad_norm: 4.2427 (4.4997) time: 0.7809 data: 0.0002 max mem: 8421 +[2024-12-06 02:41:41 root] (utils.py 283): INFO Epoch: [24] [2370/2502] eta: 0:01:44 lr: 0.000003 loss_cls: 3.8192 (3.8180) grad_norm: 4.1871 (4.4989) time: 0.7792 data: 0.0002 max mem: 8421 +[2024-12-06 02:41:49 root] (utils.py 283): INFO Epoch: [24] [2380/2502] eta: 0:01:36 lr: 0.000003 loss_cls: 4.0339 (3.8190) grad_norm: 4.1988 (4.4984) time: 0.7881 data: 0.0003 max mem: 8421 +[2024-12-06 02:41:57 root] (utils.py 283): INFO Epoch: [24] [2390/2502] eta: 0:01:28 lr: 0.000003 loss_cls: 4.0339 (3.8189) grad_norm: 4.4006 (4.4985) time: 0.7884 data: 0.0003 max mem: 8421 +[2024-12-06 02:42:04 root] (utils.py 283): INFO Epoch: [24] [2400/2502] eta: 0:01:20 lr: 0.000003 loss_cls: 3.9705 (3.8191) grad_norm: 4.4129 (4.4980) time: 0.7796 data: 0.0003 max mem: 8421 +[2024-12-06 02:42:12 root] (utils.py 283): INFO Epoch: [24] [2410/2502] eta: 0:01:12 lr: 0.000003 loss_cls: 3.8730 (3.8184) grad_norm: 4.3654 (4.4972) time: 0.7809 data: 0.0002 max mem: 8421 +[2024-12-06 02:42:20 root] (utils.py 283): INFO Epoch: [24] [2420/2502] eta: 0:01:04 lr: 0.000003 loss_cls: 3.8778 (3.8190) grad_norm: 4.4219 (4.4981) time: 0.7815 data: 0.0003 max mem: 8421 +[2024-12-06 02:42:28 root] (utils.py 283): INFO Epoch: [24] [2430/2502] eta: 0:00:57 lr: 0.000003 loss_cls: 4.0294 (3.8201) grad_norm: 4.4219 (4.4977) time: 0.7806 data: 0.0002 max mem: 8421 +[2024-12-06 02:42:36 root] (utils.py 283): INFO Epoch: [24] [2440/2502] eta: 0:00:49 lr: 0.000003 loss_cls: 4.0871 (3.8202) grad_norm: 4.3700 (4.4978) time: 0.7818 data: 0.0003 max mem: 8421 +[2024-12-06 02:42:44 root] (utils.py 283): INFO Epoch: [24] [2450/2502] eta: 0:00:41 lr: 0.000003 loss_cls: 3.9915 (3.8213) grad_norm: 4.4272 (4.4979) time: 0.7896 data: 0.0003 max mem: 8421 +[2024-12-06 02:42:51 root] (utils.py 283): INFO Epoch: [24] [2460/2502] eta: 0:00:33 lr: 0.000003 loss_cls: 4.0345 (3.8208) grad_norm: 4.4272 (4.4975) time: 0.7865 data: 0.0003 max mem: 8421 +[2024-12-06 02:42:59 root] (utils.py 283): INFO Epoch: [24] [2470/2502] eta: 0:00:25 lr: 0.000003 loss_cls: 4.0345 (3.8208) grad_norm: 4.4154 (4.4978) time: 0.7780 data: 0.0003 max mem: 8421 +[2024-12-06 02:43:07 root] (utils.py 283): INFO Epoch: [24] [2480/2502] eta: 0:00:17 lr: 0.000003 loss_cls: 3.9359 (3.8204) grad_norm: 4.4931 (4.4976) time: 0.7788 data: 0.0003 max mem: 8421 +[2024-12-06 02:43:15 root] (utils.py 283): INFO Epoch: [24] [2490/2502] eta: 0:00:09 lr: 0.000003 loss_cls: 3.9491 (3.8211) grad_norm: 4.4922 (4.4972) time: 0.8029 data: 0.0251 max mem: 8421 +[2024-12-06 02:43:23 root] (utils.py 283): INFO Epoch: [24] [2500/2502] eta: 0:00:01 lr: 0.000003 loss_cls: 4.1867 (3.8232) grad_norm: 4.4922 (4.4975) time: 0.8028 data: 0.0251 max mem: 8421 +[2024-12-06 02:43:24 root] (utils.py 283): INFO Epoch: [24] [2501/2502] eta: 0:00:00 lr: 0.000003 loss_cls: 4.1867 (3.8233) grad_norm: 4.4887 (4.4973) time: 0.8021 data: 0.0251 max mem: 8421 +[2024-12-06 02:43:24 root] (utils.py 297): INFO Epoch: [24] Total time: 0:33:01 (0.7918 s / it) +[2024-12-06 02:43:24 root] (engine.py 179): INFO Averaged stats:lr: 0.000003 loss_cls: 4.1867 (3.8227) grad_norm: 4.4887 (4.4973) +[2024-12-06 02:43:24 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7127 (0.7127) acc1: 85.9375 (85.9375) acc3: 96.0938 (96.0938) acc5: 98.4375 (98.4375) time: 0.1309 data: 0.0003 max mem: 8421 +[2024-12-06 02:43:26 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7810 (0.8449) acc1: 84.3750 (82.0312) acc3: 93.7500 (92.9688) acc5: 96.0938 (95.9517) time: 0.1311 data: 0.0004 max mem: 8421 +[2024-12-06 02:43:27 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8735 (0.8989) acc1: 78.9062 (80.7664) acc3: 92.9688 (92.5223) acc5: 95.3125 (95.3125) time: 0.1318 data: 0.0004 max mem: 8421 +[2024-12-06 02:43:28 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9513 (0.9050) acc1: 79.6875 (80.3427) acc3: 92.1875 (92.8427) acc5: 95.3125 (95.4637) time: 0.1336 data: 0.0005 max mem: 8421 +[2024-12-06 02:43:30 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8339 (0.8985) acc1: 80.4688 (80.6974) acc3: 94.5312 (93.0259) acc5: 96.0938 (95.5412) time: 0.1333 data: 0.0005 max mem: 8421 +[2024-12-06 02:43:31 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0087 (0.9909) acc1: 74.2188 (78.4314) acc3: 88.2812 (91.5135) acc5: 92.1875 (94.4700) time: 0.1390 data: 0.0060 max mem: 8421 +[2024-12-06 02:43:32 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2807 (1.0357) acc1: 72.6562 (77.5999) acc3: 86.7188 (90.7018) acc5: 90.6250 (93.7756) time: 0.1415 data: 0.0060 max mem: 8421 +[2024-12-06 02:43:34 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2386 (1.0784) acc1: 73.4375 (76.5075) acc3: 86.7188 (90.0748) acc5: 89.8438 (93.3209) time: 0.1361 data: 0.0011 max mem: 8421 +[2024-12-06 02:43:35 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2912 (1.1154) acc1: 71.0938 (75.6462) acc3: 85.1562 (89.4772) acc5: 89.8438 (92.8241) time: 0.1508 data: 0.0178 max mem: 8421 +[2024-12-06 02:43:37 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3653 (1.1452) acc1: 71.0938 (74.8369) acc3: 84.3750 (88.9166) acc5: 89.8438 (92.4193) time: 0.1570 data: 0.0249 max mem: 8421 +[2024-12-06 02:43:38 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2011 (1.1340) acc1: 71.8750 (74.9600) acc3: 87.5000 (89.0800) acc5: 91.4062 (92.6000) time: 0.1622 data: 0.0312 max mem: 8421 +[2024-12-06 02:43:38 root] (utils.py 297): INFO Test: Total time: 0:00:14 (0.1441 s / it) +[2024-12-06 02:43:40 root] (engine.py 264): INFO * Acc@1 75.016 Acc@3 89.014 Acc@5 92.458 loss 1.135 flops 1.285 layer_flops 1.251 +[2024-12-06 02:43:40 root] (main.py 547): INFO Accuracy of the network on the 50000 test images: 75.0% +[2024-12-06 02:43:40 root] (main.py 551): INFO Max accuracy: 75.04% +[2024-12-06 02:43:41 root] (utils.py 283): INFO Epoch: [25] [ 0/2502] eta: 0:34:00 lr: 0.000003 loss_cls: 3.6601 (3.6601) grad_norm: 7.2728 (7.2728) time: 0.8157 data: 0.0006 max mem: 8421 +[2024-12-06 02:43:49 root] (utils.py 283): INFO Epoch: [25] [ 10/2502] eta: 0:33:01 lr: 0.000003 loss_cls: 3.9915 (3.8714) grad_norm: 4.6784 (5.2903) time: 0.7951 data: 0.0003 max mem: 8421 +[2024-12-06 02:43:57 root] (utils.py 283): INFO Epoch: [25] [ 20/2502] eta: 0:33:30 lr: 0.000003 loss_cls: 3.9093 (3.7894) grad_norm: 4.4106 (4.9295) time: 0.8099 data: 0.0003 max mem: 8421 +[2024-12-06 02:44:06 root] (utils.py 283): INFO Epoch: [25] [ 30/2502] eta: 0:34:16 lr: 0.000003 loss_cls: 3.5867 (3.6995) grad_norm: 4.4185 (5.0152) time: 0.8521 data: 0.0003 max mem: 8421 +[2024-12-06 02:44:15 root] (utils.py 283): INFO Epoch: [25] [ 40/2502] eta: 0:34:25 lr: 0.000003 loss_cls: 3.6586 (3.7380) grad_norm: 4.4198 (4.8467) time: 0.8695 data: 0.0003 max mem: 8421 +[2024-12-06 02:44:23 root] (utils.py 283): INFO Epoch: [25] [ 50/2502] eta: 0:33:52 lr: 0.000003 loss_cls: 3.5455 (3.6746) grad_norm: 4.3676 (4.7705) time: 0.8245 data: 0.0003 max mem: 8421 +[2024-12-06 02:44:30 root] (utils.py 283): INFO Epoch: [25] [ 60/2502] eta: 0:33:25 lr: 0.000003 loss_cls: 4.0856 (3.7286) grad_norm: 4.3799 (4.7496) time: 0.7846 data: 0.0003 max mem: 8421 +[2024-12-06 02:44:38 root] (utils.py 283): INFO Epoch: [25] [ 70/2502] eta: 0:33:02 lr: 0.000003 loss_cls: 4.0005 (3.7130) grad_norm: 4.5750 (4.7508) time: 0.7797 data: 0.0003 max mem: 8421 +[2024-12-06 02:44:46 root] (utils.py 283): INFO Epoch: [25] [ 80/2502] eta: 0:32:52 lr: 0.000003 loss_cls: 3.6284 (3.7222) grad_norm: 4.3890 (4.7339) time: 0.7940 data: 0.0003 max mem: 8421 +[2024-12-06 02:44:54 root] (utils.py 283): INFO Epoch: [25] [ 90/2502] eta: 0:32:37 lr: 0.000003 loss_cls: 4.0853 (3.7500) grad_norm: 4.2673 (4.6774) time: 0.7999 data: 0.0003 max mem: 8421 +[2024-12-06 02:45:02 root] (utils.py 283): INFO Epoch: [25] [ 100/2502] eta: 0:32:22 lr: 0.000003 loss_cls: 4.0709 (3.7492) grad_norm: 4.3545 (4.6990) time: 0.7847 data: 0.0003 max mem: 8421 +[2024-12-06 02:45:10 root] (utils.py 283): INFO Epoch: [25] [ 110/2502] eta: 0:32:07 lr: 0.000003 loss_cls: 4.0267 (3.7801) grad_norm: 4.4925 (4.6917) time: 0.7790 data: 0.0003 max mem: 8421 +[2024-12-06 02:45:18 root] (utils.py 283): INFO Epoch: [25] [ 120/2502] eta: 0:31:54 lr: 0.000003 loss_cls: 4.0339 (3.7980) grad_norm: 4.3485 (4.6718) time: 0.7788 data: 0.0003 max mem: 8421 +[2024-12-06 02:45:25 root] (utils.py 283): INFO Epoch: [25] [ 130/2502] eta: 0:31:41 lr: 0.000003 loss_cls: 3.9729 (3.8004) grad_norm: 4.3485 (4.6522) time: 0.7783 data: 0.0002 max mem: 8421 +[2024-12-06 02:45:33 root] (utils.py 283): INFO Epoch: [25] [ 140/2502] eta: 0:31:30 lr: 0.000003 loss_cls: 4.0226 (3.8200) grad_norm: 4.3662 (4.6756) time: 0.7790 data: 0.0002 max mem: 8421 +[2024-12-06 02:45:41 root] (utils.py 283): INFO Epoch: [25] [ 150/2502] eta: 0:31:19 lr: 0.000003 loss_cls: 4.0050 (3.8142) grad_norm: 4.4914 (4.6557) time: 0.7808 data: 0.0002 max mem: 8421 +[2024-12-06 02:45:49 root] (utils.py 283): INFO Epoch: [25] [ 160/2502] eta: 0:31:09 lr: 0.000003 loss_cls: 3.7324 (3.7981) grad_norm: 4.4754 (4.6595) time: 0.7847 data: 0.0003 max mem: 8421 +[2024-12-06 02:45:57 root] (utils.py 283): INFO Epoch: [25] [ 170/2502] eta: 0:30:59 lr: 0.000003 loss_cls: 3.6550 (3.7745) grad_norm: 4.4211 (4.6394) time: 0.7858 data: 0.0003 max mem: 8421 +[2024-12-06 02:46:05 root] (utils.py 283): INFO Epoch: [25] [ 180/2502] eta: 0:30:49 lr: 0.000003 loss_cls: 3.3548 (3.7601) grad_norm: 4.2814 (4.6180) time: 0.7832 data: 0.0003 max mem: 8421 +[2024-12-06 02:46:12 root] (utils.py 283): INFO Epoch: [25] [ 190/2502] eta: 0:30:39 lr: 0.000003 loss_cls: 3.6973 (3.7590) grad_norm: 4.3921 (4.6426) time: 0.7802 data: 0.0003 max mem: 8421 +[2024-12-06 02:46:20 root] (utils.py 283): INFO Epoch: [25] [ 200/2502] eta: 0:30:30 lr: 0.000003 loss_cls: 4.0410 (3.7659) grad_norm: 4.4099 (4.6371) time: 0.7805 data: 0.0003 max mem: 8421 +[2024-12-06 02:46:28 root] (utils.py 283): INFO Epoch: [25] [ 210/2502] eta: 0:30:20 lr: 0.000003 loss_cls: 4.0231 (3.7661) grad_norm: 4.3459 (4.6194) time: 0.7826 data: 0.0003 max mem: 8421 +[2024-12-06 02:46:36 root] (utils.py 283): INFO Epoch: [25] [ 220/2502] eta: 0:30:11 lr: 0.000003 loss_cls: 3.9384 (3.7734) grad_norm: 4.2537 (4.6123) time: 0.7818 data: 0.0003 max mem: 8421 +[2024-12-06 02:46:44 root] (utils.py 283): INFO Epoch: [25] [ 230/2502] eta: 0:30:02 lr: 0.000003 loss_cls: 3.9296 (3.7721) grad_norm: 4.3158 (4.6021) time: 0.7811 data: 0.0003 max mem: 8421 +[2024-12-06 02:46:51 root] (utils.py 283): INFO Epoch: [25] [ 240/2502] eta: 0:29:52 lr: 0.000003 loss_cls: 3.9296 (3.7787) grad_norm: 4.3422 (4.5985) time: 0.7793 data: 0.0003 max mem: 8421 +[2024-12-06 02:46:59 root] (utils.py 283): INFO Epoch: [25] [ 250/2502] eta: 0:29:43 lr: 0.000003 loss_cls: 4.0490 (3.7873) grad_norm: 4.3146 (4.5903) time: 0.7786 data: 0.0003 max mem: 8421 +[2024-12-06 02:47:07 root] (utils.py 283): INFO Epoch: [25] [ 260/2502] eta: 0:29:35 lr: 0.000003 loss_cls: 3.9159 (3.7849) grad_norm: 4.2108 (4.5783) time: 0.7851 data: 0.0003 max mem: 8421 +[2024-12-06 02:47:15 root] (utils.py 283): INFO Epoch: [25] [ 270/2502] eta: 0:29:28 lr: 0.000003 loss_cls: 3.9010 (3.7913) grad_norm: 4.1996 (4.5705) time: 0.7947 data: 0.0003 max mem: 8421 +[2024-12-06 02:47:23 root] (utils.py 283): INFO Epoch: [25] [ 280/2502] eta: 0:29:20 lr: 0.000003 loss_cls: 4.0278 (3.7922) grad_norm: 4.1996 (4.5636) time: 0.7953 data: 0.0003 max mem: 8421 +[2024-12-06 02:47:31 root] (utils.py 283): INFO Epoch: [25] [ 290/2502] eta: 0:29:11 lr: 0.000003 loss_cls: 4.1869 (3.8048) grad_norm: 4.2018 (4.5681) time: 0.7859 data: 0.0003 max mem: 8421 +[2024-12-06 02:47:39 root] (utils.py 283): INFO Epoch: [25] [ 300/2502] eta: 0:29:02 lr: 0.000003 loss_cls: 4.1330 (3.8012) grad_norm: 4.3424 (4.5628) time: 0.7781 data: 0.0002 max mem: 8421 +[2024-12-06 02:47:46 root] (utils.py 283): INFO Epoch: [25] [ 310/2502] eta: 0:28:53 lr: 0.000003 loss_cls: 3.5819 (3.7905) grad_norm: 4.3510 (4.5610) time: 0.7801 data: 0.0003 max mem: 8421 +[2024-12-06 02:47:54 root] (utils.py 283): INFO Epoch: [25] [ 320/2502] eta: 0:28:45 lr: 0.000003 loss_cls: 3.5321 (3.7914) grad_norm: 4.3725 (4.5528) time: 0.7830 data: 0.0003 max mem: 8421 +[2024-12-06 02:48:02 root] (utils.py 283): INFO Epoch: [25] [ 330/2502] eta: 0:28:36 lr: 0.000003 loss_cls: 3.9345 (3.7973) grad_norm: 4.2140 (4.5417) time: 0.7793 data: 0.0003 max mem: 8421 +[2024-12-06 02:48:10 root] (utils.py 283): INFO Epoch: [25] [ 340/2502] eta: 0:28:28 lr: 0.000003 loss_cls: 3.9844 (3.7945) grad_norm: 4.1818 (4.5346) time: 0.7793 data: 0.0002 max mem: 8421 +[2024-12-06 02:48:18 root] (utils.py 283): INFO Epoch: [25] [ 350/2502] eta: 0:28:20 lr: 0.000003 loss_cls: 4.0108 (3.7954) grad_norm: 4.3609 (4.5600) time: 0.7839 data: 0.0003 max mem: 8421 +[2024-12-06 02:48:25 root] (utils.py 283): INFO Epoch: [25] [ 360/2502] eta: 0:28:11 lr: 0.000003 loss_cls: 3.4206 (3.7805) grad_norm: 4.5265 (4.5581) time: 0.7814 data: 0.0003 max mem: 8421 +[2024-12-06 02:48:33 root] (utils.py 283): INFO Epoch: [25] [ 370/2502] eta: 0:28:02 lr: 0.000003 loss_cls: 3.5336 (3.7801) grad_norm: 4.2955 (4.5550) time: 0.7781 data: 0.0003 max mem: 8421 +[2024-12-06 02:48:41 root] (utils.py 283): INFO Epoch: [25] [ 380/2502] eta: 0:27:54 lr: 0.000003 loss_cls: 3.8230 (3.7803) grad_norm: 4.3043 (4.5538) time: 0.7812 data: 0.0003 max mem: 8421 +[2024-12-06 02:48:49 root] (utils.py 283): INFO Epoch: [25] [ 390/2502] eta: 0:27:46 lr: 0.000003 loss_cls: 3.9830 (3.7827) grad_norm: 4.3944 (4.5496) time: 0.7828 data: 0.0003 max mem: 8421 +[2024-12-06 02:48:57 root] (utils.py 283): INFO Epoch: [25] [ 400/2502] eta: 0:27:37 lr: 0.000003 loss_cls: 4.0220 (3.7878) grad_norm: 4.2628 (4.5569) time: 0.7805 data: 0.0003 max mem: 8421 +[2024-12-06 02:49:05 root] (utils.py 283): INFO Epoch: [25] [ 410/2502] eta: 0:27:29 lr: 0.000003 loss_cls: 3.7871 (3.7848) grad_norm: 4.1354 (4.5496) time: 0.7823 data: 0.0003 max mem: 8421 +[2024-12-06 02:49:12 root] (utils.py 283): INFO Epoch: [25] [ 420/2502] eta: 0:27:21 lr: 0.000003 loss_cls: 3.9568 (3.7890) grad_norm: 4.3316 (4.5470) time: 0.7831 data: 0.0003 max mem: 8421 +[2024-12-06 02:49:20 root] (utils.py 283): INFO Epoch: [25] [ 430/2502] eta: 0:27:13 lr: 0.000003 loss_cls: 4.0016 (3.7895) grad_norm: 4.2078 (4.5412) time: 0.7815 data: 0.0003 max mem: 8421 +[2024-12-06 02:49:28 root] (utils.py 283): INFO Epoch: [25] [ 440/2502] eta: 0:27:04 lr: 0.000003 loss_cls: 3.9822 (3.7911) grad_norm: 4.2356 (4.5398) time: 0.7785 data: 0.0003 max mem: 8421 +[2024-12-06 02:49:36 root] (utils.py 283): INFO Epoch: [25] [ 450/2502] eta: 0:26:56 lr: 0.000003 loss_cls: 3.9822 (3.7898) grad_norm: 4.5317 (4.5391) time: 0.7805 data: 0.0003 max mem: 8421 +[2024-12-06 02:49:44 root] (utils.py 283): INFO Epoch: [25] [ 460/2502] eta: 0:26:48 lr: 0.000003 loss_cls: 3.6867 (3.7878) grad_norm: 4.4494 (4.5380) time: 0.7828 data: 0.0003 max mem: 8421 +[2024-12-06 02:49:51 root] (utils.py 283): INFO Epoch: [25] [ 470/2502] eta: 0:26:40 lr: 0.000003 loss_cls: 3.7592 (3.7908) grad_norm: 4.3573 (4.5382) time: 0.7834 data: 0.0003 max mem: 8421 +[2024-12-06 02:49:59 root] (utils.py 283): INFO Epoch: [25] [ 480/2502] eta: 0:26:32 lr: 0.000003 loss_cls: 3.9924 (3.7885) grad_norm: 4.3447 (4.5440) time: 0.7845 data: 0.0003 max mem: 8421 +[2024-12-06 02:50:07 root] (utils.py 283): INFO Epoch: [25] [ 490/2502] eta: 0:26:24 lr: 0.000003 loss_cls: 4.0418 (3.7927) grad_norm: 4.4182 (4.5418) time: 0.7805 data: 0.0003 max mem: 8421 +[2024-12-06 02:50:15 root] (utils.py 283): INFO Epoch: [25] [ 500/2502] eta: 0:26:16 lr: 0.000003 loss_cls: 4.0418 (3.7967) grad_norm: 4.4403 (4.5452) time: 0.7776 data: 0.0003 max mem: 8421 +[2024-12-06 02:50:23 root] (utils.py 283): INFO Epoch: [25] [ 510/2502] eta: 0:26:08 lr: 0.000003 loss_cls: 3.8962 (3.7944) grad_norm: 4.4403 (4.5419) time: 0.7845 data: 0.0003 max mem: 8421 +[2024-12-06 02:50:31 root] (utils.py 283): INFO Epoch: [25] [ 520/2502] eta: 0:26:00 lr: 0.000003 loss_cls: 3.7347 (3.7936) grad_norm: 4.3455 (4.5374) time: 0.7849 data: 0.0003 max mem: 8421 +[2024-12-06 02:50:38 root] (utils.py 283): INFO Epoch: [25] [ 530/2502] eta: 0:25:51 lr: 0.000003 loss_cls: 3.7390 (3.7885) grad_norm: 4.3366 (4.5374) time: 0.7753 data: 0.0003 max mem: 8421 +[2024-12-06 02:50:46 root] (utils.py 283): INFO Epoch: [25] [ 540/2502] eta: 0:25:43 lr: 0.000003 loss_cls: 3.4431 (3.7852) grad_norm: 4.2800 (4.5371) time: 0.7751 data: 0.0003 max mem: 8421 +[2024-12-06 02:50:54 root] (utils.py 283): INFO Epoch: [25] [ 550/2502] eta: 0:25:35 lr: 0.000003 loss_cls: 4.0015 (3.7854) grad_norm: 4.3731 (4.5359) time: 0.7776 data: 0.0003 max mem: 8421 +[2024-12-06 02:51:02 root] (utils.py 283): INFO Epoch: [25] [ 560/2502] eta: 0:25:27 lr: 0.000003 loss_cls: 3.9837 (3.7848) grad_norm: 4.4369 (4.5351) time: 0.7788 data: 0.0003 max mem: 8421 +[2024-12-06 02:51:09 root] (utils.py 283): INFO Epoch: [25] [ 570/2502] eta: 0:25:19 lr: 0.000003 loss_cls: 3.9837 (3.7864) grad_norm: 4.3234 (4.5322) time: 0.7767 data: 0.0003 max mem: 8421 +[2024-12-06 02:51:17 root] (utils.py 283): INFO Epoch: [25] [ 580/2502] eta: 0:25:10 lr: 0.000003 loss_cls: 4.0198 (3.7863) grad_norm: 4.3123 (4.5316) time: 0.7762 data: 0.0003 max mem: 8421 +[2024-12-06 02:51:25 root] (utils.py 283): INFO Epoch: [25] [ 590/2502] eta: 0:25:03 lr: 0.000003 loss_cls: 4.0744 (3.7896) grad_norm: 4.2929 (4.5303) time: 0.7867 data: 0.0003 max mem: 8421 +[2024-12-06 02:51:33 root] (utils.py 283): INFO Epoch: [25] [ 600/2502] eta: 0:24:55 lr: 0.000003 loss_cls: 3.8003 (3.7850) grad_norm: 4.3228 (4.5319) time: 0.7883 data: 0.0003 max mem: 8421 +[2024-12-06 02:51:41 root] (utils.py 283): INFO Epoch: [25] [ 610/2502] eta: 0:24:47 lr: 0.000003 loss_cls: 3.7568 (3.7866) grad_norm: 4.3228 (4.5303) time: 0.7812 data: 0.0003 max mem: 8421 +[2024-12-06 02:51:49 root] (utils.py 283): INFO Epoch: [25] [ 620/2502] eta: 0:24:39 lr: 0.000003 loss_cls: 3.7599 (3.7832) grad_norm: 4.2515 (4.5280) time: 0.7826 data: 0.0003 max mem: 8421 +[2024-12-06 02:51:56 root] (utils.py 283): INFO Epoch: [25] [ 630/2502] eta: 0:24:31 lr: 0.000003 loss_cls: 3.9110 (3.7845) grad_norm: 4.2098 (4.5268) time: 0.7839 data: 0.0003 max mem: 8421 +[2024-12-06 02:52:04 root] (utils.py 283): INFO Epoch: [25] [ 640/2502] eta: 0:24:23 lr: 0.000003 loss_cls: 3.8803 (3.7851) grad_norm: 4.3060 (4.5318) time: 0.7812 data: 0.0003 max mem: 8421 +[2024-12-06 02:52:12 root] (utils.py 283): INFO Epoch: [25] [ 650/2502] eta: 0:24:15 lr: 0.000003 loss_cls: 3.8692 (3.7819) grad_norm: 4.3435 (4.5298) time: 0.7810 data: 0.0003 max mem: 8421 +[2024-12-06 02:52:20 root] (utils.py 283): INFO Epoch: [25] [ 660/2502] eta: 0:24:07 lr: 0.000003 loss_cls: 3.8766 (3.7825) grad_norm: 4.3950 (4.5297) time: 0.7834 data: 0.0003 max mem: 8421 +[2024-12-06 02:52:28 root] (utils.py 283): INFO Epoch: [25] [ 670/2502] eta: 0:23:59 lr: 0.000003 loss_cls: 3.7440 (3.7800) grad_norm: 4.4467 (4.5293) time: 0.7817 data: 0.0003 max mem: 8421 +[2024-12-06 02:52:36 root] (utils.py 283): INFO Epoch: [25] [ 680/2502] eta: 0:23:51 lr: 0.000003 loss_cls: 3.9976 (3.7822) grad_norm: 4.4616 (4.5278) time: 0.7820 data: 0.0003 max mem: 8421 +[2024-12-06 02:52:43 root] (utils.py 283): INFO Epoch: [25] [ 690/2502] eta: 0:23:43 lr: 0.000003 loss_cls: 4.0007 (3.7808) grad_norm: 4.4146 (4.5264) time: 0.7818 data: 0.0003 max mem: 8421 +[2024-12-06 02:52:51 root] (utils.py 283): INFO Epoch: [25] [ 700/2502] eta: 0:23:35 lr: 0.000003 loss_cls: 3.9091 (3.7828) grad_norm: 4.4145 (4.5258) time: 0.7836 data: 0.0003 max mem: 8421 +[2024-12-06 02:52:59 root] (utils.py 283): INFO Epoch: [25] [ 710/2502] eta: 0:23:27 lr: 0.000003 loss_cls: 3.9091 (3.7851) grad_norm: 4.4234 (4.5243) time: 0.7850 data: 0.0003 max mem: 8421 +[2024-12-06 02:53:07 root] (utils.py 283): INFO Epoch: [25] [ 720/2502] eta: 0:23:19 lr: 0.000003 loss_cls: 3.8805 (3.7877) grad_norm: 4.3202 (4.5248) time: 0.7805 data: 0.0003 max mem: 8421 +[2024-12-06 02:53:15 root] (utils.py 283): INFO Epoch: [25] [ 730/2502] eta: 0:23:12 lr: 0.000003 loss_cls: 3.7930 (3.7857) grad_norm: 4.3356 (4.5241) time: 0.7894 data: 0.0002 max mem: 8421 +[2024-12-06 02:53:23 root] (utils.py 283): INFO Epoch: [25] [ 740/2502] eta: 0:23:04 lr: 0.000003 loss_cls: 3.9334 (3.7869) grad_norm: 4.4488 (4.5267) time: 0.7929 data: 0.0003 max mem: 8421 +[2024-12-06 02:53:31 root] (utils.py 283): INFO Epoch: [25] [ 750/2502] eta: 0:22:56 lr: 0.000003 loss_cls: 4.0326 (3.7905) grad_norm: 4.3425 (4.5252) time: 0.7904 data: 0.0003 max mem: 8421 +[2024-12-06 02:53:38 root] (utils.py 283): INFO Epoch: [25] [ 760/2502] eta: 0:22:48 lr: 0.000003 loss_cls: 3.9340 (3.7919) grad_norm: 4.3425 (4.5265) time: 0.7885 data: 0.0003 max mem: 8421 +[2024-12-06 02:53:46 root] (utils.py 283): INFO Epoch: [25] [ 770/2502] eta: 0:22:40 lr: 0.000003 loss_cls: 3.7187 (3.7866) grad_norm: 4.4139 (4.5252) time: 0.7835 data: 0.0003 max mem: 8421 +[2024-12-06 02:53:54 root] (utils.py 283): INFO Epoch: [25] [ 780/2502] eta: 0:22:33 lr: 0.000003 loss_cls: 3.8165 (3.7858) grad_norm: 4.3639 (4.5238) time: 0.7850 data: 0.0002 max mem: 8421 +[2024-12-06 02:54:02 root] (utils.py 283): INFO Epoch: [25] [ 790/2502] eta: 0:22:25 lr: 0.000003 loss_cls: 4.0345 (3.7883) grad_norm: 4.3943 (4.5234) time: 0.7847 data: 0.0003 max mem: 8421 +[2024-12-06 02:54:10 root] (utils.py 283): INFO Epoch: [25] [ 800/2502] eta: 0:22:17 lr: 0.000003 loss_cls: 4.0548 (3.7919) grad_norm: 4.3943 (4.5222) time: 0.7841 data: 0.0003 max mem: 8421 +[2024-12-06 02:54:18 root] (utils.py 283): INFO Epoch: [25] [ 810/2502] eta: 0:22:09 lr: 0.000003 loss_cls: 3.9864 (3.7915) grad_norm: 4.2154 (4.5188) time: 0.7819 data: 0.0003 max mem: 8421 +[2024-12-06 02:54:26 root] (utils.py 283): INFO Epoch: [25] [ 820/2502] eta: 0:22:01 lr: 0.000003 loss_cls: 3.9465 (3.7899) grad_norm: 4.2085 (4.5173) time: 0.7837 data: 0.0003 max mem: 8421 +[2024-12-06 02:54:33 root] (utils.py 283): INFO Epoch: [25] [ 830/2502] eta: 0:21:53 lr: 0.000003 loss_cls: 3.9551 (3.7930) grad_norm: 4.4814 (4.5232) time: 0.7879 data: 0.0003 max mem: 8421 +[2024-12-06 02:54:41 root] (utils.py 283): INFO Epoch: [25] [ 840/2502] eta: 0:21:46 lr: 0.000003 loss_cls: 3.8319 (3.7917) grad_norm: 4.2855 (4.5240) time: 0.7932 data: 0.0003 max mem: 8421 +[2024-12-06 02:54:49 root] (utils.py 283): INFO Epoch: [25] [ 850/2502] eta: 0:21:38 lr: 0.000003 loss_cls: 3.8738 (3.7931) grad_norm: 4.2168 (4.5222) time: 0.7930 data: 0.0003 max mem: 8421 +[2024-12-06 02:54:57 root] (utils.py 283): INFO Epoch: [25] [ 860/2502] eta: 0:21:30 lr: 0.000003 loss_cls: 4.0600 (3.7914) grad_norm: 4.1992 (4.5215) time: 0.7844 data: 0.0003 max mem: 8421 +[2024-12-06 02:55:05 root] (utils.py 283): INFO Epoch: [25] [ 870/2502] eta: 0:21:22 lr: 0.000003 loss_cls: 3.8637 (3.7909) grad_norm: 4.2372 (4.5275) time: 0.7816 data: 0.0003 max mem: 8421 +[2024-12-06 02:55:13 root] (utils.py 283): INFO Epoch: [25] [ 880/2502] eta: 0:21:14 lr: 0.000003 loss_cls: 3.7988 (3.7890) grad_norm: 4.4864 (4.5348) time: 0.7824 data: 0.0003 max mem: 8421 +[2024-12-06 02:55:21 root] (utils.py 283): INFO Epoch: [25] [ 890/2502] eta: 0:21:06 lr: 0.000003 loss_cls: 3.7174 (3.7867) grad_norm: 4.4619 (4.5337) time: 0.7828 data: 0.0003 max mem: 8421 +[2024-12-06 02:55:28 root] (utils.py 283): INFO Epoch: [25] [ 900/2502] eta: 0:20:58 lr: 0.000003 loss_cls: 3.5072 (3.7843) grad_norm: 4.4014 (4.5319) time: 0.7846 data: 0.0003 max mem: 8421 +[2024-12-06 02:55:36 root] (utils.py 283): INFO Epoch: [25] [ 910/2502] eta: 0:20:50 lr: 0.000003 loss_cls: 3.9013 (3.7821) grad_norm: 4.3427 (4.5304) time: 0.7831 data: 0.0003 max mem: 8421 +[2024-12-06 02:55:44 root] (utils.py 283): INFO Epoch: [25] [ 920/2502] eta: 0:20:43 lr: 0.000003 loss_cls: 3.9743 (3.7833) grad_norm: 4.5342 (4.5338) time: 0.7891 data: 0.0003 max mem: 8421 +[2024-12-06 02:55:52 root] (utils.py 283): INFO Epoch: [25] [ 930/2502] eta: 0:20:35 lr: 0.000003 loss_cls: 3.8683 (3.7854) grad_norm: 4.7647 (4.5327) time: 0.7897 data: 0.0003 max mem: 8421 +[2024-12-06 02:56:00 root] (utils.py 283): INFO Epoch: [25] [ 940/2502] eta: 0:20:27 lr: 0.000003 loss_cls: 4.0315 (3.7861) grad_norm: 4.3398 (4.5340) time: 0.7797 data: 0.0003 max mem: 8421 +[2024-12-06 02:56:08 root] (utils.py 283): INFO Epoch: [25] [ 950/2502] eta: 0:20:19 lr: 0.000003 loss_cls: 4.0315 (3.7877) grad_norm: 4.2874 (4.5314) time: 0.7811 data: 0.0003 max mem: 8421 +[2024-12-06 02:56:15 root] (utils.py 283): INFO Epoch: [25] [ 960/2502] eta: 0:20:11 lr: 0.000003 loss_cls: 3.9821 (3.7886) grad_norm: 4.2876 (4.5307) time: 0.7812 data: 0.0003 max mem: 8421 +[2024-12-06 02:56:23 root] (utils.py 283): INFO Epoch: [25] [ 970/2502] eta: 0:20:03 lr: 0.000003 loss_cls: 3.8210 (3.7874) grad_norm: 4.2150 (4.5279) time: 0.7767 data: 0.0002 max mem: 8421 +[2024-12-06 02:56:31 root] (utils.py 283): INFO Epoch: [25] [ 980/2502] eta: 0:19:55 lr: 0.000003 loss_cls: 3.6953 (3.7842) grad_norm: 4.2150 (4.5263) time: 0.7782 data: 0.0003 max mem: 8421 +[2024-12-06 02:56:39 root] (utils.py 283): INFO Epoch: [25] [ 990/2502] eta: 0:19:47 lr: 0.000003 loss_cls: 3.4195 (3.7846) grad_norm: 4.4060 (4.5313) time: 0.7807 data: 0.0003 max mem: 8421 +[2024-12-06 02:56:47 root] (utils.py 283): INFO Epoch: [25] [1000/2502] eta: 0:19:39 lr: 0.000003 loss_cls: 3.9520 (3.7857) grad_norm: 4.4624 (4.5321) time: 0.7815 data: 0.0003 max mem: 8421 +[2024-12-06 02:56:55 root] (utils.py 283): INFO Epoch: [25] [1010/2502] eta: 0:19:31 lr: 0.000003 loss_cls: 3.8596 (3.7843) grad_norm: 4.3342 (4.5302) time: 0.7896 data: 0.0003 max mem: 8421 +[2024-12-06 02:57:02 root] (utils.py 283): INFO Epoch: [25] [1020/2502] eta: 0:19:23 lr: 0.000003 loss_cls: 3.8139 (3.7849) grad_norm: 4.2608 (4.5294) time: 0.7895 data: 0.0003 max mem: 8421 +[2024-12-06 02:57:10 root] (utils.py 283): INFO Epoch: [25] [1030/2502] eta: 0:19:16 lr: 0.000003 loss_cls: 3.8139 (3.7853) grad_norm: 4.4247 (4.5291) time: 0.7817 data: 0.0003 max mem: 8421 +[2024-12-06 02:57:18 root] (utils.py 283): INFO Epoch: [25] [1040/2502] eta: 0:19:08 lr: 0.000003 loss_cls: 3.8129 (3.7869) grad_norm: 4.4483 (4.5288) time: 0.7815 data: 0.0003 max mem: 8421 +[2024-12-06 02:57:26 root] (utils.py 283): INFO Epoch: [25] [1050/2502] eta: 0:19:00 lr: 0.000003 loss_cls: 3.9911 (3.7890) grad_norm: 4.4445 (4.5293) time: 0.7852 data: 0.0003 max mem: 8421 +[2024-12-06 02:57:34 root] (utils.py 283): INFO Epoch: [25] [1060/2502] eta: 0:18:52 lr: 0.000003 loss_cls: 4.0311 (3.7904) grad_norm: 4.4671 (4.5321) time: 0.7876 data: 0.0003 max mem: 8421 +[2024-12-06 02:57:42 root] (utils.py 283): INFO Epoch: [25] [1070/2502] eta: 0:18:44 lr: 0.000003 loss_cls: 3.9388 (3.7909) grad_norm: 4.6056 (4.5360) time: 0.7885 data: 0.0003 max mem: 8421 +[2024-12-06 02:57:50 root] (utils.py 283): INFO Epoch: [25] [1080/2502] eta: 0:18:36 lr: 0.000003 loss_cls: 3.9388 (3.7925) grad_norm: 4.5745 (4.5361) time: 0.7864 data: 0.0003 max mem: 8421 +[2024-12-06 02:57:57 root] (utils.py 283): INFO Epoch: [25] [1090/2502] eta: 0:18:29 lr: 0.000003 loss_cls: 4.0542 (3.7956) grad_norm: 4.5141 (4.5366) time: 0.7880 data: 0.0003 max mem: 8421 +[2024-12-06 02:58:05 root] (utils.py 283): INFO Epoch: [25] [1100/2502] eta: 0:18:21 lr: 0.000003 loss_cls: 4.2155 (3.7979) grad_norm: 4.4273 (4.5366) time: 0.7910 data: 0.0003 max mem: 8421 +[2024-12-06 02:58:13 root] (utils.py 283): INFO Epoch: [25] [1110/2502] eta: 0:18:13 lr: 0.000003 loss_cls: 3.9532 (3.7986) grad_norm: 4.3579 (4.5368) time: 0.7897 data: 0.0003 max mem: 8421 +[2024-12-06 02:58:21 root] (utils.py 283): INFO Epoch: [25] [1120/2502] eta: 0:18:05 lr: 0.000003 loss_cls: 3.8990 (3.7981) grad_norm: 4.3101 (4.5364) time: 0.7946 data: 0.0003 max mem: 8421 +[2024-12-06 02:58:29 root] (utils.py 283): INFO Epoch: [25] [1130/2502] eta: 0:17:57 lr: 0.000003 loss_cls: 3.7625 (3.7959) grad_norm: 4.4584 (4.5397) time: 0.7945 data: 0.0003 max mem: 8421 +[2024-12-06 02:58:37 root] (utils.py 283): INFO Epoch: [25] [1140/2502] eta: 0:17:50 lr: 0.000003 loss_cls: 3.7430 (3.7956) grad_norm: 4.4975 (4.5400) time: 0.7919 data: 0.0003 max mem: 8421 +[2024-12-06 02:58:45 root] (utils.py 283): INFO Epoch: [25] [1150/2502] eta: 0:17:42 lr: 0.000003 loss_cls: 4.0133 (3.7961) grad_norm: 4.3846 (4.5395) time: 0.7878 data: 0.0003 max mem: 8421 +[2024-12-06 02:58:53 root] (utils.py 283): INFO Epoch: [25] [1160/2502] eta: 0:17:34 lr: 0.000003 loss_cls: 4.1150 (3.7983) grad_norm: 4.3486 (4.5390) time: 0.7846 data: 0.0003 max mem: 8421 +[2024-12-06 02:59:01 root] (utils.py 283): INFO Epoch: [25] [1170/2502] eta: 0:17:26 lr: 0.000003 loss_cls: 4.0190 (3.7972) grad_norm: 4.3889 (4.5394) time: 0.7837 data: 0.0003 max mem: 8421 +[2024-12-06 02:59:08 root] (utils.py 283): INFO Epoch: [25] [1180/2502] eta: 0:17:18 lr: 0.000003 loss_cls: 3.7703 (3.7976) grad_norm: 4.3344 (4.5387) time: 0.7806 data: 0.0003 max mem: 8421 +[2024-12-06 02:59:16 root] (utils.py 283): INFO Epoch: [25] [1190/2502] eta: 0:17:10 lr: 0.000003 loss_cls: 3.7703 (3.7971) grad_norm: 4.3057 (4.5406) time: 0.7821 data: 0.0003 max mem: 8421 +[2024-12-06 02:59:24 root] (utils.py 283): INFO Epoch: [25] [1200/2502] eta: 0:17:02 lr: 0.000003 loss_cls: 4.0174 (3.7982) grad_norm: 4.4384 (4.5431) time: 0.7807 data: 0.0003 max mem: 8421 +[2024-12-06 02:59:32 root] (utils.py 283): INFO Epoch: [25] [1210/2502] eta: 0:16:54 lr: 0.000003 loss_cls: 3.9134 (3.7958) grad_norm: 4.3041 (4.5399) time: 0.7785 data: 0.0003 max mem: 8421 +[2024-12-06 02:59:40 root] (utils.py 283): INFO Epoch: [25] [1220/2502] eta: 0:16:47 lr: 0.000003 loss_cls: 3.7834 (3.7963) grad_norm: 4.3000 (4.5386) time: 0.7799 data: 0.0003 max mem: 8421 +[2024-12-06 02:59:47 root] (utils.py 283): INFO Epoch: [25] [1230/2502] eta: 0:16:39 lr: 0.000003 loss_cls: 3.8278 (3.7959) grad_norm: 4.3564 (4.5362) time: 0.7798 data: 0.0003 max mem: 8421 +[2024-12-06 02:59:55 root] (utils.py 283): INFO Epoch: [25] [1240/2502] eta: 0:16:31 lr: 0.000003 loss_cls: 3.6824 (3.7949) grad_norm: 4.3564 (4.5367) time: 0.7778 data: 0.0003 max mem: 8421 +[2024-12-06 03:00:03 root] (utils.py 283): INFO Epoch: [25] [1250/2502] eta: 0:16:23 lr: 0.000003 loss_cls: 3.8444 (3.7964) grad_norm: 4.4652 (4.5383) time: 0.7796 data: 0.0003 max mem: 8421 +[2024-12-06 03:00:11 root] (utils.py 283): INFO Epoch: [25] [1260/2502] eta: 0:16:15 lr: 0.000003 loss_cls: 3.9737 (3.7967) grad_norm: 4.4650 (4.5387) time: 0.7838 data: 0.0003 max mem: 8421 +[2024-12-06 03:00:19 root] (utils.py 283): INFO Epoch: [25] [1270/2502] eta: 0:16:07 lr: 0.000003 loss_cls: 3.7154 (3.7938) grad_norm: 4.4347 (4.5385) time: 0.7819 data: 0.0003 max mem: 8421 +[2024-12-06 03:00:26 root] (utils.py 283): INFO Epoch: [25] [1280/2502] eta: 0:15:59 lr: 0.000003 loss_cls: 3.5583 (3.7932) grad_norm: 4.4158 (4.5380) time: 0.7814 data: 0.0003 max mem: 8421 +[2024-12-06 03:00:34 root] (utils.py 283): INFO Epoch: [25] [1290/2502] eta: 0:15:51 lr: 0.000003 loss_cls: 3.8172 (3.7935) grad_norm: 4.3587 (4.5362) time: 0.7813 data: 0.0003 max mem: 8421 +[2024-12-06 03:00:42 root] (utils.py 283): INFO Epoch: [25] [1300/2502] eta: 0:15:43 lr: 0.000003 loss_cls: 3.8775 (3.7932) grad_norm: 4.3587 (4.5382) time: 0.7771 data: 0.0003 max mem: 8421 +[2024-12-06 03:00:50 root] (utils.py 283): INFO Epoch: [25] [1310/2502] eta: 0:15:35 lr: 0.000003 loss_cls: 3.9740 (3.7938) grad_norm: 4.4705 (4.5374) time: 0.7839 data: 0.0003 max mem: 8421 +[2024-12-06 03:00:58 root] (utils.py 283): INFO Epoch: [25] [1320/2502] eta: 0:15:28 lr: 0.000003 loss_cls: 3.9851 (3.7954) grad_norm: 4.3565 (4.5367) time: 0.7844 data: 0.0003 max mem: 8421 +[2024-12-06 03:01:05 root] (utils.py 283): INFO Epoch: [25] [1330/2502] eta: 0:15:20 lr: 0.000003 loss_cls: 3.9052 (3.7937) grad_norm: 4.4026 (4.5372) time: 0.7783 data: 0.0003 max mem: 8421 +[2024-12-06 03:01:13 root] (utils.py 283): INFO Epoch: [25] [1340/2502] eta: 0:15:12 lr: 0.000003 loss_cls: 3.5278 (3.7945) grad_norm: 4.1702 (4.5365) time: 0.7807 data: 0.0003 max mem: 8421 +[2024-12-06 03:01:21 root] (utils.py 283): INFO Epoch: [25] [1350/2502] eta: 0:15:04 lr: 0.000003 loss_cls: 4.1558 (3.7972) grad_norm: 4.1702 (4.5359) time: 0.7864 data: 0.0002 max mem: 8421 +[2024-12-06 03:01:29 root] (utils.py 283): INFO Epoch: [25] [1360/2502] eta: 0:14:56 lr: 0.000003 loss_cls: 4.1009 (3.7960) grad_norm: 4.3541 (4.5351) time: 0.7866 data: 0.0003 max mem: 8421 +[2024-12-06 03:01:37 root] (utils.py 283): INFO Epoch: [25] [1370/2502] eta: 0:14:48 lr: 0.000003 loss_cls: 3.9445 (3.7970) grad_norm: 4.2609 (4.5342) time: 0.7816 data: 0.0003 max mem: 8421 +[2024-12-06 03:01:45 root] (utils.py 283): INFO Epoch: [25] [1380/2502] eta: 0:14:40 lr: 0.000003 loss_cls: 4.0858 (3.7982) grad_norm: 4.3937 (4.5344) time: 0.7845 data: 0.0003 max mem: 8421 +[2024-12-06 03:01:53 root] (utils.py 283): INFO Epoch: [25] [1390/2502] eta: 0:14:33 lr: 0.000003 loss_cls: 3.9862 (3.7984) grad_norm: 4.4846 (4.5342) time: 0.7967 data: 0.0003 max mem: 8421 +[2024-12-06 03:02:01 root] (utils.py 283): INFO Epoch: [25] [1400/2502] eta: 0:14:25 lr: 0.000003 loss_cls: 3.9444 (3.7983) grad_norm: 4.3516 (4.5323) time: 0.8019 data: 0.0003 max mem: 8421 +[2024-12-06 03:02:09 root] (utils.py 283): INFO Epoch: [25] [1410/2502] eta: 0:14:17 lr: 0.000003 loss_cls: 3.7170 (3.7979) grad_norm: 4.2179 (4.5300) time: 0.7886 data: 0.0003 max mem: 8421 +[2024-12-06 03:02:16 root] (utils.py 283): INFO Epoch: [25] [1420/2502] eta: 0:14:09 lr: 0.000003 loss_cls: 3.9014 (3.7996) grad_norm: 4.2320 (4.5284) time: 0.7810 data: 0.0003 max mem: 8421 +[2024-12-06 03:02:24 root] (utils.py 283): INFO Epoch: [25] [1430/2502] eta: 0:14:01 lr: 0.000003 loss_cls: 3.8994 (3.7993) grad_norm: 4.4113 (4.5286) time: 0.7819 data: 0.0003 max mem: 8421 +[2024-12-06 03:02:32 root] (utils.py 283): INFO Epoch: [25] [1440/2502] eta: 0:13:53 lr: 0.000003 loss_cls: 3.7306 (3.7997) grad_norm: 4.5165 (4.5275) time: 0.7784 data: 0.0003 max mem: 8421 +[2024-12-06 03:02:40 root] (utils.py 283): INFO Epoch: [25] [1450/2502] eta: 0:13:46 lr: 0.000003 loss_cls: 3.9908 (3.8014) grad_norm: 4.5131 (4.5270) time: 0.7829 data: 0.0003 max mem: 8421 +[2024-12-06 03:02:48 root] (utils.py 283): INFO Epoch: [25] [1460/2502] eta: 0:13:38 lr: 0.000003 loss_cls: 4.1025 (3.8009) grad_norm: 4.4444 (4.5264) time: 0.7939 data: 0.0003 max mem: 8421 +[2024-12-06 03:02:56 root] (utils.py 283): INFO Epoch: [25] [1470/2502] eta: 0:13:30 lr: 0.000003 loss_cls: 4.0171 (3.8010) grad_norm: 4.4002 (4.5267) time: 0.7895 data: 0.0003 max mem: 8421 +[2024-12-06 03:03:03 root] (utils.py 283): INFO Epoch: [25] [1480/2502] eta: 0:13:22 lr: 0.000003 loss_cls: 3.9457 (3.8005) grad_norm: 4.3525 (4.5248) time: 0.7786 data: 0.0003 max mem: 8421 +[2024-12-06 03:03:11 root] (utils.py 283): INFO Epoch: [25] [1490/2502] eta: 0:13:14 lr: 0.000003 loss_cls: 4.0315 (3.8030) grad_norm: 4.3359 (4.5245) time: 0.7795 data: 0.0003 max mem: 8421 +[2024-12-06 03:03:19 root] (utils.py 283): INFO Epoch: [25] [1500/2502] eta: 0:13:06 lr: 0.000003 loss_cls: 4.1190 (3.8036) grad_norm: 4.2749 (4.5244) time: 0.7763 data: 0.0003 max mem: 8421 +[2024-12-06 03:03:27 root] (utils.py 283): INFO Epoch: [25] [1510/2502] eta: 0:12:58 lr: 0.000003 loss_cls: 3.9290 (3.8026) grad_norm: 4.2979 (4.5234) time: 0.7742 data: 0.0003 max mem: 8421 +[2024-12-06 03:03:35 root] (utils.py 283): INFO Epoch: [25] [1520/2502] eta: 0:12:50 lr: 0.000003 loss_cls: 3.5667 (3.8010) grad_norm: 4.3518 (4.5241) time: 0.7773 data: 0.0003 max mem: 8421 +[2024-12-06 03:03:42 root] (utils.py 283): INFO Epoch: [25] [1530/2502] eta: 0:12:43 lr: 0.000003 loss_cls: 3.6187 (3.8015) grad_norm: 4.3341 (4.5218) time: 0.7813 data: 0.0003 max mem: 8421 +[2024-12-06 03:03:50 root] (utils.py 283): INFO Epoch: [25] [1540/2502] eta: 0:12:35 lr: 0.000003 loss_cls: 4.0352 (3.8018) grad_norm: 4.3091 (4.5252) time: 0.7874 data: 0.0003 max mem: 8421 +[2024-12-06 03:03:58 root] (utils.py 283): INFO Epoch: [25] [1550/2502] eta: 0:12:27 lr: 0.000003 loss_cls: 3.9475 (3.8017) grad_norm: 4.3409 (4.5245) time: 0.7859 data: 0.0003 max mem: 8421 +[2024-12-06 03:04:06 root] (utils.py 283): INFO Epoch: [25] [1560/2502] eta: 0:12:19 lr: 0.000003 loss_cls: 3.8311 (3.8024) grad_norm: 4.3409 (4.5235) time: 0.7808 data: 0.0003 max mem: 8421 +[2024-12-06 03:04:14 root] (utils.py 283): INFO Epoch: [25] [1570/2502] eta: 0:12:11 lr: 0.000003 loss_cls: 4.0494 (3.8023) grad_norm: 4.4773 (4.5259) time: 0.7898 data: 0.0003 max mem: 8421 +[2024-12-06 03:04:22 root] (utils.py 283): INFO Epoch: [25] [1580/2502] eta: 0:12:03 lr: 0.000003 loss_cls: 4.0494 (3.8047) grad_norm: 4.5584 (4.5287) time: 0.7946 data: 0.0003 max mem: 8421 +[2024-12-06 03:04:30 root] (utils.py 283): INFO Epoch: [25] [1590/2502] eta: 0:11:56 lr: 0.000003 loss_cls: 4.0884 (3.8044) grad_norm: 4.4897 (4.5304) time: 0.7913 data: 0.0003 max mem: 8421 +[2024-12-06 03:04:38 root] (utils.py 283): INFO Epoch: [25] [1600/2502] eta: 0:11:48 lr: 0.000003 loss_cls: 4.0597 (3.8038) grad_norm: 4.3231 (4.5299) time: 0.7938 data: 0.0003 max mem: 8421 +[2024-12-06 03:04:45 root] (utils.py 283): INFO Epoch: [25] [1610/2502] eta: 0:11:40 lr: 0.000003 loss_cls: 3.8740 (3.8028) grad_norm: 4.1022 (4.5281) time: 0.7866 data: 0.0003 max mem: 8421 +[2024-12-06 03:04:53 root] (utils.py 283): INFO Epoch: [25] [1620/2502] eta: 0:11:32 lr: 0.000003 loss_cls: 3.8740 (3.8043) grad_norm: 4.2257 (4.5302) time: 0.7823 data: 0.0003 max mem: 8421 +[2024-12-06 03:05:01 root] (utils.py 283): INFO Epoch: [25] [1630/2502] eta: 0:11:24 lr: 0.000003 loss_cls: 4.1465 (3.8054) grad_norm: 4.4060 (4.5298) time: 0.7830 data: 0.0003 max mem: 8421 +[2024-12-06 03:05:09 root] (utils.py 283): INFO Epoch: [25] [1640/2502] eta: 0:11:16 lr: 0.000003 loss_cls: 4.1926 (3.8069) grad_norm: 4.3019 (4.5295) time: 0.7845 data: 0.0003 max mem: 8421 +[2024-12-06 03:05:17 root] (utils.py 283): INFO Epoch: [25] [1650/2502] eta: 0:11:08 lr: 0.000003 loss_cls: 4.1689 (3.8081) grad_norm: 4.2579 (4.5285) time: 0.7820 data: 0.0003 max mem: 8421 +[2024-12-06 03:05:25 root] (utils.py 283): INFO Epoch: [25] [1660/2502] eta: 0:11:01 lr: 0.000003 loss_cls: 3.6991 (3.8060) grad_norm: 4.2382 (4.5269) time: 0.7838 data: 0.0003 max mem: 8421 +[2024-12-06 03:05:32 root] (utils.py 283): INFO Epoch: [25] [1670/2502] eta: 0:10:53 lr: 0.000003 loss_cls: 3.6991 (3.8061) grad_norm: 4.2244 (4.5278) time: 0.7848 data: 0.0003 max mem: 8421 +[2024-12-06 03:05:40 root] (utils.py 283): INFO Epoch: [25] [1680/2502] eta: 0:10:45 lr: 0.000003 loss_cls: 4.0307 (3.8072) grad_norm: 4.3965 (4.5288) time: 0.7790 data: 0.0003 max mem: 8421 +[2024-12-06 03:05:48 root] (utils.py 283): INFO Epoch: [25] [1690/2502] eta: 0:10:37 lr: 0.000003 loss_cls: 4.0288 (3.8071) grad_norm: 4.1046 (4.5265) time: 0.7857 data: 0.0003 max mem: 8421 +[2024-12-06 03:05:56 root] (utils.py 283): INFO Epoch: [25] [1700/2502] eta: 0:10:29 lr: 0.000003 loss_cls: 3.9895 (3.8069) grad_norm: 4.1382 (4.5260) time: 0.7874 data: 0.0003 max mem: 8421 +[2024-12-06 03:06:04 root] (utils.py 283): INFO Epoch: [25] [1710/2502] eta: 0:10:21 lr: 0.000003 loss_cls: 3.9309 (3.8069) grad_norm: 4.4076 (4.5266) time: 0.7811 data: 0.0003 max mem: 8421 +[2024-12-06 03:06:12 root] (utils.py 283): INFO Epoch: [25] [1720/2502] eta: 0:10:13 lr: 0.000003 loss_cls: 3.8478 (3.8057) grad_norm: 4.5548 (4.5277) time: 0.7833 data: 0.0003 max mem: 8421 +[2024-12-06 03:06:19 root] (utils.py 283): INFO Epoch: [25] [1730/2502] eta: 0:10:06 lr: 0.000003 loss_cls: 3.8478 (3.8062) grad_norm: 4.3581 (4.5268) time: 0.7844 data: 0.0003 max mem: 8421 +[2024-12-06 03:06:27 root] (utils.py 283): INFO Epoch: [25] [1740/2502] eta: 0:09:58 lr: 0.000003 loss_cls: 3.7463 (3.8050) grad_norm: 4.3522 (4.5313) time: 0.7796 data: 0.0003 max mem: 8421 +[2024-12-06 03:06:35 root] (utils.py 283): INFO Epoch: [25] [1750/2502] eta: 0:09:50 lr: 0.000003 loss_cls: 3.7463 (3.8059) grad_norm: 4.4859 (4.5317) time: 0.7912 data: 0.0003 max mem: 8421 +[2024-12-06 03:06:43 root] (utils.py 283): INFO Epoch: [25] [1760/2502] eta: 0:09:42 lr: 0.000003 loss_cls: 4.0253 (3.8067) grad_norm: 4.3931 (4.5305) time: 0.7932 data: 0.0003 max mem: 8421 +[2024-12-06 03:06:51 root] (utils.py 283): INFO Epoch: [25] [1770/2502] eta: 0:09:34 lr: 0.000003 loss_cls: 3.8186 (3.8052) grad_norm: 4.2414 (4.5298) time: 0.7809 data: 0.0003 max mem: 8421 +[2024-12-06 03:06:59 root] (utils.py 283): INFO Epoch: [25] [1780/2502] eta: 0:09:26 lr: 0.000003 loss_cls: 3.6755 (3.8050) grad_norm: 4.3299 (4.5295) time: 0.7810 data: 0.0003 max mem: 8421 +[2024-12-06 03:07:07 root] (utils.py 283): INFO Epoch: [25] [1790/2502] eta: 0:09:18 lr: 0.000003 loss_cls: 4.0972 (3.8070) grad_norm: 4.3299 (4.5288) time: 0.7812 data: 0.0002 max mem: 8421 +[2024-12-06 03:07:14 root] (utils.py 283): INFO Epoch: [25] [1800/2502] eta: 0:09:11 lr: 0.000003 loss_cls: 4.0972 (3.8077) grad_norm: 4.4345 (4.5287) time: 0.7808 data: 0.0003 max mem: 8421 +[2024-12-06 03:07:22 root] (utils.py 283): INFO Epoch: [25] [1810/2502] eta: 0:09:03 lr: 0.000003 loss_cls: 3.7929 (3.8068) grad_norm: 4.3331 (4.5267) time: 0.7834 data: 0.0003 max mem: 8421 +[2024-12-06 03:07:30 root] (utils.py 283): INFO Epoch: [25] [1820/2502] eta: 0:08:55 lr: 0.000003 loss_cls: 3.8373 (3.8076) grad_norm: 4.3097 (4.5262) time: 0.7832 data: 0.0003 max mem: 8421 +[2024-12-06 03:07:38 root] (utils.py 283): INFO Epoch: [25] [1830/2502] eta: 0:08:47 lr: 0.000003 loss_cls: 4.0951 (3.8080) grad_norm: 4.3097 (4.5287) time: 0.7837 data: 0.0003 max mem: 8421 +[2024-12-06 03:07:46 root] (utils.py 283): INFO Epoch: [25] [1840/2502] eta: 0:08:39 lr: 0.000003 loss_cls: 3.7986 (3.8073) grad_norm: 4.2329 (4.5263) time: 0.7864 data: 0.0003 max mem: 8421 +[2024-12-06 03:07:54 root] (utils.py 283): INFO Epoch: [25] [1850/2502] eta: 0:08:31 lr: 0.000003 loss_cls: 3.6054 (3.8069) grad_norm: 4.2553 (4.5301) time: 0.7824 data: 0.0003 max mem: 8421 +[2024-12-06 03:08:01 root] (utils.py 283): INFO Epoch: [25] [1860/2502] eta: 0:08:23 lr: 0.000003 loss_cls: 3.6042 (3.8066) grad_norm: 4.3453 (4.5319) time: 0.7813 data: 0.0003 max mem: 8421 +[2024-12-06 03:08:09 root] (utils.py 283): INFO Epoch: [25] [1870/2502] eta: 0:08:16 lr: 0.000003 loss_cls: 3.5958 (3.8054) grad_norm: 4.3929 (4.5319) time: 0.7817 data: 0.0003 max mem: 8421 +[2024-12-06 03:08:17 root] (utils.py 283): INFO Epoch: [25] [1880/2502] eta: 0:08:08 lr: 0.000003 loss_cls: 3.7143 (3.8061) grad_norm: 4.4531 (4.5322) time: 0.7878 data: 0.0003 max mem: 8421 +[2024-12-06 03:08:25 root] (utils.py 283): INFO Epoch: [25] [1890/2502] eta: 0:08:00 lr: 0.000003 loss_cls: 4.2018 (3.8074) grad_norm: 4.3941 (4.5313) time: 0.7939 data: 0.0003 max mem: 8421 +[2024-12-06 03:08:33 root] (utils.py 283): INFO Epoch: [25] [1900/2502] eta: 0:07:52 lr: 0.000003 loss_cls: 4.0929 (3.8074) grad_norm: 4.2681 (4.5311) time: 0.7885 data: 0.0003 max mem: 8421 +[2024-12-06 03:08:41 root] (utils.py 283): INFO Epoch: [25] [1910/2502] eta: 0:07:44 lr: 0.000003 loss_cls: 3.7731 (3.8062) grad_norm: 4.3697 (4.5303) time: 0.7826 data: 0.0003 max mem: 8421 +[2024-12-06 03:08:49 root] (utils.py 283): INFO Epoch: [25] [1920/2502] eta: 0:07:36 lr: 0.000003 loss_cls: 3.6408 (3.8058) grad_norm: 4.2991 (4.5287) time: 0.7824 data: 0.0003 max mem: 8421 +[2024-12-06 03:08:56 root] (utils.py 283): INFO Epoch: [25] [1930/2502] eta: 0:07:29 lr: 0.000003 loss_cls: 3.5957 (3.8037) grad_norm: 4.3070 (4.5282) time: 0.7824 data: 0.0003 max mem: 8421 +[2024-12-06 03:09:04 root] (utils.py 283): INFO Epoch: [25] [1940/2502] eta: 0:07:21 lr: 0.000003 loss_cls: 3.4663 (3.8032) grad_norm: 4.3070 (4.5270) time: 0.7793 data: 0.0003 max mem: 8421 +[2024-12-06 03:09:12 root] (utils.py 283): INFO Epoch: [25] [1950/2502] eta: 0:07:13 lr: 0.000003 loss_cls: 3.9645 (3.8035) grad_norm: 4.2177 (4.5264) time: 0.7805 data: 0.0002 max mem: 8421 +[2024-12-06 03:09:20 root] (utils.py 283): INFO Epoch: [25] [1960/2502] eta: 0:07:05 lr: 0.000003 loss_cls: 3.8800 (3.8043) grad_norm: 4.3784 (4.5274) time: 0.7824 data: 0.0003 max mem: 8421 +[2024-12-06 03:09:28 root] (utils.py 283): INFO Epoch: [25] [1970/2502] eta: 0:06:57 lr: 0.000003 loss_cls: 3.8936 (3.8046) grad_norm: 4.3916 (4.5271) time: 0.7960 data: 0.0003 max mem: 8421 +[2024-12-06 03:09:36 root] (utils.py 283): INFO Epoch: [25] [1980/2502] eta: 0:06:49 lr: 0.000003 loss_cls: 3.8636 (3.8040) grad_norm: 4.4029 (4.5271) time: 0.8035 data: 0.0003 max mem: 8421 +[2024-12-06 03:09:44 root] (utils.py 283): INFO Epoch: [25] [1990/2502] eta: 0:06:41 lr: 0.000003 loss_cls: 3.8740 (3.8046) grad_norm: 4.4057 (4.5273) time: 0.7919 data: 0.0003 max mem: 8421 +[2024-12-06 03:09:52 root] (utils.py 283): INFO Epoch: [25] [2000/2502] eta: 0:06:34 lr: 0.000003 loss_cls: 3.9875 (3.8045) grad_norm: 4.4299 (4.5266) time: 0.7924 data: 0.0003 max mem: 8421 +[2024-12-06 03:10:00 root] (utils.py 283): INFO Epoch: [25] [2010/2502] eta: 0:06:26 lr: 0.000003 loss_cls: 3.8674 (3.8040) grad_norm: 4.4013 (4.5267) time: 0.7945 data: 0.0002 max mem: 8421 +[2024-12-06 03:10:07 root] (utils.py 283): INFO Epoch: [25] [2020/2502] eta: 0:06:18 lr: 0.000003 loss_cls: 4.0384 (3.8059) grad_norm: 4.3733 (4.5263) time: 0.7869 data: 0.0003 max mem: 8421 +[2024-12-06 03:10:15 root] (utils.py 283): INFO Epoch: [25] [2030/2502] eta: 0:06:10 lr: 0.000003 loss_cls: 4.1554 (3.8072) grad_norm: 4.3377 (4.5271) time: 0.7877 data: 0.0003 max mem: 8421 +[2024-12-06 03:10:23 root] (utils.py 283): INFO Epoch: [25] [2040/2502] eta: 0:06:02 lr: 0.000003 loss_cls: 3.7706 (3.8063) grad_norm: 4.2698 (4.5261) time: 0.7865 data: 0.0003 max mem: 8421 +[2024-12-06 03:10:31 root] (utils.py 283): INFO Epoch: [25] [2050/2502] eta: 0:05:54 lr: 0.000003 loss_cls: 3.6960 (3.8056) grad_norm: 4.4667 (4.5267) time: 0.7815 data: 0.0003 max mem: 8421 +[2024-12-06 03:10:39 root] (utils.py 283): INFO Epoch: [25] [2060/2502] eta: 0:05:47 lr: 0.000003 loss_cls: 3.9923 (3.8066) grad_norm: 4.3817 (4.5250) time: 0.7797 data: 0.0003 max mem: 8421 +[2024-12-06 03:10:47 root] (utils.py 283): INFO Epoch: [25] [2070/2502] eta: 0:05:39 lr: 0.000003 loss_cls: 4.0459 (3.8060) grad_norm: 4.2963 (4.5253) time: 0.7795 data: 0.0002 max mem: 8421 +[2024-12-06 03:10:54 root] (utils.py 283): INFO Epoch: [25] [2080/2502] eta: 0:05:31 lr: 0.000003 loss_cls: 3.9147 (3.8060) grad_norm: 4.1201 (4.5242) time: 0.7842 data: 0.0003 max mem: 8421 +[2024-12-06 03:11:02 root] (utils.py 283): INFO Epoch: [25] [2090/2502] eta: 0:05:23 lr: 0.000003 loss_cls: 3.8352 (3.8059) grad_norm: 4.1560 (4.5250) time: 0.7890 data: 0.0003 max mem: 8421 +[2024-12-06 03:11:10 root] (utils.py 283): INFO Epoch: [25] [2100/2502] eta: 0:05:15 lr: 0.000003 loss_cls: 3.8329 (3.8055) grad_norm: 4.3064 (4.5250) time: 0.7941 data: 0.0003 max mem: 8421 +[2024-12-06 03:11:18 root] (utils.py 283): INFO Epoch: [25] [2110/2502] eta: 0:05:07 lr: 0.000003 loss_cls: 4.0044 (3.8074) grad_norm: 4.4643 (4.5271) time: 0.7962 data: 0.0003 max mem: 8421 +[2024-12-06 03:11:26 root] (utils.py 283): INFO Epoch: [25] [2120/2502] eta: 0:04:59 lr: 0.000003 loss_cls: 3.9616 (3.8069) grad_norm: 4.5190 (4.5265) time: 0.7925 data: 0.0003 max mem: 8421 +[2024-12-06 03:11:34 root] (utils.py 283): INFO Epoch: [25] [2130/2502] eta: 0:04:52 lr: 0.000003 loss_cls: 3.8091 (3.8074) grad_norm: 4.4294 (4.5262) time: 0.7856 data: 0.0003 max mem: 8421 +[2024-12-06 03:11:42 root] (utils.py 283): INFO Epoch: [25] [2140/2502] eta: 0:04:44 lr: 0.000003 loss_cls: 3.8986 (3.8083) grad_norm: 4.4294 (4.5265) time: 0.7815 data: 0.0003 max mem: 8421 +[2024-12-06 03:11:50 root] (utils.py 283): INFO Epoch: [25] [2150/2502] eta: 0:04:36 lr: 0.000003 loss_cls: 4.2650 (3.8087) grad_norm: 4.4879 (4.5257) time: 0.7788 data: 0.0003 max mem: 8421 +[2024-12-06 03:11:57 root] (utils.py 283): INFO Epoch: [25] [2160/2502] eta: 0:04:28 lr: 0.000003 loss_cls: 3.9485 (3.8084) grad_norm: 4.3656 (4.5257) time: 0.7768 data: 0.0002 max mem: 8421 +[2024-12-06 03:12:05 root] (utils.py 283): INFO Epoch: [25] [2170/2502] eta: 0:04:20 lr: 0.000003 loss_cls: 3.8833 (3.8084) grad_norm: 4.4250 (4.5256) time: 0.7803 data: 0.0003 max mem: 8421 +[2024-12-06 03:12:13 root] (utils.py 283): INFO Epoch: [25] [2180/2502] eta: 0:04:12 lr: 0.000003 loss_cls: 3.8590 (3.8088) grad_norm: 4.4889 (4.5259) time: 0.7811 data: 0.0003 max mem: 8421 +[2024-12-06 03:12:21 root] (utils.py 283): INFO Epoch: [25] [2190/2502] eta: 0:04:04 lr: 0.000003 loss_cls: 3.8590 (3.8088) grad_norm: 4.4889 (4.5257) time: 0.7810 data: 0.0003 max mem: 8421 +[2024-12-06 03:12:29 root] (utils.py 283): INFO Epoch: [25] [2200/2502] eta: 0:03:57 lr: 0.000003 loss_cls: 3.8665 (3.8089) grad_norm: 4.4758 (4.5256) time: 0.7918 data: 0.0003 max mem: 8421 +[2024-12-06 03:12:37 root] (utils.py 283): INFO Epoch: [25] [2210/2502] eta: 0:03:49 lr: 0.000003 loss_cls: 3.9366 (3.8091) grad_norm: 4.3641 (4.5257) time: 0.7900 data: 0.0003 max mem: 8421 +[2024-12-06 03:12:44 root] (utils.py 283): INFO Epoch: [25] [2220/2502] eta: 0:03:41 lr: 0.000003 loss_cls: 4.2098 (3.8111) grad_norm: 4.4816 (4.5269) time: 0.7789 data: 0.0002 max mem: 8421 +[2024-12-06 03:12:52 root] (utils.py 283): INFO Epoch: [25] [2230/2502] eta: 0:03:33 lr: 0.000003 loss_cls: 4.2170 (3.8124) grad_norm: 4.5346 (4.5255) time: 0.7799 data: 0.0002 max mem: 8421 +[2024-12-06 03:13:00 root] (utils.py 283): INFO Epoch: [25] [2240/2502] eta: 0:03:25 lr: 0.000003 loss_cls: 4.0709 (3.8129) grad_norm: 4.1767 (4.5242) time: 0.7803 data: 0.0002 max mem: 8421 +[2024-12-06 03:13:09 root] (utils.py 283): INFO Epoch: [25] [2250/2502] eta: 0:03:17 lr: 0.000003 loss_cls: 3.9975 (3.8134) grad_norm: 4.2054 (4.5232) time: 0.8178 data: 0.0003 max mem: 8421 +[2024-12-06 03:13:18 root] (utils.py 283): INFO Epoch: [25] [2260/2502] eta: 0:03:10 lr: 0.000003 loss_cls: 4.0620 (3.8145) grad_norm: 4.2224 (4.5223) time: 0.8773 data: 0.0004 max mem: 8421 +[2024-12-06 03:13:26 root] (utils.py 283): INFO Epoch: [25] [2270/2502] eta: 0:03:02 lr: 0.000003 loss_cls: 3.6966 (3.8119) grad_norm: 4.2633 (4.5214) time: 0.8446 data: 0.0004 max mem: 8421 +[2024-12-06 03:13:33 root] (utils.py 283): INFO Epoch: [25] [2280/2502] eta: 0:02:54 lr: 0.000003 loss_cls: 3.4371 (3.8118) grad_norm: 4.2633 (4.5217) time: 0.7914 data: 0.0003 max mem: 8421 +[2024-12-06 03:13:41 root] (utils.py 283): INFO Epoch: [25] [2290/2502] eta: 0:02:46 lr: 0.000003 loss_cls: 3.9246 (3.8113) grad_norm: 4.2907 (4.5208) time: 0.7931 data: 0.0003 max mem: 8421 +[2024-12-06 03:13:49 root] (utils.py 283): INFO Epoch: [25] [2300/2502] eta: 0:02:38 lr: 0.000003 loss_cls: 3.9595 (3.8118) grad_norm: 4.2917 (4.5211) time: 0.7955 data: 0.0003 max mem: 8421 +[2024-12-06 03:13:57 root] (utils.py 283): INFO Epoch: [25] [2310/2502] eta: 0:02:30 lr: 0.000003 loss_cls: 3.9916 (3.8114) grad_norm: 4.4520 (4.5215) time: 0.8009 data: 0.0003 max mem: 8421 +[2024-12-06 03:14:05 root] (utils.py 283): INFO Epoch: [25] [2320/2502] eta: 0:02:23 lr: 0.000003 loss_cls: 3.8278 (3.8113) grad_norm: 4.3381 (4.5207) time: 0.7987 data: 0.0003 max mem: 8421 +[2024-12-06 03:14:13 root] (utils.py 283): INFO Epoch: [25] [2330/2502] eta: 0:02:15 lr: 0.000003 loss_cls: 3.8943 (3.8113) grad_norm: 4.1721 (4.5197) time: 0.7854 data: 0.0003 max mem: 8421 +[2024-12-06 03:14:21 root] (utils.py 283): INFO Epoch: [25] [2340/2502] eta: 0:02:07 lr: 0.000003 loss_cls: 3.9510 (3.8122) grad_norm: 4.1603 (4.5185) time: 0.7805 data: 0.0003 max mem: 8421 +[2024-12-06 03:14:29 root] (utils.py 283): INFO Epoch: [25] [2350/2502] eta: 0:01:59 lr: 0.000003 loss_cls: 3.9783 (3.8123) grad_norm: 4.4012 (4.5191) time: 0.7831 data: 0.0003 max mem: 8421 +[2024-12-06 03:14:37 root] (utils.py 283): INFO Epoch: [25] [2360/2502] eta: 0:01:51 lr: 0.000003 loss_cls: 3.8341 (3.8120) grad_norm: 4.4581 (4.5185) time: 0.7843 data: 0.0003 max mem: 8421 +[2024-12-06 03:14:44 root] (utils.py 283): INFO Epoch: [25] [2370/2502] eta: 0:01:43 lr: 0.000003 loss_cls: 3.8331 (3.8128) grad_norm: 4.4728 (4.5186) time: 0.7806 data: 0.0003 max mem: 8421 +[2024-12-06 03:14:52 root] (utils.py 283): INFO Epoch: [25] [2380/2502] eta: 0:01:35 lr: 0.000003 loss_cls: 3.8331 (3.8120) grad_norm: 4.3911 (4.5177) time: 0.7873 data: 0.0003 max mem: 8421 +[2024-12-06 03:15:00 root] (utils.py 283): INFO Epoch: [25] [2390/2502] eta: 0:01:28 lr: 0.000003 loss_cls: 3.7360 (3.8115) grad_norm: 4.3911 (4.5171) time: 0.7900 data: 0.0003 max mem: 8421 +[2024-12-06 03:15:08 root] (utils.py 283): INFO Epoch: [25] [2400/2502] eta: 0:01:20 lr: 0.000003 loss_cls: 3.5945 (3.8106) grad_norm: 4.2128 (4.5162) time: 0.7821 data: 0.0003 max mem: 8421 +[2024-12-06 03:15:16 root] (utils.py 283): INFO Epoch: [25] [2410/2502] eta: 0:01:12 lr: 0.000003 loss_cls: 3.7834 (3.8111) grad_norm: 4.1807 (4.5160) time: 0.7819 data: 0.0003 max mem: 8421 +[2024-12-06 03:15:24 root] (utils.py 283): INFO Epoch: [25] [2420/2502] eta: 0:01:04 lr: 0.000003 loss_cls: 3.9218 (3.8105) grad_norm: 4.2330 (4.5170) time: 0.7820 data: 0.0003 max mem: 8421 +[2024-12-06 03:15:31 root] (utils.py 283): INFO Epoch: [25] [2430/2502] eta: 0:00:56 lr: 0.000003 loss_cls: 4.0131 (3.8111) grad_norm: 4.3565 (4.5168) time: 0.7822 data: 0.0003 max mem: 8421 +[2024-12-06 03:15:39 root] (utils.py 283): INFO Epoch: [25] [2440/2502] eta: 0:00:48 lr: 0.000003 loss_cls: 4.0460 (3.8127) grad_norm: 4.4011 (4.5167) time: 0.7779 data: 0.0003 max mem: 8421 +[2024-12-06 03:15:47 root] (utils.py 283): INFO Epoch: [25] [2450/2502] eta: 0:00:40 lr: 0.000003 loss_cls: 4.0039 (3.8126) grad_norm: 4.4039 (4.5162) time: 0.7749 data: 0.0003 max mem: 8421 +[2024-12-06 03:15:55 root] (utils.py 283): INFO Epoch: [25] [2460/2502] eta: 0:00:33 lr: 0.000003 loss_cls: 4.0585 (3.8146) grad_norm: 4.4151 (4.5169) time: 0.7781 data: 0.0003 max mem: 8421 +[2024-12-06 03:16:03 root] (utils.py 283): INFO Epoch: [25] [2470/2502] eta: 0:00:25 lr: 0.000003 loss_cls: 4.0585 (3.8137) grad_norm: 4.3389 (4.5158) time: 0.7822 data: 0.0002 max mem: 8421 +[2024-12-06 03:16:10 root] (utils.py 283): INFO Epoch: [25] [2480/2502] eta: 0:00:17 lr: 0.000003 loss_cls: 3.7242 (3.8138) grad_norm: 4.2031 (4.5150) time: 0.7805 data: 0.0003 max mem: 8421 +[2024-12-06 03:16:19 root] (utils.py 283): INFO Epoch: [25] [2490/2502] eta: 0:00:09 lr: 0.000003 loss_cls: 3.9974 (3.8143) grad_norm: 4.2116 (4.5140) time: 0.8026 data: 0.0253 max mem: 8421 +[2024-12-06 03:16:26 root] (utils.py 283): INFO Epoch: [25] [2500/2502] eta: 0:00:01 lr: 0.000003 loss_cls: 4.0057 (3.8140) grad_norm: 4.2543 (4.5149) time: 0.8043 data: 0.0253 max mem: 8421 +[2024-12-06 03:16:27 root] (utils.py 283): INFO Epoch: [25] [2501/2502] eta: 0:00:00 lr: 0.000003 loss_cls: 4.0057 (3.8142) grad_norm: 4.2543 (4.5152) time: 0.8042 data: 0.0253 max mem: 8421 +[2024-12-06 03:16:27 root] (utils.py 297): INFO Epoch: [25] Total time: 0:32:46 (0.7861 s / it) +[2024-12-06 03:16:27 root] (engine.py 179): INFO Averaged stats:lr: 0.000003 loss_cls: 4.0057 (3.8136) grad_norm: 4.2543 (4.5152) +[2024-12-06 03:16:28 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7069 (0.7069) acc1: 85.9375 (85.9375) acc3: 95.3125 (95.3125) acc5: 98.4375 (98.4375) time: 0.1308 data: 0.0004 max mem: 8421 +[2024-12-06 03:16:29 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7817 (0.8338) acc1: 84.3750 (82.6705) acc3: 93.7500 (93.3239) acc5: 96.0938 (96.0938) time: 0.1320 data: 0.0004 max mem: 8421 +[2024-12-06 03:16:30 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8715 (0.8913) acc1: 79.6875 (81.4732) acc3: 92.9688 (92.5223) acc5: 96.0938 (95.3869) time: 0.1320 data: 0.0004 max mem: 8421 +[2024-12-06 03:16:32 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9375 (0.8994) acc1: 79.6875 (80.6956) acc3: 92.1875 (92.8427) acc5: 96.0938 (95.4637) time: 0.1327 data: 0.0005 max mem: 8421 +[2024-12-06 03:16:33 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8337 (0.8934) acc1: 80.4688 (80.8308) acc3: 94.5312 (93.0450) acc5: 96.0938 (95.5412) time: 0.1422 data: 0.0089 max mem: 8421 +[2024-12-06 03:16:35 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0138 (0.9879) acc1: 75.0000 (78.6458) acc3: 89.8438 (91.5135) acc5: 92.1875 (94.4087) time: 0.1491 data: 0.0137 max mem: 8421 +[2024-12-06 03:16:37 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3145 (1.0325) acc1: 71.8750 (77.6511) acc3: 86.7188 (90.7787) acc5: 89.0625 (93.6603) time: 0.1684 data: 0.0325 max mem: 8421 +[2024-12-06 03:16:39 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:04 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2238 (1.0751) acc1: 71.8750 (76.5185) acc3: 87.5000 (90.1959) acc5: 89.8438 (93.2108) time: 0.1893 data: 0.0535 max mem: 8421 +[2024-12-06 03:16:41 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2992 (1.1129) acc1: 71.0938 (75.7137) acc3: 85.1562 (89.5158) acc5: 90.6250 (92.7180) time: 0.1944 data: 0.0582 max mem: 8421 +[2024-12-06 03:16:42 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3458 (1.1434) acc1: 68.7500 (74.8970) acc3: 84.3750 (88.9681) acc5: 89.8438 (92.2819) time: 0.1670 data: 0.0324 max mem: 8421 +[2024-12-06 03:16:43 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2141 (1.1320) acc1: 71.0938 (75.0640) acc3: 87.5000 (89.1280) acc5: 90.6250 (92.4880) time: 0.1678 data: 0.0323 max mem: 8421 +[2024-12-06 03:16:43 root] (utils.py 297): INFO Test: Total time: 0:00:15 (0.1553 s / it) +[2024-12-06 03:16:44 root] (engine.py 264): INFO * Acc@1 75.002 Acc@3 89.104 Acc@5 92.424 loss 1.131 flops 1.285 layer_flops 1.251 +[2024-12-06 03:16:44 root] (main.py 547): INFO Accuracy of the network on the 50000 test images: 75.0% +[2024-12-06 03:16:44 root] (main.py 551): INFO Max accuracy: 75.04% +[2024-12-06 03:16:44 root] (utils.py 283): INFO Epoch: [26] [ 0/2502] eta: 0:34:28 lr: 0.000002 loss_cls: 3.5746 (3.5746) grad_norm: 4.4766 (4.4766) time: 0.8269 data: 0.0004 max mem: 8421 +[2024-12-06 03:16:52 root] (utils.py 283): INFO Epoch: [26] [ 10/2502] eta: 0:32:51 lr: 0.000002 loss_cls: 3.9167 (3.8064) grad_norm: 4.3209 (4.4006) time: 0.7912 data: 0.0003 max mem: 8421 +[2024-12-06 03:17:00 root] (utils.py 283): INFO Epoch: [26] [ 20/2502] eta: 0:32:54 lr: 0.000002 loss_cls: 3.9197 (3.8019) grad_norm: 4.3412 (4.4021) time: 0.7939 data: 0.0003 max mem: 8421 +[2024-12-06 03:17:08 root] (utils.py 283): INFO Epoch: [26] [ 30/2502] eta: 0:32:36 lr: 0.000002 loss_cls: 3.8209 (3.7618) grad_norm: 4.3412 (4.4213) time: 0.7915 data: 0.0003 max mem: 8421 +[2024-12-06 03:17:16 root] (utils.py 283): INFO Epoch: [26] [ 40/2502] eta: 0:32:25 lr: 0.000002 loss_cls: 3.6211 (3.6992) grad_norm: 4.3778 (4.4307) time: 0.7849 data: 0.0003 max mem: 8421 +[2024-12-06 03:17:24 root] (utils.py 283): INFO Epoch: [26] [ 50/2502] eta: 0:32:29 lr: 0.000002 loss_cls: 3.6383 (3.7038) grad_norm: 4.2890 (4.3945) time: 0.8012 data: 0.0002 max mem: 8421 +[2024-12-06 03:17:32 root] (utils.py 283): INFO Epoch: [26] [ 60/2502] eta: 0:32:18 lr: 0.000002 loss_cls: 3.8611 (3.7302) grad_norm: 4.2459 (4.4940) time: 0.8011 data: 0.0003 max mem: 8421 +[2024-12-06 03:17:40 root] (utils.py 283): INFO Epoch: [26] [ 70/2502] eta: 0:32:07 lr: 0.000002 loss_cls: 4.0427 (3.7502) grad_norm: 4.4571 (4.6010) time: 0.7856 data: 0.0003 max mem: 8421 +[2024-12-06 03:17:48 root] (utils.py 283): INFO Epoch: [26] [ 80/2502] eta: 0:31:55 lr: 0.000002 loss_cls: 3.8888 (3.7591) grad_norm: 4.4084 (4.6587) time: 0.7818 data: 0.0003 max mem: 8421 +[2024-12-06 03:17:56 root] (utils.py 283): INFO Epoch: [26] [ 90/2502] eta: 0:31:47 lr: 0.000002 loss_cls: 3.6622 (3.7417) grad_norm: 4.3428 (4.6933) time: 0.7856 data: 0.0003 max mem: 8421 +[2024-12-06 03:18:03 root] (utils.py 283): INFO Epoch: [26] [ 100/2502] eta: 0:31:37 lr: 0.000002 loss_cls: 3.3970 (3.7021) grad_norm: 4.2283 (4.6629) time: 0.7862 data: 0.0003 max mem: 8421 +[2024-12-06 03:18:11 root] (utils.py 283): INFO Epoch: [26] [ 110/2502] eta: 0:31:27 lr: 0.000002 loss_cls: 3.3970 (3.7058) grad_norm: 4.3915 (4.6532) time: 0.7804 data: 0.0003 max mem: 8421 +[2024-12-06 03:18:19 root] (utils.py 283): INFO Epoch: [26] [ 120/2502] eta: 0:31:18 lr: 0.000002 loss_cls: 3.8957 (3.7128) grad_norm: 4.3915 (4.6220) time: 0.7828 data: 0.0003 max mem: 8421 +[2024-12-06 03:18:27 root] (utils.py 283): INFO Epoch: [26] [ 130/2502] eta: 0:31:10 lr: 0.000002 loss_cls: 3.6645 (3.7050) grad_norm: 4.1441 (4.6069) time: 0.7845 data: 0.0003 max mem: 8421 +[2024-12-06 03:18:35 root] (utils.py 283): INFO Epoch: [26] [ 140/2502] eta: 0:31:00 lr: 0.000002 loss_cls: 3.5911 (3.7059) grad_norm: 4.1648 (4.5844) time: 0.7810 data: 0.0003 max mem: 8421 +[2024-12-06 03:18:43 root] (utils.py 283): INFO Epoch: [26] [ 150/2502] eta: 0:30:51 lr: 0.000002 loss_cls: 3.9976 (3.7215) grad_norm: 4.2667 (4.5715) time: 0.7779 data: 0.0003 max mem: 8421 +[2024-12-06 03:18:50 root] (utils.py 283): INFO Epoch: [26] [ 160/2502] eta: 0:30:41 lr: 0.000002 loss_cls: 4.0080 (3.7275) grad_norm: 4.2148 (4.5788) time: 0.7782 data: 0.0003 max mem: 8421 +[2024-12-06 03:18:58 root] (utils.py 283): INFO Epoch: [26] [ 170/2502] eta: 0:30:33 lr: 0.000002 loss_cls: 3.8053 (3.7139) grad_norm: 4.3732 (4.6043) time: 0.7805 data: 0.0003 max mem: 8421 +[2024-12-06 03:19:06 root] (utils.py 283): INFO Epoch: [26] [ 180/2502] eta: 0:30:26 lr: 0.000002 loss_cls: 3.8405 (3.7351) grad_norm: 4.4818 (4.5933) time: 0.7893 data: 0.0003 max mem: 8421 +[2024-12-06 03:19:14 root] (utils.py 283): INFO Epoch: [26] [ 190/2502] eta: 0:30:18 lr: 0.000002 loss_cls: 4.0973 (3.7571) grad_norm: 4.4336 (4.6048) time: 0.7883 data: 0.0003 max mem: 8421 +[2024-12-06 03:19:22 root] (utils.py 283): INFO Epoch: [26] [ 200/2502] eta: 0:30:09 lr: 0.000002 loss_cls: 4.1726 (3.7768) grad_norm: 4.3699 (4.6015) time: 0.7795 data: 0.0003 max mem: 8421 +[2024-12-06 03:19:29 root] (utils.py 283): INFO Epoch: [26] [ 210/2502] eta: 0:30:00 lr: 0.000002 loss_cls: 4.1422 (3.7868) grad_norm: 4.5478 (4.6112) time: 0.7789 data: 0.0003 max mem: 8421 +[2024-12-06 03:19:37 root] (utils.py 283): INFO Epoch: [26] [ 220/2502] eta: 0:29:53 lr: 0.000002 loss_cls: 3.9779 (3.7910) grad_norm: 4.5360 (4.6028) time: 0.7856 data: 0.0003 max mem: 8421 +[2024-12-06 03:19:45 root] (utils.py 283): INFO Epoch: [26] [ 230/2502] eta: 0:29:46 lr: 0.000002 loss_cls: 3.9611 (3.7940) grad_norm: 4.3356 (4.5963) time: 0.7906 data: 0.0003 max mem: 8421 +[2024-12-06 03:19:53 root] (utils.py 283): INFO Epoch: [26] [ 240/2502] eta: 0:29:38 lr: 0.000002 loss_cls: 3.8400 (3.7913) grad_norm: 4.3356 (4.5959) time: 0.7886 data: 0.0003 max mem: 8421 +[2024-12-06 03:20:01 root] (utils.py 283): INFO Epoch: [26] [ 250/2502] eta: 0:29:31 lr: 0.000002 loss_cls: 3.7834 (3.7881) grad_norm: 4.4382 (4.6310) time: 0.7897 data: 0.0003 max mem: 8421 +[2024-12-06 03:20:09 root] (utils.py 283): INFO Epoch: [26] [ 260/2502] eta: 0:29:24 lr: 0.000002 loss_cls: 3.7955 (3.7874) grad_norm: 4.4724 (4.6365) time: 0.7964 data: 0.0003 max mem: 8421 +[2024-12-06 03:20:17 root] (utils.py 283): INFO Epoch: [26] [ 270/2502] eta: 0:29:17 lr: 0.000002 loss_cls: 3.8485 (3.7879) grad_norm: 4.4012 (4.6254) time: 0.7976 data: 0.0003 max mem: 8421 +[2024-12-06 03:20:25 root] (utils.py 283): INFO Epoch: [26] [ 280/2502] eta: 0:29:09 lr: 0.000002 loss_cls: 3.9539 (3.7928) grad_norm: 4.3752 (4.6211) time: 0.7932 data: 0.0003 max mem: 8421 +[2024-12-06 03:20:33 root] (utils.py 283): INFO Epoch: [26] [ 290/2502] eta: 0:29:02 lr: 0.000002 loss_cls: 3.8896 (3.7889) grad_norm: 4.3178 (4.6159) time: 0.7913 data: 0.0003 max mem: 8421 +[2024-12-06 03:20:41 root] (utils.py 283): INFO Epoch: [26] [ 300/2502] eta: 0:28:53 lr: 0.000002 loss_cls: 3.7183 (3.7873) grad_norm: 4.4047 (4.6198) time: 0.7846 data: 0.0002 max mem: 8421 +[2024-12-06 03:20:48 root] (utils.py 283): INFO Epoch: [26] [ 310/2502] eta: 0:28:45 lr: 0.000002 loss_cls: 3.7343 (3.7872) grad_norm: 4.4047 (4.6185) time: 0.7791 data: 0.0003 max mem: 8421 +[2024-12-06 03:20:56 root] (utils.py 283): INFO Epoch: [26] [ 320/2502] eta: 0:28:36 lr: 0.000002 loss_cls: 3.9347 (3.7943) grad_norm: 4.0824 (4.6227) time: 0.7811 data: 0.0003 max mem: 8421 +[2024-12-06 03:21:04 root] (utils.py 283): INFO Epoch: [26] [ 330/2502] eta: 0:28:28 lr: 0.000002 loss_cls: 3.9926 (3.7910) grad_norm: 4.1636 (4.6170) time: 0.7815 data: 0.0003 max mem: 8421 +[2024-12-06 03:21:12 root] (utils.py 283): INFO Epoch: [26] [ 340/2502] eta: 0:28:20 lr: 0.000002 loss_cls: 3.8111 (3.7953) grad_norm: 4.4181 (4.6075) time: 0.7796 data: 0.0003 max mem: 8421 +[2024-12-06 03:21:20 root] (utils.py 283): INFO Epoch: [26] [ 350/2502] eta: 0:28:12 lr: 0.000002 loss_cls: 3.8387 (3.7960) grad_norm: 4.2710 (4.5966) time: 0.7808 data: 0.0003 max mem: 8421 +[2024-12-06 03:21:28 root] (utils.py 283): INFO Epoch: [26] [ 360/2502] eta: 0:28:04 lr: 0.000002 loss_cls: 3.8666 (3.7898) grad_norm: 4.1800 (4.5859) time: 0.7821 data: 0.0003 max mem: 8421 +[2024-12-06 03:21:35 root] (utils.py 283): INFO Epoch: [26] [ 370/2502] eta: 0:27:56 lr: 0.000002 loss_cls: 3.8666 (3.7913) grad_norm: 4.1800 (4.5779) time: 0.7824 data: 0.0003 max mem: 8421 +[2024-12-06 03:21:43 root] (utils.py 283): INFO Epoch: [26] [ 380/2502] eta: 0:27:48 lr: 0.000002 loss_cls: 3.8960 (3.7884) grad_norm: 4.2951 (4.5822) time: 0.7864 data: 0.0003 max mem: 8421 +[2024-12-06 03:21:51 root] (utils.py 283): INFO Epoch: [26] [ 390/2502] eta: 0:27:40 lr: 0.000002 loss_cls: 3.5693 (3.7861) grad_norm: 4.4138 (4.5795) time: 0.7894 data: 0.0003 max mem: 8421 +[2024-12-06 03:21:59 root] (utils.py 283): INFO Epoch: [26] [ 400/2502] eta: 0:27:33 lr: 0.000002 loss_cls: 3.7038 (3.7837) grad_norm: 4.3858 (4.5725) time: 0.7947 data: 0.0003 max mem: 8421 +[2024-12-06 03:22:07 root] (utils.py 283): INFO Epoch: [26] [ 410/2502] eta: 0:27:26 lr: 0.000002 loss_cls: 3.7946 (3.7837) grad_norm: 4.1516 (4.5698) time: 0.8013 data: 0.0002 max mem: 8421 +[2024-12-06 03:22:15 root] (utils.py 283): INFO Epoch: [26] [ 420/2502] eta: 0:27:19 lr: 0.000002 loss_cls: 3.9361 (3.7919) grad_norm: 4.3150 (4.5686) time: 0.8082 data: 0.0003 max mem: 8421 +[2024-12-06 03:22:23 root] (utils.py 283): INFO Epoch: [26] [ 430/2502] eta: 0:27:12 lr: 0.000002 loss_cls: 4.1041 (3.7981) grad_norm: 4.2994 (4.5658) time: 0.8006 data: 0.0003 max mem: 8421 +[2024-12-06 03:22:31 root] (utils.py 283): INFO Epoch: [26] [ 440/2502] eta: 0:27:05 lr: 0.000002 loss_cls: 3.8684 (3.7904) grad_norm: 4.2249 (4.5610) time: 0.7977 data: 0.0002 max mem: 8421 +[2024-12-06 03:22:39 root] (utils.py 283): INFO Epoch: [26] [ 450/2502] eta: 0:26:56 lr: 0.000002 loss_cls: 3.3222 (3.7857) grad_norm: 4.4801 (4.5640) time: 0.7946 data: 0.0003 max mem: 8421 +[2024-12-06 03:22:47 root] (utils.py 283): INFO Epoch: [26] [ 460/2502] eta: 0:26:49 lr: 0.000002 loss_cls: 3.9462 (3.7885) grad_norm: 4.4248 (4.5593) time: 0.7862 data: 0.0003 max mem: 8421 +[2024-12-06 03:22:55 root] (utils.py 283): INFO Epoch: [26] [ 470/2502] eta: 0:26:41 lr: 0.000002 loss_cls: 4.0664 (3.7891) grad_norm: 4.3254 (4.5576) time: 0.7916 data: 0.0003 max mem: 8421 +[2024-12-06 03:23:03 root] (utils.py 283): INFO Epoch: [26] [ 480/2502] eta: 0:26:33 lr: 0.000002 loss_cls: 3.5146 (3.7839) grad_norm: 4.3131 (4.5537) time: 0.7903 data: 0.0003 max mem: 8421 +[2024-12-06 03:23:11 root] (utils.py 283): INFO Epoch: [26] [ 490/2502] eta: 0:26:25 lr: 0.000002 loss_cls: 3.4749 (3.7827) grad_norm: 4.3047 (4.5538) time: 0.7834 data: 0.0003 max mem: 8421 +[2024-12-06 03:23:18 root] (utils.py 283): INFO Epoch: [26] [ 500/2502] eta: 0:26:17 lr: 0.000002 loss_cls: 3.8302 (3.7787) grad_norm: 4.2723 (4.5502) time: 0.7812 data: 0.0003 max mem: 8421 +[2024-12-06 03:23:26 root] (utils.py 283): INFO Epoch: [26] [ 510/2502] eta: 0:26:09 lr: 0.000002 loss_cls: 3.4978 (3.7724) grad_norm: 4.2704 (4.5484) time: 0.7876 data: 0.0003 max mem: 8421 +[2024-12-06 03:23:34 root] (utils.py 283): INFO Epoch: [26] [ 520/2502] eta: 0:26:01 lr: 0.000002 loss_cls: 3.4978 (3.7710) grad_norm: 4.3483 (4.5478) time: 0.7909 data: 0.0003 max mem: 8421 +[2024-12-06 03:23:42 root] (utils.py 283): INFO Epoch: [26] [ 530/2502] eta: 0:25:53 lr: 0.000002 loss_cls: 3.9854 (3.7724) grad_norm: 4.3569 (4.5456) time: 0.7878 data: 0.0003 max mem: 8421 +[2024-12-06 03:23:50 root] (utils.py 283): INFO Epoch: [26] [ 540/2502] eta: 0:25:46 lr: 0.000002 loss_cls: 3.9956 (3.7689) grad_norm: 4.3811 (4.5462) time: 0.7893 data: 0.0003 max mem: 8421 +[2024-12-06 03:24:06 root] (utils.py 283): INFO Epoch: [26] [ 550/2502] eta: 0:26:08 lr: 0.000002 loss_cls: 3.8926 (3.7717) grad_norm: 4.4524 (4.5475) time: 1.2150 data: 0.0022 max mem: 8421 +[2024-12-06 03:24:34 root] (utils.py 283): INFO Epoch: [26] [ 560/2502] eta: 0:27:08 lr: 0.000002 loss_cls: 4.0321 (3.7719) grad_norm: 4.3715 (4.5463) time: 2.2024 data: 0.0026 max mem: 8421 +[2024-12-06 03:24:42 root] (utils.py 283): INFO Epoch: [26] [ 570/2502] eta: 0:26:58 lr: 0.000002 loss_cls: 3.9391 (3.7769) grad_norm: 4.3468 (4.5454) time: 1.7766 data: 0.0007 max mem: 8421 +[2024-12-06 03:24:50 root] (utils.py 283): INFO Epoch: [26] [ 580/2502] eta: 0:26:48 lr: 0.000002 loss_cls: 3.8302 (3.7753) grad_norm: 4.2106 (4.5396) time: 0.7948 data: 0.0003 max mem: 8421 +[2024-12-06 03:25:03 root] (utils.py 283): INFO Epoch: [26] [ 590/2502] eta: 0:26:53 lr: 0.000002 loss_cls: 3.9703 (3.7756) grad_norm: 4.2106 (4.5373) time: 1.0304 data: 0.0030 max mem: 8421 +[2024-12-06 03:25:29 root] (utils.py 283): INFO Epoch: [26] [ 600/2502] eta: 0:27:43 lr: 0.000002 loss_cls: 4.0263 (3.7800) grad_norm: 4.3358 (4.5485) time: 1.9746 data: 0.0033 max mem: 8421 +[2024-12-06 03:25:42 root] (utils.py 283): INFO Epoch: [26] [ 610/2502] eta: 0:27:45 lr: 0.000002 loss_cls: 4.1078 (3.7816) grad_norm: 4.3016 (4.5438) time: 1.9530 data: 0.0007 max mem: 8421 +[2024-12-06 03:25:49 root] (utils.py 283): INFO Epoch: [26] [ 620/2502] eta: 0:27:33 lr: 0.000002 loss_cls: 3.7621 (3.7803) grad_norm: 4.3917 (4.5415) time: 0.9979 data: 0.0003 max mem: 8421 +[2024-12-06 03:25:59 root] (utils.py 283): INFO Epoch: [26] [ 630/2502] eta: 0:27:26 lr: 0.000002 loss_cls: 3.9638 (3.7791) grad_norm: 4.4746 (4.5490) time: 0.8624 data: 0.0009 max mem: 8421 +[2024-12-06 03:26:16 root] (utils.py 283): INFO Epoch: [26] [ 640/2502] eta: 0:27:41 lr: 0.000002 loss_cls: 3.9638 (3.7814) grad_norm: 4.5400 (4.5652) time: 1.3096 data: 0.0026 max mem: 8421 +[2024-12-06 03:26:39 root] (utils.py 283): INFO Epoch: [26] [ 650/2502] eta: 0:28:12 lr: 0.000002 loss_cls: 3.8705 (3.7791) grad_norm: 4.2450 (4.5607) time: 1.9978 data: 0.0024 max mem: 8421 +[2024-12-06 03:26:47 root] (utils.py 283): INFO Epoch: [26] [ 660/2502] eta: 0:28:00 lr: 0.000002 loss_cls: 3.8705 (3.7786) grad_norm: 4.1904 (4.5576) time: 1.5501 data: 0.0007 max mem: 8421 +[2024-12-06 03:26:55 root] (utils.py 283): INFO Epoch: [26] [ 670/2502] eta: 0:27:48 lr: 0.000002 loss_cls: 3.5657 (3.7752) grad_norm: 4.4704 (4.5577) time: 0.7939 data: 0.0003 max mem: 8421 +[2024-12-06 03:27:08 root] (utils.py 283): INFO Epoch: [26] [ 680/2502] eta: 0:27:49 lr: 0.000002 loss_cls: 3.4479 (3.7716) grad_norm: 4.4774 (4.5564) time: 1.0543 data: 0.0038 max mem: 8421 +[2024-12-06 03:27:34 root] (utils.py 283): INFO Epoch: [26] [ 690/2502] eta: 0:28:25 lr: 0.000002 loss_cls: 3.9580 (3.7748) grad_norm: 4.4187 (4.5555) time: 1.9657 data: 0.0041 max mem: 8421 +[2024-12-06 03:27:45 root] (utils.py 283): INFO Epoch: [26] [ 700/2502] eta: 0:28:20 lr: 0.000002 loss_cls: 4.1004 (3.7793) grad_norm: 4.4187 (4.5546) time: 1.8764 data: 0.0005 max mem: 8421 +[2024-12-06 03:27:53 root] (utils.py 283): INFO Epoch: [26] [ 710/2502] eta: 0:28:07 lr: 0.000002 loss_cls: 3.9068 (3.7779) grad_norm: 4.4800 (4.5541) time: 0.9626 data: 0.0003 max mem: 8421 +[2024-12-06 03:28:01 root] (utils.py 283): INFO Epoch: [26] [ 720/2502] eta: 0:27:54 lr: 0.000002 loss_cls: 3.9068 (3.7811) grad_norm: 4.4054 (4.5495) time: 0.8062 data: 0.0003 max mem: 8421 +[2024-12-06 03:28:10 root] (utils.py 283): INFO Epoch: [26] [ 730/2502] eta: 0:27:42 lr: 0.000002 loss_cls: 3.7685 (3.7790) grad_norm: 4.1052 (4.5496) time: 0.8139 data: 0.0003 max mem: 8421 +[2024-12-06 03:28:18 root] (utils.py 283): INFO Epoch: [26] [ 740/2502] eta: 0:27:30 lr: 0.000002 loss_cls: 3.8902 (3.7812) grad_norm: 4.2987 (4.5500) time: 0.8158 data: 0.0002 max mem: 8421 +[2024-12-06 03:28:26 root] (utils.py 283): INFO Epoch: [26] [ 750/2502] eta: 0:27:17 lr: 0.000002 loss_cls: 3.6681 (3.7750) grad_norm: 4.3406 (4.5487) time: 0.8149 data: 0.0003 max mem: 8421 +[2024-12-06 03:28:34 root] (utils.py 283): INFO Epoch: [26] [ 760/2502] eta: 0:27:06 lr: 0.000002 loss_cls: 3.5303 (3.7739) grad_norm: 4.2969 (4.5459) time: 0.8205 data: 0.0003 max mem: 8421 +[2024-12-06 03:28:46 root] (utils.py 283): INFO Epoch: [26] [ 770/2502] eta: 0:27:02 lr: 0.000002 loss_cls: 3.6531 (3.7744) grad_norm: 4.3327 (4.5482) time: 1.0113 data: 0.0017 max mem: 8421 +[2024-12-06 03:29:14 root] (utils.py 283): INFO Epoch: [26] [ 780/2502] eta: 0:27:34 lr: 0.000002 loss_cls: 3.8804 (3.7756) grad_norm: 4.3709 (4.5457) time: 1.9969 data: 0.0021 max mem: 8421 +[2024-12-06 03:29:27 root] (utils.py 283): INFO Epoch: [26] [ 790/2502] eta: 0:27:31 lr: 0.000002 loss_cls: 3.8804 (3.7771) grad_norm: 4.3830 (4.5455) time: 2.0299 data: 0.0008 max mem: 8421 +[2024-12-06 03:29:35 root] (utils.py 283): INFO Epoch: [26] [ 800/2502] eta: 0:27:17 lr: 0.000002 loss_cls: 3.8092 (3.7772) grad_norm: 4.4566 (4.5442) time: 1.0237 data: 0.0004 max mem: 8421 +[2024-12-06 03:29:43 root] (utils.py 283): INFO Epoch: [26] [ 810/2502] eta: 0:27:04 lr: 0.000002 loss_cls: 3.8092 (3.7774) grad_norm: 4.4981 (4.5640) time: 0.7911 data: 0.0003 max mem: 8421 +[2024-12-06 03:29:51 root] (utils.py 283): INFO Epoch: [26] [ 820/2502] eta: 0:26:51 lr: 0.000002 loss_cls: 3.8217 (3.7778) grad_norm: 4.4505 (4.5642) time: 0.7992 data: 0.0003 max mem: 8421 +[2024-12-06 03:29:58 root] (utils.py 283): INFO Epoch: [26] [ 830/2502] eta: 0:26:38 lr: 0.000002 loss_cls: 3.9874 (3.7819) grad_norm: 4.4333 (4.5707) time: 0.7941 data: 0.0003 max mem: 8421 +[2024-12-06 03:30:06 root] (utils.py 283): INFO Epoch: [26] [ 840/2502] eta: 0:26:25 lr: 0.000002 loss_cls: 4.1748 (3.7836) grad_norm: 4.3321 (4.5701) time: 0.7857 data: 0.0003 max mem: 8421 +[2024-12-06 03:30:14 root] (utils.py 283): INFO Epoch: [26] [ 850/2502] eta: 0:26:13 lr: 0.000002 loss_cls: 4.0673 (3.7856) grad_norm: 4.5053 (4.5729) time: 0.7852 data: 0.0003 max mem: 8421 +[2024-12-06 03:30:22 root] (utils.py 283): INFO Epoch: [26] [ 860/2502] eta: 0:26:00 lr: 0.000002 loss_cls: 3.7991 (3.7832) grad_norm: 4.3738 (4.5760) time: 0.7846 data: 0.0003 max mem: 8421 +[2024-12-06 03:30:30 root] (utils.py 283): INFO Epoch: [26] [ 870/2502] eta: 0:25:47 lr: 0.000002 loss_cls: 3.5044 (3.7785) grad_norm: 4.1915 (4.5721) time: 0.7853 data: 0.0003 max mem: 8421 +[2024-12-06 03:30:38 root] (utils.py 283): INFO Epoch: [26] [ 880/2502] eta: 0:25:35 lr: 0.000002 loss_cls: 3.6678 (3.7786) grad_norm: 4.2769 (4.5714) time: 0.7842 data: 0.0003 max mem: 8421 +[2024-12-06 03:30:45 root] (utils.py 283): INFO Epoch: [26] [ 890/2502] eta: 0:25:22 lr: 0.000002 loss_cls: 3.7776 (3.7756) grad_norm: 4.2786 (4.5694) time: 0.7818 data: 0.0003 max mem: 8421 +[2024-12-06 03:30:53 root] (utils.py 283): INFO Epoch: [26] [ 900/2502] eta: 0:25:10 lr: 0.000002 loss_cls: 3.8601 (3.7777) grad_norm: 4.3682 (4.5689) time: 0.7864 data: 0.0003 max mem: 8421 +[2024-12-06 03:31:01 root] (utils.py 283): INFO Epoch: [26] [ 910/2502] eta: 0:24:58 lr: 0.000002 loss_cls: 3.9689 (3.7788) grad_norm: 4.4826 (4.5681) time: 0.7863 data: 0.0003 max mem: 8421 +[2024-12-06 03:31:09 root] (utils.py 283): INFO Epoch: [26] [ 920/2502] eta: 0:24:46 lr: 0.000002 loss_cls: 3.9632 (3.7793) grad_norm: 4.4865 (4.5686) time: 0.7820 data: 0.0003 max mem: 8421 +[2024-12-06 03:31:17 root] (utils.py 283): INFO Epoch: [26] [ 930/2502] eta: 0:24:34 lr: 0.000002 loss_cls: 3.6274 (3.7761) grad_norm: 4.2674 (4.5643) time: 0.7952 data: 0.0003 max mem: 8421 +[2024-12-06 03:31:25 root] (utils.py 283): INFO Epoch: [26] [ 940/2502] eta: 0:24:22 lr: 0.000002 loss_cls: 3.6274 (3.7767) grad_norm: 4.2248 (4.5611) time: 0.7972 data: 0.0003 max mem: 8421 +[2024-12-06 03:31:33 root] (utils.py 283): INFO Epoch: [26] [ 950/2502] eta: 0:24:10 lr: 0.000002 loss_cls: 3.8221 (3.7775) grad_norm: 4.3559 (4.5632) time: 0.7874 data: 0.0002 max mem: 8421 +[2024-12-06 03:31:41 root] (utils.py 283): INFO Epoch: [26] [ 960/2502] eta: 0:23:59 lr: 0.000002 loss_cls: 3.8221 (3.7773) grad_norm: 4.3699 (4.5712) time: 0.7926 data: 0.0003 max mem: 8421 +[2024-12-06 03:31:49 root] (utils.py 283): INFO Epoch: [26] [ 970/2502] eta: 0:23:47 lr: 0.000002 loss_cls: 3.6241 (3.7743) grad_norm: 4.2832 (4.5702) time: 0.7969 data: 0.0003 max mem: 8421 +[2024-12-06 03:31:57 root] (utils.py 283): INFO Epoch: [26] [ 980/2502] eta: 0:23:36 lr: 0.000002 loss_cls: 3.5607 (3.7731) grad_norm: 4.2847 (4.5714) time: 0.7907 data: 0.0003 max mem: 8421 +[2024-12-06 03:32:04 root] (utils.py 283): INFO Epoch: [26] [ 990/2502] eta: 0:23:24 lr: 0.000002 loss_cls: 3.8109 (3.7740) grad_norm: 4.3227 (4.5698) time: 0.7843 data: 0.0003 max mem: 8421 +[2024-12-06 03:32:12 root] (utils.py 283): INFO Epoch: [26] [1000/2502] eta: 0:23:13 lr: 0.000002 loss_cls: 3.9395 (3.7733) grad_norm: 4.3316 (4.5667) time: 0.7915 data: 0.0003 max mem: 8421 +[2024-12-06 03:32:20 root] (utils.py 283): INFO Epoch: [26] [1010/2502] eta: 0:23:02 lr: 0.000002 loss_cls: 3.9511 (3.7744) grad_norm: 4.4353 (4.5680) time: 0.7912 data: 0.0003 max mem: 8421 +[2024-12-06 03:32:28 root] (utils.py 283): INFO Epoch: [26] [1020/2502] eta: 0:22:50 lr: 0.000002 loss_cls: 3.9779 (3.7748) grad_norm: 4.5732 (4.5698) time: 0.7876 data: 0.0003 max mem: 8421 +[2024-12-06 03:32:36 root] (utils.py 283): INFO Epoch: [26] [1030/2502] eta: 0:22:39 lr: 0.000002 loss_cls: 3.7274 (3.7733) grad_norm: 4.4495 (4.5683) time: 0.7867 data: 0.0003 max mem: 8421 +[2024-12-06 03:32:44 root] (utils.py 283): INFO Epoch: [26] [1040/2502] eta: 0:22:28 lr: 0.000002 loss_cls: 3.7274 (3.7726) grad_norm: 4.2539 (4.5689) time: 0.7874 data: 0.0003 max mem: 8421 +[2024-12-06 03:32:52 root] (utils.py 283): INFO Epoch: [26] [1050/2502] eta: 0:22:17 lr: 0.000002 loss_cls: 3.8636 (3.7706) grad_norm: 4.4501 (4.5696) time: 0.7985 data: 0.0003 max mem: 8421 +[2024-12-06 03:33:00 root] (utils.py 283): INFO Epoch: [26] [1060/2502] eta: 0:22:06 lr: 0.000002 loss_cls: 3.8107 (3.7705) grad_norm: 4.3728 (4.5663) time: 0.7930 data: 0.0003 max mem: 8421 +[2024-12-06 03:33:08 root] (utils.py 283): INFO Epoch: [26] [1070/2502] eta: 0:21:55 lr: 0.000002 loss_cls: 3.8107 (3.7729) grad_norm: 4.3728 (4.5681) time: 0.7851 data: 0.0003 max mem: 8421 +[2024-12-06 03:33:16 root] (utils.py 283): INFO Epoch: [26] [1080/2502] eta: 0:21:44 lr: 0.000002 loss_cls: 3.7866 (3.7720) grad_norm: 4.4328 (4.5692) time: 0.7914 data: 0.0003 max mem: 8421 +[2024-12-06 03:33:24 root] (utils.py 283): INFO Epoch: [26] [1090/2502] eta: 0:21:33 lr: 0.000002 loss_cls: 3.7866 (3.7725) grad_norm: 4.5004 (4.5712) time: 0.7904 data: 0.0003 max mem: 8421 +[2024-12-06 03:33:31 root] (utils.py 283): INFO Epoch: [26] [1100/2502] eta: 0:21:22 lr: 0.000002 loss_cls: 3.7273 (3.7701) grad_norm: 4.3640 (4.5702) time: 0.7843 data: 0.0003 max mem: 8421 +[2024-12-06 03:33:39 root] (utils.py 283): INFO Epoch: [26] [1110/2502] eta: 0:21:12 lr: 0.000002 loss_cls: 3.6651 (3.7698) grad_norm: 4.3640 (4.5689) time: 0.7818 data: 0.0003 max mem: 8421 +[2024-12-06 03:33:47 root] (utils.py 283): INFO Epoch: [26] [1120/2502] eta: 0:21:01 lr: 0.000002 loss_cls: 3.8641 (3.7713) grad_norm: 4.5147 (4.5694) time: 0.7809 data: 0.0003 max mem: 8421 +[2024-12-06 03:33:55 root] (utils.py 283): INFO Epoch: [26] [1130/2502] eta: 0:20:50 lr: 0.000002 loss_cls: 3.8087 (3.7697) grad_norm: 4.4182 (4.5661) time: 0.7824 data: 0.0003 max mem: 8421 +[2024-12-06 03:34:03 root] (utils.py 283): INFO Epoch: [26] [1140/2502] eta: 0:20:40 lr: 0.000002 loss_cls: 3.3652 (3.7679) grad_norm: 4.2624 (4.5646) time: 0.7831 data: 0.0003 max mem: 8421 +[2024-12-06 03:34:10 root] (utils.py 283): INFO Epoch: [26] [1150/2502] eta: 0:20:29 lr: 0.000002 loss_cls: 4.0758 (3.7691) grad_norm: 4.3803 (4.5686) time: 0.7824 data: 0.0003 max mem: 8421 +[2024-12-06 03:34:18 root] (utils.py 283): INFO Epoch: [26] [1160/2502] eta: 0:20:18 lr: 0.000002 loss_cls: 4.0917 (3.7705) grad_norm: 4.4332 (4.5678) time: 0.7817 data: 0.0003 max mem: 8421 +[2024-12-06 03:34:26 root] (utils.py 283): INFO Epoch: [26] [1170/2502] eta: 0:20:08 lr: 0.000002 loss_cls: 3.9522 (3.7701) grad_norm: 4.3627 (4.5666) time: 0.7828 data: 0.0003 max mem: 8421 +[2024-12-06 03:34:34 root] (utils.py 283): INFO Epoch: [26] [1180/2502] eta: 0:19:57 lr: 0.000002 loss_cls: 3.8801 (3.7709) grad_norm: 4.2302 (4.5648) time: 0.7824 data: 0.0003 max mem: 8421 +[2024-12-06 03:34:42 root] (utils.py 283): INFO Epoch: [26] [1190/2502] eta: 0:19:47 lr: 0.000002 loss_cls: 3.6613 (3.7688) grad_norm: 4.1833 (4.5624) time: 0.7812 data: 0.0003 max mem: 8421 +[2024-12-06 03:34:50 root] (utils.py 283): INFO Epoch: [26] [1200/2502] eta: 0:19:37 lr: 0.000002 loss_cls: 3.7915 (3.7709) grad_norm: 4.2100 (4.5626) time: 0.7877 data: 0.0003 max mem: 8421 +[2024-12-06 03:34:58 root] (utils.py 283): INFO Epoch: [26] [1210/2502] eta: 0:19:26 lr: 0.000002 loss_cls: 4.0290 (3.7702) grad_norm: 4.2100 (4.5599) time: 0.7874 data: 0.0002 max mem: 8421 +[2024-12-06 03:35:06 root] (utils.py 283): INFO Epoch: [26] [1220/2502] eta: 0:19:16 lr: 0.000002 loss_cls: 4.0290 (3.7702) grad_norm: 4.1630 (4.5569) time: 0.7941 data: 0.0003 max mem: 8421 +[2024-12-06 03:35:14 root] (utils.py 283): INFO Epoch: [26] [1230/2502] eta: 0:19:06 lr: 0.000002 loss_cls: 4.0732 (3.7718) grad_norm: 4.3949 (4.5592) time: 0.8118 data: 0.0003 max mem: 8421 +[2024-12-06 03:35:22 root] (utils.py 283): INFO Epoch: [26] [1240/2502] eta: 0:18:56 lr: 0.000002 loss_cls: 4.1283 (3.7744) grad_norm: 4.4289 (4.5587) time: 0.8123 data: 0.0003 max mem: 8421 +[2024-12-06 03:35:30 root] (utils.py 283): INFO Epoch: [26] [1250/2502] eta: 0:18:46 lr: 0.000002 loss_cls: 4.1006 (3.7755) grad_norm: 4.2795 (4.5569) time: 0.7943 data: 0.0003 max mem: 8421 +[2024-12-06 03:35:38 root] (utils.py 283): INFO Epoch: [26] [1260/2502] eta: 0:18:36 lr: 0.000002 loss_cls: 3.9778 (3.7766) grad_norm: 4.4907 (4.5588) time: 0.7854 data: 0.0003 max mem: 8421 +[2024-12-06 03:35:46 root] (utils.py 283): INFO Epoch: [26] [1270/2502] eta: 0:18:26 lr: 0.000002 loss_cls: 4.0293 (3.7782) grad_norm: 4.5774 (4.5583) time: 0.7986 data: 0.0003 max mem: 8421 +[2024-12-06 03:35:54 root] (utils.py 283): INFO Epoch: [26] [1280/2502] eta: 0:18:16 lr: 0.000002 loss_cls: 3.9430 (3.7786) grad_norm: 4.3450 (4.5581) time: 0.7999 data: 0.0003 max mem: 8421 +[2024-12-06 03:36:01 root] (utils.py 283): INFO Epoch: [26] [1290/2502] eta: 0:18:06 lr: 0.000002 loss_cls: 3.8203 (3.7785) grad_norm: 4.3905 (4.5580) time: 0.7875 data: 0.0003 max mem: 8421 +[2024-12-06 03:36:09 root] (utils.py 283): INFO Epoch: [26] [1300/2502] eta: 0:17:56 lr: 0.000002 loss_cls: 3.7956 (3.7785) grad_norm: 4.3905 (4.5623) time: 0.7844 data: 0.0003 max mem: 8421 +[2024-12-06 03:36:17 root] (utils.py 283): INFO Epoch: [26] [1310/2502] eta: 0:17:46 lr: 0.000002 loss_cls: 4.0030 (3.7788) grad_norm: 4.2858 (4.5622) time: 0.7851 data: 0.0003 max mem: 8421 +[2024-12-06 03:36:25 root] (utils.py 283): INFO Epoch: [26] [1320/2502] eta: 0:17:36 lr: 0.000002 loss_cls: 4.0769 (3.7805) grad_norm: 4.2933 (4.5618) time: 0.7815 data: 0.0003 max mem: 8421 +[2024-12-06 03:36:33 root] (utils.py 283): INFO Epoch: [26] [1330/2502] eta: 0:17:26 lr: 0.000002 loss_cls: 3.8815 (3.7809) grad_norm: 4.2820 (4.5648) time: 0.7837 data: 0.0003 max mem: 8421 +[2024-12-06 03:36:41 root] (utils.py 283): INFO Epoch: [26] [1340/2502] eta: 0:17:17 lr: 0.000002 loss_cls: 3.8291 (3.7810) grad_norm: 4.2080 (4.5624) time: 0.7903 data: 0.0002 max mem: 8421 +[2024-12-06 03:36:49 root] (utils.py 283): INFO Epoch: [26] [1350/2502] eta: 0:17:07 lr: 0.000002 loss_cls: 3.8988 (3.7818) grad_norm: 4.3592 (4.5628) time: 0.8028 data: 0.0003 max mem: 8421 +[2024-12-06 03:36:57 root] (utils.py 283): INFO Epoch: [26] [1360/2502] eta: 0:16:57 lr: 0.000002 loss_cls: 4.2696 (3.7837) grad_norm: 4.4489 (4.5621) time: 0.8133 data: 0.0003 max mem: 8421 +[2024-12-06 03:37:05 root] (utils.py 283): INFO Epoch: [26] [1370/2502] eta: 0:16:48 lr: 0.000002 loss_cls: 3.9986 (3.7846) grad_norm: 4.4122 (4.5605) time: 0.8217 data: 0.0003 max mem: 8421 +[2024-12-06 03:37:13 root] (utils.py 283): INFO Epoch: [26] [1380/2502] eta: 0:16:38 lr: 0.000002 loss_cls: 3.8697 (3.7853) grad_norm: 4.2573 (4.5585) time: 0.8221 data: 0.0003 max mem: 8421 +[2024-12-06 03:37:21 root] (utils.py 283): INFO Epoch: [26] [1390/2502] eta: 0:16:29 lr: 0.000002 loss_cls: 3.7843 (3.7840) grad_norm: 4.1861 (4.5573) time: 0.8126 data: 0.0003 max mem: 8421 +[2024-12-06 03:37:30 root] (utils.py 283): INFO Epoch: [26] [1400/2502] eta: 0:16:19 lr: 0.000002 loss_cls: 3.9703 (3.7855) grad_norm: 4.3819 (4.5567) time: 0.8129 data: 0.0003 max mem: 8421 +[2024-12-06 03:37:38 root] (utils.py 283): INFO Epoch: [26] [1410/2502] eta: 0:16:10 lr: 0.000002 loss_cls: 3.9775 (3.7861) grad_norm: 4.4037 (4.5602) time: 0.8141 data: 0.0003 max mem: 8421 +[2024-12-06 03:37:46 root] (utils.py 283): INFO Epoch: [26] [1420/2502] eta: 0:16:00 lr: 0.000002 loss_cls: 3.9428 (3.7871) grad_norm: 4.3577 (4.5587) time: 0.8138 data: 0.0003 max mem: 8421 +[2024-12-06 03:37:54 root] (utils.py 283): INFO Epoch: [26] [1430/2502] eta: 0:15:51 lr: 0.000002 loss_cls: 3.9267 (3.7876) grad_norm: 4.3519 (4.5622) time: 0.8141 data: 0.0003 max mem: 8421 +[2024-12-06 03:38:02 root] (utils.py 283): INFO Epoch: [26] [1440/2502] eta: 0:15:42 lr: 0.000002 loss_cls: 3.9815 (3.7885) grad_norm: 4.3351 (4.5605) time: 0.8131 data: 0.0003 max mem: 8421 +[2024-12-06 03:38:10 root] (utils.py 283): INFO Epoch: [26] [1450/2502] eta: 0:15:32 lr: 0.000002 loss_cls: 3.9004 (3.7886) grad_norm: 4.3790 (4.5593) time: 0.8122 data: 0.0003 max mem: 8421 +[2024-12-06 03:38:18 root] (utils.py 283): INFO Epoch: [26] [1460/2502] eta: 0:15:23 lr: 0.000002 loss_cls: 3.6943 (3.7869) grad_norm: 4.4490 (4.5581) time: 0.8120 data: 0.0003 max mem: 8421 +[2024-12-06 03:38:27 root] (utils.py 283): INFO Epoch: [26] [1470/2502] eta: 0:15:13 lr: 0.000002 loss_cls: 3.7044 (3.7869) grad_norm: 4.2872 (4.5575) time: 0.8121 data: 0.0003 max mem: 8421 +[2024-12-06 03:38:35 root] (utils.py 283): INFO Epoch: [26] [1480/2502] eta: 0:15:04 lr: 0.000002 loss_cls: 3.7866 (3.7875) grad_norm: 4.3317 (4.5572) time: 0.8149 data: 0.0003 max mem: 8421 +[2024-12-06 03:38:43 root] (utils.py 283): INFO Epoch: [26] [1490/2502] eta: 0:14:55 lr: 0.000002 loss_cls: 4.0953 (3.7878) grad_norm: 4.2966 (4.5552) time: 0.8155 data: 0.0003 max mem: 8421 +[2024-12-06 03:38:51 root] (utils.py 283): INFO Epoch: [26] [1500/2502] eta: 0:14:45 lr: 0.000002 loss_cls: 3.9306 (3.7873) grad_norm: 4.1701 (4.5540) time: 0.8142 data: 0.0003 max mem: 8421 +[2024-12-06 03:38:59 root] (utils.py 283): INFO Epoch: [26] [1510/2502] eta: 0:14:36 lr: 0.000002 loss_cls: 3.7358 (3.7875) grad_norm: 4.2411 (4.5530) time: 0.8148 data: 0.0003 max mem: 8421 +[2024-12-06 03:39:07 root] (utils.py 283): INFO Epoch: [26] [1520/2502] eta: 0:14:27 lr: 0.000002 loss_cls: 3.8876 (3.7897) grad_norm: 4.3851 (4.5530) time: 0.8148 data: 0.0003 max mem: 8421 +[2024-12-06 03:39:15 root] (utils.py 283): INFO Epoch: [26] [1530/2502] eta: 0:14:18 lr: 0.000002 loss_cls: 4.0862 (3.7904) grad_norm: 4.3974 (4.5518) time: 0.8135 data: 0.0003 max mem: 8421 +[2024-12-06 03:39:24 root] (utils.py 283): INFO Epoch: [26] [1540/2502] eta: 0:14:08 lr: 0.000002 loss_cls: 3.9670 (3.7906) grad_norm: 4.3974 (4.5510) time: 0.8132 data: 0.0003 max mem: 8421 +[2024-12-06 03:39:32 root] (utils.py 283): INFO Epoch: [26] [1550/2502] eta: 0:13:59 lr: 0.000002 loss_cls: 3.9994 (3.7912) grad_norm: 4.1811 (4.5481) time: 0.8148 data: 0.0003 max mem: 8421 +[2024-12-06 03:39:40 root] (utils.py 283): INFO Epoch: [26] [1560/2502] eta: 0:13:50 lr: 0.000002 loss_cls: 3.7694 (3.7906) grad_norm: 4.1536 (4.5468) time: 0.8133 data: 0.0003 max mem: 8421 +[2024-12-06 03:39:48 root] (utils.py 283): INFO Epoch: [26] [1570/2502] eta: 0:13:41 lr: 0.000002 loss_cls: 3.7227 (3.7909) grad_norm: 4.2680 (4.5467) time: 0.8114 data: 0.0003 max mem: 8421 +[2024-12-06 03:39:56 root] (utils.py 283): INFO Epoch: [26] [1580/2502] eta: 0:13:31 lr: 0.000002 loss_cls: 3.9697 (3.7905) grad_norm: 4.3617 (4.5456) time: 0.8120 data: 0.0003 max mem: 8421 +[2024-12-06 03:40:04 root] (utils.py 283): INFO Epoch: [26] [1590/2502] eta: 0:13:22 lr: 0.000002 loss_cls: 3.8726 (3.7895) grad_norm: 4.3818 (4.5462) time: 0.8111 data: 0.0003 max mem: 8421 +[2024-12-06 03:40:12 root] (utils.py 283): INFO Epoch: [26] [1600/2502] eta: 0:13:13 lr: 0.000002 loss_cls: 3.5282 (3.7889) grad_norm: 4.4777 (4.5465) time: 0.8120 data: 0.0003 max mem: 8421 +[2024-12-06 03:40:20 root] (utils.py 283): INFO Epoch: [26] [1610/2502] eta: 0:13:04 lr: 0.000002 loss_cls: 3.5597 (3.7880) grad_norm: 4.5660 (4.5469) time: 0.8131 data: 0.0003 max mem: 8421 +[2024-12-06 03:40:29 root] (utils.py 283): INFO Epoch: [26] [1620/2502] eta: 0:12:55 lr: 0.000002 loss_cls: 3.5597 (3.7858) grad_norm: 4.3435 (4.5452) time: 0.8127 data: 0.0003 max mem: 8421 +[2024-12-06 03:40:37 root] (utils.py 283): INFO Epoch: [26] [1630/2502] eta: 0:12:46 lr: 0.000002 loss_cls: 3.6566 (3.7862) grad_norm: 4.3330 (4.5489) time: 0.8133 data: 0.0003 max mem: 8421 +[2024-12-06 03:40:45 root] (utils.py 283): INFO Epoch: [26] [1640/2502] eta: 0:12:36 lr: 0.000002 loss_cls: 3.9848 (3.7883) grad_norm: 4.4472 (4.5494) time: 0.8149 data: 0.0003 max mem: 8421 +[2024-12-06 03:40:53 root] (utils.py 283): INFO Epoch: [26] [1650/2502] eta: 0:12:27 lr: 0.000002 loss_cls: 4.1844 (3.7900) grad_norm: 4.5828 (4.5570) time: 0.8142 data: 0.0003 max mem: 8421 +[2024-12-06 03:41:01 root] (utils.py 283): INFO Epoch: [26] [1660/2502] eta: 0:12:18 lr: 0.000002 loss_cls: 4.1669 (3.7914) grad_norm: 4.4805 (4.5560) time: 0.8119 data: 0.0003 max mem: 8421 +[2024-12-06 03:41:09 root] (utils.py 283): INFO Epoch: [26] [1670/2502] eta: 0:12:09 lr: 0.000002 loss_cls: 3.8472 (3.7910) grad_norm: 4.3379 (4.5558) time: 0.8122 data: 0.0003 max mem: 8421 +[2024-12-06 03:41:17 root] (utils.py 283): INFO Epoch: [26] [1680/2502] eta: 0:12:00 lr: 0.000002 loss_cls: 3.8241 (3.7917) grad_norm: 4.3253 (4.5550) time: 0.8133 data: 0.0003 max mem: 8421 +[2024-12-06 03:41:26 root] (utils.py 283): INFO Epoch: [26] [1690/2502] eta: 0:11:51 lr: 0.000002 loss_cls: 3.9112 (3.7921) grad_norm: 4.3461 (4.5559) time: 0.8128 data: 0.0003 max mem: 8421 +[2024-12-06 03:41:34 root] (utils.py 283): INFO Epoch: [26] [1700/2502] eta: 0:11:42 lr: 0.000002 loss_cls: 3.9030 (3.7921) grad_norm: 4.3010 (4.5558) time: 0.8143 data: 0.0003 max mem: 8421 +[2024-12-06 03:41:42 root] (utils.py 283): INFO Epoch: [26] [1710/2502] eta: 0:11:33 lr: 0.000002 loss_cls: 3.9165 (3.7931) grad_norm: 4.2038 (4.5601) time: 0.8150 data: 0.0003 max mem: 8421 +[2024-12-06 03:41:50 root] (utils.py 283): INFO Epoch: [26] [1720/2502] eta: 0:11:24 lr: 0.000002 loss_cls: 3.8390 (3.7929) grad_norm: 4.1607 (4.5591) time: 0.8125 data: 0.0002 max mem: 8421 +[2024-12-06 03:41:58 root] (utils.py 283): INFO Epoch: [26] [1730/2502] eta: 0:11:15 lr: 0.000002 loss_cls: 3.5708 (3.7930) grad_norm: 4.3514 (4.5588) time: 0.8125 data: 0.0003 max mem: 8421 +[2024-12-06 03:42:06 root] (utils.py 283): INFO Epoch: [26] [1740/2502] eta: 0:11:06 lr: 0.000002 loss_cls: 3.7620 (3.7912) grad_norm: 4.4163 (4.5581) time: 0.8125 data: 0.0003 max mem: 8421 +[2024-12-06 03:42:14 root] (utils.py 283): INFO Epoch: [26] [1750/2502] eta: 0:10:57 lr: 0.000002 loss_cls: 3.7620 (3.7920) grad_norm: 4.3664 (4.5567) time: 0.8124 data: 0.0003 max mem: 8421 +[2024-12-06 03:42:23 root] (utils.py 283): INFO Epoch: [26] [1760/2502] eta: 0:10:48 lr: 0.000002 loss_cls: 3.9690 (3.7927) grad_norm: 4.2995 (4.5554) time: 0.8186 data: 0.0003 max mem: 8421 +[2024-12-06 03:42:32 root] (utils.py 283): INFO Epoch: [26] [1770/2502] eta: 0:10:39 lr: 0.000002 loss_cls: 3.8764 (3.7937) grad_norm: 4.3600 (4.5549) time: 0.8662 data: 0.0004 max mem: 8421 +[2024-12-06 03:42:40 root] (utils.py 283): INFO Epoch: [26] [1780/2502] eta: 0:10:30 lr: 0.000002 loss_cls: 3.8447 (3.7934) grad_norm: 4.3600 (4.5553) time: 0.8791 data: 0.0004 max mem: 8421 +[2024-12-06 03:42:48 root] (utils.py 283): INFO Epoch: [26] [1790/2502] eta: 0:10:21 lr: 0.000002 loss_cls: 3.9276 (3.7941) grad_norm: 4.4169 (4.5544) time: 0.8143 data: 0.0003 max mem: 8421 +[2024-12-06 03:42:56 root] (utils.py 283): INFO Epoch: [26] [1800/2502] eta: 0:10:12 lr: 0.000002 loss_cls: 3.9276 (3.7942) grad_norm: 4.4816 (4.5550) time: 0.7748 data: 0.0003 max mem: 8421 +[2024-12-06 03:43:03 root] (utils.py 283): INFO Epoch: [26] [1810/2502] eta: 0:10:03 lr: 0.000002 loss_cls: 3.9573 (3.7954) grad_norm: 4.5227 (4.5551) time: 0.7670 data: 0.0003 max mem: 8421 +[2024-12-06 03:43:11 root] (utils.py 283): INFO Epoch: [26] [1820/2502] eta: 0:09:54 lr: 0.000002 loss_cls: 3.8885 (3.7939) grad_norm: 4.4116 (4.5546) time: 0.7618 data: 0.0002 max mem: 8421 +[2024-12-06 03:43:18 root] (utils.py 283): INFO Epoch: [26] [1830/2502] eta: 0:09:45 lr: 0.000002 loss_cls: 3.7769 (3.7940) grad_norm: 4.4364 (4.5546) time: 0.7611 data: 0.0002 max mem: 8421 +[2024-12-06 03:43:26 root] (utils.py 283): INFO Epoch: [26] [1840/2502] eta: 0:09:36 lr: 0.000002 loss_cls: 4.0667 (3.7958) grad_norm: 4.5462 (4.5543) time: 0.7609 data: 0.0003 max mem: 8421 +[2024-12-06 03:43:34 root] (utils.py 283): INFO Epoch: [26] [1850/2502] eta: 0:09:27 lr: 0.000002 loss_cls: 4.1337 (3.7962) grad_norm: 4.5686 (4.5544) time: 0.7648 data: 0.0003 max mem: 8421 +[2024-12-06 03:43:41 root] (utils.py 283): INFO Epoch: [26] [1860/2502] eta: 0:09:18 lr: 0.000002 loss_cls: 3.9798 (3.7969) grad_norm: 4.4078 (4.5537) time: 0.7671 data: 0.0002 max mem: 8421 +[2024-12-06 03:43:49 root] (utils.py 283): INFO Epoch: [26] [1870/2502] eta: 0:09:08 lr: 0.000002 loss_cls: 3.9798 (3.7965) grad_norm: 4.3152 (4.5540) time: 0.7665 data: 0.0003 max mem: 8421 +[2024-12-06 03:43:57 root] (utils.py 283): INFO Epoch: [26] [1880/2502] eta: 0:08:59 lr: 0.000002 loss_cls: 3.9732 (3.7974) grad_norm: 4.4291 (4.5540) time: 0.7674 data: 0.0003 max mem: 8421 +[2024-12-06 03:44:04 root] (utils.py 283): INFO Epoch: [26] [1890/2502] eta: 0:08:50 lr: 0.000002 loss_cls: 4.0395 (3.7973) grad_norm: 4.4099 (4.5531) time: 0.7638 data: 0.0003 max mem: 8421 +[2024-12-06 03:44:12 root] (utils.py 283): INFO Epoch: [26] [1900/2502] eta: 0:08:41 lr: 0.000002 loss_cls: 4.0402 (3.7980) grad_norm: 4.3881 (4.5534) time: 0.7628 data: 0.0003 max mem: 8421 +[2024-12-06 03:44:20 root] (utils.py 283): INFO Epoch: [26] [1910/2502] eta: 0:08:32 lr: 0.000002 loss_cls: 3.9407 (3.7975) grad_norm: 4.3858 (4.5523) time: 0.7659 data: 0.0003 max mem: 8421 +[2024-12-06 03:44:27 root] (utils.py 283): INFO Epoch: [26] [1920/2502] eta: 0:08:23 lr: 0.000002 loss_cls: 3.8855 (3.7983) grad_norm: 4.2685 (4.5511) time: 0.7655 data: 0.0003 max mem: 8421 +[2024-12-06 03:44:35 root] (utils.py 283): INFO Epoch: [26] [1930/2502] eta: 0:08:15 lr: 0.000002 loss_cls: 4.1235 (3.7988) grad_norm: 4.3545 (4.5518) time: 0.7662 data: 0.0003 max mem: 8421 +[2024-12-06 03:44:43 root] (utils.py 283): INFO Epoch: [26] [1940/2502] eta: 0:08:06 lr: 0.000002 loss_cls: 4.1235 (3.7997) grad_norm: 4.4477 (4.5516) time: 0.7664 data: 0.0002 max mem: 8421 +[2024-12-06 03:44:50 root] (utils.py 283): INFO Epoch: [26] [1950/2502] eta: 0:07:57 lr: 0.000002 loss_cls: 3.9880 (3.7998) grad_norm: 4.2233 (4.5503) time: 0.7632 data: 0.0002 max mem: 8421 +[2024-12-06 03:44:58 root] (utils.py 283): INFO Epoch: [26] [1960/2502] eta: 0:07:48 lr: 0.000002 loss_cls: 3.8158 (3.7994) grad_norm: 4.4131 (4.5501) time: 0.7615 data: 0.0002 max mem: 8421 +[2024-12-06 03:45:05 root] (utils.py 283): INFO Epoch: [26] [1970/2502] eta: 0:07:39 lr: 0.000002 loss_cls: 3.6684 (3.7984) grad_norm: 4.4324 (4.5489) time: 0.7591 data: 0.0003 max mem: 8421 +[2024-12-06 03:45:13 root] (utils.py 283): INFO Epoch: [26] [1980/2502] eta: 0:07:30 lr: 0.000002 loss_cls: 3.9420 (3.7995) grad_norm: 4.3165 (4.5478) time: 0.7608 data: 0.0003 max mem: 8421 +[2024-12-06 03:45:21 root] (utils.py 283): INFO Epoch: [26] [1990/2502] eta: 0:07:21 lr: 0.000002 loss_cls: 3.7973 (3.7984) grad_norm: 4.3359 (4.5479) time: 0.7641 data: 0.0003 max mem: 8421 +[2024-12-06 03:45:28 root] (utils.py 283): INFO Epoch: [26] [2000/2502] eta: 0:07:12 lr: 0.000002 loss_cls: 3.6218 (3.7979) grad_norm: 4.2917 (4.5473) time: 0.7639 data: 0.0003 max mem: 8421 +[2024-12-06 03:45:36 root] (utils.py 283): INFO Epoch: [26] [2010/2502] eta: 0:07:03 lr: 0.000002 loss_cls: 3.9555 (3.7991) grad_norm: 4.2917 (4.5483) time: 0.7738 data: 0.0003 max mem: 8421 +[2024-12-06 03:45:44 root] (utils.py 283): INFO Epoch: [26] [2020/2502] eta: 0:06:54 lr: 0.000002 loss_cls: 3.8277 (3.7984) grad_norm: 4.3671 (4.5475) time: 0.7738 data: 0.0002 max mem: 8421 +[2024-12-06 03:45:52 root] (utils.py 283): INFO Epoch: [26] [2030/2502] eta: 0:06:46 lr: 0.000002 loss_cls: 3.8392 (3.7998) grad_norm: 4.2256 (4.5466) time: 0.7645 data: 0.0002 max mem: 8421 +[2024-12-06 03:45:59 root] (utils.py 283): INFO Epoch: [26] [2040/2502] eta: 0:06:37 lr: 0.000002 loss_cls: 3.9415 (3.7994) grad_norm: 4.4009 (4.5483) time: 0.7629 data: 0.0002 max mem: 8421 +[2024-12-06 03:46:07 root] (utils.py 283): INFO Epoch: [26] [2050/2502] eta: 0:06:28 lr: 0.000002 loss_cls: 3.8106 (3.7996) grad_norm: 4.4009 (4.5472) time: 0.7597 data: 0.0002 max mem: 8421 +[2024-12-06 03:46:14 root] (utils.py 283): INFO Epoch: [26] [2060/2502] eta: 0:06:19 lr: 0.000002 loss_cls: 3.8106 (3.7987) grad_norm: 4.4687 (4.5467) time: 0.7631 data: 0.0003 max mem: 8421 +[2024-12-06 03:46:22 root] (utils.py 283): INFO Epoch: [26] [2070/2502] eta: 0:06:10 lr: 0.000002 loss_cls: 4.0220 (3.8010) grad_norm: 4.2741 (4.5472) time: 0.7713 data: 0.0003 max mem: 8421 +[2024-12-06 03:46:30 root] (utils.py 283): INFO Epoch: [26] [2080/2502] eta: 0:06:02 lr: 0.000002 loss_cls: 4.1949 (3.8016) grad_norm: 4.2707 (4.5456) time: 0.7724 data: 0.0003 max mem: 8421 +[2024-12-06 03:46:38 root] (utils.py 283): INFO Epoch: [26] [2090/2502] eta: 0:05:53 lr: 0.000002 loss_cls: 4.0752 (3.8027) grad_norm: 4.3525 (4.5476) time: 0.7687 data: 0.0003 max mem: 8421 +[2024-12-06 03:46:45 root] (utils.py 283): INFO Epoch: [26] [2100/2502] eta: 0:05:44 lr: 0.000002 loss_cls: 3.9321 (3.8035) grad_norm: 4.3781 (4.5467) time: 0.7732 data: 0.0003 max mem: 8421 +[2024-12-06 03:46:53 root] (utils.py 283): INFO Epoch: [26] [2110/2502] eta: 0:05:35 lr: 0.000002 loss_cls: 3.8837 (3.8040) grad_norm: 4.4057 (4.5477) time: 0.7725 data: 0.0003 max mem: 8421 +[2024-12-06 03:47:01 root] (utils.py 283): INFO Epoch: [26] [2120/2502] eta: 0:05:27 lr: 0.000002 loss_cls: 3.8837 (3.8039) grad_norm: 4.4027 (4.5467) time: 0.7689 data: 0.0002 max mem: 8421 +[2024-12-06 03:47:08 root] (utils.py 283): INFO Epoch: [26] [2130/2502] eta: 0:05:18 lr: 0.000002 loss_cls: 4.0366 (3.8045) grad_norm: 4.3965 (4.5465) time: 0.7690 data: 0.0003 max mem: 8421 +[2024-12-06 03:47:16 root] (utils.py 283): INFO Epoch: [26] [2140/2502] eta: 0:05:09 lr: 0.000002 loss_cls: 4.0203 (3.8056) grad_norm: 4.2217 (4.5450) time: 0.7652 data: 0.0002 max mem: 8421 +[2024-12-06 03:47:24 root] (utils.py 283): INFO Epoch: [26] [2150/2502] eta: 0:05:01 lr: 0.000002 loss_cls: 4.0203 (3.8057) grad_norm: 4.2289 (4.5450) time: 0.7628 data: 0.0002 max mem: 8421 +[2024-12-06 03:47:31 root] (utils.py 283): INFO Epoch: [26] [2160/2502] eta: 0:04:52 lr: 0.000002 loss_cls: 3.7494 (3.8057) grad_norm: 4.3551 (4.5452) time: 0.7638 data: 0.0003 max mem: 8421 +[2024-12-06 03:47:39 root] (utils.py 283): INFO Epoch: [26] [2170/2502] eta: 0:04:43 lr: 0.000002 loss_cls: 3.7494 (3.8054) grad_norm: 4.3776 (4.5441) time: 0.7629 data: 0.0002 max mem: 8421 +[2024-12-06 03:47:47 root] (utils.py 283): INFO Epoch: [26] [2180/2502] eta: 0:04:34 lr: 0.000002 loss_cls: 4.0808 (3.8064) grad_norm: 4.1944 (4.5428) time: 0.7609 data: 0.0002 max mem: 8421 +[2024-12-06 03:47:54 root] (utils.py 283): INFO Epoch: [26] [2190/2502] eta: 0:04:26 lr: 0.000002 loss_cls: 3.8321 (3.8051) grad_norm: 4.3061 (4.5426) time: 0.7618 data: 0.0002 max mem: 8421 +[2024-12-06 03:48:02 root] (utils.py 283): INFO Epoch: [26] [2200/2502] eta: 0:04:17 lr: 0.000002 loss_cls: 3.5940 (3.8047) grad_norm: 4.3453 (4.5418) time: 0.7604 data: 0.0002 max mem: 8421 +[2024-12-06 03:48:09 root] (utils.py 283): INFO Epoch: [26] [2210/2502] eta: 0:04:09 lr: 0.000002 loss_cls: 3.4662 (3.8023) grad_norm: 4.2690 (4.5408) time: 0.7621 data: 0.0003 max mem: 8421 +[2024-12-06 03:48:17 root] (utils.py 283): INFO Epoch: [26] [2220/2502] eta: 0:04:00 lr: 0.000002 loss_cls: 3.6146 (3.8029) grad_norm: 4.2721 (4.5400) time: 0.7677 data: 0.0003 max mem: 8421 +[2024-12-06 03:48:25 root] (utils.py 283): INFO Epoch: [26] [2230/2502] eta: 0:03:51 lr: 0.000002 loss_cls: 3.8705 (3.8019) grad_norm: 4.2091 (4.5393) time: 0.7670 data: 0.0002 max mem: 8421 +[2024-12-06 03:48:32 root] (utils.py 283): INFO Epoch: [26] [2240/2502] eta: 0:03:43 lr: 0.000002 loss_cls: 3.9691 (3.8034) grad_norm: 4.2091 (4.5391) time: 0.7642 data: 0.0002 max mem: 8421 +[2024-12-06 03:48:40 root] (utils.py 283): INFO Epoch: [26] [2250/2502] eta: 0:03:34 lr: 0.000002 loss_cls: 4.1063 (3.8039) grad_norm: 4.5602 (4.5394) time: 0.7628 data: 0.0002 max mem: 8421 +[2024-12-06 03:48:48 root] (utils.py 283): INFO Epoch: [26] [2260/2502] eta: 0:03:25 lr: 0.000002 loss_cls: 3.9140 (3.8035) grad_norm: 4.4415 (4.5383) time: 0.7621 data: 0.0002 max mem: 8421 +[2024-12-06 03:48:55 root] (utils.py 283): INFO Epoch: [26] [2270/2502] eta: 0:03:17 lr: 0.000002 loss_cls: 3.9410 (3.8039) grad_norm: 4.1942 (4.5375) time: 0.7618 data: 0.0002 max mem: 8421 +[2024-12-06 03:49:03 root] (utils.py 283): INFO Epoch: [26] [2280/2502] eta: 0:03:08 lr: 0.000002 loss_cls: 3.8830 (3.8030) grad_norm: 4.2146 (4.5383) time: 0.7638 data: 0.0002 max mem: 8421 +[2024-12-06 03:49:11 root] (utils.py 283): INFO Epoch: [26] [2290/2502] eta: 0:03:00 lr: 0.000002 loss_cls: 3.7305 (3.8025) grad_norm: 4.2569 (4.5378) time: 0.7654 data: 0.0002 max mem: 8421 +[2024-12-06 03:49:18 root] (utils.py 283): INFO Epoch: [26] [2300/2502] eta: 0:02:51 lr: 0.000002 loss_cls: 3.9107 (3.8030) grad_norm: 4.2478 (4.5370) time: 0.7703 data: 0.0002 max mem: 8421 +[2024-12-06 03:49:26 root] (utils.py 283): INFO Epoch: [26] [2310/2502] eta: 0:02:43 lr: 0.000002 loss_cls: 4.0175 (3.8033) grad_norm: 4.2934 (4.5368) time: 0.7769 data: 0.0002 max mem: 8421 +[2024-12-06 03:49:34 root] (utils.py 283): INFO Epoch: [26] [2320/2502] eta: 0:02:34 lr: 0.000002 loss_cls: 3.7898 (3.8028) grad_norm: 4.4061 (4.5366) time: 0.7745 data: 0.0002 max mem: 8421 +[2024-12-06 03:49:41 root] (utils.py 283): INFO Epoch: [26] [2330/2502] eta: 0:02:25 lr: 0.000002 loss_cls: 3.7898 (3.8030) grad_norm: 4.4834 (4.5366) time: 0.7689 data: 0.0002 max mem: 8421 +[2024-12-06 03:49:49 root] (utils.py 283): INFO Epoch: [26] [2340/2502] eta: 0:02:17 lr: 0.000002 loss_cls: 3.9890 (3.8030) grad_norm: 4.5292 (4.5366) time: 0.7645 data: 0.0002 max mem: 8421 +[2024-12-06 03:49:57 root] (utils.py 283): INFO Epoch: [26] [2350/2502] eta: 0:02:08 lr: 0.000002 loss_cls: 3.6686 (3.8022) grad_norm: 4.3042 (4.5355) time: 0.7664 data: 0.0002 max mem: 8421 +[2024-12-06 03:50:04 root] (utils.py 283): INFO Epoch: [26] [2360/2502] eta: 0:02:00 lr: 0.000002 loss_cls: 3.5227 (3.8026) grad_norm: 4.4101 (4.5349) time: 0.7661 data: 0.0002 max mem: 8421 +[2024-12-06 03:50:12 root] (utils.py 283): INFO Epoch: [26] [2370/2502] eta: 0:01:51 lr: 0.000002 loss_cls: 3.8521 (3.8029) grad_norm: 4.5105 (4.5352) time: 0.7618 data: 0.0002 max mem: 8421 +[2024-12-06 03:50:20 root] (utils.py 283): INFO Epoch: [26] [2380/2502] eta: 0:01:43 lr: 0.000002 loss_cls: 3.9634 (3.8035) grad_norm: 4.4145 (4.5363) time: 0.7612 data: 0.0003 max mem: 8421 +[2024-12-06 03:50:27 root] (utils.py 283): INFO Epoch: [26] [2390/2502] eta: 0:01:34 lr: 0.000002 loss_cls: 3.9844 (3.8032) grad_norm: 4.3894 (4.5370) time: 0.7656 data: 0.0003 max mem: 8421 +[2024-12-06 03:50:35 root] (utils.py 283): INFO Epoch: [26] [2400/2502] eta: 0:01:26 lr: 0.000002 loss_cls: 3.9829 (3.8029) grad_norm: 4.3701 (4.5361) time: 0.7672 data: 0.0003 max mem: 8421 +[2024-12-06 03:50:43 root] (utils.py 283): INFO Epoch: [26] [2410/2502] eta: 0:01:17 lr: 0.000002 loss_cls: 3.8655 (3.8024) grad_norm: 4.2776 (4.5356) time: 0.7676 data: 0.0003 max mem: 8421 +[2024-12-06 03:50:50 root] (utils.py 283): INFO Epoch: [26] [2420/2502] eta: 0:01:09 lr: 0.000002 loss_cls: 3.8655 (3.8031) grad_norm: 4.3215 (4.5360) time: 0.7678 data: 0.0002 max mem: 8421 +[2024-12-06 03:50:58 root] (utils.py 283): INFO Epoch: [26] [2430/2502] eta: 0:01:00 lr: 0.000002 loss_cls: 4.0030 (3.8026) grad_norm: 4.2975 (4.5450) time: 0.7699 data: 0.0003 max mem: 8421 +[2024-12-06 03:51:06 root] (utils.py 283): INFO Epoch: [26] [2440/2502] eta: 0:00:52 lr: 0.000002 loss_cls: 3.9021 (3.8021) grad_norm: 4.2870 (4.5459) time: 0.7743 data: 0.0002 max mem: 8421 +[2024-12-06 03:51:13 root] (utils.py 283): INFO Epoch: [26] [2450/2502] eta: 0:00:43 lr: 0.000002 loss_cls: 3.8075 (3.8015) grad_norm: 4.2698 (4.5448) time: 0.7703 data: 0.0003 max mem: 8421 +[2024-12-06 03:51:21 root] (utils.py 283): INFO Epoch: [26] [2460/2502] eta: 0:00:35 lr: 0.000002 loss_cls: 3.9084 (3.8019) grad_norm: 4.2698 (4.5446) time: 0.7650 data: 0.0003 max mem: 8421 +[2024-12-06 03:51:29 root] (utils.py 283): INFO Epoch: [26] [2470/2502] eta: 0:00:26 lr: 0.000002 loss_cls: 3.9795 (3.8021) grad_norm: 4.2909 (4.5454) time: 0.7634 data: 0.0003 max mem: 8421 +[2024-12-06 03:51:36 root] (utils.py 283): INFO Epoch: [26] [2480/2502] eta: 0:00:18 lr: 0.000002 loss_cls: 3.9259 (3.8026) grad_norm: 4.2909 (4.5455) time: 0.7626 data: 0.0003 max mem: 8421 +[2024-12-06 03:51:44 root] (utils.py 283): INFO Epoch: [26] [2490/2502] eta: 0:00:10 lr: 0.000002 loss_cls: 3.7844 (3.8020) grad_norm: 4.5336 (4.5456) time: 0.7838 data: 0.0227 max mem: 8421 +[2024-12-06 03:51:52 root] (utils.py 283): INFO Epoch: [26] [2500/2502] eta: 0:00:01 lr: 0.000002 loss_cls: 3.7723 (3.8017) grad_norm: 4.4621 (4.5452) time: 0.7846 data: 0.0227 max mem: 8421 +[2024-12-06 03:51:53 root] (utils.py 283): INFO Epoch: [26] [2501/2502] eta: 0:00:00 lr: 0.000002 loss_cls: 3.7723 (3.8015) grad_norm: 4.4332 (4.5450) time: 0.7860 data: 0.0227 max mem: 8421 +[2024-12-06 03:51:53 root] (utils.py 297): INFO Epoch: [26] Total time: 0:35:09 (0.8430 s / it) +[2024-12-06 03:51:53 root] (engine.py 179): INFO Averaged stats:lr: 0.000002 loss_cls: 3.7723 (3.8103) grad_norm: 4.4332 (4.5450) +[2024-12-06 03:51:53 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7060 (0.7060) acc1: 85.1562 (85.1562) acc3: 96.0938 (96.0938) acc5: 98.4375 (98.4375) time: 0.1316 data: 0.0005 max mem: 8421 +[2024-12-06 03:51:55 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7867 (0.8365) acc1: 84.3750 (82.5994) acc3: 92.9688 (93.1108) acc5: 96.8750 (96.2358) time: 0.1316 data: 0.0004 max mem: 8421 +[2024-12-06 03:51:56 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8713 (0.8932) acc1: 79.6875 (81.1756) acc3: 92.9688 (92.4479) acc5: 96.0938 (95.4613) time: 0.1315 data: 0.0004 max mem: 8421 +[2024-12-06 03:51:57 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9613 (0.9004) acc1: 78.9062 (80.3679) acc3: 92.1875 (92.7923) acc5: 95.3125 (95.5141) time: 0.1316 data: 0.0004 max mem: 8421 +[2024-12-06 03:51:59 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8251 (0.8954) acc1: 79.6875 (80.6402) acc3: 93.7500 (92.9116) acc5: 96.0938 (95.5030) time: 0.1331 data: 0.0005 max mem: 8421 +[2024-12-06 03:52:00 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9885 (0.9884) acc1: 75.7812 (78.4926) acc3: 89.0625 (91.2837) acc5: 91.4062 (94.3627) time: 0.1333 data: 0.0005 max mem: 8421 +[2024-12-06 03:52:01 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3166 (1.0315) acc1: 72.6562 (77.6639) acc3: 86.7188 (90.6378) acc5: 89.8438 (93.6603) time: 0.1321 data: 0.0005 max mem: 8421 +[2024-12-06 03:52:03 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2525 (1.0751) acc1: 74.2188 (76.5405) acc3: 88.2812 (90.1298) acc5: 89.8438 (93.2438) time: 0.1331 data: 0.0005 max mem: 8421 +[2024-12-06 03:52:04 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2830 (1.1120) acc1: 71.0938 (75.7330) acc3: 85.1562 (89.5255) acc5: 89.8438 (92.7373) time: 0.1358 data: 0.0009 max mem: 8421 +[2024-12-06 03:52:05 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3244 (1.1425) acc1: 68.7500 (74.9141) acc3: 85.1562 (88.9852) acc5: 89.8438 (92.3249) time: 0.1351 data: 0.0009 max mem: 8421 +[2024-12-06 03:52:06 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2079 (1.1317) acc1: 72.6562 (75.0640) acc3: 87.5000 (89.1920) acc5: 91.4062 (92.5040) time: 0.1313 data: 0.0008 max mem: 8421 +[2024-12-06 03:52:06 root] (utils.py 297): INFO Test: Total time: 0:00:13 (0.1328 s / it) +[2024-12-06 03:52:07 root] (engine.py 264): INFO * Acc@1 75.100 Acc@3 89.072 Acc@5 92.432 loss 1.132 flops 1.285 layer_flops 1.251 +[2024-12-06 03:52:07 root] (main.py 547): INFO Accuracy of the network on the 50000 test images: 75.1% +[2024-12-06 03:52:07 root] (main.py 551): INFO Max accuracy: 75.10% +[2024-12-06 03:52:08 root] (utils.py 283): INFO Epoch: [27] [ 0/2502] eta: 0:34:34 lr: 0.000002 loss_cls: 4.9754 (4.9754) grad_norm: 7.1446 (7.1446) time: 0.8292 data: 0.0007 max mem: 8421 +[2024-12-06 03:52:16 root] (utils.py 283): INFO Epoch: [27] [ 10/2502] eta: 0:32:32 lr: 0.000002 loss_cls: 4.1133 (4.0055) grad_norm: 4.4919 (4.7299) time: 0.7834 data: 0.0003 max mem: 8421 +[2024-12-06 03:52:23 root] (utils.py 283): INFO Epoch: [27] [ 20/2502] eta: 0:32:05 lr: 0.000002 loss_cls: 4.2078 (4.0728) grad_norm: 4.4919 (4.8124) time: 0.7730 data: 0.0003 max mem: 8421 +[2024-12-06 03:52:31 root] (utils.py 283): INFO Epoch: [27] [ 30/2502] eta: 0:31:54 lr: 0.000002 loss_cls: 4.2160 (3.9887) grad_norm: 4.6052 (4.7296) time: 0.7696 data: 0.0002 max mem: 8421 +[2024-12-06 03:52:39 root] (utils.py 283): INFO Epoch: [27] [ 40/2502] eta: 0:31:39 lr: 0.000002 loss_cls: 4.0428 (3.9620) grad_norm: 4.3780 (4.7379) time: 0.7670 data: 0.0002 max mem: 8421 +[2024-12-06 03:52:46 root] (utils.py 283): INFO Epoch: [27] [ 50/2502] eta: 0:31:27 lr: 0.000002 loss_cls: 3.9300 (3.8890) grad_norm: 4.2754 (4.6631) time: 0.7620 data: 0.0002 max mem: 8421 +[2024-12-06 03:52:54 root] (utils.py 283): INFO Epoch: [27] [ 60/2502] eta: 0:31:15 lr: 0.000002 loss_cls: 3.7387 (3.8657) grad_norm: 4.3256 (4.6457) time: 0.7605 data: 0.0002 max mem: 8421 +[2024-12-06 03:53:02 root] (utils.py 283): INFO Epoch: [27] [ 70/2502] eta: 0:31:06 lr: 0.000002 loss_cls: 3.7960 (3.8604) grad_norm: 4.5353 (4.6688) time: 0.7617 data: 0.0002 max mem: 8421 +[2024-12-06 03:53:09 root] (utils.py 283): INFO Epoch: [27] [ 80/2502] eta: 0:30:57 lr: 0.000002 loss_cls: 3.8163 (3.8620) grad_norm: 4.3871 (4.6261) time: 0.7635 data: 0.0002 max mem: 8421 +[2024-12-06 03:53:17 root] (utils.py 283): INFO Epoch: [27] [ 90/2502] eta: 0:30:49 lr: 0.000002 loss_cls: 3.9088 (3.8581) grad_norm: 4.1906 (4.6264) time: 0.7640 data: 0.0002 max mem: 8421 +[2024-12-06 03:53:25 root] (utils.py 283): INFO Epoch: [27] [ 100/2502] eta: 0:30:46 lr: 0.000002 loss_cls: 3.6162 (3.8252) grad_norm: 4.3384 (4.6187) time: 0.7768 data: 0.0002 max mem: 8421 +[2024-12-06 03:53:32 root] (utils.py 283): INFO Epoch: [27] [ 110/2502] eta: 0:30:39 lr: 0.000002 loss_cls: 3.7562 (3.8405) grad_norm: 4.4175 (4.6165) time: 0.7808 data: 0.0003 max mem: 8421 +[2024-12-06 03:53:40 root] (utils.py 283): INFO Epoch: [27] [ 120/2502] eta: 0:30:33 lr: 0.000002 loss_cls: 3.8446 (3.8295) grad_norm: 4.4175 (4.6381) time: 0.7747 data: 0.0003 max mem: 8421 +[2024-12-06 03:53:48 root] (utils.py 283): INFO Epoch: [27] [ 130/2502] eta: 0:30:24 lr: 0.000002 loss_cls: 3.7978 (3.8218) grad_norm: 4.3422 (4.6187) time: 0.7686 data: 0.0002 max mem: 8421 +[2024-12-06 03:53:55 root] (utils.py 283): INFO Epoch: [27] [ 140/2502] eta: 0:30:15 lr: 0.000002 loss_cls: 3.9410 (3.8400) grad_norm: 4.3836 (4.6161) time: 0.7615 data: 0.0002 max mem: 8421 +[2024-12-06 03:54:03 root] (utils.py 283): INFO Epoch: [27] [ 150/2502] eta: 0:30:07 lr: 0.000002 loss_cls: 4.0604 (3.8402) grad_norm: 4.4573 (4.6560) time: 0.7642 data: 0.0002 max mem: 8421 +[2024-12-06 03:54:11 root] (utils.py 283): INFO Epoch: [27] [ 160/2502] eta: 0:29:58 lr: 0.000002 loss_cls: 3.9140 (3.8233) grad_norm: 4.4822 (4.6469) time: 0.7636 data: 0.0003 max mem: 8421 +[2024-12-06 03:54:18 root] (utils.py 283): INFO Epoch: [27] [ 170/2502] eta: 0:29:49 lr: 0.000002 loss_cls: 3.8942 (3.8223) grad_norm: 4.1628 (4.6210) time: 0.7608 data: 0.0002 max mem: 8421 +[2024-12-06 03:54:26 root] (utils.py 283): INFO Epoch: [27] [ 180/2502] eta: 0:29:41 lr: 0.000002 loss_cls: 3.9461 (3.8268) grad_norm: 4.2363 (4.6248) time: 0.7602 data: 0.0002 max mem: 8421 +[2024-12-06 03:54:34 root] (utils.py 283): INFO Epoch: [27] [ 190/2502] eta: 0:29:33 lr: 0.000002 loss_cls: 3.9628 (3.8256) grad_norm: 4.5607 (4.6138) time: 0.7616 data: 0.0002 max mem: 8421 +[2024-12-06 03:54:41 root] (utils.py 283): INFO Epoch: [27] [ 200/2502] eta: 0:29:24 lr: 0.000002 loss_cls: 3.8178 (3.8264) grad_norm: 4.1898 (4.6001) time: 0.7630 data: 0.0003 max mem: 8421 +[2024-12-06 03:54:49 root] (utils.py 283): INFO Epoch: [27] [ 210/2502] eta: 0:29:16 lr: 0.000002 loss_cls: 3.9475 (3.8245) grad_norm: 4.2615 (4.6009) time: 0.7619 data: 0.0003 max mem: 8421 +[2024-12-06 03:54:56 root] (utils.py 283): INFO Epoch: [27] [ 220/2502] eta: 0:29:08 lr: 0.000002 loss_cls: 3.6877 (3.8174) grad_norm: 4.2995 (4.5930) time: 0.7617 data: 0.0003 max mem: 8421 +[2024-12-06 03:55:04 root] (utils.py 283): INFO Epoch: [27] [ 230/2502] eta: 0:29:00 lr: 0.000002 loss_cls: 3.6353 (3.8126) grad_norm: 4.3727 (4.5789) time: 0.7606 data: 0.0002 max mem: 8421 +[2024-12-06 03:55:12 root] (utils.py 283): INFO Epoch: [27] [ 240/2502] eta: 0:28:52 lr: 0.000002 loss_cls: 3.8693 (3.8124) grad_norm: 4.3260 (4.5744) time: 0.7615 data: 0.0002 max mem: 8421 +[2024-12-06 03:55:19 root] (utils.py 283): INFO Epoch: [27] [ 250/2502] eta: 0:28:44 lr: 0.000002 loss_cls: 3.9446 (3.8161) grad_norm: 4.3862 (4.5687) time: 0.7627 data: 0.0002 max mem: 8421 +[2024-12-06 03:55:27 root] (utils.py 283): INFO Epoch: [27] [ 260/2502] eta: 0:28:36 lr: 0.000002 loss_cls: 3.9575 (3.8149) grad_norm: 4.3862 (4.5617) time: 0.7630 data: 0.0002 max mem: 8421 +[2024-12-06 03:55:35 root] (utils.py 283): INFO Epoch: [27] [ 270/2502] eta: 0:28:28 lr: 0.000002 loss_cls: 3.6737 (3.8086) grad_norm: 4.5008 (4.5732) time: 0.7644 data: 0.0003 max mem: 8421 +[2024-12-06 03:55:42 root] (utils.py 283): INFO Epoch: [27] [ 280/2502] eta: 0:28:20 lr: 0.000002 loss_cls: 3.8152 (3.8180) grad_norm: 4.6379 (4.5759) time: 0.7616 data: 0.0003 max mem: 8421 +[2024-12-06 03:55:50 root] (utils.py 283): INFO Epoch: [27] [ 290/2502] eta: 0:28:12 lr: 0.000002 loss_cls: 4.0722 (3.8187) grad_norm: 4.3395 (4.5680) time: 0.7603 data: 0.0002 max mem: 8421 +[2024-12-06 03:55:57 root] (utils.py 283): INFO Epoch: [27] [ 300/2502] eta: 0:28:05 lr: 0.000002 loss_cls: 4.0722 (3.8224) grad_norm: 4.2916 (4.5824) time: 0.7668 data: 0.0002 max mem: 8421 +[2024-12-06 03:56:05 root] (utils.py 283): INFO Epoch: [27] [ 310/2502] eta: 0:27:57 lr: 0.000002 loss_cls: 3.9021 (3.8197) grad_norm: 4.4182 (4.5885) time: 0.7689 data: 0.0002 max mem: 8421 +[2024-12-06 03:56:13 root] (utils.py 283): INFO Epoch: [27] [ 320/2502] eta: 0:27:50 lr: 0.000002 loss_cls: 3.9273 (3.8243) grad_norm: 4.3395 (4.5838) time: 0.7653 data: 0.0002 max mem: 8421 +[2024-12-06 03:56:21 root] (utils.py 283): INFO Epoch: [27] [ 330/2502] eta: 0:27:43 lr: 0.000002 loss_cls: 4.1091 (3.8310) grad_norm: 4.2942 (4.5766) time: 0.7719 data: 0.0002 max mem: 8421 +[2024-12-06 03:56:28 root] (utils.py 283): INFO Epoch: [27] [ 340/2502] eta: 0:27:36 lr: 0.000002 loss_cls: 4.1622 (3.8316) grad_norm: 4.2942 (4.5838) time: 0.7813 data: 0.0002 max mem: 8421 +[2024-12-06 03:56:36 root] (utils.py 283): INFO Epoch: [27] [ 350/2502] eta: 0:27:30 lr: 0.000002 loss_cls: 4.1281 (3.8368) grad_norm: 4.3911 (4.5815) time: 0.7843 data: 0.0002 max mem: 8421 +[2024-12-06 03:56:44 root] (utils.py 283): INFO Epoch: [27] [ 360/2502] eta: 0:27:23 lr: 0.000002 loss_cls: 4.1281 (3.8370) grad_norm: 4.5007 (4.5819) time: 0.7821 data: 0.0002 max mem: 8421 +[2024-12-06 03:56:52 root] (utils.py 283): INFO Epoch: [27] [ 370/2502] eta: 0:27:15 lr: 0.000002 loss_cls: 3.8763 (3.8366) grad_norm: 4.4221 (4.5807) time: 0.7708 data: 0.0003 max mem: 8421 +[2024-12-06 03:56:59 root] (utils.py 283): INFO Epoch: [27] [ 380/2502] eta: 0:27:07 lr: 0.000002 loss_cls: 3.7523 (3.8307) grad_norm: 4.3686 (4.5807) time: 0.7623 data: 0.0003 max mem: 8421 +[2024-12-06 03:57:07 root] (utils.py 283): INFO Epoch: [27] [ 390/2502] eta: 0:26:59 lr: 0.000002 loss_cls: 3.6624 (3.8246) grad_norm: 4.3880 (4.5763) time: 0.7628 data: 0.0002 max mem: 8421 +[2024-12-06 03:57:15 root] (utils.py 283): INFO Epoch: [27] [ 400/2502] eta: 0:26:51 lr: 0.000002 loss_cls: 3.6624 (3.8259) grad_norm: 4.2927 (4.5712) time: 0.7646 data: 0.0002 max mem: 8421 +[2024-12-06 03:57:22 root] (utils.py 283): INFO Epoch: [27] [ 410/2502] eta: 0:26:43 lr: 0.000002 loss_cls: 3.8859 (3.8261) grad_norm: 4.2480 (4.5656) time: 0.7621 data: 0.0002 max mem: 8421 +[2024-12-06 03:57:30 root] (utils.py 283): INFO Epoch: [27] [ 420/2502] eta: 0:26:35 lr: 0.000002 loss_cls: 3.5956 (3.8184) grad_norm: 4.2623 (4.5656) time: 0.7599 data: 0.0002 max mem: 8421 +[2024-12-06 03:57:38 root] (utils.py 283): INFO Epoch: [27] [ 430/2502] eta: 0:26:28 lr: 0.000002 loss_cls: 3.6081 (3.8207) grad_norm: 4.2426 (4.5639) time: 0.7689 data: 0.0003 max mem: 8421 +[2024-12-06 03:57:45 root] (utils.py 283): INFO Epoch: [27] [ 440/2502] eta: 0:26:21 lr: 0.000002 loss_cls: 3.8688 (3.8186) grad_norm: 4.2142 (4.5716) time: 0.7762 data: 0.0003 max mem: 8421 +[2024-12-06 03:57:53 root] (utils.py 283): INFO Epoch: [27] [ 450/2502] eta: 0:26:14 lr: 0.000002 loss_cls: 4.0093 (3.8242) grad_norm: 4.2765 (4.5689) time: 0.7734 data: 0.0003 max mem: 8421 +[2024-12-06 03:58:01 root] (utils.py 283): INFO Epoch: [27] [ 460/2502] eta: 0:26:06 lr: 0.000002 loss_cls: 4.0490 (3.8195) grad_norm: 4.3537 (4.5655) time: 0.7672 data: 0.0002 max mem: 8421 +[2024-12-06 03:58:08 root] (utils.py 283): INFO Epoch: [27] [ 470/2502] eta: 0:25:58 lr: 0.000002 loss_cls: 3.5588 (3.8200) grad_norm: 4.3539 (4.5607) time: 0.7637 data: 0.0002 max mem: 8421 +[2024-12-06 03:58:16 root] (utils.py 283): INFO Epoch: [27] [ 480/2502] eta: 0:25:50 lr: 0.000002 loss_cls: 3.9260 (3.8223) grad_norm: 4.2908 (4.5616) time: 0.7645 data: 0.0002 max mem: 8421 +[2024-12-06 03:58:24 root] (utils.py 283): INFO Epoch: [27] [ 490/2502] eta: 0:25:43 lr: 0.000002 loss_cls: 3.9260 (3.8179) grad_norm: 4.2376 (4.5612) time: 0.7688 data: 0.0002 max mem: 8421 +[2024-12-06 03:58:31 root] (utils.py 283): INFO Epoch: [27] [ 500/2502] eta: 0:25:35 lr: 0.000002 loss_cls: 3.7595 (3.8173) grad_norm: 4.2196 (4.5541) time: 0.7690 data: 0.0003 max mem: 8421 +[2024-12-06 03:58:39 root] (utils.py 283): INFO Epoch: [27] [ 510/2502] eta: 0:25:27 lr: 0.000002 loss_cls: 3.7595 (3.8161) grad_norm: 4.2389 (4.5596) time: 0.7667 data: 0.0003 max mem: 8421 +[2024-12-06 03:58:47 root] (utils.py 283): INFO Epoch: [27] [ 520/2502] eta: 0:25:20 lr: 0.000002 loss_cls: 3.8980 (3.8195) grad_norm: 4.2199 (4.5540) time: 0.7670 data: 0.0003 max mem: 8421 +[2024-12-06 03:58:54 root] (utils.py 283): INFO Epoch: [27] [ 530/2502] eta: 0:25:12 lr: 0.000002 loss_cls: 3.9834 (3.8166) grad_norm: 4.1993 (4.5516) time: 0.7654 data: 0.0003 max mem: 8421 +[2024-12-06 03:59:02 root] (utils.py 283): INFO Epoch: [27] [ 540/2502] eta: 0:25:04 lr: 0.000002 loss_cls: 3.8121 (3.8159) grad_norm: 4.4908 (4.5518) time: 0.7643 data: 0.0003 max mem: 8421 +[2024-12-06 03:59:10 root] (utils.py 283): INFO Epoch: [27] [ 550/2502] eta: 0:24:56 lr: 0.000002 loss_cls: 3.7560 (3.8118) grad_norm: 4.5320 (4.5518) time: 0.7654 data: 0.0003 max mem: 8421 +[2024-12-06 03:59:17 root] (utils.py 283): INFO Epoch: [27] [ 560/2502] eta: 0:24:49 lr: 0.000002 loss_cls: 3.9367 (3.8159) grad_norm: 4.6031 (4.5539) time: 0.7665 data: 0.0002 max mem: 8421 +[2024-12-06 03:59:25 root] (utils.py 283): INFO Epoch: [27] [ 570/2502] eta: 0:24:41 lr: 0.000002 loss_cls: 4.1208 (3.8188) grad_norm: 4.3193 (4.5535) time: 0.7632 data: 0.0002 max mem: 8421 +[2024-12-06 03:59:33 root] (utils.py 283): INFO Epoch: [27] [ 580/2502] eta: 0:24:33 lr: 0.000002 loss_cls: 3.7688 (3.8107) grad_norm: 4.3337 (4.5546) time: 0.7645 data: 0.0002 max mem: 8421 +[2024-12-06 03:59:40 root] (utils.py 283): INFO Epoch: [27] [ 590/2502] eta: 0:24:26 lr: 0.000002 loss_cls: 3.7865 (3.8138) grad_norm: 4.3863 (4.5538) time: 0.7712 data: 0.0003 max mem: 8421 +[2024-12-06 03:59:48 root] (utils.py 283): INFO Epoch: [27] [ 600/2502] eta: 0:24:18 lr: 0.000002 loss_cls: 3.8503 (3.8119) grad_norm: 4.3487 (4.5526) time: 0.7691 data: 0.0003 max mem: 8421 +[2024-12-06 03:59:56 root] (utils.py 283): INFO Epoch: [27] [ 610/2502] eta: 0:24:10 lr: 0.000002 loss_cls: 3.4706 (3.8076) grad_norm: 4.3085 (4.5523) time: 0.7631 data: 0.0003 max mem: 8421 +[2024-12-06 04:00:03 root] (utils.py 283): INFO Epoch: [27] [ 620/2502] eta: 0:24:02 lr: 0.000002 loss_cls: 3.5961 (3.8057) grad_norm: 4.2704 (4.5469) time: 0.7611 data: 0.0002 max mem: 8421 +[2024-12-06 04:00:11 root] (utils.py 283): INFO Epoch: [27] [ 630/2502] eta: 0:23:54 lr: 0.000002 loss_cls: 3.9094 (3.8110) grad_norm: 4.3289 (4.5446) time: 0.7592 data: 0.0002 max mem: 8421 +[2024-12-06 04:00:19 root] (utils.py 283): INFO Epoch: [27] [ 640/2502] eta: 0:23:47 lr: 0.000002 loss_cls: 3.9731 (3.8071) grad_norm: 4.4653 (4.5475) time: 0.7662 data: 0.0002 max mem: 8421 +[2024-12-06 04:00:26 root] (utils.py 283): INFO Epoch: [27] [ 650/2502] eta: 0:23:39 lr: 0.000002 loss_cls: 3.8449 (3.8084) grad_norm: 4.4329 (4.5463) time: 0.7667 data: 0.0002 max mem: 8421 +[2024-12-06 04:00:34 root] (utils.py 283): INFO Epoch: [27] [ 660/2502] eta: 0:23:31 lr: 0.000002 loss_cls: 3.8712 (3.8090) grad_norm: 4.1817 (4.5438) time: 0.7612 data: 0.0002 max mem: 8421 +[2024-12-06 04:00:42 root] (utils.py 283): INFO Epoch: [27] [ 670/2502] eta: 0:23:24 lr: 0.000002 loss_cls: 3.8158 (3.8052) grad_norm: 4.3088 (4.5491) time: 0.7737 data: 0.0002 max mem: 8421 +[2024-12-06 04:00:49 root] (utils.py 283): INFO Epoch: [27] [ 680/2502] eta: 0:23:16 lr: 0.000002 loss_cls: 3.8158 (3.8072) grad_norm: 4.6215 (4.5478) time: 0.7737 data: 0.0002 max mem: 8421 +[2024-12-06 04:00:57 root] (utils.py 283): INFO Epoch: [27] [ 690/2502] eta: 0:23:09 lr: 0.000002 loss_cls: 3.8709 (3.8052) grad_norm: 4.4776 (4.5479) time: 0.7619 data: 0.0002 max mem: 8421 +[2024-12-06 04:01:04 root] (utils.py 283): INFO Epoch: [27] [ 700/2502] eta: 0:23:01 lr: 0.000002 loss_cls: 3.7566 (3.8046) grad_norm: 4.2369 (4.5440) time: 0.7615 data: 0.0002 max mem: 8421 +[2024-12-06 04:01:12 root] (utils.py 283): INFO Epoch: [27] [ 710/2502] eta: 0:22:53 lr: 0.000002 loss_cls: 4.0201 (3.8081) grad_norm: 4.4768 (4.5483) time: 0.7640 data: 0.0002 max mem: 8421 +[2024-12-06 04:01:20 root] (utils.py 283): INFO Epoch: [27] [ 720/2502] eta: 0:22:45 lr: 0.000002 loss_cls: 4.0052 (3.8023) grad_norm: 4.5753 (4.5468) time: 0.7628 data: 0.0002 max mem: 8421 +[2024-12-06 04:01:27 root] (utils.py 283): INFO Epoch: [27] [ 730/2502] eta: 0:22:37 lr: 0.000002 loss_cls: 3.6626 (3.8038) grad_norm: 4.2960 (4.5420) time: 0.7589 data: 0.0003 max mem: 8421 +[2024-12-06 04:01:35 root] (utils.py 283): INFO Epoch: [27] [ 740/2502] eta: 0:22:30 lr: 0.000002 loss_cls: 4.0376 (3.8073) grad_norm: 4.2758 (4.5418) time: 0.7592 data: 0.0003 max mem: 8421 +[2024-12-06 04:01:42 root] (utils.py 283): INFO Epoch: [27] [ 750/2502] eta: 0:22:22 lr: 0.000002 loss_cls: 3.8174 (3.8062) grad_norm: 4.4131 (4.5441) time: 0.7602 data: 0.0003 max mem: 8421 +[2024-12-06 04:01:50 root] (utils.py 283): INFO Epoch: [27] [ 760/2502] eta: 0:22:14 lr: 0.000002 loss_cls: 3.7795 (3.8065) grad_norm: 4.4131 (4.5461) time: 0.7675 data: 0.0003 max mem: 8421 +[2024-12-06 04:01:58 root] (utils.py 283): INFO Epoch: [27] [ 770/2502] eta: 0:22:07 lr: 0.000002 loss_cls: 3.9484 (3.8062) grad_norm: 4.3684 (4.5431) time: 0.7696 data: 0.0003 max mem: 8421 +[2024-12-06 04:02:06 root] (utils.py 283): INFO Epoch: [27] [ 780/2502] eta: 0:21:59 lr: 0.000002 loss_cls: 3.8942 (3.8047) grad_norm: 4.3373 (4.5459) time: 0.7659 data: 0.0002 max mem: 8421 +[2024-12-06 04:02:13 root] (utils.py 283): INFO Epoch: [27] [ 790/2502] eta: 0:21:51 lr: 0.000002 loss_cls: 3.9489 (3.8073) grad_norm: 4.3373 (4.5416) time: 0.7654 data: 0.0002 max mem: 8421 +[2024-12-06 04:02:21 root] (utils.py 283): INFO Epoch: [27] [ 800/2502] eta: 0:21:44 lr: 0.000002 loss_cls: 3.9663 (3.8057) grad_norm: 4.3467 (4.5387) time: 0.7625 data: 0.0003 max mem: 8421 +[2024-12-06 04:02:28 root] (utils.py 283): INFO Epoch: [27] [ 810/2502] eta: 0:21:36 lr: 0.000002 loss_cls: 3.6483 (3.8040) grad_norm: 4.5430 (4.5407) time: 0.7616 data: 0.0002 max mem: 8421 +[2024-12-06 04:02:36 root] (utils.py 283): INFO Epoch: [27] [ 820/2502] eta: 0:21:28 lr: 0.000002 loss_cls: 3.7100 (3.8027) grad_norm: 4.4953 (4.5385) time: 0.7623 data: 0.0002 max mem: 8421 +[2024-12-06 04:02:44 root] (utils.py 283): INFO Epoch: [27] [ 830/2502] eta: 0:21:20 lr: 0.000002 loss_cls: 3.7100 (3.7996) grad_norm: 4.2349 (4.5345) time: 0.7629 data: 0.0002 max mem: 8421 +[2024-12-06 04:02:51 root] (utils.py 283): INFO Epoch: [27] [ 840/2502] eta: 0:21:13 lr: 0.000002 loss_cls: 3.7989 (3.8025) grad_norm: 4.2377 (4.5323) time: 0.7681 data: 0.0003 max mem: 8421 +[2024-12-06 04:02:59 root] (utils.py 283): INFO Epoch: [27] [ 850/2502] eta: 0:21:05 lr: 0.000002 loss_cls: 4.0412 (3.8032) grad_norm: 4.3433 (4.5329) time: 0.7702 data: 0.0003 max mem: 8421 +[2024-12-06 04:03:07 root] (utils.py 283): INFO Epoch: [27] [ 860/2502] eta: 0:20:57 lr: 0.000002 loss_cls: 3.8535 (3.8033) grad_norm: 4.4072 (4.5306) time: 0.7636 data: 0.0002 max mem: 8421 +[2024-12-06 04:03:14 root] (utils.py 283): INFO Epoch: [27] [ 870/2502] eta: 0:20:50 lr: 0.000002 loss_cls: 3.5982 (3.7995) grad_norm: 4.2700 (4.5303) time: 0.7618 data: 0.0003 max mem: 8421 +[2024-12-06 04:03:22 root] (utils.py 283): INFO Epoch: [27] [ 880/2502] eta: 0:20:42 lr: 0.000002 loss_cls: 3.5982 (3.8004) grad_norm: 4.3830 (4.5393) time: 0.7702 data: 0.0003 max mem: 8421 +[2024-12-06 04:03:30 root] (utils.py 283): INFO Epoch: [27] [ 890/2502] eta: 0:20:35 lr: 0.000002 loss_cls: 3.7702 (3.7989) grad_norm: 4.6134 (4.5382) time: 0.7800 data: 0.0003 max mem: 8421 +[2024-12-06 04:03:38 root] (utils.py 283): INFO Epoch: [27] [ 900/2502] eta: 0:20:28 lr: 0.000002 loss_cls: 3.7646 (3.7994) grad_norm: 4.3047 (4.5365) time: 0.7871 data: 0.0003 max mem: 8421 +[2024-12-06 04:03:46 root] (utils.py 283): INFO Epoch: [27] [ 910/2502] eta: 0:20:20 lr: 0.000002 loss_cls: 3.8602 (3.7986) grad_norm: 4.3935 (4.5361) time: 0.7866 data: 0.0003 max mem: 8421 +[2024-12-06 04:03:53 root] (utils.py 283): INFO Epoch: [27] [ 920/2502] eta: 0:20:13 lr: 0.000002 loss_cls: 3.6050 (3.7961) grad_norm: 4.5044 (4.5360) time: 0.7731 data: 0.0003 max mem: 8421 +[2024-12-06 04:04:01 root] (utils.py 283): INFO Epoch: [27] [ 930/2502] eta: 0:20:05 lr: 0.000002 loss_cls: 3.5097 (3.7947) grad_norm: 4.4134 (4.5348) time: 0.7677 data: 0.0002 max mem: 8421 +[2024-12-06 04:04:09 root] (utils.py 283): INFO Epoch: [27] [ 940/2502] eta: 0:19:57 lr: 0.000002 loss_cls: 3.8467 (3.7951) grad_norm: 4.1782 (4.5334) time: 0.7706 data: 0.0003 max mem: 8421 +[2024-12-06 04:04:16 root] (utils.py 283): INFO Epoch: [27] [ 950/2502] eta: 0:19:50 lr: 0.000002 loss_cls: 3.9674 (3.7952) grad_norm: 4.2007 (4.5303) time: 0.7671 data: 0.0003 max mem: 8421 +[2024-12-06 04:04:24 root] (utils.py 283): INFO Epoch: [27] [ 960/2502] eta: 0:19:42 lr: 0.000002 loss_cls: 3.7958 (3.7943) grad_norm: 4.2888 (4.5292) time: 0.7652 data: 0.0003 max mem: 8421 +[2024-12-06 04:04:32 root] (utils.py 283): INFO Epoch: [27] [ 970/2502] eta: 0:19:34 lr: 0.000002 loss_cls: 3.7958 (3.7953) grad_norm: 4.3612 (4.5301) time: 0.7657 data: 0.0002 max mem: 8421 +[2024-12-06 04:04:39 root] (utils.py 283): INFO Epoch: [27] [ 980/2502] eta: 0:19:27 lr: 0.000002 loss_cls: 4.0530 (3.7962) grad_norm: 4.3641 (4.5289) time: 0.7654 data: 0.0003 max mem: 8421 +[2024-12-06 04:04:47 root] (utils.py 283): INFO Epoch: [27] [ 990/2502] eta: 0:19:19 lr: 0.000002 loss_cls: 4.0631 (3.7976) grad_norm: 4.3378 (4.5282) time: 0.7628 data: 0.0003 max mem: 8421 +[2024-12-06 04:04:55 root] (utils.py 283): INFO Epoch: [27] [1000/2502] eta: 0:19:11 lr: 0.000002 loss_cls: 3.8468 (3.7972) grad_norm: 4.3158 (4.5262) time: 0.7676 data: 0.0002 max mem: 8421 +[2024-12-06 04:05:02 root] (utils.py 283): INFO Epoch: [27] [1010/2502] eta: 0:19:04 lr: 0.000002 loss_cls: 3.8468 (3.7972) grad_norm: 4.2704 (4.5254) time: 0.7775 data: 0.0003 max mem: 8421 +[2024-12-06 04:05:10 root] (utils.py 283): INFO Epoch: [27] [1020/2502] eta: 0:18:56 lr: 0.000002 loss_cls: 3.9162 (3.7980) grad_norm: 4.2478 (4.5225) time: 0.7856 data: 0.0003 max mem: 8421 +[2024-12-06 04:05:18 root] (utils.py 283): INFO Epoch: [27] [1030/2502] eta: 0:18:49 lr: 0.000002 loss_cls: 3.8392 (3.7976) grad_norm: 4.2478 (4.5229) time: 0.7938 data: 0.0003 max mem: 8421 +[2024-12-06 04:05:26 root] (utils.py 283): INFO Epoch: [27] [1040/2502] eta: 0:18:41 lr: 0.000002 loss_cls: 3.9313 (3.7996) grad_norm: 4.4510 (4.5222) time: 0.7799 data: 0.0003 max mem: 8421 +[2024-12-06 04:05:34 root] (utils.py 283): INFO Epoch: [27] [1050/2502] eta: 0:18:34 lr: 0.000002 loss_cls: 4.0238 (3.7978) grad_norm: 4.4076 (4.5214) time: 0.7619 data: 0.0002 max mem: 8421 +[2024-12-06 04:05:41 root] (utils.py 283): INFO Epoch: [27] [1060/2502] eta: 0:18:26 lr: 0.000002 loss_cls: 3.9253 (3.7987) grad_norm: 4.3790 (4.5252) time: 0.7611 data: 0.0002 max mem: 8421 +[2024-12-06 04:05:49 root] (utils.py 283): INFO Epoch: [27] [1070/2502] eta: 0:18:18 lr: 0.000002 loss_cls: 3.9253 (3.7987) grad_norm: 4.1879 (4.5228) time: 0.7663 data: 0.0002 max mem: 8421 +[2024-12-06 04:05:57 root] (utils.py 283): INFO Epoch: [27] [1080/2502] eta: 0:18:11 lr: 0.000002 loss_cls: 3.9220 (3.7998) grad_norm: 4.2207 (4.5276) time: 0.7664 data: 0.0002 max mem: 8421 +[2024-12-06 04:06:04 root] (utils.py 283): INFO Epoch: [27] [1090/2502] eta: 0:18:03 lr: 0.000002 loss_cls: 3.9569 (3.8015) grad_norm: 4.5077 (4.5392) time: 0.7606 data: 0.0002 max mem: 8421 +[2024-12-06 04:06:12 root] (utils.py 283): INFO Epoch: [27] [1100/2502] eta: 0:17:55 lr: 0.000002 loss_cls: 4.2045 (3.8048) grad_norm: 4.8176 (4.5427) time: 0.7609 data: 0.0002 max mem: 8421 +[2024-12-06 04:06:19 root] (utils.py 283): INFO Epoch: [27] [1110/2502] eta: 0:17:47 lr: 0.000002 loss_cls: 4.1238 (3.8042) grad_norm: 4.6377 (4.5410) time: 0.7632 data: 0.0002 max mem: 8421 +[2024-12-06 04:06:27 root] (utils.py 283): INFO Epoch: [27] [1120/2502] eta: 0:17:40 lr: 0.000002 loss_cls: 3.9652 (3.8035) grad_norm: 4.2620 (4.5413) time: 0.7614 data: 0.0002 max mem: 8421 +[2024-12-06 04:06:35 root] (utils.py 283): INFO Epoch: [27] [1130/2502] eta: 0:17:32 lr: 0.000002 loss_cls: 4.0147 (3.8039) grad_norm: 4.6247 (4.5442) time: 0.7661 data: 0.0002 max mem: 8421 +[2024-12-06 04:06:42 root] (utils.py 283): INFO Epoch: [27] [1140/2502] eta: 0:17:24 lr: 0.000002 loss_cls: 3.8296 (3.8058) grad_norm: 4.4844 (4.5429) time: 0.7688 data: 0.0002 max mem: 8421 +[2024-12-06 04:06:50 root] (utils.py 283): INFO Epoch: [27] [1150/2502] eta: 0:17:17 lr: 0.000002 loss_cls: 3.7838 (3.8035) grad_norm: 4.3855 (4.5422) time: 0.7654 data: 0.0002 max mem: 8421 +[2024-12-06 04:06:58 root] (utils.py 283): INFO Epoch: [27] [1160/2502] eta: 0:17:09 lr: 0.000002 loss_cls: 3.7637 (3.8044) grad_norm: 4.4844 (4.5425) time: 0.7642 data: 0.0002 max mem: 8421 +[2024-12-06 04:07:05 root] (utils.py 283): INFO Epoch: [27] [1170/2502] eta: 0:17:01 lr: 0.000002 loss_cls: 3.8798 (3.8056) grad_norm: 4.4888 (4.5462) time: 0.7634 data: 0.0002 max mem: 8421 +[2024-12-06 04:07:13 root] (utils.py 283): INFO Epoch: [27] [1180/2502] eta: 0:16:54 lr: 0.000002 loss_cls: 3.9650 (3.8069) grad_norm: 4.4488 (4.5453) time: 0.7767 data: 0.0002 max mem: 8421 +[2024-12-06 04:07:21 root] (utils.py 283): INFO Epoch: [27] [1190/2502] eta: 0:16:46 lr: 0.000002 loss_cls: 4.0011 (3.8064) grad_norm: 4.3617 (4.5445) time: 0.7848 data: 0.0003 max mem: 8421 +[2024-12-06 04:07:29 root] (utils.py 283): INFO Epoch: [27] [1200/2502] eta: 0:16:39 lr: 0.000002 loss_cls: 4.0376 (3.8078) grad_norm: 4.3718 (4.5465) time: 0.7729 data: 0.0003 max mem: 8421 +[2024-12-06 04:07:36 root] (utils.py 283): INFO Epoch: [27] [1210/2502] eta: 0:16:31 lr: 0.000002 loss_cls: 4.1785 (3.8099) grad_norm: 4.5159 (4.5492) time: 0.7664 data: 0.0002 max mem: 8421 +[2024-12-06 04:07:44 root] (utils.py 283): INFO Epoch: [27] [1220/2502] eta: 0:16:23 lr: 0.000002 loss_cls: 3.7768 (3.8084) grad_norm: 4.3994 (4.5476) time: 0.7736 data: 0.0002 max mem: 8421 +[2024-12-06 04:07:52 root] (utils.py 283): INFO Epoch: [27] [1230/2502] eta: 0:16:16 lr: 0.000002 loss_cls: 4.0117 (3.8102) grad_norm: 4.2784 (4.5474) time: 0.7729 data: 0.0002 max mem: 8421 +[2024-12-06 04:07:59 root] (utils.py 283): INFO Epoch: [27] [1240/2502] eta: 0:16:08 lr: 0.000002 loss_cls: 4.0363 (3.8098) grad_norm: 4.3647 (4.5458) time: 0.7658 data: 0.0003 max mem: 8421 +[2024-12-06 04:08:07 root] (utils.py 283): INFO Epoch: [27] [1250/2502] eta: 0:16:00 lr: 0.000002 loss_cls: 3.7638 (3.8073) grad_norm: 4.3322 (4.5454) time: 0.7660 data: 0.0003 max mem: 8421 +[2024-12-06 04:08:15 root] (utils.py 283): INFO Epoch: [27] [1260/2502] eta: 0:15:53 lr: 0.000002 loss_cls: 3.5475 (3.8062) grad_norm: 4.5441 (4.5489) time: 0.7667 data: 0.0002 max mem: 8421 +[2024-12-06 04:08:22 root] (utils.py 283): INFO Epoch: [27] [1270/2502] eta: 0:15:45 lr: 0.000002 loss_cls: 3.7356 (3.8067) grad_norm: 4.4952 (4.5468) time: 0.7667 data: 0.0002 max mem: 8421 +[2024-12-06 04:08:30 root] (utils.py 283): INFO Epoch: [27] [1280/2502] eta: 0:15:37 lr: 0.000002 loss_cls: 3.8393 (3.8063) grad_norm: 4.1724 (4.5451) time: 0.7649 data: 0.0002 max mem: 8421 +[2024-12-06 04:08:38 root] (utils.py 283): INFO Epoch: [27] [1290/2502] eta: 0:15:29 lr: 0.000002 loss_cls: 3.8340 (3.8055) grad_norm: 4.3048 (4.5455) time: 0.7629 data: 0.0002 max mem: 8421 +[2024-12-06 04:08:45 root] (utils.py 283): INFO Epoch: [27] [1300/2502] eta: 0:15:22 lr: 0.000002 loss_cls: 3.8340 (3.8049) grad_norm: 4.3161 (4.5443) time: 0.7650 data: 0.0002 max mem: 8421 +[2024-12-06 04:08:53 root] (utils.py 283): INFO Epoch: [27] [1310/2502] eta: 0:15:14 lr: 0.000002 loss_cls: 3.9242 (3.8055) grad_norm: 4.1975 (4.5446) time: 0.7752 data: 0.0002 max mem: 8421 +[2024-12-06 04:09:01 root] (utils.py 283): INFO Epoch: [27] [1320/2502] eta: 0:15:07 lr: 0.000002 loss_cls: 3.8278 (3.8039) grad_norm: 4.3112 (4.5435) time: 0.7874 data: 0.0002 max mem: 8421 +[2024-12-06 04:09:09 root] (utils.py 283): INFO Epoch: [27] [1330/2502] eta: 0:14:59 lr: 0.000002 loss_cls: 3.6862 (3.8043) grad_norm: 4.3112 (4.5441) time: 0.7876 data: 0.0002 max mem: 8421 +[2024-12-06 04:09:17 root] (utils.py 283): INFO Epoch: [27] [1340/2502] eta: 0:14:52 lr: 0.000002 loss_cls: 4.0557 (3.8058) grad_norm: 4.4382 (4.5436) time: 0.7746 data: 0.0002 max mem: 8421 +[2024-12-06 04:09:24 root] (utils.py 283): INFO Epoch: [27] [1350/2502] eta: 0:14:44 lr: 0.000002 loss_cls: 4.1398 (3.8080) grad_norm: 4.3727 (4.5421) time: 0.7675 data: 0.0003 max mem: 8421 +[2024-12-06 04:09:32 root] (utils.py 283): INFO Epoch: [27] [1360/2502] eta: 0:14:36 lr: 0.000002 loss_cls: 3.8300 (3.8073) grad_norm: 4.4337 (4.5412) time: 0.7707 data: 0.0003 max mem: 8421 +[2024-12-06 04:09:40 root] (utils.py 283): INFO Epoch: [27] [1370/2502] eta: 0:14:29 lr: 0.000002 loss_cls: 3.5774 (3.8048) grad_norm: 4.3730 (4.5397) time: 0.7697 data: 0.0003 max mem: 8421 +[2024-12-06 04:09:47 root] (utils.py 283): INFO Epoch: [27] [1380/2502] eta: 0:14:21 lr: 0.000002 loss_cls: 3.4789 (3.8033) grad_norm: 4.2437 (4.5380) time: 0.7694 data: 0.0003 max mem: 8421 +[2024-12-06 04:09:55 root] (utils.py 283): INFO Epoch: [27] [1390/2502] eta: 0:14:13 lr: 0.000002 loss_cls: 3.6029 (3.8014) grad_norm: 4.4477 (4.5390) time: 0.7703 data: 0.0002 max mem: 8421 +[2024-12-06 04:10:03 root] (utils.py 283): INFO Epoch: [27] [1400/2502] eta: 0:14:06 lr: 0.000002 loss_cls: 3.7856 (3.8014) grad_norm: 4.5289 (4.5384) time: 0.7784 data: 0.0003 max mem: 8421 +[2024-12-06 04:10:11 root] (utils.py 283): INFO Epoch: [27] [1410/2502] eta: 0:13:58 lr: 0.000002 loss_cls: 3.7856 (3.8008) grad_norm: 4.4361 (4.5376) time: 0.8042 data: 0.0003 max mem: 8421 +[2024-12-06 04:10:19 root] (utils.py 283): INFO Epoch: [27] [1420/2502] eta: 0:13:51 lr: 0.000002 loss_cls: 3.5533 (3.7994) grad_norm: 4.3345 (4.5375) time: 0.8130 data: 0.0003 max mem: 8421 +[2024-12-06 04:10:27 root] (utils.py 283): INFO Epoch: [27] [1430/2502] eta: 0:13:44 lr: 0.000002 loss_cls: 3.4881 (3.7976) grad_norm: 4.2733 (4.5373) time: 0.8129 data: 0.0003 max mem: 8421 +[2024-12-06 04:10:35 root] (utils.py 283): INFO Epoch: [27] [1440/2502] eta: 0:13:36 lr: 0.000002 loss_cls: 3.9002 (3.7993) grad_norm: 4.2733 (4.5358) time: 0.8096 data: 0.0003 max mem: 8421 +[2024-12-06 04:10:43 root] (utils.py 283): INFO Epoch: [27] [1450/2502] eta: 0:13:29 lr: 0.000002 loss_cls: 4.0551 (3.7998) grad_norm: 4.2712 (4.5355) time: 0.7930 data: 0.0003 max mem: 8421 +[2024-12-06 04:10:51 root] (utils.py 283): INFO Epoch: [27] [1460/2502] eta: 0:13:21 lr: 0.000002 loss_cls: 3.8904 (3.7999) grad_norm: 4.2712 (4.5338) time: 0.7886 data: 0.0003 max mem: 8421 +[2024-12-06 04:10:59 root] (utils.py 283): INFO Epoch: [27] [1470/2502] eta: 0:13:14 lr: 0.000002 loss_cls: 3.7872 (3.7990) grad_norm: 4.2792 (4.5323) time: 0.7850 data: 0.0003 max mem: 8421 +[2024-12-06 04:11:07 root] (utils.py 283): INFO Epoch: [27] [1480/2502] eta: 0:13:06 lr: 0.000002 loss_cls: 3.8452 (3.7985) grad_norm: 4.3596 (4.5316) time: 0.7827 data: 0.0003 max mem: 8421 +[2024-12-06 04:11:15 root] (utils.py 283): INFO Epoch: [27] [1490/2502] eta: 0:12:58 lr: 0.000002 loss_cls: 3.8507 (3.7984) grad_norm: 4.3366 (4.5326) time: 0.7858 data: 0.0002 max mem: 8421 +[2024-12-06 04:11:23 root] (utils.py 283): INFO Epoch: [27] [1500/2502] eta: 0:12:51 lr: 0.000002 loss_cls: 3.7086 (3.7982) grad_norm: 4.2575 (4.5315) time: 0.7856 data: 0.0002 max mem: 8421 +[2024-12-06 04:11:30 root] (utils.py 283): INFO Epoch: [27] [1510/2502] eta: 0:12:43 lr: 0.000002 loss_cls: 3.9435 (3.7998) grad_norm: 4.4317 (4.5317) time: 0.7849 data: 0.0003 max mem: 8421 +[2024-12-06 04:11:38 root] (utils.py 283): INFO Epoch: [27] [1520/2502] eta: 0:12:36 lr: 0.000002 loss_cls: 3.9884 (3.7995) grad_norm: 4.4156 (4.5312) time: 0.7873 data: 0.0002 max mem: 8421 +[2024-12-06 04:11:46 root] (utils.py 283): INFO Epoch: [27] [1530/2502] eta: 0:12:28 lr: 0.000002 loss_cls: 3.8284 (3.8000) grad_norm: 4.3876 (4.5325) time: 0.7775 data: 0.0002 max mem: 8421 +[2024-12-06 04:11:54 root] (utils.py 283): INFO Epoch: [27] [1540/2502] eta: 0:12:20 lr: 0.000002 loss_cls: 3.8912 (3.8004) grad_norm: 4.3876 (4.5313) time: 0.7638 data: 0.0002 max mem: 8421 +[2024-12-06 04:12:01 root] (utils.py 283): INFO Epoch: [27] [1550/2502] eta: 0:12:12 lr: 0.000002 loss_cls: 3.8989 (3.7993) grad_norm: 4.4025 (4.5316) time: 0.7658 data: 0.0002 max mem: 8421 +[2024-12-06 04:12:09 root] (utils.py 283): INFO Epoch: [27] [1560/2502] eta: 0:12:05 lr: 0.000002 loss_cls: 3.9656 (3.8007) grad_norm: 4.2797 (4.5298) time: 0.7726 data: 0.0002 max mem: 8421 +[2024-12-06 04:12:17 root] (utils.py 283): INFO Epoch: [27] [1570/2502] eta: 0:11:57 lr: 0.000002 loss_cls: 4.0455 (3.8007) grad_norm: 4.2797 (4.5314) time: 0.7798 data: 0.0002 max mem: 8421 +[2024-12-06 04:12:25 root] (utils.py 283): INFO Epoch: [27] [1580/2502] eta: 0:11:50 lr: 0.000002 loss_cls: 4.0459 (3.8007) grad_norm: 4.4168 (4.5302) time: 0.7922 data: 0.0003 max mem: 8421 +[2024-12-06 04:12:33 root] (utils.py 283): INFO Epoch: [27] [1590/2502] eta: 0:11:42 lr: 0.000002 loss_cls: 3.9667 (3.8011) grad_norm: 4.3801 (4.5305) time: 0.8014 data: 0.0003 max mem: 8421 +[2024-12-06 04:12:41 root] (utils.py 283): INFO Epoch: [27] [1600/2502] eta: 0:11:35 lr: 0.000002 loss_cls: 3.9283 (3.7998) grad_norm: 4.3801 (4.5296) time: 0.8037 data: 0.0003 max mem: 8421 +[2024-12-06 04:12:49 root] (utils.py 283): INFO Epoch: [27] [1610/2502] eta: 0:11:27 lr: 0.000002 loss_cls: 3.5395 (3.7977) grad_norm: 4.3371 (4.5278) time: 0.8023 data: 0.0003 max mem: 8421 +[2024-12-06 04:12:57 root] (utils.py 283): INFO Epoch: [27] [1620/2502] eta: 0:11:20 lr: 0.000002 loss_cls: 3.9416 (3.7991) grad_norm: 4.4078 (4.5281) time: 0.7983 data: 0.0003 max mem: 8421 +[2024-12-06 04:13:05 root] (utils.py 283): INFO Epoch: [27] [1630/2502] eta: 0:11:12 lr: 0.000002 loss_cls: 4.0706 (3.8004) grad_norm: 4.5458 (4.5294) time: 0.7980 data: 0.0003 max mem: 8421 +[2024-12-06 04:13:13 root] (utils.py 283): INFO Epoch: [27] [1640/2502] eta: 0:11:04 lr: 0.000002 loss_cls: 4.0551 (3.8013) grad_norm: 4.3940 (4.5298) time: 0.7919 data: 0.0003 max mem: 8421 +[2024-12-06 04:13:21 root] (utils.py 283): INFO Epoch: [27] [1650/2502] eta: 0:10:57 lr: 0.000002 loss_cls: 3.8308 (3.8007) grad_norm: 4.3940 (4.5291) time: 0.7790 data: 0.0002 max mem: 8421 +[2024-12-06 04:13:28 root] (utils.py 283): INFO Epoch: [27] [1660/2502] eta: 0:10:49 lr: 0.000002 loss_cls: 3.6727 (3.7999) grad_norm: 4.4342 (4.5287) time: 0.7713 data: 0.0002 max mem: 8421 +[2024-12-06 04:13:36 root] (utils.py 283): INFO Epoch: [27] [1670/2502] eta: 0:10:41 lr: 0.000002 loss_cls: 3.6473 (3.7996) grad_norm: 4.4342 (4.5278) time: 0.7836 data: 0.0002 max mem: 8421 +[2024-12-06 04:13:44 root] (utils.py 283): INFO Epoch: [27] [1680/2502] eta: 0:10:34 lr: 0.000002 loss_cls: 3.8359 (3.7989) grad_norm: 4.4688 (4.5281) time: 0.7989 data: 0.0003 max mem: 8421 +[2024-12-06 04:13:52 root] (utils.py 283): INFO Epoch: [27] [1690/2502] eta: 0:10:26 lr: 0.000002 loss_cls: 3.8820 (3.7988) grad_norm: 4.6056 (4.5294) time: 0.8092 data: 0.0003 max mem: 8421 +[2024-12-06 04:14:01 root] (utils.py 283): INFO Epoch: [27] [1700/2502] eta: 0:10:19 lr: 0.000002 loss_cls: 3.8929 (3.7988) grad_norm: 4.4933 (4.5284) time: 0.8138 data: 0.0003 max mem: 8421 +[2024-12-06 04:14:09 root] (utils.py 283): INFO Epoch: [27] [1710/2502] eta: 0:10:11 lr: 0.000002 loss_cls: 3.8272 (3.7982) grad_norm: 4.4933 (4.5311) time: 0.8186 data: 0.0003 max mem: 8421 +[2024-12-06 04:14:17 root] (utils.py 283): INFO Epoch: [27] [1720/2502] eta: 0:10:04 lr: 0.000002 loss_cls: 3.9405 (3.7982) grad_norm: 4.5550 (4.5324) time: 0.8033 data: 0.0003 max mem: 8421 +[2024-12-06 04:14:24 root] (utils.py 283): INFO Epoch: [27] [1730/2502] eta: 0:09:56 lr: 0.000002 loss_cls: 3.9807 (3.7984) grad_norm: 4.6036 (4.5338) time: 0.7736 data: 0.0003 max mem: 8421 +[2024-12-06 04:14:32 root] (utils.py 283): INFO Epoch: [27] [1740/2502] eta: 0:09:48 lr: 0.000002 loss_cls: 3.9616 (3.7979) grad_norm: 4.6122 (4.5338) time: 0.7754 data: 0.0003 max mem: 8421 +[2024-12-06 04:14:40 root] (utils.py 283): INFO Epoch: [27] [1750/2502] eta: 0:09:40 lr: 0.000002 loss_cls: 3.7109 (3.7978) grad_norm: 4.5008 (4.5362) time: 0.7814 data: 0.0003 max mem: 8421 +[2024-12-06 04:14:48 root] (utils.py 283): INFO Epoch: [27] [1760/2502] eta: 0:09:33 lr: 0.000002 loss_cls: 4.0415 (3.8002) grad_norm: 4.5718 (4.5392) time: 0.7731 data: 0.0003 max mem: 8421 +[2024-12-06 04:14:55 root] (utils.py 283): INFO Epoch: [27] [1770/2502] eta: 0:09:25 lr: 0.000002 loss_cls: 4.1307 (3.8018) grad_norm: 4.4476 (4.5382) time: 0.7656 data: 0.0003 max mem: 8421 +[2024-12-06 04:15:03 root] (utils.py 283): INFO Epoch: [27] [1780/2502] eta: 0:09:17 lr: 0.000002 loss_cls: 3.9826 (3.8016) grad_norm: 4.3130 (4.5378) time: 0.7649 data: 0.0003 max mem: 8421 +[2024-12-06 04:15:11 root] (utils.py 283): INFO Epoch: [27] [1790/2502] eta: 0:09:09 lr: 0.000002 loss_cls: 3.8286 (3.8020) grad_norm: 4.3096 (4.5372) time: 0.7645 data: 0.0003 max mem: 8421 +[2024-12-06 04:15:18 root] (utils.py 283): INFO Epoch: [27] [1800/2502] eta: 0:09:02 lr: 0.000002 loss_cls: 3.8286 (3.8024) grad_norm: 4.2891 (4.5365) time: 0.7655 data: 0.0003 max mem: 8421 +[2024-12-06 04:15:26 root] (utils.py 283): INFO Epoch: [27] [1810/2502] eta: 0:08:54 lr: 0.000002 loss_cls: 3.9020 (3.8032) grad_norm: 4.3025 (4.5368) time: 0.7644 data: 0.0002 max mem: 8421 +[2024-12-06 04:15:33 root] (utils.py 283): INFO Epoch: [27] [1820/2502] eta: 0:08:46 lr: 0.000002 loss_cls: 4.0478 (3.8050) grad_norm: 4.4473 (4.5371) time: 0.7623 data: 0.0002 max mem: 8421 +[2024-12-06 04:15:41 root] (utils.py 283): INFO Epoch: [27] [1830/2502] eta: 0:08:38 lr: 0.000002 loss_cls: 4.1581 (3.8066) grad_norm: 4.4524 (4.5369) time: 0.7633 data: 0.0003 max mem: 8421 +[2024-12-06 04:15:49 root] (utils.py 283): INFO Epoch: [27] [1840/2502] eta: 0:08:31 lr: 0.000002 loss_cls: 3.7572 (3.8046) grad_norm: 4.6114 (4.5369) time: 0.7647 data: 0.0003 max mem: 8421 +[2024-12-06 04:15:56 root] (utils.py 283): INFO Epoch: [27] [1850/2502] eta: 0:08:23 lr: 0.000002 loss_cls: 3.5727 (3.8043) grad_norm: 4.4577 (4.5367) time: 0.7626 data: 0.0003 max mem: 8421 +[2024-12-06 04:16:04 root] (utils.py 283): INFO Epoch: [27] [1860/2502] eta: 0:08:15 lr: 0.000002 loss_cls: 3.6043 (3.8037) grad_norm: 4.4334 (4.5370) time: 0.7637 data: 0.0002 max mem: 8421 +[2024-12-06 04:16:12 root] (utils.py 283): INFO Epoch: [27] [1870/2502] eta: 0:08:07 lr: 0.000002 loss_cls: 3.7846 (3.8037) grad_norm: 4.3909 (4.5360) time: 0.7642 data: 0.0002 max mem: 8421 +[2024-12-06 04:16:19 root] (utils.py 283): INFO Epoch: [27] [1880/2502] eta: 0:08:00 lr: 0.000002 loss_cls: 4.0970 (3.8045) grad_norm: 4.3668 (4.5366) time: 0.7622 data: 0.0002 max mem: 8421 +[2024-12-06 04:16:27 root] (utils.py 283): INFO Epoch: [27] [1890/2502] eta: 0:07:52 lr: 0.000002 loss_cls: 3.8310 (3.8043) grad_norm: 4.5047 (4.5378) time: 0.7641 data: 0.0003 max mem: 8421 +[2024-12-06 04:16:35 root] (utils.py 283): INFO Epoch: [27] [1900/2502] eta: 0:07:44 lr: 0.000002 loss_cls: 3.7627 (3.8038) grad_norm: 4.4443 (4.5385) time: 0.7658 data: 0.0003 max mem: 8421 +[2024-12-06 04:16:42 root] (utils.py 283): INFO Epoch: [27] [1910/2502] eta: 0:07:36 lr: 0.000002 loss_cls: 3.9403 (3.8032) grad_norm: 4.3610 (4.5379) time: 0.7646 data: 0.0003 max mem: 8421 +[2024-12-06 04:16:50 root] (utils.py 283): INFO Epoch: [27] [1920/2502] eta: 0:07:29 lr: 0.000002 loss_cls: 4.0314 (3.8034) grad_norm: 4.3537 (4.5384) time: 0.7642 data: 0.0002 max mem: 8421 +[2024-12-06 04:16:57 root] (utils.py 283): INFO Epoch: [27] [1930/2502] eta: 0:07:21 lr: 0.000002 loss_cls: 3.6254 (3.8018) grad_norm: 4.3351 (4.5391) time: 0.7650 data: 0.0002 max mem: 8421 +[2024-12-06 04:17:05 root] (utils.py 283): INFO Epoch: [27] [1940/2502] eta: 0:07:13 lr: 0.000002 loss_cls: 3.7461 (3.8024) grad_norm: 4.3595 (4.5398) time: 0.7650 data: 0.0003 max mem: 8421 +[2024-12-06 04:17:13 root] (utils.py 283): INFO Epoch: [27] [1950/2502] eta: 0:07:05 lr: 0.000002 loss_cls: 3.9441 (3.8023) grad_norm: 4.4953 (4.5418) time: 0.7656 data: 0.0003 max mem: 8421 +[2024-12-06 04:17:21 root] (utils.py 283): INFO Epoch: [27] [1960/2502] eta: 0:06:58 lr: 0.000002 loss_cls: 3.9733 (3.8036) grad_norm: 4.4959 (4.5425) time: 0.7674 data: 0.0003 max mem: 8421 +[2024-12-06 04:17:28 root] (utils.py 283): INFO Epoch: [27] [1970/2502] eta: 0:06:50 lr: 0.000002 loss_cls: 4.0232 (3.8035) grad_norm: 4.5965 (4.5442) time: 0.7750 data: 0.0002 max mem: 8421 +[2024-12-06 04:17:36 root] (utils.py 283): INFO Epoch: [27] [1980/2502] eta: 0:06:42 lr: 0.000002 loss_cls: 4.0280 (3.8048) grad_norm: 4.5653 (4.5454) time: 0.7720 data: 0.0002 max mem: 8421 +[2024-12-06 04:17:44 root] (utils.py 283): INFO Epoch: [27] [1990/2502] eta: 0:06:35 lr: 0.000002 loss_cls: 3.9478 (3.8052) grad_norm: 4.5221 (4.5453) time: 0.7631 data: 0.0002 max mem: 8421 +[2024-12-06 04:17:51 root] (utils.py 283): INFO Epoch: [27] [2000/2502] eta: 0:06:27 lr: 0.000002 loss_cls: 3.7274 (3.8041) grad_norm: 4.4045 (4.5448) time: 0.7642 data: 0.0002 max mem: 8421 +[2024-12-06 04:17:59 root] (utils.py 283): INFO Epoch: [27] [2010/2502] eta: 0:06:19 lr: 0.000002 loss_cls: 3.5904 (3.8029) grad_norm: 4.3911 (4.5441) time: 0.7641 data: 0.0002 max mem: 8421 +[2024-12-06 04:18:06 root] (utils.py 283): INFO Epoch: [27] [2020/2502] eta: 0:06:11 lr: 0.000002 loss_cls: 3.8797 (3.8035) grad_norm: 4.1946 (4.5422) time: 0.7622 data: 0.0002 max mem: 8421 +[2024-12-06 04:18:14 root] (utils.py 283): INFO Epoch: [27] [2030/2502] eta: 0:06:04 lr: 0.000002 loss_cls: 3.9592 (3.8024) grad_norm: 4.1479 (4.5410) time: 0.7633 data: 0.0002 max mem: 8421 +[2024-12-06 04:18:22 root] (utils.py 283): INFO Epoch: [27] [2040/2502] eta: 0:05:56 lr: 0.000002 loss_cls: 3.7938 (3.8028) grad_norm: 4.2488 (4.5401) time: 0.7652 data: 0.0002 max mem: 8421 +[2024-12-06 04:18:29 root] (utils.py 283): INFO Epoch: [27] [2050/2502] eta: 0:05:48 lr: 0.000002 loss_cls: 3.8742 (3.8030) grad_norm: 4.3433 (4.5397) time: 0.7649 data: 0.0002 max mem: 8421 +[2024-12-06 04:18:37 root] (utils.py 283): INFO Epoch: [27] [2060/2502] eta: 0:05:40 lr: 0.000002 loss_cls: 3.8742 (3.8036) grad_norm: 4.2667 (4.5377) time: 0.7656 data: 0.0003 max mem: 8421 +[2024-12-06 04:18:45 root] (utils.py 283): INFO Epoch: [27] [2070/2502] eta: 0:05:33 lr: 0.000002 loss_cls: 3.8507 (3.8029) grad_norm: 4.2667 (4.5369) time: 0.7701 data: 0.0003 max mem: 8421 +[2024-12-06 04:18:53 root] (utils.py 283): INFO Epoch: [27] [2080/2502] eta: 0:05:25 lr: 0.000002 loss_cls: 3.5821 (3.8018) grad_norm: 4.2801 (4.5362) time: 0.7710 data: 0.0002 max mem: 8421 +[2024-12-06 04:19:00 root] (utils.py 283): INFO Epoch: [27] [2090/2502] eta: 0:05:17 lr: 0.000002 loss_cls: 3.7098 (3.8010) grad_norm: 4.3348 (4.5358) time: 0.7691 data: 0.0002 max mem: 8421 +[2024-12-06 04:19:08 root] (utils.py 283): INFO Epoch: [27] [2100/2502] eta: 0:05:10 lr: 0.000002 loss_cls: 3.7098 (3.8004) grad_norm: 4.3348 (4.5351) time: 0.7700 data: 0.0002 max mem: 8421 +[2024-12-06 04:19:16 root] (utils.py 283): INFO Epoch: [27] [2110/2502] eta: 0:05:02 lr: 0.000002 loss_cls: 3.6567 (3.7998) grad_norm: 4.3142 (4.5355) time: 0.7686 data: 0.0002 max mem: 8421 +[2024-12-06 04:19:23 root] (utils.py 283): INFO Epoch: [27] [2120/2502] eta: 0:04:54 lr: 0.000002 loss_cls: 3.9511 (3.8006) grad_norm: 4.4135 (4.5352) time: 0.7708 data: 0.0002 max mem: 8421 +[2024-12-06 04:19:31 root] (utils.py 283): INFO Epoch: [27] [2130/2502] eta: 0:04:46 lr: 0.000002 loss_cls: 3.9511 (3.8012) grad_norm: 4.4752 (4.5354) time: 0.7714 data: 0.0002 max mem: 8421 +[2024-12-06 04:19:39 root] (utils.py 283): INFO Epoch: [27] [2140/2502] eta: 0:04:39 lr: 0.000002 loss_cls: 3.9497 (3.8011) grad_norm: 4.4646 (4.5346) time: 0.7678 data: 0.0003 max mem: 8421 +[2024-12-06 04:19:46 root] (utils.py 283): INFO Epoch: [27] [2150/2502] eta: 0:04:31 lr: 0.000002 loss_cls: 3.9929 (3.8016) grad_norm: 4.4641 (4.5383) time: 0.7658 data: 0.0003 max mem: 8421 +[2024-12-06 04:19:54 root] (utils.py 283): INFO Epoch: [27] [2160/2502] eta: 0:04:23 lr: 0.000002 loss_cls: 3.9037 (3.8025) grad_norm: 4.4899 (4.5394) time: 0.7755 data: 0.0003 max mem: 8421 +[2024-12-06 04:20:02 root] (utils.py 283): INFO Epoch: [27] [2170/2502] eta: 0:04:16 lr: 0.000002 loss_cls: 4.0625 (3.8031) grad_norm: 4.3582 (4.5490) time: 0.7899 data: 0.0003 max mem: 8421 +[2024-12-06 04:20:10 root] (utils.py 283): INFO Epoch: [27] [2180/2502] eta: 0:04:08 lr: 0.000002 loss_cls: 4.1249 (3.8036) grad_norm: 4.3582 (4.5486) time: 0.7791 data: 0.0003 max mem: 8421 +[2024-12-06 04:20:17 root] (utils.py 283): INFO Epoch: [27] [2190/2502] eta: 0:04:00 lr: 0.000002 loss_cls: 3.8717 (3.8033) grad_norm: 4.3008 (4.5506) time: 0.7675 data: 0.0003 max mem: 8421 +[2024-12-06 04:20:25 root] (utils.py 283): INFO Epoch: [27] [2200/2502] eta: 0:03:52 lr: 0.000002 loss_cls: 3.9342 (3.8043) grad_norm: 4.4652 (4.5554) time: 0.7649 data: 0.0002 max mem: 8421 +[2024-12-06 04:20:33 root] (utils.py 283): INFO Epoch: [27] [2210/2502] eta: 0:03:45 lr: 0.000002 loss_cls: 3.9342 (3.8042) grad_norm: 4.7426 (4.5564) time: 0.7607 data: 0.0002 max mem: 8421 +[2024-12-06 04:20:40 root] (utils.py 283): INFO Epoch: [27] [2220/2502] eta: 0:03:37 lr: 0.000002 loss_cls: 3.8545 (3.8032) grad_norm: 4.4705 (4.5560) time: 0.7631 data: 0.0003 max mem: 8421 +[2024-12-06 04:20:48 root] (utils.py 283): INFO Epoch: [27] [2230/2502] eta: 0:03:29 lr: 0.000002 loss_cls: 3.6967 (3.8033) grad_norm: 4.3679 (4.5551) time: 0.7658 data: 0.0002 max mem: 8421 +[2024-12-06 04:20:56 root] (utils.py 283): INFO Epoch: [27] [2240/2502] eta: 0:03:22 lr: 0.000002 loss_cls: 3.6967 (3.8023) grad_norm: 4.2004 (4.5537) time: 0.7644 data: 0.0002 max mem: 8421 +[2024-12-06 04:21:03 root] (utils.py 283): INFO Epoch: [27] [2250/2502] eta: 0:03:14 lr: 0.000002 loss_cls: 3.8542 (3.8026) grad_norm: 4.2531 (4.5532) time: 0.7633 data: 0.0003 max mem: 8421 +[2024-12-06 04:21:11 root] (utils.py 283): INFO Epoch: [27] [2260/2502] eta: 0:03:06 lr: 0.000002 loss_cls: 3.7373 (3.8011) grad_norm: 4.2220 (4.5513) time: 0.7660 data: 0.0002 max mem: 8421 +[2024-12-06 04:21:19 root] (utils.py 283): INFO Epoch: [27] [2270/2502] eta: 0:02:58 lr: 0.000002 loss_cls: 3.7133 (3.8013) grad_norm: 4.2039 (4.5503) time: 0.7693 data: 0.0002 max mem: 8421 +[2024-12-06 04:21:26 root] (utils.py 283): INFO Epoch: [27] [2280/2502] eta: 0:02:51 lr: 0.000002 loss_cls: 3.8940 (3.8022) grad_norm: 4.2344 (4.5497) time: 0.7674 data: 0.0002 max mem: 8421 +[2024-12-06 04:21:34 root] (utils.py 283): INFO Epoch: [27] [2290/2502] eta: 0:02:43 lr: 0.000002 loss_cls: 3.9781 (3.8026) grad_norm: 4.3322 (4.5493) time: 0.7628 data: 0.0002 max mem: 8421 +[2024-12-06 04:21:42 root] (utils.py 283): INFO Epoch: [27] [2300/2502] eta: 0:02:35 lr: 0.000002 loss_cls: 4.1241 (3.8031) grad_norm: 4.3251 (4.5486) time: 0.7750 data: 0.0002 max mem: 8421 +[2024-12-06 04:21:50 root] (utils.py 283): INFO Epoch: [27] [2310/2502] eta: 0:02:28 lr: 0.000002 loss_cls: 4.0635 (3.8031) grad_norm: 4.1720 (4.5467) time: 0.7874 data: 0.0003 max mem: 8421 +[2024-12-06 04:21:58 root] (utils.py 283): INFO Epoch: [27] [2320/2502] eta: 0:02:20 lr: 0.000002 loss_cls: 4.0446 (3.8035) grad_norm: 4.1273 (4.5466) time: 0.7838 data: 0.0003 max mem: 8421 +[2024-12-06 04:22:05 root] (utils.py 283): INFO Epoch: [27] [2330/2502] eta: 0:02:12 lr: 0.000002 loss_cls: 3.8828 (3.8033) grad_norm: 4.4149 (4.5469) time: 0.7734 data: 0.0003 max mem: 8421 +[2024-12-06 04:22:13 root] (utils.py 283): INFO Epoch: [27] [2340/2502] eta: 0:02:04 lr: 0.000002 loss_cls: 3.7688 (3.8035) grad_norm: 4.7022 (4.5503) time: 0.7637 data: 0.0003 max mem: 8421 +[2024-12-06 04:22:20 root] (utils.py 283): INFO Epoch: [27] [2350/2502] eta: 0:01:57 lr: 0.000002 loss_cls: 3.9781 (3.8034) grad_norm: 4.7156 (4.5518) time: 0.7637 data: 0.0003 max mem: 8421 +[2024-12-06 04:22:28 root] (utils.py 283): INFO Epoch: [27] [2360/2502] eta: 0:01:49 lr: 0.000002 loss_cls: 3.7917 (3.8027) grad_norm: 4.5931 (4.5519) time: 0.7651 data: 0.0003 max mem: 8421 +[2024-12-06 04:22:36 root] (utils.py 283): INFO Epoch: [27] [2370/2502] eta: 0:01:41 lr: 0.000002 loss_cls: 3.5000 (3.8014) grad_norm: 4.3959 (4.5511) time: 0.7635 data: 0.0003 max mem: 8421 +[2024-12-06 04:22:43 root] (utils.py 283): INFO Epoch: [27] [2380/2502] eta: 0:01:34 lr: 0.000002 loss_cls: 3.5711 (3.8011) grad_norm: 4.1927 (4.5498) time: 0.7622 data: 0.0003 max mem: 8421 +[2024-12-06 04:22:51 root] (utils.py 283): INFO Epoch: [27] [2390/2502] eta: 0:01:26 lr: 0.000002 loss_cls: 3.9644 (3.8014) grad_norm: 4.2094 (4.5488) time: 0.7625 data: 0.0003 max mem: 8421 +[2024-12-06 04:22:59 root] (utils.py 283): INFO Epoch: [27] [2400/2502] eta: 0:01:18 lr: 0.000002 loss_cls: 3.9644 (3.8011) grad_norm: 4.4799 (4.5486) time: 0.7635 data: 0.0003 max mem: 8421 +[2024-12-06 04:23:06 root] (utils.py 283): INFO Epoch: [27] [2410/2502] eta: 0:01:10 lr: 0.000002 loss_cls: 3.7399 (3.8003) grad_norm: 4.2635 (4.5476) time: 0.7657 data: 0.0003 max mem: 8421 +[2024-12-06 04:23:14 root] (utils.py 283): INFO Epoch: [27] [2420/2502] eta: 0:01:03 lr: 0.000002 loss_cls: 3.8400 (3.8007) grad_norm: 4.3417 (4.5476) time: 0.7776 data: 0.0002 max mem: 8421 +[2024-12-06 04:23:22 root] (utils.py 283): INFO Epoch: [27] [2430/2502] eta: 0:00:55 lr: 0.000002 loss_cls: 4.0895 (3.8015) grad_norm: 4.3647 (4.5470) time: 0.7831 data: 0.0002 max mem: 8421 +[2024-12-06 04:23:30 root] (utils.py 283): INFO Epoch: [27] [2440/2502] eta: 0:00:47 lr: 0.000002 loss_cls: 4.1162 (3.8018) grad_norm: 4.3046 (4.5469) time: 0.7737 data: 0.0002 max mem: 8421 +[2024-12-06 04:23:37 root] (utils.py 283): INFO Epoch: [27] [2450/2502] eta: 0:00:40 lr: 0.000002 loss_cls: 4.1426 (3.8026) grad_norm: 4.5158 (4.5470) time: 0.7672 data: 0.0002 max mem: 8421 +[2024-12-06 04:23:45 root] (utils.py 283): INFO Epoch: [27] [2460/2502] eta: 0:00:32 lr: 0.000002 loss_cls: 3.9768 (3.8013) grad_norm: 4.4601 (4.5465) time: 0.7660 data: 0.0002 max mem: 8421 +[2024-12-06 04:23:53 root] (utils.py 283): INFO Epoch: [27] [2470/2502] eta: 0:00:24 lr: 0.000002 loss_cls: 3.8924 (3.8017) grad_norm: 4.3054 (4.5459) time: 0.7709 data: 0.0002 max mem: 8421 +[2024-12-06 04:24:01 root] (utils.py 283): INFO Epoch: [27] [2480/2502] eta: 0:00:16 lr: 0.000002 loss_cls: 4.0579 (3.8022) grad_norm: 4.5203 (4.5461) time: 0.7876 data: 0.0003 max mem: 8421 +[2024-12-06 04:24:09 root] (utils.py 283): INFO Epoch: [27] [2490/2502] eta: 0:00:09 lr: 0.000002 loss_cls: 3.8896 (3.8022) grad_norm: 4.5211 (4.5458) time: 0.8277 data: 0.0236 max mem: 8421 +[2024-12-06 04:24:18 root] (utils.py 283): INFO Epoch: [27] [2500/2502] eta: 0:00:01 lr: 0.000002 loss_cls: 3.9703 (3.8023) grad_norm: 4.5347 (4.5460) time: 0.8450 data: 0.0236 max mem: 8421 +[2024-12-06 04:24:18 root] (utils.py 283): INFO Epoch: [27] [2501/2502] eta: 0:00:00 lr: 0.000002 loss_cls: 3.9703 (3.8021) grad_norm: 4.5416 (4.5462) time: 0.8449 data: 0.0236 max mem: 8421 +[2024-12-06 04:24:18 root] (utils.py 297): INFO Epoch: [27] Total time: 0:32:11 (0.7719 s / it) +[2024-12-06 04:24:18 root] (engine.py 179): INFO Averaged stats:lr: 0.000002 loss_cls: 3.9703 (3.8042) grad_norm: 4.5416 (4.5462) +[2024-12-06 04:24:19 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7062 (0.7062) acc1: 86.7188 (86.7188) acc3: 96.0938 (96.0938) acc5: 98.4375 (98.4375) time: 0.1311 data: 0.0004 max mem: 8421 +[2024-12-06 04:24:20 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7944 (0.8450) acc1: 83.5938 (82.4574) acc3: 93.7500 (93.2528) acc5: 96.0938 (95.8097) time: 0.1313 data: 0.0004 max mem: 8421 +[2024-12-06 04:24:22 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8600 (0.8952) acc1: 80.4688 (81.2500) acc3: 92.9688 (92.7083) acc5: 96.0938 (95.3869) time: 0.1335 data: 0.0005 max mem: 8421 +[2024-12-06 04:24:23 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9298 (0.9006) acc1: 80.4688 (80.5948) acc3: 92.9688 (92.9435) acc5: 96.0938 (95.5141) time: 0.1502 data: 0.0147 max mem: 8421 +[2024-12-06 04:24:25 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8335 (0.8942) acc1: 80.4688 (80.6974) acc3: 92.9688 (92.9878) acc5: 96.0938 (95.5412) time: 0.1793 data: 0.0427 max mem: 8421 +[2024-12-06 04:24:27 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0182 (0.9867) acc1: 76.5625 (78.5233) acc3: 86.7188 (91.3756) acc5: 92.1875 (94.4393) time: 0.1918 data: 0.0526 max mem: 8421 +[2024-12-06 04:24:29 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2935 (1.0298) acc1: 71.8750 (77.6511) acc3: 85.1562 (90.5610) acc5: 89.0625 (93.7500) time: 0.1949 data: 0.0581 max mem: 8421 +[2024-12-06 04:24:31 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:04 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2105 (1.0723) acc1: 72.6562 (76.6395) acc3: 86.7188 (90.0198) acc5: 90.6250 (93.3319) time: 0.2138 data: 0.0812 max mem: 8421 +[2024-12-06 04:24:33 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2843 (1.1083) acc1: 70.3125 (75.8005) acc3: 85.9375 (89.4387) acc5: 89.8438 (92.7951) time: 0.1900 data: 0.0575 max mem: 8421 +[2024-12-06 04:24:34 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3160 (1.1380) acc1: 68.7500 (75.0515) acc3: 85.9375 (88.9681) acc5: 89.0625 (92.3506) time: 0.1475 data: 0.0160 max mem: 8421 +[2024-12-06 04:24:36 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.1838 (1.1279) acc1: 74.2188 (75.1840) acc3: 87.5000 (89.1360) acc5: 91.4062 (92.5440) time: 0.1668 data: 0.0382 max mem: 8421 +[2024-12-06 04:24:36 root] (utils.py 297): INFO Test: Total time: 0:00:16 (0.1720 s / it) +[2024-12-06 04:24:36 root] (engine.py 264): INFO * Acc@1 75.180 Acc@3 89.168 Acc@5 92.436 loss 1.126 flops 1.285 layer_flops 1.251 +[2024-12-06 04:24:36 root] (main.py 547): INFO Accuracy of the network on the 50000 test images: 75.2% +[2024-12-06 04:24:36 root] (main.py 551): INFO Max accuracy: 75.18% +[2024-12-06 04:24:37 root] (utils.py 283): INFO Epoch: [28] [ 0/2502] eta: 0:32:09 lr: 0.000001 loss_cls: 3.9609 (3.9609) grad_norm: 3.9996 (3.9996) time: 0.7713 data: 0.0003 max mem: 8421 +[2024-12-06 04:24:45 root] (utils.py 283): INFO Epoch: [28] [ 10/2502] eta: 0:32:55 lr: 0.000001 loss_cls: 3.9609 (3.6894) grad_norm: 4.5920 (4.4854) time: 0.7927 data: 0.0003 max mem: 8421 +[2024-12-06 04:24:52 root] (utils.py 283): INFO Epoch: [28] [ 20/2502] eta: 0:32:15 lr: 0.000001 loss_cls: 3.6640 (3.7397) grad_norm: 4.6652 (4.7760) time: 0.7802 data: 0.0002 max mem: 8421 +[2024-12-06 04:25:00 root] (utils.py 283): INFO Epoch: [28] [ 30/2502] eta: 0:31:53 lr: 0.000001 loss_cls: 3.9582 (3.8083) grad_norm: 4.5602 (4.6661) time: 0.7637 data: 0.0003 max mem: 8421 +[2024-12-06 04:25:08 root] (utils.py 283): INFO Epoch: [28] [ 40/2502] eta: 0:31:49 lr: 0.000001 loss_cls: 3.9739 (3.8069) grad_norm: 4.2720 (4.5811) time: 0.7708 data: 0.0003 max mem: 8421 +[2024-12-06 04:25:15 root] (utils.py 283): INFO Epoch: [28] [ 50/2502] eta: 0:31:35 lr: 0.000001 loss_cls: 4.0097 (3.8271) grad_norm: 4.3155 (4.5578) time: 0.7712 data: 0.0002 max mem: 8421 +[2024-12-06 04:25:23 root] (utils.py 283): INFO Epoch: [28] [ 60/2502] eta: 0:31:25 lr: 0.000001 loss_cls: 3.6271 (3.7772) grad_norm: 4.3594 (4.5210) time: 0.7648 data: 0.0002 max mem: 8421 +[2024-12-06 04:25:31 root] (utils.py 283): INFO Epoch: [28] [ 70/2502] eta: 0:31:14 lr: 0.000001 loss_cls: 3.7958 (3.7662) grad_norm: 4.3272 (4.5000) time: 0.7648 data: 0.0002 max mem: 8421 +[2024-12-06 04:25:38 root] (utils.py 283): INFO Epoch: [28] [ 80/2502] eta: 0:31:08 lr: 0.000001 loss_cls: 3.9647 (3.8024) grad_norm: 4.3169 (4.4801) time: 0.7699 data: 0.0003 max mem: 8421 +[2024-12-06 04:25:46 root] (utils.py 283): INFO Epoch: [28] [ 90/2502] eta: 0:30:59 lr: 0.000001 loss_cls: 4.2193 (3.8209) grad_norm: 4.2286 (4.4700) time: 0.7715 data: 0.0002 max mem: 8421 +[2024-12-06 04:25:54 root] (utils.py 283): INFO Epoch: [28] [ 100/2502] eta: 0:30:50 lr: 0.000001 loss_cls: 4.0638 (3.8245) grad_norm: 4.2284 (4.4533) time: 0.7652 data: 0.0002 max mem: 8421 +[2024-12-06 04:26:01 root] (utils.py 283): INFO Epoch: [28] [ 110/2502] eta: 0:30:42 lr: 0.000001 loss_cls: 4.0607 (3.8463) grad_norm: 4.2335 (4.4693) time: 0.7669 data: 0.0002 max mem: 8421 +[2024-12-06 04:26:09 root] (utils.py 283): INFO Epoch: [28] [ 120/2502] eta: 0:30:32 lr: 0.000001 loss_cls: 3.9184 (3.8325) grad_norm: 4.2906 (4.4594) time: 0.7652 data: 0.0002 max mem: 8421 +[2024-12-06 04:26:17 root] (utils.py 283): INFO Epoch: [28] [ 130/2502] eta: 0:30:24 lr: 0.000001 loss_cls: 3.9606 (3.8417) grad_norm: 4.3320 (4.4848) time: 0.7645 data: 0.0003 max mem: 8421 +[2024-12-06 04:26:24 root] (utils.py 283): INFO Epoch: [28] [ 140/2502] eta: 0:30:15 lr: 0.000001 loss_cls: 3.9482 (3.8287) grad_norm: 4.3532 (4.4762) time: 0.7655 data: 0.0003 max mem: 8421 +[2024-12-06 04:26:32 root] (utils.py 283): INFO Epoch: [28] [ 150/2502] eta: 0:30:07 lr: 0.000001 loss_cls: 3.9348 (3.8367) grad_norm: 4.2416 (4.4592) time: 0.7652 data: 0.0003 max mem: 8421 +[2024-12-06 04:26:40 root] (utils.py 283): INFO Epoch: [28] [ 160/2502] eta: 0:29:59 lr: 0.000001 loss_cls: 3.9348 (3.8271) grad_norm: 4.4084 (4.4781) time: 0.7649 data: 0.0002 max mem: 8421 +[2024-12-06 04:26:47 root] (utils.py 283): INFO Epoch: [28] [ 170/2502] eta: 0:29:51 lr: 0.000001 loss_cls: 3.7821 (3.8316) grad_norm: 4.6986 (4.5133) time: 0.7651 data: 0.0002 max mem: 8421 +[2024-12-06 04:26:55 root] (utils.py 283): INFO Epoch: [28] [ 180/2502] eta: 0:29:43 lr: 0.000001 loss_cls: 3.8319 (3.8367) grad_norm: 4.6523 (4.5223) time: 0.7655 data: 0.0002 max mem: 8421 +[2024-12-06 04:27:03 root] (utils.py 283): INFO Epoch: [28] [ 190/2502] eta: 0:29:35 lr: 0.000001 loss_cls: 3.7467 (3.8381) grad_norm: 4.6603 (4.5371) time: 0.7643 data: 0.0003 max mem: 8421 +[2024-12-06 04:27:10 root] (utils.py 283): INFO Epoch: [28] [ 200/2502] eta: 0:29:27 lr: 0.000001 loss_cls: 3.4253 (3.8216) grad_norm: 4.3472 (4.5234) time: 0.7651 data: 0.0002 max mem: 8421 +[2024-12-06 04:27:18 root] (utils.py 283): INFO Epoch: [28] [ 210/2502] eta: 0:29:19 lr: 0.000001 loss_cls: 3.4675 (3.8153) grad_norm: 4.2799 (4.5141) time: 0.7662 data: 0.0002 max mem: 8421 +[2024-12-06 04:27:26 root] (utils.py 283): INFO Epoch: [28] [ 220/2502] eta: 0:29:12 lr: 0.000001 loss_cls: 3.7037 (3.8165) grad_norm: 4.3288 (4.5104) time: 0.7691 data: 0.0002 max mem: 8421 +[2024-12-06 04:27:33 root] (utils.py 283): INFO Epoch: [28] [ 230/2502] eta: 0:29:04 lr: 0.000001 loss_cls: 4.1269 (3.8333) grad_norm: 4.3285 (4.5167) time: 0.7686 data: 0.0003 max mem: 8421 +[2024-12-06 04:27:41 root] (utils.py 283): INFO Epoch: [28] [ 240/2502] eta: 0:28:56 lr: 0.000001 loss_cls: 4.0058 (3.8364) grad_norm: 4.3370 (4.5127) time: 0.7660 data: 0.0003 max mem: 8421 +[2024-12-06 04:27:49 root] (utils.py 283): INFO Epoch: [28] [ 250/2502] eta: 0:28:48 lr: 0.000001 loss_cls: 3.9065 (3.8307) grad_norm: 4.1939 (4.5028) time: 0.7647 data: 0.0002 max mem: 8421 +[2024-12-06 04:27:56 root] (utils.py 283): INFO Epoch: [28] [ 260/2502] eta: 0:28:40 lr: 0.000001 loss_cls: 3.7039 (3.8214) grad_norm: 4.0914 (4.5005) time: 0.7646 data: 0.0002 max mem: 8421 +[2024-12-06 04:28:04 root] (utils.py 283): INFO Epoch: [28] [ 270/2502] eta: 0:28:32 lr: 0.000001 loss_cls: 3.7303 (3.8175) grad_norm: 4.3316 (4.4984) time: 0.7662 data: 0.0002 max mem: 8421 +[2024-12-06 04:28:12 root] (utils.py 283): INFO Epoch: [28] [ 280/2502] eta: 0:28:25 lr: 0.000001 loss_cls: 3.8707 (3.8227) grad_norm: 4.4616 (4.5121) time: 0.7658 data: 0.0003 max mem: 8421 +[2024-12-06 04:28:19 root] (utils.py 283): INFO Epoch: [28] [ 290/2502] eta: 0:28:18 lr: 0.000001 loss_cls: 3.9932 (3.8210) grad_norm: 4.4788 (4.5156) time: 0.7731 data: 0.0003 max mem: 8421 +[2024-12-06 04:28:27 root] (utils.py 283): INFO Epoch: [28] [ 300/2502] eta: 0:28:11 lr: 0.000001 loss_cls: 3.8042 (3.8253) grad_norm: 4.4067 (4.5174) time: 0.7819 data: 0.0003 max mem: 8421 +[2024-12-06 04:28:35 root] (utils.py 283): INFO Epoch: [28] [ 310/2502] eta: 0:28:04 lr: 0.000001 loss_cls: 3.8560 (3.8235) grad_norm: 4.4311 (4.5223) time: 0.7777 data: 0.0003 max mem: 8421 +[2024-12-06 04:28:43 root] (utils.py 283): INFO Epoch: [28] [ 320/2502] eta: 0:27:56 lr: 0.000001 loss_cls: 4.0798 (3.8260) grad_norm: 4.3880 (4.5172) time: 0.7682 data: 0.0003 max mem: 8421 +[2024-12-06 04:28:50 root] (utils.py 283): INFO Epoch: [28] [ 330/2502] eta: 0:27:48 lr: 0.000001 loss_cls: 3.9519 (3.8189) grad_norm: 4.3961 (4.5213) time: 0.7631 data: 0.0003 max mem: 8421 +[2024-12-06 04:28:58 root] (utils.py 283): INFO Epoch: [28] [ 340/2502] eta: 0:27:41 lr: 0.000001 loss_cls: 3.7770 (3.8238) grad_norm: 4.4534 (4.5587) time: 0.7678 data: 0.0002 max mem: 8421 +[2024-12-06 04:29:06 root] (utils.py 283): INFO Epoch: [28] [ 350/2502] eta: 0:27:33 lr: 0.000001 loss_cls: 3.7496 (3.8212) grad_norm: 4.3963 (4.5581) time: 0.7737 data: 0.0002 max mem: 8421 +[2024-12-06 04:29:13 root] (utils.py 283): INFO Epoch: [28] [ 360/2502] eta: 0:27:26 lr: 0.000001 loss_cls: 3.7879 (3.8188) grad_norm: 4.2609 (4.5554) time: 0.7729 data: 0.0002 max mem: 8421 +[2024-12-06 04:29:21 root] (utils.py 283): INFO Epoch: [28] [ 370/2502] eta: 0:27:18 lr: 0.000001 loss_cls: 3.8470 (3.8174) grad_norm: 4.3245 (4.5564) time: 0.7693 data: 0.0002 max mem: 8421 +[2024-12-06 04:29:29 root] (utils.py 283): INFO Epoch: [28] [ 380/2502] eta: 0:27:10 lr: 0.000001 loss_cls: 3.9596 (3.8126) grad_norm: 4.5140 (4.5600) time: 0.7658 data: 0.0002 max mem: 8421 +[2024-12-06 04:29:36 root] (utils.py 283): INFO Epoch: [28] [ 390/2502] eta: 0:27:02 lr: 0.000001 loss_cls: 3.3788 (3.8053) grad_norm: 4.4513 (4.5577) time: 0.7629 data: 0.0002 max mem: 8421 +[2024-12-06 04:29:44 root] (utils.py 283): INFO Epoch: [28] [ 400/2502] eta: 0:26:55 lr: 0.000001 loss_cls: 3.7080 (3.8103) grad_norm: 4.2505 (4.5604) time: 0.7677 data: 0.0002 max mem: 8421 +[2024-12-06 04:29:52 root] (utils.py 283): INFO Epoch: [28] [ 410/2502] eta: 0:26:48 lr: 0.000001 loss_cls: 4.1524 (3.8134) grad_norm: 4.5137 (4.5639) time: 0.7840 data: 0.0002 max mem: 8421 +[2024-12-06 04:30:00 root] (utils.py 283): INFO Epoch: [28] [ 420/2502] eta: 0:26:42 lr: 0.000001 loss_cls: 4.0904 (3.8179) grad_norm: 4.6710 (4.5688) time: 0.7918 data: 0.0002 max mem: 8421 +[2024-12-06 04:30:08 root] (utils.py 283): INFO Epoch: [28] [ 430/2502] eta: 0:26:35 lr: 0.000001 loss_cls: 3.7738 (3.8112) grad_norm: 4.5561 (4.5755) time: 0.7899 data: 0.0002 max mem: 8421 +[2024-12-06 04:30:16 root] (utils.py 283): INFO Epoch: [28] [ 440/2502] eta: 0:26:27 lr: 0.000001 loss_cls: 3.7633 (3.8129) grad_norm: 4.3090 (4.5698) time: 0.7829 data: 0.0003 max mem: 8421 +[2024-12-06 04:30:23 root] (utils.py 283): INFO Epoch: [28] [ 450/2502] eta: 0:26:19 lr: 0.000001 loss_cls: 4.0575 (3.8147) grad_norm: 4.3090 (4.5680) time: 0.7694 data: 0.0002 max mem: 8421 +[2024-12-06 04:30:31 root] (utils.py 283): INFO Epoch: [28] [ 460/2502] eta: 0:26:12 lr: 0.000001 loss_cls: 4.0978 (3.8194) grad_norm: 4.3430 (4.5631) time: 0.7652 data: 0.0002 max mem: 8421 +[2024-12-06 04:30:39 root] (utils.py 283): INFO Epoch: [28] [ 470/2502] eta: 0:26:04 lr: 0.000001 loss_cls: 4.0748 (3.8199) grad_norm: 4.2690 (4.5658) time: 0.7695 data: 0.0003 max mem: 8421 +[2024-12-06 04:30:46 root] (utils.py 283): INFO Epoch: [28] [ 480/2502] eta: 0:25:56 lr: 0.000001 loss_cls: 3.6623 (3.8181) grad_norm: 4.4651 (4.5640) time: 0.7700 data: 0.0003 max mem: 8421 +[2024-12-06 04:30:54 root] (utils.py 283): INFO Epoch: [28] [ 490/2502] eta: 0:25:48 lr: 0.000001 loss_cls: 3.5559 (3.8103) grad_norm: 4.4362 (4.5644) time: 0.7647 data: 0.0002 max mem: 8421 +[2024-12-06 04:31:02 root] (utils.py 283): INFO Epoch: [28] [ 500/2502] eta: 0:25:40 lr: 0.000001 loss_cls: 3.3617 (3.8042) grad_norm: 4.4362 (4.5613) time: 0.7660 data: 0.0002 max mem: 8421 +[2024-12-06 04:31:09 root] (utils.py 283): INFO Epoch: [28] [ 510/2502] eta: 0:25:33 lr: 0.000001 loss_cls: 3.4567 (3.7997) grad_norm: 4.4577 (4.5607) time: 0.7722 data: 0.0002 max mem: 8421 +[2024-12-06 04:31:17 root] (utils.py 283): INFO Epoch: [28] [ 520/2502] eta: 0:25:25 lr: 0.000001 loss_cls: 3.4883 (3.7995) grad_norm: 4.4995 (4.5601) time: 0.7681 data: 0.0002 max mem: 8421 +[2024-12-06 04:31:25 root] (utils.py 283): INFO Epoch: [28] [ 530/2502] eta: 0:25:17 lr: 0.000001 loss_cls: 3.8391 (3.8006) grad_norm: 4.5350 (4.5609) time: 0.7621 data: 0.0003 max mem: 8421 +[2024-12-06 04:31:32 root] (utils.py 283): INFO Epoch: [28] [ 540/2502] eta: 0:25:09 lr: 0.000001 loss_cls: 3.7697 (3.7950) grad_norm: 4.4034 (4.5551) time: 0.7636 data: 0.0003 max mem: 8421 +[2024-12-06 04:31:40 root] (utils.py 283): INFO Epoch: [28] [ 550/2502] eta: 0:25:01 lr: 0.000001 loss_cls: 3.4263 (3.7893) grad_norm: 4.3243 (4.5552) time: 0.7636 data: 0.0003 max mem: 8421 +[2024-12-06 04:31:47 root] (utils.py 283): INFO Epoch: [28] [ 560/2502] eta: 0:24:53 lr: 0.000001 loss_cls: 3.7395 (3.7909) grad_norm: 4.5047 (4.5547) time: 0.7611 data: 0.0003 max mem: 8421 +[2024-12-06 04:31:55 root] (utils.py 283): INFO Epoch: [28] [ 570/2502] eta: 0:24:45 lr: 0.000001 loss_cls: 3.8564 (3.7923) grad_norm: 4.2783 (4.5526) time: 0.7613 data: 0.0002 max mem: 8421 +[2024-12-06 04:32:03 root] (utils.py 283): INFO Epoch: [28] [ 580/2502] eta: 0:24:37 lr: 0.000001 loss_cls: 3.8503 (3.7908) grad_norm: 4.0974 (4.5461) time: 0.7631 data: 0.0002 max mem: 8421 +[2024-12-06 04:32:10 root] (utils.py 283): INFO Epoch: [28] [ 590/2502] eta: 0:24:30 lr: 0.000001 loss_cls: 3.8591 (3.7927) grad_norm: 4.1321 (4.5460) time: 0.7662 data: 0.0003 max mem: 8421 +[2024-12-06 04:32:18 root] (utils.py 283): INFO Epoch: [28] [ 600/2502] eta: 0:24:22 lr: 0.000001 loss_cls: 3.8591 (3.7937) grad_norm: 4.5517 (4.5488) time: 0.7749 data: 0.0003 max mem: 8421 +[2024-12-06 04:32:26 root] (utils.py 283): INFO Epoch: [28] [ 610/2502] eta: 0:24:14 lr: 0.000001 loss_cls: 3.7927 (3.7933) grad_norm: 4.5863 (4.5504) time: 0.7719 data: 0.0002 max mem: 8421 +[2024-12-06 04:32:33 root] (utils.py 283): INFO Epoch: [28] [ 620/2502] eta: 0:24:07 lr: 0.000001 loss_cls: 3.8611 (3.7948) grad_norm: 4.3490 (4.5483) time: 0.7640 data: 0.0002 max mem: 8421 +[2024-12-06 04:32:41 root] (utils.py 283): INFO Epoch: [28] [ 630/2502] eta: 0:23:59 lr: 0.000001 loss_cls: 4.1285 (3.7975) grad_norm: 4.3172 (4.5460) time: 0.7666 data: 0.0002 max mem: 8421 +[2024-12-06 04:32:49 root] (utils.py 283): INFO Epoch: [28] [ 640/2502] eta: 0:23:51 lr: 0.000001 loss_cls: 4.1389 (3.8015) grad_norm: 4.2388 (4.5413) time: 0.7676 data: 0.0002 max mem: 8421 +[2024-12-06 04:32:56 root] (utils.py 283): INFO Epoch: [28] [ 650/2502] eta: 0:23:43 lr: 0.000001 loss_cls: 4.0576 (3.7994) grad_norm: 4.3112 (4.5436) time: 0.7666 data: 0.0003 max mem: 8421 +[2024-12-06 04:33:04 root] (utils.py 283): INFO Epoch: [28] [ 660/2502] eta: 0:23:36 lr: 0.000001 loss_cls: 3.6849 (3.7955) grad_norm: 4.4958 (4.5447) time: 0.7661 data: 0.0003 max mem: 8421 +[2024-12-06 04:33:12 root] (utils.py 283): INFO Epoch: [28] [ 670/2502] eta: 0:23:28 lr: 0.000001 loss_cls: 3.6845 (3.7923) grad_norm: 4.3940 (4.5428) time: 0.7658 data: 0.0002 max mem: 8421 +[2024-12-06 04:33:19 root] (utils.py 283): INFO Epoch: [28] [ 680/2502] eta: 0:23:20 lr: 0.000001 loss_cls: 3.8464 (3.7925) grad_norm: 4.2138 (4.5377) time: 0.7646 data: 0.0002 max mem: 8421 +[2024-12-06 04:33:27 root] (utils.py 283): INFO Epoch: [28] [ 690/2502] eta: 0:23:12 lr: 0.000001 loss_cls: 3.9797 (3.7927) grad_norm: 4.2900 (4.5411) time: 0.7650 data: 0.0002 max mem: 8421 +[2024-12-06 04:33:35 root] (utils.py 283): INFO Epoch: [28] [ 700/2502] eta: 0:23:05 lr: 0.000001 loss_cls: 3.9797 (3.7930) grad_norm: 4.3663 (4.5361) time: 0.7674 data: 0.0003 max mem: 8421 +[2024-12-06 04:33:42 root] (utils.py 283): INFO Epoch: [28] [ 710/2502] eta: 0:22:57 lr: 0.000001 loss_cls: 4.0326 (3.7942) grad_norm: 4.2292 (4.5339) time: 0.7686 data: 0.0003 max mem: 8421 +[2024-12-06 04:33:50 root] (utils.py 283): INFO Epoch: [28] [ 720/2502] eta: 0:22:49 lr: 0.000001 loss_cls: 3.9398 (3.7927) grad_norm: 4.1012 (4.5300) time: 0.7655 data: 0.0002 max mem: 8421 +[2024-12-06 04:33:58 root] (utils.py 283): INFO Epoch: [28] [ 730/2502] eta: 0:22:41 lr: 0.000001 loss_cls: 3.9914 (3.7960) grad_norm: 4.0833 (4.5301) time: 0.7652 data: 0.0002 max mem: 8421 +[2024-12-06 04:34:05 root] (utils.py 283): INFO Epoch: [28] [ 740/2502] eta: 0:22:34 lr: 0.000001 loss_cls: 4.1441 (3.7974) grad_norm: 4.4832 (4.5281) time: 0.7667 data: 0.0003 max mem: 8421 +[2024-12-06 04:34:13 root] (utils.py 283): INFO Epoch: [28] [ 750/2502] eta: 0:22:26 lr: 0.000001 loss_cls: 4.0789 (3.8022) grad_norm: 4.3460 (4.5257) time: 0.7658 data: 0.0002 max mem: 8421 +[2024-12-06 04:34:21 root] (utils.py 283): INFO Epoch: [28] [ 760/2502] eta: 0:22:18 lr: 0.000001 loss_cls: 3.9938 (3.8016) grad_norm: 4.1898 (4.5262) time: 0.7648 data: 0.0002 max mem: 8421 +[2024-12-06 04:34:28 root] (utils.py 283): INFO Epoch: [28] [ 770/2502] eta: 0:22:10 lr: 0.000001 loss_cls: 3.9375 (3.8041) grad_norm: 4.2651 (4.5256) time: 0.7634 data: 0.0002 max mem: 8421 +[2024-12-06 04:34:36 root] (utils.py 283): INFO Epoch: [28] [ 780/2502] eta: 0:22:02 lr: 0.000001 loss_cls: 3.9209 (3.8056) grad_norm: 4.3270 (4.5234) time: 0.7617 data: 0.0002 max mem: 8421 +[2024-12-06 04:34:44 root] (utils.py 283): INFO Epoch: [28] [ 790/2502] eta: 0:21:55 lr: 0.000001 loss_cls: 3.9318 (3.8111) grad_norm: 4.3003 (4.5213) time: 0.7615 data: 0.0002 max mem: 8421 +[2024-12-06 04:34:51 root] (utils.py 283): INFO Epoch: [28] [ 800/2502] eta: 0:21:47 lr: 0.000001 loss_cls: 4.1369 (3.8147) grad_norm: 4.4187 (4.5271) time: 0.7642 data: 0.0002 max mem: 8421 +[2024-12-06 04:34:59 root] (utils.py 283): INFO Epoch: [28] [ 810/2502] eta: 0:21:39 lr: 0.000001 loss_cls: 4.1090 (3.8153) grad_norm: 4.2847 (4.5253) time: 0.7675 data: 0.0002 max mem: 8421 +[2024-12-06 04:35:07 root] (utils.py 283): INFO Epoch: [28] [ 820/2502] eta: 0:21:32 lr: 0.000001 loss_cls: 4.0727 (3.8157) grad_norm: 4.2446 (4.5237) time: 0.7687 data: 0.0003 max mem: 8421 +[2024-12-06 04:35:15 root] (utils.py 283): INFO Epoch: [28] [ 830/2502] eta: 0:21:24 lr: 0.000001 loss_cls: 3.9834 (3.8160) grad_norm: 4.3425 (4.5251) time: 0.7797 data: 0.0003 max mem: 8421 +[2024-12-06 04:35:22 root] (utils.py 283): INFO Epoch: [28] [ 840/2502] eta: 0:21:17 lr: 0.000001 loss_cls: 4.0038 (3.8202) grad_norm: 4.3438 (4.5227) time: 0.7938 data: 0.0003 max mem: 8421 +[2024-12-06 04:35:30 root] (utils.py 283): INFO Epoch: [28] [ 850/2502] eta: 0:21:10 lr: 0.000001 loss_cls: 4.0647 (3.8218) grad_norm: 4.3409 (4.5205) time: 0.7946 data: 0.0003 max mem: 8421 +[2024-12-06 04:35:38 root] (utils.py 283): INFO Epoch: [28] [ 860/2502] eta: 0:21:03 lr: 0.000001 loss_cls: 4.0100 (3.8225) grad_norm: 4.3926 (4.5203) time: 0.7912 data: 0.0002 max mem: 8421 +[2024-12-06 04:35:46 root] (utils.py 283): INFO Epoch: [28] [ 870/2502] eta: 0:20:55 lr: 0.000001 loss_cls: 3.8561 (3.8212) grad_norm: 4.3620 (4.5171) time: 0.7859 data: 0.0003 max mem: 8421 +[2024-12-06 04:35:54 root] (utils.py 283): INFO Epoch: [28] [ 880/2502] eta: 0:20:47 lr: 0.000001 loss_cls: 3.8561 (3.8217) grad_norm: 4.3755 (4.5175) time: 0.7730 data: 0.0003 max mem: 8421 +[2024-12-06 04:36:02 root] (utils.py 283): INFO Epoch: [28] [ 890/2502] eta: 0:20:40 lr: 0.000001 loss_cls: 4.0949 (3.8242) grad_norm: 4.4946 (4.5167) time: 0.7692 data: 0.0002 max mem: 8421 +[2024-12-06 04:36:09 root] (utils.py 283): INFO Epoch: [28] [ 900/2502] eta: 0:20:32 lr: 0.000001 loss_cls: 4.0949 (3.8227) grad_norm: 4.3551 (4.5166) time: 0.7705 data: 0.0003 max mem: 8421 +[2024-12-06 04:36:17 root] (utils.py 283): INFO Epoch: [28] [ 910/2502] eta: 0:20:24 lr: 0.000001 loss_cls: 4.0499 (3.8266) grad_norm: 4.3551 (4.5163) time: 0.7649 data: 0.0003 max mem: 8421 +[2024-12-06 04:36:25 root] (utils.py 283): INFO Epoch: [28] [ 920/2502] eta: 0:20:17 lr: 0.000001 loss_cls: 3.9806 (3.8224) grad_norm: 4.3202 (4.5145) time: 0.7682 data: 0.0003 max mem: 8421 +[2024-12-06 04:36:32 root] (utils.py 283): INFO Epoch: [28] [ 930/2502] eta: 0:20:09 lr: 0.000001 loss_cls: 3.8475 (3.8202) grad_norm: 4.3685 (4.5138) time: 0.7750 data: 0.0003 max mem: 8421 +[2024-12-06 04:36:40 root] (utils.py 283): INFO Epoch: [28] [ 940/2502] eta: 0:20:01 lr: 0.000001 loss_cls: 3.9778 (3.8209) grad_norm: 4.4673 (4.5171) time: 0.7706 data: 0.0003 max mem: 8421 +[2024-12-06 04:36:48 root] (utils.py 283): INFO Epoch: [28] [ 950/2502] eta: 0:19:54 lr: 0.000001 loss_cls: 3.9778 (3.8202) grad_norm: 4.3675 (4.5157) time: 0.7671 data: 0.0003 max mem: 8421 +[2024-12-06 04:36:55 root] (utils.py 283): INFO Epoch: [28] [ 960/2502] eta: 0:19:46 lr: 0.000001 loss_cls: 3.8148 (3.8208) grad_norm: 4.3420 (4.5163) time: 0.7745 data: 0.0003 max mem: 8421 +[2024-12-06 04:37:03 root] (utils.py 283): INFO Epoch: [28] [ 970/2502] eta: 0:19:38 lr: 0.000001 loss_cls: 3.8411 (3.8200) grad_norm: 4.5224 (4.5169) time: 0.7739 data: 0.0003 max mem: 8421 +[2024-12-06 04:37:11 root] (utils.py 283): INFO Epoch: [28] [ 980/2502] eta: 0:19:31 lr: 0.000001 loss_cls: 3.9144 (3.8197) grad_norm: 4.4639 (4.5155) time: 0.7677 data: 0.0003 max mem: 8421 +[2024-12-06 04:37:19 root] (utils.py 283): INFO Epoch: [28] [ 990/2502] eta: 0:19:23 lr: 0.000001 loss_cls: 3.9467 (3.8203) grad_norm: 4.1786 (4.5137) time: 0.7696 data: 0.0003 max mem: 8421 +[2024-12-06 04:37:26 root] (utils.py 283): INFO Epoch: [28] [1000/2502] eta: 0:19:15 lr: 0.000001 loss_cls: 3.7328 (3.8187) grad_norm: 4.1645 (4.5129) time: 0.7722 data: 0.0003 max mem: 8421 +[2024-12-06 04:37:34 root] (utils.py 283): INFO Epoch: [28] [1010/2502] eta: 0:19:08 lr: 0.000001 loss_cls: 3.8194 (3.8193) grad_norm: 4.2499 (4.5133) time: 0.7671 data: 0.0003 max mem: 8421 +[2024-12-06 04:37:42 root] (utils.py 283): INFO Epoch: [28] [1020/2502] eta: 0:19:00 lr: 0.000001 loss_cls: 3.9534 (3.8217) grad_norm: 4.2818 (4.5128) time: 0.7622 data: 0.0002 max mem: 8421 +[2024-12-06 04:37:49 root] (utils.py 283): INFO Epoch: [28] [1030/2502] eta: 0:18:52 lr: 0.000001 loss_cls: 4.2775 (3.8237) grad_norm: 4.5027 (4.5141) time: 0.7649 data: 0.0002 max mem: 8421 +[2024-12-06 04:37:57 root] (utils.py 283): INFO Epoch: [28] [1040/2502] eta: 0:18:45 lr: 0.000001 loss_cls: 4.0904 (3.8247) grad_norm: 4.5840 (4.5142) time: 0.7755 data: 0.0002 max mem: 8421 +[2024-12-06 04:38:05 root] (utils.py 283): INFO Epoch: [28] [1050/2502] eta: 0:18:37 lr: 0.000001 loss_cls: 3.9498 (3.8249) grad_norm: 4.3893 (4.5124) time: 0.7847 data: 0.0002 max mem: 8421 +[2024-12-06 04:38:13 root] (utils.py 283): INFO Epoch: [28] [1060/2502] eta: 0:18:29 lr: 0.000001 loss_cls: 3.9856 (3.8263) grad_norm: 4.3401 (4.5145) time: 0.7761 data: 0.0002 max mem: 8421 +[2024-12-06 04:38:20 root] (utils.py 283): INFO Epoch: [28] [1070/2502] eta: 0:18:22 lr: 0.000001 loss_cls: 3.9879 (3.8277) grad_norm: 4.3558 (4.5153) time: 0.7698 data: 0.0002 max mem: 8421 +[2024-12-06 04:38:28 root] (utils.py 283): INFO Epoch: [28] [1080/2502] eta: 0:18:14 lr: 0.000001 loss_cls: 3.9030 (3.8286) grad_norm: 4.3424 (4.5145) time: 0.7692 data: 0.0003 max mem: 8421 +[2024-12-06 04:38:36 root] (utils.py 283): INFO Epoch: [28] [1090/2502] eta: 0:18:06 lr: 0.000001 loss_cls: 3.9129 (3.8297) grad_norm: 4.3619 (4.5142) time: 0.7703 data: 0.0003 max mem: 8421 +[2024-12-06 04:38:44 root] (utils.py 283): INFO Epoch: [28] [1100/2502] eta: 0:17:59 lr: 0.000001 loss_cls: 3.9164 (3.8308) grad_norm: 4.3083 (4.5137) time: 0.7784 data: 0.0002 max mem: 8421 +[2024-12-06 04:38:51 root] (utils.py 283): INFO Epoch: [28] [1110/2502] eta: 0:17:51 lr: 0.000001 loss_cls: 3.9477 (3.8325) grad_norm: 4.3083 (4.5136) time: 0.7735 data: 0.0002 max mem: 8421 +[2024-12-06 04:38:59 root] (utils.py 283): INFO Epoch: [28] [1120/2502] eta: 0:17:43 lr: 0.000001 loss_cls: 4.0514 (3.8338) grad_norm: 4.4014 (4.5147) time: 0.7642 data: 0.0003 max mem: 8421 +[2024-12-06 04:39:06 root] (utils.py 283): INFO Epoch: [28] [1130/2502] eta: 0:17:35 lr: 0.000001 loss_cls: 4.0685 (3.8350) grad_norm: 4.4014 (4.5155) time: 0.7638 data: 0.0002 max mem: 8421 +[2024-12-06 04:39:14 root] (utils.py 283): INFO Epoch: [28] [1140/2502] eta: 0:17:28 lr: 0.000001 loss_cls: 4.0895 (3.8346) grad_norm: 4.3789 (4.5285) time: 0.7653 data: 0.0002 max mem: 8421 +[2024-12-06 04:39:22 root] (utils.py 283): INFO Epoch: [28] [1150/2502] eta: 0:17:20 lr: 0.000001 loss_cls: 3.9616 (3.8346) grad_norm: 4.2482 (4.5266) time: 0.7724 data: 0.0003 max mem: 8421 +[2024-12-06 04:39:30 root] (utils.py 283): INFO Epoch: [28] [1160/2502] eta: 0:17:13 lr: 0.000001 loss_cls: 3.9380 (3.8330) grad_norm: 4.4705 (4.5268) time: 0.7885 data: 0.0003 max mem: 8421 +[2024-12-06 04:39:38 root] (utils.py 283): INFO Epoch: [28] [1170/2502] eta: 0:17:06 lr: 0.000001 loss_cls: 3.5771 (3.8305) grad_norm: 4.4980 (4.5254) time: 0.8242 data: 0.0003 max mem: 8421 +[2024-12-06 04:39:46 root] (utils.py 283): INFO Epoch: [28] [1180/2502] eta: 0:16:59 lr: 0.000001 loss_cls: 3.4654 (3.8291) grad_norm: 4.3557 (4.5240) time: 0.8304 data: 0.0003 max mem: 8421 +[2024-12-06 04:39:55 root] (utils.py 283): INFO Epoch: [28] [1190/2502] eta: 0:16:51 lr: 0.000001 loss_cls: 4.0227 (3.8287) grad_norm: 4.2687 (4.5226) time: 0.8119 data: 0.0002 max mem: 8421 +[2024-12-06 04:40:03 root] (utils.py 283): INFO Epoch: [28] [1200/2502] eta: 0:16:44 lr: 0.000001 loss_cls: 3.7463 (3.8279) grad_norm: 4.3344 (4.5222) time: 0.8019 data: 0.0003 max mem: 8421 +[2024-12-06 04:40:10 root] (utils.py 283): INFO Epoch: [28] [1210/2502] eta: 0:16:36 lr: 0.000001 loss_cls: 3.9237 (3.8289) grad_norm: 4.3963 (4.5225) time: 0.7829 data: 0.0003 max mem: 8421 +[2024-12-06 04:40:18 root] (utils.py 283): INFO Epoch: [28] [1220/2502] eta: 0:16:29 lr: 0.000001 loss_cls: 4.0615 (3.8299) grad_norm: 4.4862 (4.5249) time: 0.7714 data: 0.0003 max mem: 8421 +[2024-12-06 04:40:26 root] (utils.py 283): INFO Epoch: [28] [1230/2502] eta: 0:16:21 lr: 0.000001 loss_cls: 4.0304 (3.8298) grad_norm: 4.5605 (4.5240) time: 0.7686 data: 0.0003 max mem: 8421 +[2024-12-06 04:40:33 root] (utils.py 283): INFO Epoch: [28] [1240/2502] eta: 0:16:13 lr: 0.000001 loss_cls: 4.0487 (3.8313) grad_norm: 4.3501 (4.5222) time: 0.7682 data: 0.0003 max mem: 8421 +[2024-12-06 04:40:41 root] (utils.py 283): INFO Epoch: [28] [1250/2502] eta: 0:16:05 lr: 0.000001 loss_cls: 3.9541 (3.8297) grad_norm: 4.4108 (4.5229) time: 0.7687 data: 0.0002 max mem: 8421 +[2024-12-06 04:40:49 root] (utils.py 283): INFO Epoch: [28] [1260/2502] eta: 0:15:58 lr: 0.000001 loss_cls: 3.8332 (3.8311) grad_norm: 4.5203 (4.5249) time: 0.7878 data: 0.0002 max mem: 8421 +[2024-12-06 04:40:57 root] (utils.py 283): INFO Epoch: [28] [1270/2502] eta: 0:15:51 lr: 0.000001 loss_cls: 4.0165 (3.8295) grad_norm: 4.5399 (4.5250) time: 0.8077 data: 0.0002 max mem: 8421 +[2024-12-06 04:41:05 root] (utils.py 283): INFO Epoch: [28] [1280/2502] eta: 0:15:43 lr: 0.000001 loss_cls: 3.5381 (3.8270) grad_norm: 4.5037 (4.5254) time: 0.8100 data: 0.0002 max mem: 8421 +[2024-12-06 04:41:13 root] (utils.py 283): INFO Epoch: [28] [1290/2502] eta: 0:15:36 lr: 0.000001 loss_cls: 3.8500 (3.8278) grad_norm: 4.4391 (4.5263) time: 0.8048 data: 0.0003 max mem: 8421 +[2024-12-06 04:41:21 root] (utils.py 283): INFO Epoch: [28] [1300/2502] eta: 0:15:28 lr: 0.000001 loss_cls: 3.7970 (3.8252) grad_norm: 4.4535 (4.5271) time: 0.7881 data: 0.0003 max mem: 8421 +[2024-12-06 04:41:29 root] (utils.py 283): INFO Epoch: [28] [1310/2502] eta: 0:15:20 lr: 0.000001 loss_cls: 3.7660 (3.8273) grad_norm: 4.4236 (4.5266) time: 0.7783 data: 0.0002 max mem: 8421 +[2024-12-06 04:41:37 root] (utils.py 283): INFO Epoch: [28] [1320/2502] eta: 0:15:13 lr: 0.000001 loss_cls: 4.0744 (3.8278) grad_norm: 4.3887 (4.5313) time: 0.7979 data: 0.0002 max mem: 8421 +[2024-12-06 04:41:45 root] (utils.py 283): INFO Epoch: [28] [1330/2502] eta: 0:15:06 lr: 0.000001 loss_cls: 3.8044 (3.8263) grad_norm: 4.3887 (4.5302) time: 0.8154 data: 0.0003 max mem: 8421 +[2024-12-06 04:41:53 root] (utils.py 283): INFO Epoch: [28] [1340/2502] eta: 0:14:58 lr: 0.000001 loss_cls: 3.8860 (3.8262) grad_norm: 4.2746 (4.5317) time: 0.8068 data: 0.0003 max mem: 8421 +[2024-12-06 04:42:01 root] (utils.py 283): INFO Epoch: [28] [1350/2502] eta: 0:14:51 lr: 0.000001 loss_cls: 3.9231 (3.8261) grad_norm: 4.4171 (4.5429) time: 0.8022 data: 0.0002 max mem: 8421 +[2024-12-06 04:42:09 root] (utils.py 283): INFO Epoch: [28] [1360/2502] eta: 0:14:43 lr: 0.000001 loss_cls: 3.9231 (3.8263) grad_norm: 4.4845 (4.5427) time: 0.7941 data: 0.0003 max mem: 8421 +[2024-12-06 04:42:17 root] (utils.py 283): INFO Epoch: [28] [1370/2502] eta: 0:14:35 lr: 0.000001 loss_cls: 4.1122 (3.8277) grad_norm: 4.3208 (4.5417) time: 0.7743 data: 0.0003 max mem: 8421 +[2024-12-06 04:42:24 root] (utils.py 283): INFO Epoch: [28] [1380/2502] eta: 0:14:27 lr: 0.000001 loss_cls: 4.0451 (3.8271) grad_norm: 4.3523 (4.5418) time: 0.7662 data: 0.0002 max mem: 8421 +[2024-12-06 04:42:32 root] (utils.py 283): INFO Epoch: [28] [1390/2502] eta: 0:14:20 lr: 0.000001 loss_cls: 3.7760 (3.8266) grad_norm: 4.3561 (4.5416) time: 0.7678 data: 0.0002 max mem: 8421 +[2024-12-06 04:42:40 root] (utils.py 283): INFO Epoch: [28] [1400/2502] eta: 0:14:12 lr: 0.000001 loss_cls: 3.8570 (3.8280) grad_norm: 4.4766 (4.5551) time: 0.7669 data: 0.0002 max mem: 8421 +[2024-12-06 04:42:47 root] (utils.py 283): INFO Epoch: [28] [1410/2502] eta: 0:14:04 lr: 0.000001 loss_cls: 4.1772 (3.8286) grad_norm: 4.4580 (4.5534) time: 0.7631 data: 0.0002 max mem: 8421 +[2024-12-06 04:42:55 root] (utils.py 283): INFO Epoch: [28] [1420/2502] eta: 0:13:56 lr: 0.000001 loss_cls: 4.0908 (3.8280) grad_norm: 4.3119 (4.5525) time: 0.7624 data: 0.0003 max mem: 8421 +[2024-12-06 04:43:03 root] (utils.py 283): INFO Epoch: [28] [1430/2502] eta: 0:13:48 lr: 0.000001 loss_cls: 3.8779 (3.8274) grad_norm: 4.2582 (4.5514) time: 0.7623 data: 0.0003 max mem: 8421 +[2024-12-06 04:43:10 root] (utils.py 283): INFO Epoch: [28] [1440/2502] eta: 0:13:41 lr: 0.000001 loss_cls: 3.8975 (3.8285) grad_norm: 4.2582 (4.5631) time: 0.7627 data: 0.0002 max mem: 8421 +[2024-12-06 04:43:18 root] (utils.py 283): INFO Epoch: [28] [1450/2502] eta: 0:13:33 lr: 0.000001 loss_cls: 3.9337 (3.8294) grad_norm: 4.3758 (4.5629) time: 0.7631 data: 0.0002 max mem: 8421 +[2024-12-06 04:43:25 root] (utils.py 283): INFO Epoch: [28] [1460/2502] eta: 0:13:25 lr: 0.000001 loss_cls: 3.8573 (3.8288) grad_norm: 4.5243 (4.5657) time: 0.7608 data: 0.0002 max mem: 8421 +[2024-12-06 04:43:33 root] (utils.py 283): INFO Epoch: [28] [1470/2502] eta: 0:13:17 lr: 0.000001 loss_cls: 3.8995 (3.8285) grad_norm: 4.5143 (4.5679) time: 0.7620 data: 0.0002 max mem: 8421 +[2024-12-06 04:43:41 root] (utils.py 283): INFO Epoch: [28] [1480/2502] eta: 0:13:09 lr: 0.000001 loss_cls: 3.9952 (3.8294) grad_norm: 4.1939 (4.5668) time: 0.7641 data: 0.0002 max mem: 8421 +[2024-12-06 04:43:48 root] (utils.py 283): INFO Epoch: [28] [1490/2502] eta: 0:13:02 lr: 0.000001 loss_cls: 3.8679 (3.8281) grad_norm: 4.3046 (4.5659) time: 0.7699 data: 0.0002 max mem: 8421 +[2024-12-06 04:43:56 root] (utils.py 283): INFO Epoch: [28] [1500/2502] eta: 0:12:54 lr: 0.000001 loss_cls: 4.0423 (3.8300) grad_norm: 4.3728 (4.5656) time: 0.7681 data: 0.0002 max mem: 8421 +[2024-12-06 04:44:04 root] (utils.py 283): INFO Epoch: [28] [1510/2502] eta: 0:12:46 lr: 0.000001 loss_cls: 4.0520 (3.8300) grad_norm: 4.4253 (4.5646) time: 0.7636 data: 0.0002 max mem: 8421 +[2024-12-06 04:44:12 root] (utils.py 283): INFO Epoch: [28] [1520/2502] eta: 0:12:38 lr: 0.000001 loss_cls: 3.9502 (3.8280) grad_norm: 4.4091 (4.5636) time: 0.7738 data: 0.0002 max mem: 8421 +[2024-12-06 04:44:21 root] (utils.py 283): INFO Epoch: [28] [1530/2502] eta: 0:12:32 lr: 0.000001 loss_cls: 3.4581 (3.8262) grad_norm: 4.3339 (4.5619) time: 0.8847 data: 0.0009 max mem: 8421 +[2024-12-06 04:44:32 root] (utils.py 283): INFO Epoch: [28] [1540/2502] eta: 0:12:26 lr: 0.000001 loss_cls: 3.6158 (3.8248) grad_norm: 4.2208 (4.5599) time: 1.0052 data: 0.0025 max mem: 8421 +[2024-12-06 04:44:39 root] (utils.py 283): INFO Epoch: [28] [1550/2502] eta: 0:12:18 lr: 0.000001 loss_cls: 3.9169 (3.8260) grad_norm: 4.2731 (4.5598) time: 0.8978 data: 0.0019 max mem: 8421 +[2024-12-06 04:44:47 root] (utils.py 283): INFO Epoch: [28] [1560/2502] eta: 0:12:10 lr: 0.000001 loss_cls: 3.9983 (3.8274) grad_norm: 4.4448 (4.5589) time: 0.7805 data: 0.0003 max mem: 8421 +[2024-12-06 04:44:55 root] (utils.py 283): INFO Epoch: [28] [1570/2502] eta: 0:12:03 lr: 0.000001 loss_cls: 4.1133 (3.8286) grad_norm: 4.4223 (4.5579) time: 0.7828 data: 0.0003 max mem: 8421 +[2024-12-06 04:45:03 root] (utils.py 283): INFO Epoch: [28] [1580/2502] eta: 0:11:55 lr: 0.000001 loss_cls: 3.8321 (3.8269) grad_norm: 4.3823 (4.5571) time: 0.7821 data: 0.0003 max mem: 8421 +[2024-12-06 04:45:11 root] (utils.py 283): INFO Epoch: [28] [1590/2502] eta: 0:11:47 lr: 0.000001 loss_cls: 3.5457 (3.8265) grad_norm: 4.3461 (4.5562) time: 0.7828 data: 0.0003 max mem: 8421 +[2024-12-06 04:45:18 root] (utils.py 283): INFO Epoch: [28] [1600/2502] eta: 0:11:39 lr: 0.000001 loss_cls: 3.5857 (3.8259) grad_norm: 4.2717 (4.5545) time: 0.7780 data: 0.0003 max mem: 8421 +[2024-12-06 04:45:26 root] (utils.py 283): INFO Epoch: [28] [1610/2502] eta: 0:11:32 lr: 0.000001 loss_cls: 3.6652 (3.8258) grad_norm: 4.3110 (4.5538) time: 0.7798 data: 0.0003 max mem: 8421 +[2024-12-06 04:45:34 root] (utils.py 283): INFO Epoch: [28] [1620/2502] eta: 0:11:24 lr: 0.000001 loss_cls: 3.8351 (3.8256) grad_norm: 4.3926 (4.5528) time: 0.7846 data: 0.0003 max mem: 8421 +[2024-12-06 04:45:42 root] (utils.py 283): INFO Epoch: [28] [1630/2502] eta: 0:11:16 lr: 0.000001 loss_cls: 3.9789 (3.8266) grad_norm: 4.5061 (4.5569) time: 0.7899 data: 0.0003 max mem: 8421 +[2024-12-06 04:45:50 root] (utils.py 283): INFO Epoch: [28] [1640/2502] eta: 0:11:09 lr: 0.000001 loss_cls: 3.9789 (3.8264) grad_norm: 4.4995 (4.5553) time: 0.7906 data: 0.0003 max mem: 8421 +[2024-12-06 04:45:58 root] (utils.py 283): INFO Epoch: [28] [1650/2502] eta: 0:11:01 lr: 0.000001 loss_cls: 3.8328 (3.8262) grad_norm: 4.2748 (4.5539) time: 0.7825 data: 0.0003 max mem: 8421 +[2024-12-06 04:46:06 root] (utils.py 283): INFO Epoch: [28] [1660/2502] eta: 0:10:53 lr: 0.000001 loss_cls: 3.9473 (3.8276) grad_norm: 4.3668 (4.5538) time: 0.7772 data: 0.0003 max mem: 8421 +[2024-12-06 04:46:13 root] (utils.py 283): INFO Epoch: [28] [1670/2502] eta: 0:10:45 lr: 0.000001 loss_cls: 3.9473 (3.8263) grad_norm: 4.4531 (4.5537) time: 0.7843 data: 0.0003 max mem: 8421 +[2024-12-06 04:46:21 root] (utils.py 283): INFO Epoch: [28] [1680/2502] eta: 0:10:38 lr: 0.000001 loss_cls: 3.8859 (3.8280) grad_norm: 4.6673 (4.5553) time: 0.7898 data: 0.0003 max mem: 8421 +[2024-12-06 04:46:29 root] (utils.py 283): INFO Epoch: [28] [1690/2502] eta: 0:10:30 lr: 0.000001 loss_cls: 4.0465 (3.8281) grad_norm: 4.6433 (4.5552) time: 0.7870 data: 0.0009 max mem: 8421 +[2024-12-06 04:46:38 root] (utils.py 283): INFO Epoch: [28] [1700/2502] eta: 0:10:23 lr: 0.000001 loss_cls: 3.9510 (3.8288) grad_norm: 4.4269 (4.5578) time: 0.8300 data: 0.0009 max mem: 8421 +[2024-12-06 04:46:47 root] (utils.py 283): INFO Epoch: [28] [1710/2502] eta: 0:10:16 lr: 0.000001 loss_cls: 3.8585 (3.8268) grad_norm: 4.4392 (4.5579) time: 0.9051 data: 0.0017 max mem: 8421 +[2024-12-06 04:46:55 root] (utils.py 283): INFO Epoch: [28] [1720/2502] eta: 0:10:08 lr: 0.000001 loss_cls: 3.6714 (3.8264) grad_norm: 4.3917 (4.5601) time: 0.8611 data: 0.0017 max mem: 8421 +[2024-12-06 04:47:03 root] (utils.py 283): INFO Epoch: [28] [1730/2502] eta: 0:10:00 lr: 0.000001 loss_cls: 3.9097 (3.8268) grad_norm: 4.3228 (4.5590) time: 0.7801 data: 0.0003 max mem: 8421 +[2024-12-06 04:47:11 root] (utils.py 283): INFO Epoch: [28] [1740/2502] eta: 0:09:52 lr: 0.000001 loss_cls: 4.0926 (3.8276) grad_norm: 4.2681 (4.5586) time: 0.7856 data: 0.0003 max mem: 8421 +[2024-12-06 04:47:19 root] (utils.py 283): INFO Epoch: [28] [1750/2502] eta: 0:09:45 lr: 0.000001 loss_cls: 4.1429 (3.8287) grad_norm: 4.2875 (4.5574) time: 0.7921 data: 0.0003 max mem: 8421 +[2024-12-06 04:47:27 root] (utils.py 283): INFO Epoch: [28] [1760/2502] eta: 0:09:37 lr: 0.000001 loss_cls: 4.1081 (3.8301) grad_norm: 4.2875 (4.5565) time: 0.7840 data: 0.0003 max mem: 8421 +[2024-12-06 04:47:34 root] (utils.py 283): INFO Epoch: [28] [1770/2502] eta: 0:09:29 lr: 0.000001 loss_cls: 4.0995 (3.8313) grad_norm: 4.2601 (4.5552) time: 0.7802 data: 0.0003 max mem: 8421 +[2024-12-06 04:47:42 root] (utils.py 283): INFO Epoch: [28] [1780/2502] eta: 0:09:21 lr: 0.000001 loss_cls: 3.8409 (3.8306) grad_norm: 4.2601 (4.5547) time: 0.7838 data: 0.0003 max mem: 8421 +[2024-12-06 04:47:50 root] (utils.py 283): INFO Epoch: [28] [1790/2502] eta: 0:09:14 lr: 0.000001 loss_cls: 3.8970 (3.8307) grad_norm: 4.2985 (4.5539) time: 0.7829 data: 0.0002 max mem: 8421 +[2024-12-06 04:47:58 root] (utils.py 283): INFO Epoch: [28] [1800/2502] eta: 0:09:06 lr: 0.000001 loss_cls: 3.9810 (3.8302) grad_norm: 4.3027 (4.5534) time: 0.7820 data: 0.0003 max mem: 8421 +[2024-12-06 04:48:06 root] (utils.py 283): INFO Epoch: [28] [1810/2502] eta: 0:08:58 lr: 0.000001 loss_cls: 3.7306 (3.8307) grad_norm: 4.6960 (4.5569) time: 0.7795 data: 0.0003 max mem: 8421 +[2024-12-06 04:48:13 root] (utils.py 283): INFO Epoch: [28] [1820/2502] eta: 0:08:50 lr: 0.000001 loss_cls: 3.7306 (3.8299) grad_norm: 4.4698 (4.5557) time: 0.7772 data: 0.0003 max mem: 8421 +[2024-12-06 04:48:21 root] (utils.py 283): INFO Epoch: [28] [1830/2502] eta: 0:08:43 lr: 0.000001 loss_cls: 3.6198 (3.8287) grad_norm: 4.3602 (4.5555) time: 0.7791 data: 0.0003 max mem: 8421 +[2024-12-06 04:48:29 root] (utils.py 283): INFO Epoch: [28] [1840/2502] eta: 0:08:35 lr: 0.000001 loss_cls: 3.6706 (3.8286) grad_norm: 4.4219 (4.5607) time: 0.7757 data: 0.0003 max mem: 8421 +[2024-12-06 04:48:37 root] (utils.py 283): INFO Epoch: [28] [1850/2502] eta: 0:08:27 lr: 0.000001 loss_cls: 4.0185 (3.8287) grad_norm: 4.5480 (4.5618) time: 0.7754 data: 0.0003 max mem: 8421 +[2024-12-06 04:48:45 root] (utils.py 283): INFO Epoch: [28] [1860/2502] eta: 0:08:19 lr: 0.000001 loss_cls: 3.9010 (3.8283) grad_norm: 4.4856 (4.5615) time: 0.7776 data: 0.0003 max mem: 8421 +[2024-12-06 04:48:52 root] (utils.py 283): INFO Epoch: [28] [1870/2502] eta: 0:08:11 lr: 0.000001 loss_cls: 3.6783 (3.8274) grad_norm: 4.3434 (4.5601) time: 0.7819 data: 0.0003 max mem: 8421 +[2024-12-06 04:49:00 root] (utils.py 283): INFO Epoch: [28] [1880/2502] eta: 0:08:04 lr: 0.000001 loss_cls: 3.5966 (3.8270) grad_norm: 4.4322 (4.5610) time: 0.7827 data: 0.0003 max mem: 8421 +[2024-12-06 04:49:08 root] (utils.py 283): INFO Epoch: [28] [1890/2502] eta: 0:07:56 lr: 0.000001 loss_cls: 3.9749 (3.8276) grad_norm: 4.5205 (4.5604) time: 0.7800 data: 0.0002 max mem: 8421 +[2024-12-06 04:49:16 root] (utils.py 283): INFO Epoch: [28] [1900/2502] eta: 0:07:48 lr: 0.000001 loss_cls: 4.0868 (3.8286) grad_norm: 4.3221 (4.5597) time: 0.7815 data: 0.0002 max mem: 8421 +[2024-12-06 04:49:24 root] (utils.py 283): INFO Epoch: [28] [1910/2502] eta: 0:07:40 lr: 0.000001 loss_cls: 4.0792 (3.8288) grad_norm: 4.4103 (4.5608) time: 0.7932 data: 0.0002 max mem: 8421 +[2024-12-06 04:49:32 root] (utils.py 283): INFO Epoch: [28] [1920/2502] eta: 0:07:33 lr: 0.000001 loss_cls: 4.0014 (3.8292) grad_norm: 4.4109 (4.5600) time: 0.7985 data: 0.0003 max mem: 8421 +[2024-12-06 04:49:40 root] (utils.py 283): INFO Epoch: [28] [1930/2502] eta: 0:07:25 lr: 0.000001 loss_cls: 4.0951 (3.8298) grad_norm: 4.3580 (4.5587) time: 0.7852 data: 0.0003 max mem: 8421 +[2024-12-06 04:49:47 root] (utils.py 283): INFO Epoch: [28] [1940/2502] eta: 0:07:17 lr: 0.000001 loss_cls: 3.7742 (3.8289) grad_norm: 4.3548 (4.5580) time: 0.7763 data: 0.0003 max mem: 8421 +[2024-12-06 04:49:55 root] (utils.py 283): INFO Epoch: [28] [1950/2502] eta: 0:07:09 lr: 0.000001 loss_cls: 3.7742 (3.8281) grad_norm: 4.3700 (4.5574) time: 0.7754 data: 0.0002 max mem: 8421 +[2024-12-06 04:50:03 root] (utils.py 283): INFO Epoch: [28] [1960/2502] eta: 0:07:01 lr: 0.000001 loss_cls: 3.3978 (3.8256) grad_norm: 4.4378 (4.5574) time: 0.7745 data: 0.0003 max mem: 8421 +[2024-12-06 04:50:11 root] (utils.py 283): INFO Epoch: [28] [1970/2502] eta: 0:06:54 lr: 0.000001 loss_cls: 3.4543 (3.8260) grad_norm: 4.4896 (4.5569) time: 0.7741 data: 0.0003 max mem: 8421 +[2024-12-06 04:50:18 root] (utils.py 283): INFO Epoch: [28] [1980/2502] eta: 0:06:46 lr: 0.000001 loss_cls: 3.9258 (3.8260) grad_norm: 4.3849 (4.5565) time: 0.7758 data: 0.0003 max mem: 8421 +[2024-12-06 04:50:26 root] (utils.py 283): INFO Epoch: [28] [1990/2502] eta: 0:06:38 lr: 0.000001 loss_cls: 3.8077 (3.8245) grad_norm: 4.3711 (4.5553) time: 0.7852 data: 0.0003 max mem: 8421 +[2024-12-06 04:50:34 root] (utils.py 283): INFO Epoch: [28] [2000/2502] eta: 0:06:30 lr: 0.000001 loss_cls: 3.8077 (3.8249) grad_norm: 4.4385 (4.5552) time: 0.7832 data: 0.0003 max mem: 8421 +[2024-12-06 04:50:42 root] (utils.py 283): INFO Epoch: [28] [2010/2502] eta: 0:06:23 lr: 0.000001 loss_cls: 3.8732 (3.8247) grad_norm: 4.6134 (4.5561) time: 0.7723 data: 0.0003 max mem: 8421 +[2024-12-06 04:50:49 root] (utils.py 283): INFO Epoch: [28] [2020/2502] eta: 0:06:15 lr: 0.000001 loss_cls: 3.5869 (3.8234) grad_norm: 4.5196 (4.5554) time: 0.7733 data: 0.0003 max mem: 8421 +[2024-12-06 04:50:57 root] (utils.py 283): INFO Epoch: [28] [2030/2502] eta: 0:06:07 lr: 0.000001 loss_cls: 3.6501 (3.8229) grad_norm: 4.5196 (4.5566) time: 0.7750 data: 0.0003 max mem: 8421 +[2024-12-06 04:51:05 root] (utils.py 283): INFO Epoch: [28] [2040/2502] eta: 0:05:59 lr: 0.000001 loss_cls: 3.8888 (3.8231) grad_norm: 4.4479 (4.5557) time: 0.7789 data: 0.0003 max mem: 8421 +[2024-12-06 04:51:13 root] (utils.py 283): INFO Epoch: [28] [2050/2502] eta: 0:05:51 lr: 0.000001 loss_cls: 3.8479 (3.8231) grad_norm: 4.3437 (4.5556) time: 0.7770 data: 0.0002 max mem: 8421 +[2024-12-06 04:51:21 root] (utils.py 283): INFO Epoch: [28] [2060/2502] eta: 0:05:44 lr: 0.000001 loss_cls: 3.8552 (3.8234) grad_norm: 4.3466 (4.5550) time: 0.7783 data: 0.0002 max mem: 8421 +[2024-12-06 04:51:28 root] (utils.py 283): INFO Epoch: [28] [2070/2502] eta: 0:05:36 lr: 0.000001 loss_cls: 4.0118 (3.8237) grad_norm: 4.3743 (4.5550) time: 0.7780 data: 0.0002 max mem: 8421 +[2024-12-06 04:51:36 root] (utils.py 283): INFO Epoch: [28] [2080/2502] eta: 0:05:28 lr: 0.000001 loss_cls: 4.0973 (3.8243) grad_norm: 4.3743 (4.5543) time: 0.7718 data: 0.0002 max mem: 8421 +[2024-12-06 04:51:44 root] (utils.py 283): INFO Epoch: [28] [2090/2502] eta: 0:05:20 lr: 0.000001 loss_cls: 3.8616 (3.8245) grad_norm: 4.3761 (4.5543) time: 0.7728 data: 0.0002 max mem: 8421 +[2024-12-06 04:51:52 root] (utils.py 283): INFO Epoch: [28] [2100/2502] eta: 0:05:12 lr: 0.000001 loss_cls: 3.9199 (3.8261) grad_norm: 4.3761 (4.5532) time: 0.7796 data: 0.0002 max mem: 8421 +[2024-12-06 04:51:59 root] (utils.py 283): INFO Epoch: [28] [2110/2502] eta: 0:05:05 lr: 0.000001 loss_cls: 3.9199 (3.8255) grad_norm: 4.3950 (4.5527) time: 0.7805 data: 0.0003 max mem: 8421 +[2024-12-06 04:52:07 root] (utils.py 283): INFO Epoch: [28] [2120/2502] eta: 0:04:57 lr: 0.000001 loss_cls: 3.8482 (3.8257) grad_norm: 4.4439 (4.5528) time: 0.7769 data: 0.0003 max mem: 8421 +[2024-12-06 04:52:15 root] (utils.py 283): INFO Epoch: [28] [2130/2502] eta: 0:04:49 lr: 0.000001 loss_cls: 4.0816 (3.8267) grad_norm: 4.2642 (4.5523) time: 0.7811 data: 0.0003 max mem: 8421 +[2024-12-06 04:52:23 root] (utils.py 283): INFO Epoch: [28] [2140/2502] eta: 0:04:41 lr: 0.000001 loss_cls: 4.0085 (3.8272) grad_norm: 4.4150 (4.5578) time: 0.7851 data: 0.0003 max mem: 8421 +[2024-12-06 04:52:31 root] (utils.py 283): INFO Epoch: [28] [2150/2502] eta: 0:04:34 lr: 0.000001 loss_cls: 3.9212 (3.8272) grad_norm: 4.5510 (4.5580) time: 0.7819 data: 0.0003 max mem: 8421 +[2024-12-06 04:52:38 root] (utils.py 283): INFO Epoch: [28] [2160/2502] eta: 0:04:26 lr: 0.000001 loss_cls: 3.8960 (3.8277) grad_norm: 4.6028 (4.5586) time: 0.7768 data: 0.0003 max mem: 8421 +[2024-12-06 04:52:46 root] (utils.py 283): INFO Epoch: [28] [2170/2502] eta: 0:04:18 lr: 0.000001 loss_cls: 3.9676 (3.8282) grad_norm: 4.4421 (4.5577) time: 0.7761 data: 0.0002 max mem: 8421 +[2024-12-06 04:52:54 root] (utils.py 283): INFO Epoch: [28] [2180/2502] eta: 0:04:10 lr: 0.000001 loss_cls: 3.9450 (3.8277) grad_norm: 4.3982 (4.5575) time: 0.7734 data: 0.0003 max mem: 8421 +[2024-12-06 04:53:02 root] (utils.py 283): INFO Epoch: [28] [2190/2502] eta: 0:04:02 lr: 0.000001 loss_cls: 3.9133 (3.8284) grad_norm: 4.4312 (4.5574) time: 0.7726 data: 0.0002 max mem: 8421 +[2024-12-06 04:53:09 root] (utils.py 283): INFO Epoch: [28] [2200/2502] eta: 0:03:55 lr: 0.000001 loss_cls: 3.9364 (3.8283) grad_norm: 4.4283 (4.5568) time: 0.7768 data: 0.0003 max mem: 8421 +[2024-12-06 04:53:17 root] (utils.py 283): INFO Epoch: [28] [2210/2502] eta: 0:03:47 lr: 0.000001 loss_cls: 3.7923 (3.8272) grad_norm: 4.4170 (4.5565) time: 0.7772 data: 0.0003 max mem: 8421 +[2024-12-06 04:53:25 root] (utils.py 283): INFO Epoch: [28] [2220/2502] eta: 0:03:39 lr: 0.000001 loss_cls: 3.9752 (3.8287) grad_norm: 4.5328 (4.5581) time: 0.7766 data: 0.0003 max mem: 8421 +[2024-12-06 04:53:33 root] (utils.py 283): INFO Epoch: [28] [2230/2502] eta: 0:03:31 lr: 0.000001 loss_cls: 3.9752 (3.8281) grad_norm: 4.5178 (4.5578) time: 0.7853 data: 0.0002 max mem: 8421 +[2024-12-06 04:53:41 root] (utils.py 283): INFO Epoch: [28] [2240/2502] eta: 0:03:23 lr: 0.000001 loss_cls: 3.8489 (3.8279) grad_norm: 4.3840 (4.5590) time: 0.7856 data: 0.0003 max mem: 8421 +[2024-12-06 04:53:48 root] (utils.py 283): INFO Epoch: [28] [2250/2502] eta: 0:03:16 lr: 0.000001 loss_cls: 3.8873 (3.8279) grad_norm: 4.3860 (4.5600) time: 0.7790 data: 0.0003 max mem: 8421 +[2024-12-06 04:53:56 root] (utils.py 283): INFO Epoch: [28] [2260/2502] eta: 0:03:08 lr: 0.000001 loss_cls: 4.0561 (3.8279) grad_norm: 4.3860 (4.5594) time: 0.7787 data: 0.0003 max mem: 8421 +[2024-12-06 04:54:04 root] (utils.py 283): INFO Epoch: [28] [2270/2502] eta: 0:03:00 lr: 0.000001 loss_cls: 3.6816 (3.8266) grad_norm: 4.2832 (4.5585) time: 0.7828 data: 0.0003 max mem: 8421 +[2024-12-06 04:54:12 root] (utils.py 283): INFO Epoch: [28] [2280/2502] eta: 0:02:52 lr: 0.000001 loss_cls: 3.7677 (3.8270) grad_norm: 4.3189 (4.5580) time: 0.7921 data: 0.0003 max mem: 8421 +[2024-12-06 04:54:20 root] (utils.py 283): INFO Epoch: [28] [2290/2502] eta: 0:02:45 lr: 0.000001 loss_cls: 4.0603 (3.8279) grad_norm: 4.2894 (4.5570) time: 0.7882 data: 0.0003 max mem: 8421 +[2024-12-06 04:54:28 root] (utils.py 283): INFO Epoch: [28] [2300/2502] eta: 0:02:37 lr: 0.000001 loss_cls: 4.1578 (3.8289) grad_norm: 4.2964 (4.5568) time: 0.7764 data: 0.0003 max mem: 8421 +[2024-12-06 04:54:35 root] (utils.py 283): INFO Epoch: [28] [2310/2502] eta: 0:02:29 lr: 0.000001 loss_cls: 4.1434 (3.8298) grad_norm: 4.5364 (4.5592) time: 0.7771 data: 0.0003 max mem: 8421 +[2024-12-06 04:54:43 root] (utils.py 283): INFO Epoch: [28] [2320/2502] eta: 0:02:21 lr: 0.000001 loss_cls: 4.1434 (3.8312) grad_norm: 4.5627 (4.5591) time: 0.7787 data: 0.0003 max mem: 8421 +[2024-12-06 04:54:51 root] (utils.py 283): INFO Epoch: [28] [2330/2502] eta: 0:02:13 lr: 0.000001 loss_cls: 4.0698 (3.8311) grad_norm: 4.5208 (4.5599) time: 0.7787 data: 0.0003 max mem: 8421 +[2024-12-06 04:54:59 root] (utils.py 283): INFO Epoch: [28] [2340/2502] eta: 0:02:06 lr: 0.000001 loss_cls: 3.8249 (3.8305) grad_norm: 4.4025 (4.5595) time: 0.7805 data: 0.0003 max mem: 8421 +[2024-12-06 04:55:07 root] (utils.py 283): INFO Epoch: [28] [2350/2502] eta: 0:01:58 lr: 0.000001 loss_cls: 3.7228 (3.8302) grad_norm: 4.4375 (4.5589) time: 0.7792 data: 0.0002 max mem: 8421 +[2024-12-06 04:55:14 root] (utils.py 283): INFO Epoch: [28] [2360/2502] eta: 0:01:50 lr: 0.000001 loss_cls: 3.7732 (3.8303) grad_norm: 4.4399 (4.5616) time: 0.7801 data: 0.0003 max mem: 8421 +[2024-12-06 04:55:22 root] (utils.py 283): INFO Epoch: [28] [2370/2502] eta: 0:01:42 lr: 0.000001 loss_cls: 3.9061 (3.8303) grad_norm: 4.4561 (4.5626) time: 0.7814 data: 0.0003 max mem: 8421 +[2024-12-06 04:55:30 root] (utils.py 283): INFO Epoch: [28] [2380/2502] eta: 0:01:34 lr: 0.000001 loss_cls: 4.0724 (3.8305) grad_norm: 4.4282 (4.5634) time: 0.7784 data: 0.0003 max mem: 8421 +[2024-12-06 04:55:38 root] (utils.py 283): INFO Epoch: [28] [2390/2502] eta: 0:01:27 lr: 0.000001 loss_cls: 4.0470 (3.8298) grad_norm: 4.2322 (4.5634) time: 0.7753 data: 0.0003 max mem: 8421 +[2024-12-06 04:55:45 root] (utils.py 283): INFO Epoch: [28] [2400/2502] eta: 0:01:19 lr: 0.000001 loss_cls: 3.9560 (3.8305) grad_norm: 4.3937 (4.5640) time: 0.7760 data: 0.0002 max mem: 8421 +[2024-12-06 04:55:53 root] (utils.py 283): INFO Epoch: [28] [2410/2502] eta: 0:01:11 lr: 0.000001 loss_cls: 3.8218 (3.8299) grad_norm: 4.3230 (4.5626) time: 0.7838 data: 0.0003 max mem: 8421 +[2024-12-06 04:56:01 root] (utils.py 283): INFO Epoch: [28] [2420/2502] eta: 0:01:03 lr: 0.000001 loss_cls: 4.0190 (3.8307) grad_norm: 4.3047 (4.5633) time: 0.7995 data: 0.0003 max mem: 8421 +[2024-12-06 04:56:09 root] (utils.py 283): INFO Epoch: [28] [2430/2502] eta: 0:00:56 lr: 0.000001 loss_cls: 3.8936 (3.8297) grad_norm: 4.6264 (4.5647) time: 0.7933 data: 0.0003 max mem: 8421 +[2024-12-06 04:56:17 root] (utils.py 283): INFO Epoch: [28] [2440/2502] eta: 0:00:48 lr: 0.000001 loss_cls: 3.6647 (3.8300) grad_norm: 4.3967 (4.5629) time: 0.7776 data: 0.0003 max mem: 8421 +[2024-12-06 04:56:25 root] (utils.py 283): INFO Epoch: [28] [2450/2502] eta: 0:00:40 lr: 0.000001 loss_cls: 3.9868 (3.8298) grad_norm: 4.1280 (4.5629) time: 0.7786 data: 0.0003 max mem: 8421 +[2024-12-06 04:56:33 root] (utils.py 283): INFO Epoch: [28] [2460/2502] eta: 0:00:32 lr: 0.000001 loss_cls: 3.8176 (3.8289) grad_norm: 4.3893 (4.5624) time: 0.7788 data: 0.0003 max mem: 8421 +[2024-12-06 04:56:40 root] (utils.py 283): INFO Epoch: [28] [2470/2502] eta: 0:00:24 lr: 0.000001 loss_cls: 3.8028 (3.8284) grad_norm: 4.3746 (4.5619) time: 0.7828 data: 0.0003 max mem: 8421 +[2024-12-06 04:56:48 root] (utils.py 283): INFO Epoch: [28] [2480/2502] eta: 0:00:17 lr: 0.000001 loss_cls: 3.8190 (3.8280) grad_norm: 4.2940 (4.5603) time: 0.7808 data: 0.0003 max mem: 8421 +[2024-12-06 04:56:57 root] (utils.py 283): INFO Epoch: [28] [2490/2502] eta: 0:00:09 lr: 0.000001 loss_cls: 3.8190 (3.8275) grad_norm: 4.1947 (4.5594) time: 0.8035 data: 0.0259 max mem: 8421 +[2024-12-06 04:57:04 root] (utils.py 283): INFO Epoch: [28] [2500/2502] eta: 0:00:01 lr: 0.000001 loss_cls: 3.7886 (3.8274) grad_norm: 4.3845 (4.5591) time: 0.8100 data: 0.0259 max mem: 8421 +[2024-12-06 04:57:05 root] (utils.py 283): INFO Epoch: [28] [2501/2502] eta: 0:00:00 lr: 0.000001 loss_cls: 3.7886 (3.8276) grad_norm: 4.3933 (4.5590) time: 0.8112 data: 0.0259 max mem: 8421 +[2024-12-06 04:57:05 root] (utils.py 297): INFO Epoch: [28] Total time: 0:32:29 (0.7791 s / it) +[2024-12-06 04:57:05 root] (engine.py 179): INFO Averaged stats:lr: 0.000001 loss_cls: 3.7886 (3.8155) grad_norm: 4.3933 (4.5590) +[2024-12-06 04:57:06 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7178 (0.7178) acc1: 85.9375 (85.9375) acc3: 95.3125 (95.3125) acc5: 97.6562 (97.6562) time: 0.1311 data: 0.0004 max mem: 8421 +[2024-12-06 04:57:07 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7715 (0.8383) acc1: 84.3750 (82.7415) acc3: 93.7500 (92.9688) acc5: 96.0938 (95.6676) time: 0.1312 data: 0.0004 max mem: 8421 +[2024-12-06 04:57:08 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8521 (0.8888) acc1: 80.4688 (81.3616) acc3: 92.9688 (92.6339) acc5: 95.3125 (95.3125) time: 0.1321 data: 0.0005 max mem: 8421 +[2024-12-06 04:57:10 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9370 (0.8948) acc1: 79.6875 (80.6704) acc3: 92.9688 (92.9435) acc5: 96.0938 (95.4889) time: 0.1325 data: 0.0005 max mem: 8421 +[2024-12-06 04:57:11 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8278 (0.8887) acc1: 81.2500 (80.7927) acc3: 93.7500 (93.0069) acc5: 96.0938 (95.5412) time: 0.1465 data: 0.0130 max mem: 8421 +[2024-12-06 04:57:14 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:07 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.0326 (0.9831) acc1: 75.7812 (78.4773) acc3: 89.0625 (91.3756) acc5: 92.1875 (94.4853) time: 0.1905 data: 0.0554 max mem: 8421 +[2024-12-06 04:57:16 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3038 (1.0267) acc1: 71.8750 (77.7152) acc3: 85.9375 (90.6762) acc5: 90.6250 (93.7756) time: 0.2093 data: 0.0757 max mem: 8421 +[2024-12-06 04:57:18 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:04 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2482 (1.0706) acc1: 72.6562 (76.5955) acc3: 87.5000 (90.0528) acc5: 89.8438 (93.3319) time: 0.2035 data: 0.0706 max mem: 8421 +[2024-12-06 04:57:20 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2980 (1.1080) acc1: 69.5312 (75.7812) acc3: 85.9375 (89.4579) acc5: 89.8438 (92.7951) time: 0.2117 data: 0.0785 max mem: 8421 +[2024-12-06 04:57:21 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3417 (1.1383) acc1: 68.7500 (75.0429) acc3: 84.3750 (88.9337) acc5: 89.8438 (92.3764) time: 0.1900 data: 0.0563 max mem: 8421 +[2024-12-06 04:57:22 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2112 (1.1275) acc1: 75.0000 (75.1360) acc3: 87.5000 (89.1440) acc5: 90.6250 (92.5760) time: 0.1873 data: 0.0562 max mem: 8421 +[2024-12-06 04:57:22 root] (utils.py 297): INFO Test: Total time: 0:00:16 (0.1701 s / it) +[2024-12-06 04:57:22 root] (engine.py 264): INFO * Acc@1 75.140 Acc@3 89.174 Acc@5 92.520 loss 1.126 flops 1.285 layer_flops 1.251 +[2024-12-06 04:57:22 root] (main.py 547): INFO Accuracy of the network on the 50000 test images: 75.1% +[2024-12-06 04:57:22 root] (main.py 551): INFO Max accuracy: 75.18% +[2024-12-06 04:57:23 root] (utils.py 283): INFO Epoch: [29] [ 0/2502] eta: 0:33:09 lr: 0.000001 loss_cls: 3.8213 (3.8213) grad_norm: 6.1283 (6.1283) time: 0.7952 data: 0.0004 max mem: 8421 +[2024-12-06 04:57:31 root] (utils.py 283): INFO Epoch: [29] [ 10/2502] eta: 0:32:58 lr: 0.000001 loss_cls: 3.6316 (3.6847) grad_norm: 4.3565 (4.5763) time: 0.7938 data: 0.0003 max mem: 8421 +[2024-12-06 04:57:39 root] (utils.py 283): INFO Epoch: [29] [ 20/2502] eta: 0:32:40 lr: 0.000001 loss_cls: 3.6580 (3.7449) grad_norm: 4.3163 (4.5307) time: 0.7895 data: 0.0003 max mem: 8421 +[2024-12-06 04:57:47 root] (utils.py 283): INFO Epoch: [29] [ 30/2502] eta: 0:32:38 lr: 0.000001 loss_cls: 3.7948 (3.6953) grad_norm: 4.2704 (4.4421) time: 0.7918 data: 0.0003 max mem: 8421 +[2024-12-06 04:57:55 root] (utils.py 283): INFO Epoch: [29] [ 40/2502] eta: 0:32:20 lr: 0.000001 loss_cls: 3.9470 (3.7797) grad_norm: 4.3145 (4.4496) time: 0.7868 data: 0.0003 max mem: 8421 +[2024-12-06 04:58:02 root] (utils.py 283): INFO Epoch: [29] [ 50/2502] eta: 0:32:10 lr: 0.000001 loss_cls: 3.9494 (3.7982) grad_norm: 4.4094 (4.4525) time: 0.7793 data: 0.0002 max mem: 8421 +[2024-12-06 04:58:10 root] (utils.py 283): INFO Epoch: [29] [ 60/2502] eta: 0:32:05 lr: 0.000001 loss_cls: 3.8904 (3.8119) grad_norm: 4.3788 (4.4308) time: 0.7885 data: 0.0003 max mem: 8421 +[2024-12-06 04:58:18 root] (utils.py 283): INFO Epoch: [29] [ 70/2502] eta: 0:31:52 lr: 0.000001 loss_cls: 3.9333 (3.8115) grad_norm: 4.2081 (4.4035) time: 0.7845 data: 0.0003 max mem: 8421 +[2024-12-06 04:58:26 root] (utils.py 283): INFO Epoch: [29] [ 80/2502] eta: 0:31:42 lr: 0.000001 loss_cls: 3.7686 (3.7987) grad_norm: 4.2765 (4.5953) time: 0.7765 data: 0.0003 max mem: 8421 +[2024-12-06 04:58:34 root] (utils.py 283): INFO Epoch: [29] [ 90/2502] eta: 0:31:32 lr: 0.000001 loss_cls: 3.4810 (3.7966) grad_norm: 4.3043 (4.5859) time: 0.7787 data: 0.0003 max mem: 8421 +[2024-12-06 04:58:42 root] (utils.py 283): INFO Epoch: [29] [ 100/2502] eta: 0:31:26 lr: 0.000001 loss_cls: 3.4879 (3.7830) grad_norm: 4.3530 (4.5755) time: 0.7861 data: 0.0003 max mem: 8421 +[2024-12-06 04:58:49 root] (utils.py 283): INFO Epoch: [29] [ 110/2502] eta: 0:31:18 lr: 0.000001 loss_cls: 3.8453 (3.7930) grad_norm: 4.3462 (4.5511) time: 0.7879 data: 0.0003 max mem: 8421 +[2024-12-06 04:58:57 root] (utils.py 283): INFO Epoch: [29] [ 120/2502] eta: 0:31:08 lr: 0.000001 loss_cls: 4.0646 (3.8020) grad_norm: 4.4789 (4.6120) time: 0.7798 data: 0.0003 max mem: 8421 +[2024-12-06 04:59:05 root] (utils.py 283): INFO Epoch: [29] [ 130/2502] eta: 0:30:59 lr: 0.000001 loss_cls: 3.8818 (3.7977) grad_norm: 4.6427 (4.5896) time: 0.7773 data: 0.0003 max mem: 8421 +[2024-12-06 04:59:13 root] (utils.py 283): INFO Epoch: [29] [ 140/2502] eta: 0:30:52 lr: 0.000001 loss_cls: 3.8427 (3.7939) grad_norm: 4.2384 (4.5761) time: 0.7827 data: 0.0003 max mem: 8421 +[2024-12-06 04:59:21 root] (utils.py 283): INFO Epoch: [29] [ 150/2502] eta: 0:30:43 lr: 0.000001 loss_cls: 3.8778 (3.7998) grad_norm: 4.3049 (4.5687) time: 0.7829 data: 0.0003 max mem: 8421 +[2024-12-06 04:59:28 root] (utils.py 283): INFO Epoch: [29] [ 160/2502] eta: 0:30:35 lr: 0.000001 loss_cls: 3.8862 (3.8085) grad_norm: 4.3492 (4.5715) time: 0.7795 data: 0.0003 max mem: 8421 +[2024-12-06 04:59:36 root] (utils.py 283): INFO Epoch: [29] [ 170/2502] eta: 0:30:26 lr: 0.000001 loss_cls: 3.8287 (3.7942) grad_norm: 4.3128 (4.5846) time: 0.7777 data: 0.0003 max mem: 8421 +[2024-12-06 04:59:44 root] (utils.py 283): INFO Epoch: [29] [ 180/2502] eta: 0:30:18 lr: 0.000001 loss_cls: 3.4927 (3.7731) grad_norm: 4.2584 (4.5715) time: 0.7769 data: 0.0003 max mem: 8421 +[2024-12-06 04:59:52 root] (utils.py 283): INFO Epoch: [29] [ 190/2502] eta: 0:30:09 lr: 0.000001 loss_cls: 3.9311 (3.7929) grad_norm: 4.4830 (4.6459) time: 0.7776 data: 0.0003 max mem: 8421 +[2024-12-06 05:00:00 root] (utils.py 283): INFO Epoch: [29] [ 200/2502] eta: 0:30:00 lr: 0.000001 loss_cls: 4.1449 (3.7984) grad_norm: 4.4830 (4.6307) time: 0.7765 data: 0.0003 max mem: 8421 +[2024-12-06 05:00:07 root] (utils.py 283): INFO Epoch: [29] [ 210/2502] eta: 0:29:53 lr: 0.000001 loss_cls: 4.0364 (3.8129) grad_norm: 4.3200 (4.6256) time: 0.7809 data: 0.0003 max mem: 8421 +[2024-12-06 05:00:15 root] (utils.py 283): INFO Epoch: [29] [ 220/2502] eta: 0:29:45 lr: 0.000001 loss_cls: 3.9583 (3.8054) grad_norm: 4.3607 (4.6161) time: 0.7823 data: 0.0003 max mem: 8421 +[2024-12-06 05:00:23 root] (utils.py 283): INFO Epoch: [29] [ 230/2502] eta: 0:29:37 lr: 0.000001 loss_cls: 3.5887 (3.8004) grad_norm: 4.3356 (4.6027) time: 0.7800 data: 0.0003 max mem: 8421 +[2024-12-06 05:00:31 root] (utils.py 283): INFO Epoch: [29] [ 240/2502] eta: 0:29:30 lr: 0.000001 loss_cls: 3.7075 (3.8021) grad_norm: 4.2875 (4.5949) time: 0.7858 data: 0.0003 max mem: 8421 +[2024-12-06 05:00:39 root] (utils.py 283): INFO Epoch: [29] [ 250/2502] eta: 0:29:21 lr: 0.000001 loss_cls: 4.1572 (3.8119) grad_norm: 4.3892 (4.5974) time: 0.7814 data: 0.0003 max mem: 8421 +[2024-12-06 05:00:46 root] (utils.py 283): INFO Epoch: [29] [ 260/2502] eta: 0:29:13 lr: 0.000001 loss_cls: 4.0799 (3.8155) grad_norm: 4.2198 (4.5881) time: 0.7747 data: 0.0003 max mem: 8421 +[2024-12-06 05:00:54 root] (utils.py 283): INFO Epoch: [29] [ 270/2502] eta: 0:29:04 lr: 0.000001 loss_cls: 3.8664 (3.8204) grad_norm: 4.4879 (4.5886) time: 0.7769 data: 0.0003 max mem: 8421 +[2024-12-06 05:01:02 root] (utils.py 283): INFO Epoch: [29] [ 280/2502] eta: 0:28:56 lr: 0.000001 loss_cls: 3.7760 (3.8099) grad_norm: 4.3355 (4.5790) time: 0.7775 data: 0.0003 max mem: 8421 +[2024-12-06 05:01:10 root] (utils.py 283): INFO Epoch: [29] [ 290/2502] eta: 0:28:48 lr: 0.000001 loss_cls: 3.8613 (3.8166) grad_norm: 4.2255 (4.5670) time: 0.7775 data: 0.0003 max mem: 8421 +[2024-12-06 05:01:18 root] (utils.py 283): INFO Epoch: [29] [ 300/2502] eta: 0:28:40 lr: 0.000001 loss_cls: 3.9817 (3.8243) grad_norm: 4.3688 (4.5705) time: 0.7777 data: 0.0003 max mem: 8421 +[2024-12-06 05:01:25 root] (utils.py 283): INFO Epoch: [29] [ 310/2502] eta: 0:28:32 lr: 0.000001 loss_cls: 4.0949 (3.8288) grad_norm: 4.4279 (4.5680) time: 0.7777 data: 0.0003 max mem: 8421 +[2024-12-06 05:01:33 root] (utils.py 283): INFO Epoch: [29] [ 320/2502] eta: 0:28:24 lr: 0.000001 loss_cls: 3.9922 (3.8267) grad_norm: 4.3463 (4.5585) time: 0.7783 data: 0.0003 max mem: 8421 +[2024-12-06 05:01:41 root] (utils.py 283): INFO Epoch: [29] [ 330/2502] eta: 0:28:16 lr: 0.000001 loss_cls: 4.0680 (3.8369) grad_norm: 4.4044 (4.5629) time: 0.7784 data: 0.0003 max mem: 8421 +[2024-12-06 05:01:49 root] (utils.py 283): INFO Epoch: [29] [ 340/2502] eta: 0:28:08 lr: 0.000001 loss_cls: 4.1597 (3.8397) grad_norm: 4.5067 (4.5603) time: 0.7774 data: 0.0003 max mem: 8421 +[2024-12-06 05:01:56 root] (utils.py 283): INFO Epoch: [29] [ 350/2502] eta: 0:28:00 lr: 0.000001 loss_cls: 3.9759 (3.8313) grad_norm: 4.3271 (4.5655) time: 0.7778 data: 0.0003 max mem: 8421 +[2024-12-06 05:02:04 root] (utils.py 283): INFO Epoch: [29] [ 360/2502] eta: 0:27:52 lr: 0.000001 loss_cls: 3.7995 (3.8276) grad_norm: 4.2596 (4.5684) time: 0.7764 data: 0.0003 max mem: 8421 +[2024-12-06 05:02:12 root] (utils.py 283): INFO Epoch: [29] [ 370/2502] eta: 0:27:44 lr: 0.000001 loss_cls: 3.8173 (3.8222) grad_norm: 4.5022 (4.5705) time: 0.7771 data: 0.0003 max mem: 8421 +[2024-12-06 05:02:20 root] (utils.py 283): INFO Epoch: [29] [ 380/2502] eta: 0:27:36 lr: 0.000001 loss_cls: 3.8173 (3.8218) grad_norm: 4.3860 (4.5670) time: 0.7791 data: 0.0003 max mem: 8421 +[2024-12-06 05:02:28 root] (utils.py 283): INFO Epoch: [29] [ 390/2502] eta: 0:27:28 lr: 0.000001 loss_cls: 3.8888 (3.8237) grad_norm: 4.4350 (4.5629) time: 0.7781 data: 0.0003 max mem: 8421 +[2024-12-06 05:02:35 root] (utils.py 283): INFO Epoch: [29] [ 400/2502] eta: 0:27:20 lr: 0.000001 loss_cls: 3.8797 (3.8209) grad_norm: 4.4476 (4.5616) time: 0.7780 data: 0.0003 max mem: 8421 +[2024-12-06 05:02:43 root] (utils.py 283): INFO Epoch: [29] [ 410/2502] eta: 0:27:13 lr: 0.000001 loss_cls: 3.9063 (3.8237) grad_norm: 4.6737 (4.5768) time: 0.7861 data: 0.0002 max mem: 8421 +[2024-12-06 05:02:51 root] (utils.py 283): INFO Epoch: [29] [ 420/2502] eta: 0:27:05 lr: 0.000001 loss_cls: 3.9991 (3.8246) grad_norm: 4.9359 (4.5818) time: 0.7883 data: 0.0002 max mem: 8421 +[2024-12-06 05:02:59 root] (utils.py 283): INFO Epoch: [29] [ 430/2502] eta: 0:26:58 lr: 0.000001 loss_cls: 3.7781 (3.8225) grad_norm: 4.5091 (4.5785) time: 0.7835 data: 0.0003 max mem: 8421 +[2024-12-06 05:03:07 root] (utils.py 283): INFO Epoch: [29] [ 440/2502] eta: 0:26:50 lr: 0.000001 loss_cls: 3.7781 (3.8195) grad_norm: 4.4558 (4.5763) time: 0.7852 data: 0.0003 max mem: 8421 +[2024-12-06 05:03:15 root] (utils.py 283): INFO Epoch: [29] [ 450/2502] eta: 0:26:42 lr: 0.000001 loss_cls: 3.8607 (3.8185) grad_norm: 4.3246 (4.5716) time: 0.7858 data: 0.0003 max mem: 8421 +[2024-12-06 05:03:22 root] (utils.py 283): INFO Epoch: [29] [ 460/2502] eta: 0:26:35 lr: 0.000001 loss_cls: 3.6359 (3.8112) grad_norm: 4.3055 (4.5705) time: 0.7811 data: 0.0003 max mem: 8421 +[2024-12-06 05:03:30 root] (utils.py 283): INFO Epoch: [29] [ 470/2502] eta: 0:26:27 lr: 0.000001 loss_cls: 3.8100 (3.8131) grad_norm: 4.2895 (4.5678) time: 0.7840 data: 0.0003 max mem: 8421 +[2024-12-06 05:03:38 root] (utils.py 283): INFO Epoch: [29] [ 480/2502] eta: 0:26:19 lr: 0.000001 loss_cls: 3.9449 (3.8142) grad_norm: 4.3058 (4.5688) time: 0.7830 data: 0.0003 max mem: 8421 +[2024-12-06 05:03:46 root] (utils.py 283): INFO Epoch: [29] [ 490/2502] eta: 0:26:12 lr: 0.000001 loss_cls: 3.9213 (3.8146) grad_norm: 4.3629 (4.5660) time: 0.7831 data: 0.0003 max mem: 8421 +[2024-12-06 05:03:54 root] (utils.py 283): INFO Epoch: [29] [ 500/2502] eta: 0:26:04 lr: 0.000001 loss_cls: 4.0971 (3.8224) grad_norm: 4.5289 (4.5701) time: 0.7854 data: 0.0003 max mem: 8421 +[2024-12-06 05:04:02 root] (utils.py 283): INFO Epoch: [29] [ 510/2502] eta: 0:25:56 lr: 0.000001 loss_cls: 4.0971 (3.8220) grad_norm: 4.5675 (4.5706) time: 0.7782 data: 0.0003 max mem: 8421 +[2024-12-06 05:04:09 root] (utils.py 283): INFO Epoch: [29] [ 520/2502] eta: 0:25:48 lr: 0.000001 loss_cls: 4.0341 (3.8259) grad_norm: 4.4221 (4.5704) time: 0.7765 data: 0.0003 max mem: 8421 +[2024-12-06 05:04:17 root] (utils.py 283): INFO Epoch: [29] [ 530/2502] eta: 0:25:40 lr: 0.000001 loss_cls: 4.0142 (3.8305) grad_norm: 4.3954 (4.5757) time: 0.7801 data: 0.0003 max mem: 8421 +[2024-12-06 05:04:25 root] (utils.py 283): INFO Epoch: [29] [ 540/2502] eta: 0:25:32 lr: 0.000001 loss_cls: 4.0040 (3.8320) grad_norm: 4.3954 (4.5772) time: 0.7792 data: 0.0002 max mem: 8421 +[2024-12-06 05:04:33 root] (utils.py 283): INFO Epoch: [29] [ 550/2502] eta: 0:25:25 lr: 0.000001 loss_cls: 3.9192 (3.8288) grad_norm: 4.4067 (4.5915) time: 0.7928 data: 0.0002 max mem: 8421 +[2024-12-06 05:04:41 root] (utils.py 283): INFO Epoch: [29] [ 560/2502] eta: 0:25:17 lr: 0.000001 loss_cls: 3.9772 (3.8316) grad_norm: 4.3526 (4.5870) time: 0.7936 data: 0.0003 max mem: 8421 +[2024-12-06 05:04:49 root] (utils.py 283): INFO Epoch: [29] [ 570/2502] eta: 0:25:09 lr: 0.000001 loss_cls: 4.0779 (3.8314) grad_norm: 4.3526 (4.5826) time: 0.7757 data: 0.0003 max mem: 8421 +[2024-12-06 05:04:56 root] (utils.py 283): INFO Epoch: [29] [ 580/2502] eta: 0:25:02 lr: 0.000001 loss_cls: 3.8876 (3.8297) grad_norm: 4.3160 (4.5793) time: 0.7819 data: 0.0003 max mem: 8421 +[2024-12-06 05:05:04 root] (utils.py 283): INFO Epoch: [29] [ 590/2502] eta: 0:24:54 lr: 0.000001 loss_cls: 3.9569 (3.8336) grad_norm: 4.3160 (4.5782) time: 0.7853 data: 0.0003 max mem: 8421 +[2024-12-06 05:05:12 root] (utils.py 283): INFO Epoch: [29] [ 600/2502] eta: 0:24:46 lr: 0.000001 loss_cls: 4.1435 (3.8356) grad_norm: 4.3667 (4.5760) time: 0.7761 data: 0.0003 max mem: 8421 +[2024-12-06 05:05:20 root] (utils.py 283): INFO Epoch: [29] [ 610/2502] eta: 0:24:38 lr: 0.000001 loss_cls: 3.9172 (3.8351) grad_norm: 4.2663 (4.5717) time: 0.7750 data: 0.0003 max mem: 8421 +[2024-12-06 05:05:27 root] (utils.py 283): INFO Epoch: [29] [ 620/2502] eta: 0:24:30 lr: 0.000001 loss_cls: 3.6596 (3.8290) grad_norm: 4.2334 (4.5692) time: 0.7760 data: 0.0003 max mem: 8421 +[2024-12-06 05:05:35 root] (utils.py 283): INFO Epoch: [29] [ 630/2502] eta: 0:24:22 lr: 0.000001 loss_cls: 3.6596 (3.8272) grad_norm: 4.5718 (4.5726) time: 0.7728 data: 0.0003 max mem: 8421 +[2024-12-06 05:05:43 root] (utils.py 283): INFO Epoch: [29] [ 640/2502] eta: 0:24:14 lr: 0.000001 loss_cls: 3.9471 (3.8286) grad_norm: 4.5860 (4.5712) time: 0.7799 data: 0.0003 max mem: 8421 +[2024-12-06 05:05:51 root] (utils.py 283): INFO Epoch: [29] [ 650/2502] eta: 0:24:06 lr: 0.000001 loss_cls: 3.8051 (3.8259) grad_norm: 4.4338 (4.5699) time: 0.7846 data: 0.0003 max mem: 8421 +[2024-12-06 05:05:59 root] (utils.py 283): INFO Epoch: [29] [ 660/2502] eta: 0:23:58 lr: 0.000001 loss_cls: 3.7712 (3.8265) grad_norm: 4.3434 (4.5720) time: 0.7820 data: 0.0003 max mem: 8421 +[2024-12-06 05:06:06 root] (utils.py 283): INFO Epoch: [29] [ 670/2502] eta: 0:23:50 lr: 0.000001 loss_cls: 4.0336 (3.8272) grad_norm: 4.4830 (4.5754) time: 0.7793 data: 0.0003 max mem: 8421 +[2024-12-06 05:06:14 root] (utils.py 283): INFO Epoch: [29] [ 680/2502] eta: 0:23:43 lr: 0.000001 loss_cls: 3.7969 (3.8278) grad_norm: 4.4807 (4.5800) time: 0.7776 data: 0.0003 max mem: 8421 +[2024-12-06 05:06:22 root] (utils.py 283): INFO Epoch: [29] [ 690/2502] eta: 0:23:35 lr: 0.000001 loss_cls: 3.7749 (3.8239) grad_norm: 4.3351 (4.5756) time: 0.7771 data: 0.0003 max mem: 8421 +[2024-12-06 05:06:30 root] (utils.py 283): INFO Epoch: [29] [ 700/2502] eta: 0:23:27 lr: 0.000001 loss_cls: 3.9305 (3.8234) grad_norm: 4.2982 (4.5730) time: 0.7867 data: 0.0003 max mem: 8421 +[2024-12-06 05:06:38 root] (utils.py 283): INFO Epoch: [29] [ 710/2502] eta: 0:23:19 lr: 0.000001 loss_cls: 3.9361 (3.8227) grad_norm: 4.3262 (4.5734) time: 0.7873 data: 0.0003 max mem: 8421 +[2024-12-06 05:06:46 root] (utils.py 283): INFO Epoch: [29] [ 720/2502] eta: 0:23:12 lr: 0.000001 loss_cls: 4.0042 (3.8252) grad_norm: 4.3262 (4.5725) time: 0.7782 data: 0.0003 max mem: 8421 +[2024-12-06 05:06:53 root] (utils.py 283): INFO Epoch: [29] [ 730/2502] eta: 0:23:04 lr: 0.000001 loss_cls: 3.7395 (3.8208) grad_norm: 4.5396 (4.5763) time: 0.7787 data: 0.0003 max mem: 8421 +[2024-12-06 05:07:01 root] (utils.py 283): INFO Epoch: [29] [ 740/2502] eta: 0:22:56 lr: 0.000001 loss_cls: 3.6135 (3.8228) grad_norm: 4.5077 (4.5750) time: 0.7758 data: 0.0003 max mem: 8421 +[2024-12-06 05:07:09 root] (utils.py 283): INFO Epoch: [29] [ 750/2502] eta: 0:22:48 lr: 0.000001 loss_cls: 3.8141 (3.8207) grad_norm: 4.3217 (4.5782) time: 0.7768 data: 0.0003 max mem: 8421 +[2024-12-06 05:07:17 root] (utils.py 283): INFO Epoch: [29] [ 760/2502] eta: 0:22:40 lr: 0.000001 loss_cls: 3.8641 (3.8236) grad_norm: 4.3217 (4.5783) time: 0.7776 data: 0.0003 max mem: 8421 +[2024-12-06 05:07:24 root] (utils.py 283): INFO Epoch: [29] [ 770/2502] eta: 0:22:32 lr: 0.000001 loss_cls: 3.9768 (3.8230) grad_norm: 4.1696 (4.5768) time: 0.7771 data: 0.0003 max mem: 8421 +[2024-12-06 05:07:32 root] (utils.py 283): INFO Epoch: [29] [ 780/2502] eta: 0:22:24 lr: 0.000001 loss_cls: 3.9533 (3.8249) grad_norm: 4.4749 (4.5800) time: 0.7866 data: 0.0003 max mem: 8421 +[2024-12-06 05:07:40 root] (utils.py 283): INFO Epoch: [29] [ 790/2502] eta: 0:22:17 lr: 0.000001 loss_cls: 3.8969 (3.8259) grad_norm: 4.5035 (4.5779) time: 0.7874 data: 0.0003 max mem: 8421 +[2024-12-06 05:07:48 root] (utils.py 283): INFO Epoch: [29] [ 800/2502] eta: 0:22:09 lr: 0.000001 loss_cls: 3.9609 (3.8284) grad_norm: 4.4014 (4.5764) time: 0.7797 data: 0.0003 max mem: 8421 +[2024-12-06 05:07:56 root] (utils.py 283): INFO Epoch: [29] [ 810/2502] eta: 0:22:01 lr: 0.000001 loss_cls: 4.0536 (3.8293) grad_norm: 4.3236 (4.5753) time: 0.7794 data: 0.0003 max mem: 8421 +[2024-12-06 05:08:04 root] (utils.py 283): INFO Epoch: [29] [ 820/2502] eta: 0:21:53 lr: 0.000001 loss_cls: 3.9503 (3.8286) grad_norm: 4.3236 (4.5758) time: 0.7819 data: 0.0003 max mem: 8421 +[2024-12-06 05:08:11 root] (utils.py 283): INFO Epoch: [29] [ 830/2502] eta: 0:21:45 lr: 0.000001 loss_cls: 3.8960 (3.8302) grad_norm: 4.6198 (4.5797) time: 0.7813 data: 0.0002 max mem: 8421 +[2024-12-06 05:08:19 root] (utils.py 283): INFO Epoch: [29] [ 840/2502] eta: 0:21:38 lr: 0.000001 loss_cls: 3.6955 (3.8258) grad_norm: 4.8439 (4.5902) time: 0.7902 data: 0.0003 max mem: 8421 +[2024-12-06 05:08:27 root] (utils.py 283): INFO Epoch: [29] [ 850/2502] eta: 0:21:30 lr: 0.000001 loss_cls: 3.6193 (3.8262) grad_norm: 4.5175 (4.5920) time: 0.7883 data: 0.0003 max mem: 8421 +[2024-12-06 05:08:35 root] (utils.py 283): INFO Epoch: [29] [ 860/2502] eta: 0:21:22 lr: 0.000001 loss_cls: 3.8203 (3.8273) grad_norm: 4.5099 (4.6000) time: 0.7773 data: 0.0003 max mem: 8421 +[2024-12-06 05:08:43 root] (utils.py 283): INFO Epoch: [29] [ 870/2502] eta: 0:21:14 lr: 0.000001 loss_cls: 3.8098 (3.8255) grad_norm: 4.4941 (4.5983) time: 0.7787 data: 0.0003 max mem: 8421 +[2024-12-06 05:08:51 root] (utils.py 283): INFO Epoch: [29] [ 880/2502] eta: 0:21:06 lr: 0.000001 loss_cls: 3.5318 (3.8241) grad_norm: 4.4354 (4.5972) time: 0.7764 data: 0.0003 max mem: 8421 +[2024-12-06 05:08:58 root] (utils.py 283): INFO Epoch: [29] [ 890/2502] eta: 0:20:59 lr: 0.000001 loss_cls: 3.6170 (3.8205) grad_norm: 4.2585 (4.5956) time: 0.7821 data: 0.0003 max mem: 8421 +[2024-12-06 05:09:06 root] (utils.py 283): INFO Epoch: [29] [ 900/2502] eta: 0:20:51 lr: 0.000001 loss_cls: 3.8657 (3.8206) grad_norm: 4.1467 (4.5929) time: 0.7832 data: 0.0003 max mem: 8421 +[2024-12-06 05:09:14 root] (utils.py 283): INFO Epoch: [29] [ 910/2502] eta: 0:20:43 lr: 0.000001 loss_cls: 3.4506 (3.8161) grad_norm: 4.2209 (4.5902) time: 0.7800 data: 0.0003 max mem: 8421 +[2024-12-06 05:09:22 root] (utils.py 283): INFO Epoch: [29] [ 920/2502] eta: 0:20:35 lr: 0.000001 loss_cls: 3.5237 (3.8159) grad_norm: 4.2209 (4.5920) time: 0.7877 data: 0.0003 max mem: 8421 +[2024-12-06 05:09:30 root] (utils.py 283): INFO Epoch: [29] [ 930/2502] eta: 0:20:28 lr: 0.000001 loss_cls: 3.8374 (3.8158) grad_norm: 4.3680 (4.5912) time: 0.7854 data: 0.0003 max mem: 8421 +[2024-12-06 05:09:38 root] (utils.py 283): INFO Epoch: [29] [ 940/2502] eta: 0:20:20 lr: 0.000001 loss_cls: 3.8748 (3.8168) grad_norm: 4.5943 (4.5922) time: 0.7831 data: 0.0002 max mem: 8421 +[2024-12-06 05:09:45 root] (utils.py 283): INFO Epoch: [29] [ 950/2502] eta: 0:20:12 lr: 0.000001 loss_cls: 4.1754 (3.8209) grad_norm: 4.3722 (4.5896) time: 0.7848 data: 0.0003 max mem: 8421 +[2024-12-06 05:09:53 root] (utils.py 283): INFO Epoch: [29] [ 960/2502] eta: 0:20:04 lr: 0.000001 loss_cls: 4.1525 (3.8223) grad_norm: 4.3387 (4.5873) time: 0.7794 data: 0.0003 max mem: 8421 +[2024-12-06 05:10:01 root] (utils.py 283): INFO Epoch: [29] [ 970/2502] eta: 0:19:56 lr: 0.000001 loss_cls: 3.9554 (3.8221) grad_norm: 4.2908 (4.5831) time: 0.7761 data: 0.0003 max mem: 8421 +[2024-12-06 05:10:09 root] (utils.py 283): INFO Epoch: [29] [ 980/2502] eta: 0:19:49 lr: 0.000001 loss_cls: 3.9448 (3.8236) grad_norm: 4.2908 (4.5821) time: 0.7858 data: 0.0003 max mem: 8421 +[2024-12-06 05:10:17 root] (utils.py 283): INFO Epoch: [29] [ 990/2502] eta: 0:19:41 lr: 0.000001 loss_cls: 3.9886 (3.8254) grad_norm: 4.4166 (4.5793) time: 0.7901 data: 0.0003 max mem: 8421 +[2024-12-06 05:10:25 root] (utils.py 283): INFO Epoch: [29] [1000/2502] eta: 0:19:33 lr: 0.000001 loss_cls: 4.0092 (3.8261) grad_norm: 4.3621 (4.5773) time: 0.7806 data: 0.0003 max mem: 8421 +[2024-12-06 05:10:32 root] (utils.py 283): INFO Epoch: [29] [1010/2502] eta: 0:19:25 lr: 0.000001 loss_cls: 3.9560 (3.8270) grad_norm: 4.4013 (4.5755) time: 0.7773 data: 0.0003 max mem: 8421 +[2024-12-06 05:10:40 root] (utils.py 283): INFO Epoch: [29] [1020/2502] eta: 0:19:17 lr: 0.000001 loss_cls: 3.9903 (3.8282) grad_norm: 4.3910 (4.5734) time: 0.7782 data: 0.0002 max mem: 8421 +[2024-12-06 05:10:48 root] (utils.py 283): INFO Epoch: [29] [1030/2502] eta: 0:19:09 lr: 0.000001 loss_cls: 3.8872 (3.8275) grad_norm: 4.3129 (4.5757) time: 0.7775 data: 0.0002 max mem: 8421 +[2024-12-06 05:10:56 root] (utils.py 283): INFO Epoch: [29] [1040/2502] eta: 0:19:01 lr: 0.000001 loss_cls: 3.8484 (3.8285) grad_norm: 4.4439 (4.5777) time: 0.7725 data: 0.0003 max mem: 8421 +[2024-12-06 05:11:03 root] (utils.py 283): INFO Epoch: [29] [1050/2502] eta: 0:18:54 lr: 0.000001 loss_cls: 4.0152 (3.8292) grad_norm: 4.4131 (4.5757) time: 0.7733 data: 0.0003 max mem: 8421 +[2024-12-06 05:11:11 root] (utils.py 283): INFO Epoch: [29] [1060/2502] eta: 0:18:46 lr: 0.000001 loss_cls: 3.9155 (3.8292) grad_norm: 4.2095 (4.5724) time: 0.7771 data: 0.0002 max mem: 8421 +[2024-12-06 05:11:19 root] (utils.py 283): INFO Epoch: [29] [1070/2502] eta: 0:18:38 lr: 0.000001 loss_cls: 3.9875 (3.8311) grad_norm: 4.2095 (4.5707) time: 0.7746 data: 0.0002 max mem: 8421 +[2024-12-06 05:11:27 root] (utils.py 283): INFO Epoch: [29] [1080/2502] eta: 0:18:30 lr: 0.000001 loss_cls: 3.9589 (3.8310) grad_norm: 4.3920 (4.5695) time: 0.7754 data: 0.0003 max mem: 8421 +[2024-12-06 05:11:34 root] (utils.py 283): INFO Epoch: [29] [1090/2502] eta: 0:18:22 lr: 0.000001 loss_cls: 4.0371 (3.8337) grad_norm: 4.4082 (4.5692) time: 0.7789 data: 0.0003 max mem: 8421 +[2024-12-06 05:11:42 root] (utils.py 283): INFO Epoch: [29] [1100/2502] eta: 0:18:14 lr: 0.000001 loss_cls: 3.9723 (3.8334) grad_norm: 4.3072 (4.5682) time: 0.7790 data: 0.0003 max mem: 8421 +[2024-12-06 05:11:50 root] (utils.py 283): INFO Epoch: [29] [1110/2502] eta: 0:18:06 lr: 0.000001 loss_cls: 3.9701 (3.8324) grad_norm: 4.3072 (4.5674) time: 0.7773 data: 0.0003 max mem: 8421 +[2024-12-06 05:11:58 root] (utils.py 283): INFO Epoch: [29] [1120/2502] eta: 0:17:59 lr: 0.000001 loss_cls: 3.8975 (3.8300) grad_norm: 4.4687 (4.5669) time: 0.7739 data: 0.0003 max mem: 8421 +[2024-12-06 05:12:05 root] (utils.py 283): INFO Epoch: [29] [1130/2502] eta: 0:17:51 lr: 0.000001 loss_cls: 3.6704 (3.8291) grad_norm: 4.4196 (4.5715) time: 0.7752 data: 0.0003 max mem: 8421 +[2024-12-06 05:12:13 root] (utils.py 283): INFO Epoch: [29] [1140/2502] eta: 0:17:43 lr: 0.000001 loss_cls: 3.6802 (3.8278) grad_norm: 4.3543 (4.5710) time: 0.7795 data: 0.0003 max mem: 8421 +[2024-12-06 05:12:21 root] (utils.py 283): INFO Epoch: [29] [1150/2502] eta: 0:17:35 lr: 0.000001 loss_cls: 3.6935 (3.8280) grad_norm: 4.3600 (4.5716) time: 0.7800 data: 0.0003 max mem: 8421 +[2024-12-06 05:12:29 root] (utils.py 283): INFO Epoch: [29] [1160/2502] eta: 0:17:27 lr: 0.000001 loss_cls: 3.8554 (3.8279) grad_norm: 4.3511 (4.5700) time: 0.7796 data: 0.0003 max mem: 8421 +[2024-12-06 05:12:37 root] (utils.py 283): INFO Epoch: [29] [1170/2502] eta: 0:17:19 lr: 0.000001 loss_cls: 3.7828 (3.8281) grad_norm: 4.3511 (4.5690) time: 0.7796 data: 0.0003 max mem: 8421 +[2024-12-06 05:12:44 root] (utils.py 283): INFO Epoch: [29] [1180/2502] eta: 0:17:12 lr: 0.000001 loss_cls: 3.7828 (3.8276) grad_norm: 4.4208 (4.5677) time: 0.7791 data: 0.0003 max mem: 8421 +[2024-12-06 05:12:52 root] (utils.py 283): INFO Epoch: [29] [1190/2502] eta: 0:17:04 lr: 0.000001 loss_cls: 3.7742 (3.8272) grad_norm: 4.3701 (4.5662) time: 0.7778 data: 0.0002 max mem: 8421 +[2024-12-06 05:13:00 root] (utils.py 283): INFO Epoch: [29] [1200/2502] eta: 0:16:56 lr: 0.000001 loss_cls: 3.7164 (3.8239) grad_norm: 4.3796 (4.5668) time: 0.7787 data: 0.0003 max mem: 8421 +[2024-12-06 05:13:08 root] (utils.py 283): INFO Epoch: [29] [1210/2502] eta: 0:16:48 lr: 0.000001 loss_cls: 3.7164 (3.8233) grad_norm: 4.5681 (4.5672) time: 0.7773 data: 0.0003 max mem: 8421 +[2024-12-06 05:13:16 root] (utils.py 283): INFO Epoch: [29] [1220/2502] eta: 0:16:40 lr: 0.000001 loss_cls: 3.8876 (3.8256) grad_norm: 4.5381 (4.5667) time: 0.7763 data: 0.0003 max mem: 8421 +[2024-12-06 05:13:23 root] (utils.py 283): INFO Epoch: [29] [1230/2502] eta: 0:16:32 lr: 0.000001 loss_cls: 3.8617 (3.8234) grad_norm: 4.4022 (4.5648) time: 0.7776 data: 0.0003 max mem: 8421 +[2024-12-06 05:13:31 root] (utils.py 283): INFO Epoch: [29] [1240/2502] eta: 0:16:25 lr: 0.000001 loss_cls: 3.7222 (3.8222) grad_norm: 4.3159 (4.5630) time: 0.7762 data: 0.0003 max mem: 8421 +[2024-12-06 05:13:39 root] (utils.py 283): INFO Epoch: [29] [1250/2502] eta: 0:16:17 lr: 0.000001 loss_cls: 3.7718 (3.8221) grad_norm: 4.3159 (4.5621) time: 0.7771 data: 0.0003 max mem: 8421 +[2024-12-06 05:13:47 root] (utils.py 283): INFO Epoch: [29] [1260/2502] eta: 0:16:09 lr: 0.000001 loss_cls: 3.7890 (3.8202) grad_norm: 4.2507 (4.5600) time: 0.7787 data: 0.0003 max mem: 8421 +[2024-12-06 05:13:55 root] (utils.py 283): INFO Epoch: [29] [1270/2502] eta: 0:16:01 lr: 0.000001 loss_cls: 3.7890 (3.8195) grad_norm: 4.3182 (4.5596) time: 0.7862 data: 0.0003 max mem: 8421 +[2024-12-06 05:14:02 root] (utils.py 283): INFO Epoch: [29] [1280/2502] eta: 0:15:53 lr: 0.000001 loss_cls: 4.0462 (3.8222) grad_norm: 4.5052 (4.5615) time: 0.7843 data: 0.0003 max mem: 8421 +[2024-12-06 05:14:10 root] (utils.py 283): INFO Epoch: [29] [1290/2502] eta: 0:15:46 lr: 0.000001 loss_cls: 3.9914 (3.8223) grad_norm: 4.5282 (4.5633) time: 0.7771 data: 0.0003 max mem: 8421 +[2024-12-06 05:14:18 root] (utils.py 283): INFO Epoch: [29] [1300/2502] eta: 0:15:38 lr: 0.000001 loss_cls: 3.9970 (3.8226) grad_norm: 4.4839 (4.5624) time: 0.7793 data: 0.0003 max mem: 8421 +[2024-12-06 05:14:26 root] (utils.py 283): INFO Epoch: [29] [1310/2502] eta: 0:15:30 lr: 0.000001 loss_cls: 4.1341 (3.8232) grad_norm: 4.3225 (4.5612) time: 0.7778 data: 0.0003 max mem: 8421 +[2024-12-06 05:14:33 root] (utils.py 283): INFO Epoch: [29] [1320/2502] eta: 0:15:22 lr: 0.000001 loss_cls: 3.9845 (3.8231) grad_norm: 4.4206 (4.5613) time: 0.7790 data: 0.0003 max mem: 8421 +[2024-12-06 05:14:41 root] (utils.py 283): INFO Epoch: [29] [1330/2502] eta: 0:15:14 lr: 0.000001 loss_cls: 3.9684 (3.8236) grad_norm: 4.5075 (4.5614) time: 0.7803 data: 0.0003 max mem: 8421 +[2024-12-06 05:14:49 root] (utils.py 283): INFO Epoch: [29] [1340/2502] eta: 0:15:06 lr: 0.000001 loss_cls: 4.0561 (3.8259) grad_norm: 4.3099 (4.5606) time: 0.7781 data: 0.0003 max mem: 8421 +[2024-12-06 05:14:57 root] (utils.py 283): INFO Epoch: [29] [1350/2502] eta: 0:14:59 lr: 0.000001 loss_cls: 4.1367 (3.8274) grad_norm: 4.3067 (4.5600) time: 0.7814 data: 0.0002 max mem: 8421 +[2024-12-06 05:15:05 root] (utils.py 283): INFO Epoch: [29] [1360/2502] eta: 0:14:51 lr: 0.000001 loss_cls: 3.8738 (3.8261) grad_norm: 4.3067 (4.5594) time: 0.7809 data: 0.0003 max mem: 8421 +[2024-12-06 05:15:13 root] (utils.py 283): INFO Epoch: [29] [1370/2502] eta: 0:14:43 lr: 0.000001 loss_cls: 3.8699 (3.8269) grad_norm: 4.2898 (4.5580) time: 0.7808 data: 0.0003 max mem: 8421 +[2024-12-06 05:15:20 root] (utils.py 283): INFO Epoch: [29] [1380/2502] eta: 0:14:35 lr: 0.000001 loss_cls: 3.8829 (3.8256) grad_norm: 4.4220 (4.5600) time: 0.7811 data: 0.0003 max mem: 8421 +[2024-12-06 05:15:28 root] (utils.py 283): INFO Epoch: [29] [1390/2502] eta: 0:14:27 lr: 0.000001 loss_cls: 3.6954 (3.8236) grad_norm: 4.5309 (4.5602) time: 0.7765 data: 0.0003 max mem: 8421 +[2024-12-06 05:15:36 root] (utils.py 283): INFO Epoch: [29] [1400/2502] eta: 0:14:20 lr: 0.000001 loss_cls: 3.6989 (3.8229) grad_norm: 4.4366 (4.5612) time: 0.7767 data: 0.0003 max mem: 8421 +[2024-12-06 05:15:44 root] (utils.py 283): INFO Epoch: [29] [1410/2502] eta: 0:14:12 lr: 0.000001 loss_cls: 3.8497 (3.8239) grad_norm: 4.3392 (4.5593) time: 0.7774 data: 0.0003 max mem: 8421 +[2024-12-06 05:15:51 root] (utils.py 283): INFO Epoch: [29] [1420/2502] eta: 0:14:04 lr: 0.000001 loss_cls: 3.8497 (3.8221) grad_norm: 4.2847 (4.5583) time: 0.7788 data: 0.0003 max mem: 8421 +[2024-12-06 05:15:59 root] (utils.py 283): INFO Epoch: [29] [1430/2502] eta: 0:13:56 lr: 0.000001 loss_cls: 3.2837 (3.8182) grad_norm: 4.3744 (4.5590) time: 0.7797 data: 0.0003 max mem: 8421 +[2024-12-06 05:16:07 root] (utils.py 283): INFO Epoch: [29] [1440/2502] eta: 0:13:48 lr: 0.000001 loss_cls: 3.5478 (3.8190) grad_norm: 4.4650 (4.5598) time: 0.7777 data: 0.0003 max mem: 8421 +[2024-12-06 05:16:15 root] (utils.py 283): INFO Epoch: [29] [1450/2502] eta: 0:13:40 lr: 0.000001 loss_cls: 3.9465 (3.8203) grad_norm: 4.4111 (4.5592) time: 0.7798 data: 0.0003 max mem: 8421 +[2024-12-06 05:16:23 root] (utils.py 283): INFO Epoch: [29] [1460/2502] eta: 0:13:33 lr: 0.000001 loss_cls: 3.9465 (3.8192) grad_norm: 4.3484 (4.5576) time: 0.7793 data: 0.0003 max mem: 8421 +[2024-12-06 05:16:30 root] (utils.py 283): INFO Epoch: [29] [1470/2502] eta: 0:13:25 lr: 0.000001 loss_cls: 3.6477 (3.8181) grad_norm: 4.3463 (4.5565) time: 0.7772 data: 0.0002 max mem: 8421 +[2024-12-06 05:16:38 root] (utils.py 283): INFO Epoch: [29] [1480/2502] eta: 0:13:17 lr: 0.000001 loss_cls: 3.9437 (3.8190) grad_norm: 4.4777 (4.5557) time: 0.7846 data: 0.0003 max mem: 8421 +[2024-12-06 05:16:46 root] (utils.py 283): INFO Epoch: [29] [1490/2502] eta: 0:13:09 lr: 0.000001 loss_cls: 3.9437 (3.8186) grad_norm: 4.4709 (4.5548) time: 0.7836 data: 0.0003 max mem: 8421 +[2024-12-06 05:16:54 root] (utils.py 283): INFO Epoch: [29] [1500/2502] eta: 0:13:01 lr: 0.000001 loss_cls: 3.8304 (3.8178) grad_norm: 4.3763 (4.5561) time: 0.7752 data: 0.0003 max mem: 8421 +[2024-12-06 05:17:02 root] (utils.py 283): INFO Epoch: [29] [1510/2502] eta: 0:12:54 lr: 0.000001 loss_cls: 3.7339 (3.8176) grad_norm: 4.3287 (4.5557) time: 0.7777 data: 0.0003 max mem: 8421 +[2024-12-06 05:17:09 root] (utils.py 283): INFO Epoch: [29] [1520/2502] eta: 0:12:46 lr: 0.000001 loss_cls: 3.9230 (3.8175) grad_norm: 4.5115 (4.5553) time: 0.7767 data: 0.0003 max mem: 8421 +[2024-12-06 05:17:17 root] (utils.py 283): INFO Epoch: [29] [1530/2502] eta: 0:12:38 lr: 0.000001 loss_cls: 3.9103 (3.8161) grad_norm: 4.3529 (4.5538) time: 0.7756 data: 0.0003 max mem: 8421 +[2024-12-06 05:17:25 root] (utils.py 283): INFO Epoch: [29] [1540/2502] eta: 0:12:30 lr: 0.000001 loss_cls: 3.9103 (3.8182) grad_norm: 4.2963 (4.5534) time: 0.7777 data: 0.0003 max mem: 8421 +[2024-12-06 05:17:33 root] (utils.py 283): INFO Epoch: [29] [1550/2502] eta: 0:12:22 lr: 0.000001 loss_cls: 4.0182 (3.8189) grad_norm: 4.3875 (4.5520) time: 0.7771 data: 0.0003 max mem: 8421 +[2024-12-06 05:17:40 root] (utils.py 283): INFO Epoch: [29] [1560/2502] eta: 0:12:14 lr: 0.000001 loss_cls: 3.5756 (3.8174) grad_norm: 4.3884 (4.5509) time: 0.7768 data: 0.0003 max mem: 8421 +[2024-12-06 05:17:48 root] (utils.py 283): INFO Epoch: [29] [1570/2502] eta: 0:12:07 lr: 0.000001 loss_cls: 3.6462 (3.8180) grad_norm: 4.3935 (4.5496) time: 0.7756 data: 0.0003 max mem: 8421 +[2024-12-06 05:17:56 root] (utils.py 283): INFO Epoch: [29] [1580/2502] eta: 0:11:59 lr: 0.000001 loss_cls: 3.7902 (3.8175) grad_norm: 4.4162 (4.5500) time: 0.7773 data: 0.0003 max mem: 8421 +[2024-12-06 05:18:04 root] (utils.py 283): INFO Epoch: [29] [1590/2502] eta: 0:11:51 lr: 0.000001 loss_cls: 3.7762 (3.8163) grad_norm: 4.4215 (4.5494) time: 0.7789 data: 0.0003 max mem: 8421 +[2024-12-06 05:18:11 root] (utils.py 283): INFO Epoch: [29] [1600/2502] eta: 0:11:43 lr: 0.000001 loss_cls: 3.7907 (3.8168) grad_norm: 4.3277 (4.5478) time: 0.7779 data: 0.0002 max mem: 8421 +[2024-12-06 05:18:19 root] (utils.py 283): INFO Epoch: [29] [1610/2502] eta: 0:11:35 lr: 0.000001 loss_cls: 4.0097 (3.8171) grad_norm: 4.1685 (4.5463) time: 0.7766 data: 0.0002 max mem: 8421 +[2024-12-06 05:18:27 root] (utils.py 283): INFO Epoch: [29] [1620/2502] eta: 0:11:28 lr: 0.000001 loss_cls: 3.9186 (3.8163) grad_norm: 4.3264 (4.5474) time: 0.7747 data: 0.0002 max mem: 8421 +[2024-12-06 05:18:35 root] (utils.py 283): INFO Epoch: [29] [1630/2502] eta: 0:11:20 lr: 0.000001 loss_cls: 3.7246 (3.8160) grad_norm: 4.4476 (4.5465) time: 0.7849 data: 0.0002 max mem: 8421 +[2024-12-06 05:18:43 root] (utils.py 283): INFO Epoch: [29] [1640/2502] eta: 0:11:12 lr: 0.000001 loss_cls: 4.0177 (3.8171) grad_norm: 4.4200 (4.5463) time: 0.8006 data: 0.0002 max mem: 8421 +[2024-12-06 05:18:51 root] (utils.py 283): INFO Epoch: [29] [1650/2502] eta: 0:11:04 lr: 0.000001 loss_cls: 3.8796 (3.8143) grad_norm: 4.3677 (4.5469) time: 0.8065 data: 0.0003 max mem: 8421 +[2024-12-06 05:18:59 root] (utils.py 283): INFO Epoch: [29] [1660/2502] eta: 0:10:57 lr: 0.000001 loss_cls: 3.6567 (3.8154) grad_norm: 4.4519 (4.5463) time: 0.8013 data: 0.0003 max mem: 8421 +[2024-12-06 05:19:07 root] (utils.py 283): INFO Epoch: [29] [1670/2502] eta: 0:10:49 lr: 0.000001 loss_cls: 3.9652 (3.8131) grad_norm: 4.4059 (4.5473) time: 0.7893 data: 0.0002 max mem: 8421 +[2024-12-06 05:19:15 root] (utils.py 283): INFO Epoch: [29] [1680/2502] eta: 0:10:41 lr: 0.000001 loss_cls: 3.4783 (3.8113) grad_norm: 4.3859 (4.5466) time: 0.7866 data: 0.0003 max mem: 8421 +[2024-12-06 05:19:23 root] (utils.py 283): INFO Epoch: [29] [1690/2502] eta: 0:10:33 lr: 0.000001 loss_cls: 3.6502 (3.8107) grad_norm: 4.3217 (4.5458) time: 0.7858 data: 0.0002 max mem: 8421 +[2024-12-06 05:19:31 root] (utils.py 283): INFO Epoch: [29] [1700/2502] eta: 0:10:26 lr: 0.000001 loss_cls: 3.5355 (3.8083) grad_norm: 4.2888 (4.5450) time: 0.7875 data: 0.0005 max mem: 8421 +[2024-12-06 05:19:38 root] (utils.py 283): INFO Epoch: [29] [1710/2502] eta: 0:10:18 lr: 0.000001 loss_cls: 3.7291 (3.8088) grad_norm: 4.3525 (4.5449) time: 0.7882 data: 0.0005 max mem: 8421 +[2024-12-06 05:19:46 root] (utils.py 283): INFO Epoch: [29] [1720/2502] eta: 0:10:10 lr: 0.000001 loss_cls: 4.0668 (3.8092) grad_norm: 4.3525 (4.5450) time: 0.7882 data: 0.0003 max mem: 8421 +[2024-12-06 05:19:54 root] (utils.py 283): INFO Epoch: [29] [1730/2502] eta: 0:10:02 lr: 0.000001 loss_cls: 4.0651 (3.8083) grad_norm: 4.2970 (4.5444) time: 0.7858 data: 0.0002 max mem: 8421 +[2024-12-06 05:20:02 root] (utils.py 283): INFO Epoch: [29] [1740/2502] eta: 0:09:54 lr: 0.000001 loss_cls: 3.8164 (3.8068) grad_norm: 4.2806 (4.5432) time: 0.7789 data: 0.0003 max mem: 8421 +[2024-12-06 05:20:10 root] (utils.py 283): INFO Epoch: [29] [1750/2502] eta: 0:09:47 lr: 0.000001 loss_cls: 3.9273 (3.8083) grad_norm: 4.3068 (4.5433) time: 0.7765 data: 0.0003 max mem: 8421 +[2024-12-06 05:20:17 root] (utils.py 283): INFO Epoch: [29] [1760/2502] eta: 0:09:39 lr: 0.000001 loss_cls: 3.9273 (3.8082) grad_norm: 4.3126 (4.5419) time: 0.7761 data: 0.0003 max mem: 8421 +[2024-12-06 05:20:25 root] (utils.py 283): INFO Epoch: [29] [1770/2502] eta: 0:09:31 lr: 0.000001 loss_cls: 3.8508 (3.8076) grad_norm: 4.3433 (4.5432) time: 0.7764 data: 0.0003 max mem: 8421 +[2024-12-06 05:20:33 root] (utils.py 283): INFO Epoch: [29] [1780/2502] eta: 0:09:23 lr: 0.000001 loss_cls: 3.9506 (3.8093) grad_norm: 4.5092 (4.5439) time: 0.7778 data: 0.0003 max mem: 8421 +[2024-12-06 05:20:41 root] (utils.py 283): INFO Epoch: [29] [1790/2502] eta: 0:09:15 lr: 0.000001 loss_cls: 4.0588 (3.8090) grad_norm: 4.5106 (4.5432) time: 0.7834 data: 0.0003 max mem: 8421 +[2024-12-06 05:20:49 root] (utils.py 283): INFO Epoch: [29] [1800/2502] eta: 0:09:08 lr: 0.000001 loss_cls: 4.0998 (3.8106) grad_norm: 4.4372 (4.5423) time: 0.7847 data: 0.0003 max mem: 8421 +[2024-12-06 05:20:56 root] (utils.py 283): INFO Epoch: [29] [1810/2502] eta: 0:09:00 lr: 0.000001 loss_cls: 4.1884 (3.8111) grad_norm: 4.3898 (4.5421) time: 0.7814 data: 0.0003 max mem: 8421 +[2024-12-06 05:21:04 root] (utils.py 283): INFO Epoch: [29] [1820/2502] eta: 0:08:52 lr: 0.000001 loss_cls: 3.6159 (3.8083) grad_norm: 4.3898 (4.5423) time: 0.7796 data: 0.0003 max mem: 8421 +[2024-12-06 05:21:12 root] (utils.py 283): INFO Epoch: [29] [1830/2502] eta: 0:08:44 lr: 0.000001 loss_cls: 3.7619 (3.8083) grad_norm: 4.4030 (4.5415) time: 0.7791 data: 0.0003 max mem: 8421 +[2024-12-06 05:21:20 root] (utils.py 283): INFO Epoch: [29] [1840/2502] eta: 0:08:36 lr: 0.000001 loss_cls: 4.0421 (3.8099) grad_norm: 4.3917 (4.5430) time: 0.7775 data: 0.0002 max mem: 8421 +[2024-12-06 05:21:28 root] (utils.py 283): INFO Epoch: [29] [1850/2502] eta: 0:08:29 lr: 0.000001 loss_cls: 4.1167 (3.8113) grad_norm: 4.3796 (4.5426) time: 0.7791 data: 0.0003 max mem: 8421 +[2024-12-06 05:21:35 root] (utils.py 283): INFO Epoch: [29] [1860/2502] eta: 0:08:21 lr: 0.000001 loss_cls: 3.9794 (3.8114) grad_norm: 4.3202 (4.5417) time: 0.7796 data: 0.0003 max mem: 8421 +[2024-12-06 05:21:43 root] (utils.py 283): INFO Epoch: [29] [1870/2502] eta: 0:08:13 lr: 0.000001 loss_cls: 3.8938 (3.8121) grad_norm: 4.3364 (4.5421) time: 0.7766 data: 0.0003 max mem: 8421 +[2024-12-06 05:21:51 root] (utils.py 283): INFO Epoch: [29] [1880/2502] eta: 0:08:05 lr: 0.000001 loss_cls: 4.0546 (3.8132) grad_norm: 4.6178 (4.5442) time: 0.7765 data: 0.0002 max mem: 8421 +[2024-12-06 05:21:59 root] (utils.py 283): INFO Epoch: [29] [1890/2502] eta: 0:07:57 lr: 0.000001 loss_cls: 4.0546 (3.8132) grad_norm: 4.6129 (4.5444) time: 0.7788 data: 0.0002 max mem: 8421 +[2024-12-06 05:22:07 root] (utils.py 283): INFO Epoch: [29] [1900/2502] eta: 0:07:49 lr: 0.000001 loss_cls: 3.7639 (3.8117) grad_norm: 4.4284 (4.5433) time: 0.7786 data: 0.0002 max mem: 8421 +[2024-12-06 05:22:14 root] (utils.py 283): INFO Epoch: [29] [1910/2502] eta: 0:07:42 lr: 0.000001 loss_cls: 3.7639 (3.8110) grad_norm: 4.4648 (4.5462) time: 0.7763 data: 0.0003 max mem: 8421 +[2024-12-06 05:22:22 root] (utils.py 283): INFO Epoch: [29] [1920/2502] eta: 0:07:34 lr: 0.000001 loss_cls: 3.9962 (3.8114) grad_norm: 4.5649 (4.5462) time: 0.7768 data: 0.0003 max mem: 8421 +[2024-12-06 05:22:30 root] (utils.py 283): INFO Epoch: [29] [1930/2502] eta: 0:07:26 lr: 0.000001 loss_cls: 3.9962 (3.8116) grad_norm: 4.3672 (4.5461) time: 0.7779 data: 0.0002 max mem: 8421 +[2024-12-06 05:22:38 root] (utils.py 283): INFO Epoch: [29] [1940/2502] eta: 0:07:18 lr: 0.000001 loss_cls: 3.6763 (3.8106) grad_norm: 4.4808 (4.5463) time: 0.7800 data: 0.0003 max mem: 8421 +[2024-12-06 05:22:45 root] (utils.py 283): INFO Epoch: [29] [1950/2502] eta: 0:07:10 lr: 0.000001 loss_cls: 3.8519 (3.8104) grad_norm: 4.4981 (4.5480) time: 0.7798 data: 0.0003 max mem: 8421 +[2024-12-06 05:22:53 root] (utils.py 283): INFO Epoch: [29] [1960/2502] eta: 0:07:03 lr: 0.000001 loss_cls: 4.0285 (3.8118) grad_norm: 4.4981 (4.5481) time: 0.7775 data: 0.0003 max mem: 8421 +[2024-12-06 05:23:01 root] (utils.py 283): INFO Epoch: [29] [1970/2502] eta: 0:06:55 lr: 0.000001 loss_cls: 4.1239 (3.8119) grad_norm: 4.3738 (4.5474) time: 0.7773 data: 0.0003 max mem: 8421 +[2024-12-06 05:23:09 root] (utils.py 283): INFO Epoch: [29] [1980/2502] eta: 0:06:47 lr: 0.000001 loss_cls: 4.0110 (3.8119) grad_norm: 4.3738 (4.5481) time: 0.7778 data: 0.0003 max mem: 8421 +[2024-12-06 05:23:17 root] (utils.py 283): INFO Epoch: [29] [1990/2502] eta: 0:06:39 lr: 0.000001 loss_cls: 3.9378 (3.8119) grad_norm: 4.4712 (4.5486) time: 0.7789 data: 0.0003 max mem: 8421 +[2024-12-06 05:23:24 root] (utils.py 283): INFO Epoch: [29] [2000/2502] eta: 0:06:31 lr: 0.000001 loss_cls: 3.6802 (3.8114) grad_norm: 4.3824 (4.5475) time: 0.7781 data: 0.0002 max mem: 8421 +[2024-12-06 05:23:32 root] (utils.py 283): INFO Epoch: [29] [2010/2502] eta: 0:06:24 lr: 0.000001 loss_cls: 3.6512 (3.8111) grad_norm: 4.3824 (4.5479) time: 0.7779 data: 0.0003 max mem: 8421 +[2024-12-06 05:23:40 root] (utils.py 283): INFO Epoch: [29] [2020/2502] eta: 0:06:16 lr: 0.000001 loss_cls: 3.5793 (3.8100) grad_norm: 4.4649 (4.5467) time: 0.7800 data: 0.0003 max mem: 8421 +[2024-12-06 05:23:48 root] (utils.py 283): INFO Epoch: [29] [2030/2502] eta: 0:06:08 lr: 0.000001 loss_cls: 3.8620 (3.8105) grad_norm: 4.4089 (4.5490) time: 0.7787 data: 0.0003 max mem: 8421 +[2024-12-06 05:23:55 root] (utils.py 283): INFO Epoch: [29] [2040/2502] eta: 0:06:00 lr: 0.000001 loss_cls: 3.9953 (3.8107) grad_norm: 4.4089 (4.5487) time: 0.7786 data: 0.0003 max mem: 8421 +[2024-12-06 05:24:03 root] (utils.py 283): INFO Epoch: [29] [2050/2502] eta: 0:05:52 lr: 0.000001 loss_cls: 3.8806 (3.8110) grad_norm: 4.2565 (4.5479) time: 0.7819 data: 0.0003 max mem: 8421 +[2024-12-06 05:24:11 root] (utils.py 283): INFO Epoch: [29] [2060/2502] eta: 0:05:45 lr: 0.000001 loss_cls: 3.9765 (3.8111) grad_norm: 4.2843 (4.5477) time: 0.7915 data: 0.0003 max mem: 8421 +[2024-12-06 05:24:19 root] (utils.py 283): INFO Epoch: [29] [2070/2502] eta: 0:05:37 lr: 0.000001 loss_cls: 3.6805 (3.8096) grad_norm: 4.2858 (4.5466) time: 0.7942 data: 0.0003 max mem: 8421 +[2024-12-06 05:24:27 root] (utils.py 283): INFO Epoch: [29] [2080/2502] eta: 0:05:29 lr: 0.000001 loss_cls: 3.6721 (3.8099) grad_norm: 4.4383 (4.5462) time: 0.7839 data: 0.0003 max mem: 8421 +[2024-12-06 05:24:35 root] (utils.py 283): INFO Epoch: [29] [2090/2502] eta: 0:05:21 lr: 0.000001 loss_cls: 3.6871 (3.8080) grad_norm: 4.5207 (4.5469) time: 0.7810 data: 0.0002 max mem: 8421 +[2024-12-06 05:24:43 root] (utils.py 283): INFO Epoch: [29] [2100/2502] eta: 0:05:13 lr: 0.000001 loss_cls: 3.5710 (3.8077) grad_norm: 4.4260 (4.5467) time: 0.7777 data: 0.0002 max mem: 8421 +[2024-12-06 05:24:50 root] (utils.py 283): INFO Epoch: [29] [2110/2502] eta: 0:05:05 lr: 0.000001 loss_cls: 3.8464 (3.8075) grad_norm: 4.2995 (4.5463) time: 0.7783 data: 0.0002 max mem: 8421 +[2024-12-06 05:24:58 root] (utils.py 283): INFO Epoch: [29] [2120/2502] eta: 0:04:58 lr: 0.000001 loss_cls: 3.7240 (3.8060) grad_norm: 4.2995 (4.5452) time: 0.7818 data: 0.0003 max mem: 8421 +[2024-12-06 05:25:06 root] (utils.py 283): INFO Epoch: [29] [2130/2502] eta: 0:04:50 lr: 0.000001 loss_cls: 3.6527 (3.8058) grad_norm: 4.3667 (4.5453) time: 0.7774 data: 0.0003 max mem: 8421 +[2024-12-06 05:25:14 root] (utils.py 283): INFO Epoch: [29] [2140/2502] eta: 0:04:42 lr: 0.000001 loss_cls: 3.8477 (3.8059) grad_norm: 4.3667 (4.5445) time: 0.7812 data: 0.0003 max mem: 8421 +[2024-12-06 05:25:22 root] (utils.py 283): INFO Epoch: [29] [2150/2502] eta: 0:04:34 lr: 0.000001 loss_cls: 3.9247 (3.8060) grad_norm: 4.4271 (4.5445) time: 0.7893 data: 0.0003 max mem: 8421 +[2024-12-06 05:25:30 root] (utils.py 283): INFO Epoch: [29] [2160/2502] eta: 0:04:27 lr: 0.000001 loss_cls: 4.0559 (3.8068) grad_norm: 4.5489 (4.5444) time: 0.7923 data: 0.0003 max mem: 8421 +[2024-12-06 05:25:37 root] (utils.py 283): INFO Epoch: [29] [2170/2502] eta: 0:04:19 lr: 0.000001 loss_cls: 4.1499 (3.8082) grad_norm: 4.4585 (4.5442) time: 0.7860 data: 0.0002 max mem: 8421 +[2024-12-06 05:25:45 root] (utils.py 283): INFO Epoch: [29] [2180/2502] eta: 0:04:11 lr: 0.000001 loss_cls: 4.1644 (3.8102) grad_norm: 4.3811 (4.5438) time: 0.7810 data: 0.0002 max mem: 8421 +[2024-12-06 05:25:53 root] (utils.py 283): INFO Epoch: [29] [2190/2502] eta: 0:04:03 lr: 0.000001 loss_cls: 4.1752 (3.8112) grad_norm: 4.4168 (4.5440) time: 0.7843 data: 0.0002 max mem: 8421 +[2024-12-06 05:26:01 root] (utils.py 283): INFO Epoch: [29] [2200/2502] eta: 0:03:55 lr: 0.000001 loss_cls: 4.0793 (3.8114) grad_norm: 4.4852 (4.5432) time: 0.7870 data: 0.0003 max mem: 8421 +[2024-12-06 05:26:09 root] (utils.py 283): INFO Epoch: [29] [2210/2502] eta: 0:03:47 lr: 0.000001 loss_cls: 3.8254 (3.8109) grad_norm: 4.4685 (4.5440) time: 0.7855 data: 0.0003 max mem: 8421 +[2024-12-06 05:26:17 root] (utils.py 283): INFO Epoch: [29] [2220/2502] eta: 0:03:40 lr: 0.000001 loss_cls: 3.7377 (3.8106) grad_norm: 4.4037 (4.5426) time: 0.7819 data: 0.0003 max mem: 8421 +[2024-12-06 05:26:24 root] (utils.py 283): INFO Epoch: [29] [2230/2502] eta: 0:03:32 lr: 0.000001 loss_cls: 3.8723 (3.8113) grad_norm: 4.4037 (4.5425) time: 0.7801 data: 0.0003 max mem: 8421 +[2024-12-06 05:26:32 root] (utils.py 283): INFO Epoch: [29] [2240/2502] eta: 0:03:24 lr: 0.000001 loss_cls: 3.9828 (3.8119) grad_norm: 4.5947 (4.5433) time: 0.7780 data: 0.0002 max mem: 8421 +[2024-12-06 05:26:40 root] (utils.py 283): INFO Epoch: [29] [2250/2502] eta: 0:03:16 lr: 0.000001 loss_cls: 4.1442 (3.8127) grad_norm: 4.5947 (4.5436) time: 0.7746 data: 0.0002 max mem: 8421 +[2024-12-06 05:26:48 root] (utils.py 283): INFO Epoch: [29] [2260/2502] eta: 0:03:08 lr: 0.000001 loss_cls: 3.9610 (3.8126) grad_norm: 4.4342 (4.5431) time: 0.7754 data: 0.0002 max mem: 8421 +[2024-12-06 05:26:56 root] (utils.py 283): INFO Epoch: [29] [2270/2502] eta: 0:03:01 lr: 0.000001 loss_cls: 3.7143 (3.8122) grad_norm: 4.3485 (4.5432) time: 0.7820 data: 0.0002 max mem: 8421 +[2024-12-06 05:27:03 root] (utils.py 283): INFO Epoch: [29] [2280/2502] eta: 0:02:53 lr: 0.000001 loss_cls: 3.7498 (3.8111) grad_norm: 4.5169 (4.5436) time: 0.7833 data: 0.0003 max mem: 8421 +[2024-12-06 05:27:11 root] (utils.py 283): INFO Epoch: [29] [2290/2502] eta: 0:02:45 lr: 0.000001 loss_cls: 3.9377 (3.8115) grad_norm: 4.5594 (4.5438) time: 0.7811 data: 0.0003 max mem: 8421 +[2024-12-06 05:27:19 root] (utils.py 283): INFO Epoch: [29] [2300/2502] eta: 0:02:37 lr: 0.000001 loss_cls: 3.9333 (3.8109) grad_norm: 4.5186 (4.5438) time: 0.7813 data: 0.0003 max mem: 8421 +[2024-12-06 05:27:27 root] (utils.py 283): INFO Epoch: [29] [2310/2502] eta: 0:02:29 lr: 0.000001 loss_cls: 3.9334 (3.8120) grad_norm: 4.5186 (4.5455) time: 0.7792 data: 0.0002 max mem: 8421 +[2024-12-06 05:27:35 root] (utils.py 283): INFO Epoch: [29] [2320/2502] eta: 0:02:22 lr: 0.000001 loss_cls: 3.9546 (3.8119) grad_norm: 4.5450 (4.5454) time: 0.7794 data: 0.0002 max mem: 8421 +[2024-12-06 05:27:42 root] (utils.py 283): INFO Epoch: [29] [2330/2502] eta: 0:02:14 lr: 0.000001 loss_cls: 3.9546 (3.8126) grad_norm: 4.4310 (4.5446) time: 0.7777 data: 0.0002 max mem: 8421 +[2024-12-06 05:27:50 root] (utils.py 283): INFO Epoch: [29] [2340/2502] eta: 0:02:06 lr: 0.000001 loss_cls: 3.9962 (3.8127) grad_norm: 4.2949 (4.5457) time: 0.7762 data: 0.0002 max mem: 8421 +[2024-12-06 05:27:58 root] (utils.py 283): INFO Epoch: [29] [2350/2502] eta: 0:01:58 lr: 0.000001 loss_cls: 4.0368 (3.8140) grad_norm: 4.5646 (4.5468) time: 0.7807 data: 0.0003 max mem: 8421 +[2024-12-06 05:28:06 root] (utils.py 283): INFO Epoch: [29] [2360/2502] eta: 0:01:50 lr: 0.000001 loss_cls: 3.8604 (3.8128) grad_norm: 4.5646 (4.5468) time: 0.7798 data: 0.0003 max mem: 8421 +[2024-12-06 05:28:14 root] (utils.py 283): INFO Epoch: [29] [2370/2502] eta: 0:01:43 lr: 0.000001 loss_cls: 3.3832 (3.8123) grad_norm: 4.3224 (4.5470) time: 0.7756 data: 0.0002 max mem: 8421 +[2024-12-06 05:28:21 root] (utils.py 283): INFO Epoch: [29] [2380/2502] eta: 0:01:35 lr: 0.000001 loss_cls: 3.9019 (3.8124) grad_norm: 4.5480 (4.5469) time: 0.7787 data: 0.0002 max mem: 8421 +[2024-12-06 05:28:29 root] (utils.py 283): INFO Epoch: [29] [2390/2502] eta: 0:01:27 lr: 0.000001 loss_cls: 3.9127 (3.8130) grad_norm: 4.5439 (4.5492) time: 0.7870 data: 0.0002 max mem: 8421 +[2024-12-06 05:28:37 root] (utils.py 283): INFO Epoch: [29] [2400/2502] eta: 0:01:19 lr: 0.000001 loss_cls: 4.0986 (3.8135) grad_norm: 4.4315 (4.5489) time: 0.7839 data: 0.0003 max mem: 8421 +[2024-12-06 05:28:45 root] (utils.py 283): INFO Epoch: [29] [2410/2502] eta: 0:01:11 lr: 0.000001 loss_cls: 3.8851 (3.8129) grad_norm: 4.3399 (4.5487) time: 0.7754 data: 0.0003 max mem: 8421 +[2024-12-06 05:28:53 root] (utils.py 283): INFO Epoch: [29] [2420/2502] eta: 0:01:04 lr: 0.000001 loss_cls: 3.8955 (3.8135) grad_norm: 4.3399 (4.5479) time: 0.7753 data: 0.0002 max mem: 8421 +[2024-12-06 05:29:00 root] (utils.py 283): INFO Epoch: [29] [2430/2502] eta: 0:00:56 lr: 0.000001 loss_cls: 4.0770 (3.8146) grad_norm: 4.3160 (4.5477) time: 0.7795 data: 0.0002 max mem: 8421 +[2024-12-06 05:29:08 root] (utils.py 283): INFO Epoch: [29] [2440/2502] eta: 0:00:48 lr: 0.000001 loss_cls: 4.0770 (3.8151) grad_norm: 4.4333 (4.5488) time: 0.7808 data: 0.0002 max mem: 8421 +[2024-12-06 05:29:16 root] (utils.py 283): INFO Epoch: [29] [2450/2502] eta: 0:00:40 lr: 0.000001 loss_cls: 3.9227 (3.8154) grad_norm: 4.4405 (4.5482) time: 0.7878 data: 0.0002 max mem: 8421 +[2024-12-06 05:29:24 root] (utils.py 283): INFO Epoch: [29] [2460/2502] eta: 0:00:32 lr: 0.000001 loss_cls: 3.8364 (3.8139) grad_norm: 4.4049 (4.5478) time: 0.7942 data: 0.0002 max mem: 8421 +[2024-12-06 05:29:32 root] (utils.py 283): INFO Epoch: [29] [2470/2502] eta: 0:00:24 lr: 0.000001 loss_cls: 3.5227 (3.8138) grad_norm: 4.5025 (4.5508) time: 0.7842 data: 0.0002 max mem: 8421 +[2024-12-06 05:29:40 root] (utils.py 283): INFO Epoch: [29] [2480/2502] eta: 0:00:17 lr: 0.000001 loss_cls: 3.8185 (3.8137) grad_norm: 4.3989 (4.5502) time: 0.7897 data: 0.0003 max mem: 8421 +[2024-12-06 05:29:48 root] (utils.py 283): INFO Epoch: [29] [2490/2502] eta: 0:00:09 lr: 0.000001 loss_cls: 3.8094 (3.8129) grad_norm: 4.3823 (4.5500) time: 0.8242 data: 0.0264 max mem: 8421 +[2024-12-06 05:29:56 root] (utils.py 283): INFO Epoch: [29] [2500/2502] eta: 0:00:01 lr: 0.000001 loss_cls: 3.8391 (3.8129) grad_norm: 4.3999 (4.5495) time: 0.8119 data: 0.0264 max mem: 8421 +[2024-12-06 05:29:57 root] (utils.py 283): INFO Epoch: [29] [2501/2502] eta: 0:00:00 lr: 0.000001 loss_cls: 3.5060 (3.8125) grad_norm: 4.3999 (4.5494) time: 0.8103 data: 0.0264 max mem: 8421 +[2024-12-06 05:29:57 root] (utils.py 297): INFO Epoch: [29] Total time: 0:32:34 (0.7812 s / it) +[2024-12-06 05:29:57 root] (engine.py 179): INFO Averaged stats:lr: 0.000001 loss_cls: 3.5060 (3.8067) grad_norm: 4.3999 (4.5494) +[2024-12-06 05:29:57 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:12 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7202 (0.7202) acc1: 86.7188 (86.7188) acc3: 96.0938 (96.0938) acc5: 98.4375 (98.4375) time: 0.1309 data: 0.0003 max mem: 8421 +[2024-12-06 05:29:59 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:11 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.7786 (0.8443) acc1: 83.5938 (82.1023) acc3: 92.9688 (93.1108) acc5: 95.3125 (95.8807) time: 0.1310 data: 0.0004 max mem: 8421 +[2024-12-06 05:30:00 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:10 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8668 (0.8943) acc1: 78.1250 (80.9524) acc3: 92.9688 (92.5223) acc5: 95.3125 (95.3125) time: 0.1319 data: 0.0004 max mem: 8421 +[2024-12-06 05:30:01 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:09 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9488 (0.8994) acc1: 79.6875 (80.4688) acc3: 92.9688 (92.7923) acc5: 96.0938 (95.4637) time: 0.1336 data: 0.0005 max mem: 8421 +[2024-12-06 05:30:03 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:08 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.8223 (0.8928) acc1: 81.2500 (80.6974) acc3: 93.7500 (92.8354) acc5: 96.0938 (95.4649) time: 0.1539 data: 0.0202 max mem: 8421 +[2024-12-06 05:30:04 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:06 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 0.9981 (0.9848) acc1: 75.7812 (78.6458) acc3: 88.2812 (91.2377) acc5: 91.4062 (94.3168) time: 0.1533 data: 0.0202 max mem: 8421 +[2024-12-06 05:30:06 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:05 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2796 (1.0287) acc1: 71.0938 (77.7664) acc3: 85.9375 (90.5738) acc5: 89.8438 (93.6219) time: 0.1331 data: 0.0005 max mem: 8421 +[2024-12-06 05:30:07 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:03 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2331 (1.0704) acc1: 72.6562 (76.7276) acc3: 87.5000 (90.0748) acc5: 89.8438 (93.1888) time: 0.1432 data: 0.0106 max mem: 8421 +[2024-12-06 05:30:09 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:02 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2935 (1.1083) acc1: 72.6562 (75.8488) acc3: 84.3750 (89.4194) acc5: 89.8438 (92.6601) time: 0.1449 data: 0.0110 max mem: 8421 +[2024-12-06 05:30:10 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:01 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.3265 (1.1377) acc1: 70.3125 (75.1288) acc3: 84.3750 (88.9251) acc5: 89.0625 (92.2905) time: 0.1351 data: 0.0008 max mem: 8421 +[2024-12-06 05:30:11 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 1.2845 (1.2845) layer_flops: 1.2508 (1.2508) loss: 1.2111 (1.1266) acc1: 73.4375 (75.2640) acc3: 87.5000 (89.1200) acc5: 90.6250 (92.5040) time: 0.1322 data: 0.0008 max mem: 8421 +[2024-12-06 05:30:11 root] (utils.py 297): INFO Test: Total time: 0:00:13 (0.1394 s / it) +[2024-12-06 05:30:13 root] (engine.py 264): INFO * Acc@1 75.254 Acc@3 89.142 Acc@5 92.506 loss 1.126 flops 1.285 layer_flops 1.251 +[2024-12-06 05:30:13 root] (main.py 547): INFO Accuracy of the network on the 50000 test images: 75.3% +[2024-12-06 05:30:14 root] (main.py 551): INFO Max accuracy: 75.25% +[2024-12-06 05:30:14 root] (main.py 564): INFO Finetune time 7:46:45