diff --git "a/Vim-B_15epochs_81.3/logs/log_rank0.txt" "b/Vim-B_15epochs_81.3/logs/log_rank0.txt" new file mode 100644--- /dev/null +++ "b/Vim-B_15epochs_81.3/logs/log_rank0.txt" @@ -0,0 +1,4055 @@ +[2024-12-10 11:19:40 root] (main.py 238): INFO Namespace(batch_size=128, epochs=15, model='RMeeTo_base', multi_reso=False, input_size=224, drop=0.0, drop_path=0.1, model_ema_decay=0.99996, model_ema_force_cpu=False, model_ema=True, opt='adamw', opt_eps=1e-08, opt_betas=None, clip_grad=None, momentum=0.9, weight_decay=0.0, sched='cosine', lr=2e-05, lr_noise=None, lr_noise_pct=0.67, lr_noise_std=1.0, warmup_lr=1e-06, min_lr=1e-06, decay_epochs=30, warmup_epochs=5, cooldown_epochs=10, patience_epochs=10, decay_rate=0.1, color_jitter=0.4, aa='rand-m9-mstd0.5-inc1', smoothing=0.1, train_interpolation='bicubic', repeated_aug=True, reprob=0.25, remode='pixel', recount=1, resplit=False, mixup=0.8, cutmix=1.0, cutmix_minmax=None, mixup_prob=1.0, mixup_switch_prob=0.5, mixup_mode='batch', finetune='', data_path='/datasets/imagenet/', data_set='IMNET', inat_category='name', output_dir='exp/tab2/base_ema', device='cuda', seed=0, resume='', autoresume=False, start_epoch=0, dist_eval=True, num_workers=10, pin_mem=True, world_size=4, port='15662', dist_url='env://', target_flops=3.0, granularity=4, load_compression_rate=False, warmup_compression_rate=False, distill='True', throughput=False, eval=False, merge_method='ToMe', merge_interval=2, if_pruning=False, num_prune='11', metric='X', distance='cosine', if_order=True, if_random=False, model_pth='/pretrained', if_merge_odd=False, merge_mode='sum', if_shuffle=False, shuffle_rate=0.0, choose='max', compare=11, data_ratio=1.0, data_seed=0, rank=0, gpu=0, distributed=True, dist_backend='nccl') +[2024-12-10 11:19:45 root] (main.py 304): INFO Creating model: RMeeTo_base +[2024-12-10 11:19:56 timm.utils.model_ema] (model_ema.py 64): INFO Loaded state_dict_ema +[2024-12-10 11:19:57 root] (main.py 393): INFO number of params: 97598440 +[2024-12-10 11:20:00 timm.utils.model_ema] (model_ema.py 64): INFO Loaded state_dict_ema +[2024-12-10 11:20:03 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:04:40 loss: 0.3897 (0.3897) acc1: 92.9688 (92.9688) acc5: 98.4375 (98.4375) time: 2.8657 data: 0.0012 max mem: 3465 +[2024-12-10 11:20:10 root] (utils.py 283): INFO Test: [10/98] eta: 0:01:20 loss: 0.6423 (0.5990) acc1: 85.1562 (87.0739) acc5: 98.4375 (98.0824) time: 0.9132 data: 0.0005 max mem: 3539 +[2024-12-10 11:20:17 root] (utils.py 283): INFO Test: [20/98] eta: 0:01:04 loss: 0.6205 (0.6312) acc1: 85.1562 (86.0491) acc5: 97.6562 (97.8423) time: 0.7196 data: 0.0004 max mem: 3539 +[2024-12-10 11:20:24 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:53 loss: 0.6205 (0.6533) acc1: 85.9375 (85.5343) acc5: 97.6562 (97.6562) time: 0.7219 data: 0.0004 max mem: 3539 +[2024-12-10 11:20:32 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:44 loss: 0.6672 (0.6529) acc1: 85.9375 (85.6517) acc5: 97.6562 (97.6372) time: 0.7227 data: 0.0004 max mem: 3539 +[2024-12-10 11:20:39 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:36 loss: 0.7682 (0.7215) acc1: 80.4688 (84.0533) acc5: 96.0938 (96.8137) time: 0.7235 data: 0.0005 max mem: 3539 +[2024-12-10 11:20:46 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:28 loss: 0.9657 (0.7483) acc1: 77.3438 (83.5425) acc5: 93.7500 (96.4011) time: 0.7279 data: 0.0014 max mem: 3539 +[2024-12-10 11:20:53 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:21 loss: 0.8900 (0.7725) acc1: 78.9062 (82.8345) acc5: 94.5312 (96.2478) time: 0.7280 data: 0.0014 max mem: 3539 +[2024-12-10 11:21:01 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:13 loss: 0.8900 (0.7935) acc1: 78.9062 (82.4267) acc5: 94.5312 (95.9684) time: 0.7254 data: 0.0009 max mem: 3539 +[2024-12-10 11:21:08 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:05 loss: 0.9522 (0.8186) acc1: 77.3438 (81.7308) acc5: 93.7500 (95.7503) time: 0.7267 data: 0.0010 max mem: 3539 +[2024-12-10 11:21:13 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 loss: 0.8850 (0.8153) acc1: 78.9062 (81.7280) acc5: 95.3125 (95.8640) time: 0.7223 data: 0.0008 max mem: 3539 +[2024-12-10 11:21:13 root] (utils.py 297): INFO Test: Total time: 0:01:13 (0.7456 s / it) +[2024-12-10 11:21:15 root] (engine.py 339): INFO * Acc@1 81.882 Acc@5 95.756 loss 0.811 +[2024-12-10 11:21:16 root] (main.py 513): INFO Start training for 15 epochs +[2024-12-10 11:21:21 root] (utils.py 283): INFO Epoch: [0] [ 0/2502] eta: 4:00:15 lr: 0.000020 loss_cls: 4.2059 (4.2059) grad_norm: 7.2845 (7.2845) time: 5.7615 data: 0.0003 max mem: 28069 +[2024-12-10 11:21:52 root] (utils.py 283): INFO Epoch: [0] [ 10/2502] eta: 2:16:02 lr: 0.000020 loss_cls: 4.1005 (3.9615) grad_norm: 6.2445 (6.6206) time: 3.2756 data: 0.0004 max mem: 29202 +[2024-12-10 11:22:21 root] (utils.py 283): INFO Epoch: [0] [ 20/2502] eta: 2:09:03 lr: 0.000020 loss_cls: 3.8190 (3.7703) grad_norm: 5.1714 (5.8379) time: 2.9877 data: 0.0003 max mem: 29202 +[2024-12-10 11:22:51 root] (utils.py 283): INFO Epoch: [0] [ 30/2502] eta: 2:06:12 lr: 0.000020 loss_cls: 3.7475 (3.7275) grad_norm: 3.7928 (5.0669) time: 2.9466 data: 0.0003 max mem: 29202 +[2024-12-10 11:23:20 root] (utils.py 283): INFO Epoch: [0] [ 40/2502] eta: 2:04:34 lr: 0.000020 loss_cls: 3.5752 (3.6137) grad_norm: 3.4128 (4.6495) time: 2.9479 data: 0.0003 max mem: 29202 +[2024-12-10 11:23:50 root] (utils.py 283): INFO Epoch: [0] [ 50/2502] eta: 2:03:21 lr: 0.000020 loss_cls: 3.2877 (3.5580) grad_norm: 3.0982 (4.4571) time: 2.9488 data: 0.0003 max mem: 29202 +[2024-12-10 11:24:19 root] (utils.py 283): INFO Epoch: [0] [ 60/2502] eta: 2:02:20 lr: 0.000020 loss_cls: 3.2877 (3.5000) grad_norm: 2.8831 (4.1997) time: 2.9447 data: 0.0003 max mem: 29202 +[2024-12-10 11:24:48 root] (utils.py 283): INFO Epoch: [0] [ 70/2502] eta: 2:01:28 lr: 0.000020 loss_cls: 3.2523 (3.4374) grad_norm: 2.7880 (4.0800) time: 2.9416 data: 0.0003 max mem: 29202 +[2024-12-10 11:25:18 root] (utils.py 283): INFO Epoch: [0] [ 80/2502] eta: 2:00:42 lr: 0.000020 loss_cls: 3.2191 (3.3924) grad_norm: 2.5474 (3.8830) time: 2.9421 data: 0.0003 max mem: 29202 +[2024-12-10 11:25:47 root] (utils.py 283): INFO Epoch: [0] [ 90/2502] eta: 1:59:59 lr: 0.000020 loss_cls: 3.2191 (3.3595) grad_norm: 2.4236 (3.7265) time: 2.9430 data: 0.0003 max mem: 29202 +[2024-12-10 11:26:17 root] (utils.py 283): INFO Epoch: [0] [ 100/2502] eta: 1:59:21 lr: 0.000020 loss_cls: 3.4001 (3.3537) grad_norm: 2.4404 (3.6285) time: 2.9460 data: 0.0003 max mem: 29202 +[2024-12-10 11:26:46 root] (utils.py 283): INFO Epoch: [0] [ 110/2502] eta: 1:58:43 lr: 0.000020 loss_cls: 3.3950 (3.3307) grad_norm: 2.4404 (3.5430) time: 2.9457 data: 0.0002 max mem: 29202 +[2024-12-10 11:27:16 root] (utils.py 283): INFO Epoch: [0] [ 120/2502] eta: 1:58:07 lr: 0.000020 loss_cls: 3.1771 (3.3143) grad_norm: 2.4616 (3.4555) time: 2.9454 data: 0.0002 max mem: 29202 +[2024-12-10 11:27:45 root] (utils.py 283): INFO Epoch: [0] [ 130/2502] eta: 1:57:35 lr: 0.000020 loss_cls: 3.1552 (3.2957) grad_norm: 2.2004 (3.3760) time: 2.9563 data: 0.0003 max mem: 29202 +[2024-12-10 11:28:15 root] (utils.py 283): INFO Epoch: [0] [ 140/2502] eta: 1:57:01 lr: 0.000020 loss_cls: 3.1394 (3.2775) grad_norm: 2.3384 (3.3327) time: 2.9556 data: 0.0003 max mem: 29202 +[2024-12-10 11:28:44 root] (utils.py 283): INFO Epoch: [0] [ 150/2502] eta: 1:56:27 lr: 0.000020 loss_cls: 3.0529 (3.2609) grad_norm: 2.3963 (3.2831) time: 2.9456 data: 0.0002 max mem: 29202 +[2024-12-10 11:29:14 root] (utils.py 283): INFO Epoch: [0] [ 160/2502] eta: 1:55:56 lr: 0.000020 loss_cls: 3.0845 (3.2503) grad_norm: 2.3532 (3.2259) time: 2.9541 data: 0.0003 max mem: 29202 +[2024-12-10 11:29:43 root] (utils.py 283): INFO Epoch: [0] [ 170/2502] eta: 1:55:25 lr: 0.000020 loss_cls: 3.0845 (3.2379) grad_norm: 2.2512 (3.1747) time: 2.9614 data: 0.0003 max mem: 29202 +[2024-12-10 11:30:13 root] (utils.py 283): INFO Epoch: [0] [ 180/2502] eta: 1:54:53 lr: 0.000020 loss_cls: 3.0212 (3.2152) grad_norm: 2.0533 (3.1192) time: 2.9543 data: 0.0003 max mem: 29202 +[2024-12-10 11:30:42 root] (utils.py 283): INFO Epoch: [0] [ 190/2502] eta: 1:54:20 lr: 0.000020 loss_cls: 2.8961 (3.2012) grad_norm: 2.0598 (3.0729) time: 2.9475 data: 0.0003 max mem: 29202 +[2024-12-10 11:31:12 root] (utils.py 283): INFO Epoch: [0] [ 200/2502] eta: 1:53:47 lr: 0.000020 loss_cls: 2.8822 (3.1815) grad_norm: 2.1291 (3.0325) time: 2.9416 data: 0.0003 max mem: 29202 +[2024-12-10 11:31:41 root] (utils.py 283): INFO Epoch: [0] [ 210/2502] eta: 1:53:15 lr: 0.000020 loss_cls: 2.9242 (3.1699) grad_norm: 2.1098 (3.0023) time: 2.9423 data: 0.0003 max mem: 29202 +[2024-12-10 11:32:11 root] (utils.py 283): INFO Epoch: [0] [ 220/2502] eta: 1:52:45 lr: 0.000020 loss_cls: 2.9480 (3.1537) grad_norm: 2.0680 (2.9608) time: 2.9538 data: 0.0003 max mem: 29202 +[2024-12-10 11:32:40 root] (utils.py 283): INFO Epoch: [0] [ 230/2502] eta: 1:52:13 lr: 0.000020 loss_cls: 2.9665 (3.1450) grad_norm: 2.0836 (2.9296) time: 2.9526 data: 0.0003 max mem: 29202 +[2024-12-10 11:33:10 root] (utils.py 283): INFO Epoch: [0] [ 240/2502] eta: 1:51:41 lr: 0.000020 loss_cls: 3.0232 (3.1354) grad_norm: 2.1179 (2.8952) time: 2.9407 data: 0.0003 max mem: 29202 +[2024-12-10 11:33:39 root] (utils.py 283): INFO Epoch: [0] [ 250/2502] eta: 1:51:11 lr: 0.000020 loss_cls: 2.9011 (3.1191) grad_norm: 2.0070 (2.8665) time: 2.9463 data: 0.0003 max mem: 29202 +[2024-12-10 11:34:09 root] (utils.py 283): INFO Epoch: [0] [ 260/2502] eta: 1:50:40 lr: 0.000020 loss_cls: 2.8634 (3.1049) grad_norm: 2.0779 (2.8415) time: 2.9507 data: 0.0003 max mem: 29202 +[2024-12-10 11:34:38 root] (utils.py 283): INFO Epoch: [0] [ 270/2502] eta: 1:50:09 lr: 0.000020 loss_cls: 2.9128 (3.0951) grad_norm: 2.2377 (2.8201) time: 2.9452 data: 0.0003 max mem: 29202 +[2024-12-10 11:35:08 root] (utils.py 283): INFO Epoch: [0] [ 280/2502] eta: 1:49:38 lr: 0.000020 loss_cls: 2.9730 (3.0963) grad_norm: 2.2377 (2.8005) time: 2.9443 data: 0.0003 max mem: 29202 +[2024-12-10 11:35:37 root] (utils.py 283): INFO Epoch: [0] [ 290/2502] eta: 1:49:08 lr: 0.000020 loss_cls: 2.9105 (3.0821) grad_norm: 2.1913 (2.7787) time: 2.9469 data: 0.0003 max mem: 29202 +[2024-12-10 11:36:07 root] (utils.py 283): INFO Epoch: [0] [ 300/2502] eta: 1:48:38 lr: 0.000020 loss_cls: 2.5378 (3.0688) grad_norm: 1.9476 (2.7520) time: 2.9532 data: 0.0003 max mem: 29202 +[2024-12-10 11:36:36 root] (utils.py 283): INFO Epoch: [0] [ 310/2502] eta: 1:48:08 lr: 0.000020 loss_cls: 2.7660 (3.0620) grad_norm: 2.0038 (2.7309) time: 2.9598 data: 0.0003 max mem: 29202 +[2024-12-10 11:37:06 root] (utils.py 283): INFO Epoch: [0] [ 320/2502] eta: 1:47:37 lr: 0.000020 loss_cls: 3.1576 (3.0700) grad_norm: 2.2301 (2.7243) time: 2.9523 data: 0.0003 max mem: 29202 +[2024-12-10 11:37:35 root] (utils.py 283): INFO Epoch: [0] [ 330/2502] eta: 1:47:07 lr: 0.000020 loss_cls: 3.1606 (3.0650) grad_norm: 2.2301 (2.7088) time: 2.9434 data: 0.0003 max mem: 29202 +[2024-12-10 11:38:05 root] (utils.py 283): INFO Epoch: [0] [ 340/2502] eta: 1:46:36 lr: 0.000020 loss_cls: 3.0884 (3.0659) grad_norm: 2.1142 (2.6945) time: 2.9425 data: 0.0003 max mem: 29202 +[2024-12-10 11:38:34 root] (utils.py 283): INFO Epoch: [0] [ 350/2502] eta: 1:46:06 lr: 0.000020 loss_cls: 3.0426 (3.0626) grad_norm: 2.3165 (2.6885) time: 2.9452 data: 0.0003 max mem: 29202 +[2024-12-10 11:39:04 root] (utils.py 283): INFO Epoch: [0] [ 360/2502] eta: 1:45:36 lr: 0.000020 loss_cls: 3.0197 (3.0561) grad_norm: 2.1091 (2.6728) time: 2.9490 data: 0.0003 max mem: 29202 +[2024-12-10 11:39:33 root] (utils.py 283): INFO Epoch: [0] [ 370/2502] eta: 1:45:06 lr: 0.000020 loss_cls: 2.9664 (3.0503) grad_norm: 2.1091 (2.6578) time: 2.9480 data: 0.0003 max mem: 29202 +[2024-12-10 11:40:03 root] (utils.py 283): INFO Epoch: [0] [ 380/2502] eta: 1:44:36 lr: 0.000020 loss_cls: 3.0114 (3.0461) grad_norm: 1.9950 (2.6396) time: 2.9480 data: 0.0003 max mem: 29202 +[2024-12-10 11:40:32 root] (utils.py 283): INFO Epoch: [0] [ 390/2502] eta: 1:44:07 lr: 0.000020 loss_cls: 3.0114 (3.0406) grad_norm: 1.9950 (2.6286) time: 2.9613 data: 0.0003 max mem: 29202 +[2024-12-10 11:41:02 root] (utils.py 283): INFO Epoch: [0] [ 400/2502] eta: 1:43:37 lr: 0.000020 loss_cls: 3.1033 (3.0416) grad_norm: 2.0442 (2.6149) time: 2.9590 data: 0.0003 max mem: 29202 +[2024-12-10 11:41:31 root] (utils.py 283): INFO Epoch: [0] [ 410/2502] eta: 1:43:06 lr: 0.000020 loss_cls: 3.1033 (3.0444) grad_norm: 2.0442 (2.6053) time: 2.9450 data: 0.0003 max mem: 29202 +[2024-12-10 11:42:01 root] (utils.py 283): INFO Epoch: [0] [ 420/2502] eta: 1:42:36 lr: 0.000020 loss_cls: 3.1032 (3.0412) grad_norm: 2.1045 (2.5947) time: 2.9449 data: 0.0003 max mem: 29202 +[2024-12-10 11:42:30 root] (utils.py 283): INFO Epoch: [0] [ 430/2502] eta: 1:42:06 lr: 0.000020 loss_cls: 3.1843 (3.0432) grad_norm: 2.0665 (2.5828) time: 2.9420 data: 0.0003 max mem: 29202 +[2024-12-10 11:43:00 root] (utils.py 283): INFO Epoch: [0] [ 440/2502] eta: 1:41:36 lr: 0.000020 loss_cls: 3.1843 (3.0462) grad_norm: 1.9829 (2.5829) time: 2.9452 data: 0.0003 max mem: 29202 +[2024-12-10 11:43:29 root] (utils.py 283): INFO Epoch: [0] [ 450/2502] eta: 1:41:06 lr: 0.000020 loss_cls: 3.0806 (3.0442) grad_norm: 2.0207 (2.5750) time: 2.9550 data: 0.0003 max mem: 29202 +[2024-12-10 11:43:59 root] (utils.py 283): INFO Epoch: [0] [ 460/2502] eta: 1:40:37 lr: 0.000020 loss_cls: 2.9618 (3.0367) grad_norm: 2.0964 (2.5660) time: 2.9543 data: 0.0003 max mem: 29202 +[2024-12-10 11:44:28 root] (utils.py 283): INFO Epoch: [0] [ 470/2502] eta: 1:40:07 lr: 0.000020 loss_cls: 2.7355 (3.0261) grad_norm: 2.0939 (2.5587) time: 2.9491 data: 0.0003 max mem: 29202 +[2024-12-10 11:44:58 root] (utils.py 283): INFO Epoch: [0] [ 480/2502] eta: 1:39:37 lr: 0.000020 loss_cls: 2.7383 (3.0217) grad_norm: 1.9374 (2.5445) time: 2.9462 data: 0.0003 max mem: 29202 +[2024-12-10 11:45:27 root] (utils.py 283): INFO Epoch: [0] [ 490/2502] eta: 1:39:06 lr: 0.000020 loss_cls: 2.8954 (3.0248) grad_norm: 1.8973 (2.5332) time: 2.9431 data: 0.0003 max mem: 29202 +[2024-12-10 11:45:56 root] (utils.py 283): INFO Epoch: [0] [ 500/2502] eta: 1:38:37 lr: 0.000020 loss_cls: 3.0503 (3.0212) grad_norm: 1.9987 (2.5241) time: 2.9459 data: 0.0003 max mem: 29202 +[2024-12-10 11:46:26 root] (utils.py 283): INFO Epoch: [0] [ 510/2502] eta: 1:38:07 lr: 0.000020 loss_cls: 3.0427 (3.0229) grad_norm: 1.9987 (2.5141) time: 2.9490 data: 0.0003 max mem: 29202 +[2024-12-10 11:46:55 root] (utils.py 283): INFO Epoch: [0] [ 520/2502] eta: 1:37:37 lr: 0.000020 loss_cls: 3.0898 (3.0232) grad_norm: 2.0356 (2.5072) time: 2.9445 data: 0.0003 max mem: 29202 +[2024-12-10 11:47:25 root] (utils.py 283): INFO Epoch: [0] [ 530/2502] eta: 1:37:07 lr: 0.000020 loss_cls: 3.0114 (3.0154) grad_norm: 1.8999 (2.4949) time: 2.9447 data: 0.0003 max mem: 29202 +[2024-12-10 11:47:54 root] (utils.py 283): INFO Epoch: [0] [ 540/2502] eta: 1:36:37 lr: 0.000020 loss_cls: 3.0553 (3.0121) grad_norm: 1.8999 (2.4839) time: 2.9463 data: 0.0003 max mem: 29202 +[2024-12-10 11:48:24 root] (utils.py 283): INFO Epoch: [0] [ 550/2502] eta: 1:36:07 lr: 0.000020 loss_cls: 3.0553 (3.0101) grad_norm: 2.0119 (2.4771) time: 2.9448 data: 0.0003 max mem: 29202 +[2024-12-10 11:48:53 root] (utils.py 283): INFO Epoch: [0] [ 560/2502] eta: 1:35:37 lr: 0.000020 loss_cls: 3.2471 (3.0123) grad_norm: 2.1635 (2.4718) time: 2.9445 data: 0.0003 max mem: 29202 +[2024-12-10 11:49:23 root] (utils.py 283): INFO Epoch: [0] [ 570/2502] eta: 1:35:07 lr: 0.000020 loss_cls: 3.2471 (3.0128) grad_norm: 2.0560 (2.4628) time: 2.9439 data: 0.0003 max mem: 29202 +[2024-12-10 11:49:52 root] (utils.py 283): INFO Epoch: [0] [ 580/2502] eta: 1:34:37 lr: 0.000020 loss_cls: 3.0372 (3.0121) grad_norm: 1.9697 (2.4568) time: 2.9443 data: 0.0003 max mem: 29202 +[2024-12-10 11:50:22 root] (utils.py 283): INFO Epoch: [0] [ 590/2502] eta: 1:34:08 lr: 0.000020 loss_cls: 3.0363 (3.0112) grad_norm: 1.9697 (2.4476) time: 2.9513 data: 0.0003 max mem: 29202 +[2024-12-10 11:50:51 root] (utils.py 283): INFO Epoch: [0] [ 600/2502] eta: 1:33:38 lr: 0.000020 loss_cls: 3.0310 (3.0082) grad_norm: 1.8134 (2.4397) time: 2.9503 data: 0.0003 max mem: 29202 +[2024-12-10 11:51:20 root] (utils.py 283): INFO Epoch: [0] [ 610/2502] eta: 1:33:08 lr: 0.000020 loss_cls: 2.9333 (3.0069) grad_norm: 1.8134 (2.4318) time: 2.9366 data: 0.0003 max mem: 29202 +[2024-12-10 11:51:50 root] (utils.py 283): INFO Epoch: [0] [ 620/2502] eta: 1:32:37 lr: 0.000020 loss_cls: 2.9128 (3.0080) grad_norm: 1.9402 (2.4264) time: 2.9303 data: 0.0003 max mem: 29202 +[2024-12-10 11:52:19 root] (utils.py 283): INFO Epoch: [0] [ 630/2502] eta: 1:32:07 lr: 0.000020 loss_cls: 2.9649 (3.0075) grad_norm: 2.1125 (2.4236) time: 2.9270 data: 0.0003 max mem: 29202 +[2024-12-10 11:52:48 root] (utils.py 283): INFO Epoch: [0] [ 640/2502] eta: 1:31:37 lr: 0.000020 loss_cls: 2.9649 (3.0049) grad_norm: 2.0574 (2.4170) time: 2.9275 data: 0.0003 max mem: 29202 +[2024-12-10 11:53:18 root] (utils.py 283): INFO Epoch: [0] [ 650/2502] eta: 1:31:07 lr: 0.000020 loss_cls: 2.8949 (3.0039) grad_norm: 1.9741 (2.4132) time: 2.9300 data: 0.0003 max mem: 29202 +[2024-12-10 11:53:47 root] (utils.py 283): INFO Epoch: [0] [ 660/2502] eta: 1:30:37 lr: 0.000020 loss_cls: 2.8144 (2.9967) grad_norm: 1.9099 (2.4056) time: 2.9302 data: 0.0003 max mem: 29202 +[2024-12-10 11:54:17 root] (utils.py 283): INFO Epoch: [0] [ 670/2502] eta: 1:30:08 lr: 0.000020 loss_cls: 2.9782 (2.9994) grad_norm: 1.9512 (2.4071) time: 2.9575 data: 0.0003 max mem: 29202 +[2024-12-10 11:54:46 root] (utils.py 283): INFO Epoch: [0] [ 680/2502] eta: 1:29:38 lr: 0.000020 loss_cls: 3.1937 (2.9993) grad_norm: 2.1316 (2.4032) time: 2.9647 data: 0.0003 max mem: 29202 +[2024-12-10 11:55:16 root] (utils.py 283): INFO Epoch: [0] [ 690/2502] eta: 1:29:09 lr: 0.000020 loss_cls: 2.7508 (2.9949) grad_norm: 2.0268 (2.3990) time: 2.9445 data: 0.0003 max mem: 29202 +[2024-12-10 11:55:45 root] (utils.py 283): INFO Epoch: [0] [ 700/2502] eta: 1:28:39 lr: 0.000020 loss_cls: 2.6214 (2.9921) grad_norm: 1.9891 (2.3933) time: 2.9507 data: 0.0003 max mem: 29202 +[2024-12-10 11:56:15 root] (utils.py 283): INFO Epoch: [0] [ 710/2502] eta: 1:28:10 lr: 0.000020 loss_cls: 2.8545 (2.9893) grad_norm: 1.9891 (2.3880) time: 2.9592 data: 0.0003 max mem: 29202 +[2024-12-10 11:56:44 root] (utils.py 283): INFO Epoch: [0] [ 720/2502] eta: 1:27:40 lr: 0.000020 loss_cls: 2.8545 (2.9850) grad_norm: 1.9682 (2.3840) time: 2.9456 data: 0.0003 max mem: 29202 +[2024-12-10 11:57:13 root] (utils.py 283): INFO Epoch: [0] [ 730/2502] eta: 1:27:10 lr: 0.000020 loss_cls: 2.8550 (2.9839) grad_norm: 1.9649 (2.3782) time: 2.9302 data: 0.0003 max mem: 29202 +[2024-12-10 11:57:43 root] (utils.py 283): INFO Epoch: [0] [ 740/2502] eta: 1:26:40 lr: 0.000020 loss_cls: 3.1152 (2.9874) grad_norm: 1.9649 (2.3737) time: 2.9305 data: 0.0003 max mem: 29202 +[2024-12-10 11:58:12 root] (utils.py 283): INFO Epoch: [0] [ 750/2502] eta: 1:26:10 lr: 0.000020 loss_cls: 3.1187 (2.9846) grad_norm: 1.8716 (2.3692) time: 2.9332 data: 0.0003 max mem: 29202 +[2024-12-10 11:58:42 root] (utils.py 283): INFO Epoch: [0] [ 760/2502] eta: 1:25:40 lr: 0.000020 loss_cls: 3.0473 (2.9838) grad_norm: 1.9738 (2.3668) time: 2.9404 data: 0.0003 max mem: 29202 +[2024-12-10 11:59:11 root] (utils.py 283): INFO Epoch: [0] [ 770/2502] eta: 1:25:10 lr: 0.000020 loss_cls: 2.9908 (2.9835) grad_norm: 2.0087 (2.3634) time: 2.9430 data: 0.0003 max mem: 29202 +[2024-12-10 11:59:40 root] (utils.py 283): INFO Epoch: [0] [ 780/2502] eta: 1:24:41 lr: 0.000020 loss_cls: 3.0123 (2.9836) grad_norm: 1.9817 (2.3616) time: 2.9446 data: 0.0003 max mem: 29202 +[2024-12-10 12:00:10 root] (utils.py 283): INFO Epoch: [0] [ 790/2502] eta: 1:24:11 lr: 0.000020 loss_cls: 2.8359 (2.9810) grad_norm: 1.9696 (2.3597) time: 2.9467 data: 0.0003 max mem: 29202 +[2024-12-10 12:00:39 root] (utils.py 283): INFO Epoch: [0] [ 800/2502] eta: 1:23:42 lr: 0.000020 loss_cls: 2.8348 (2.9803) grad_norm: 1.9801 (2.3567) time: 2.9470 data: 0.0003 max mem: 29202 +[2024-12-10 12:01:09 root] (utils.py 283): INFO Epoch: [0] [ 810/2502] eta: 1:23:12 lr: 0.000020 loss_cls: 2.6663 (2.9726) grad_norm: 1.9445 (2.3514) time: 2.9504 data: 0.0003 max mem: 29202 +[2024-12-10 12:01:38 root] (utils.py 283): INFO Epoch: [0] [ 820/2502] eta: 1:22:43 lr: 0.000020 loss_cls: 2.6663 (2.9724) grad_norm: 1.9008 (2.3456) time: 2.9495 data: 0.0003 max mem: 29202 +[2024-12-10 12:02:08 root] (utils.py 283): INFO Epoch: [0] [ 830/2502] eta: 1:22:13 lr: 0.000020 loss_cls: 2.9807 (2.9679) grad_norm: 1.8567 (2.3418) time: 2.9531 data: 0.0003 max mem: 29202 +[2024-12-10 12:02:37 root] (utils.py 283): INFO Epoch: [0] [ 840/2502] eta: 1:21:44 lr: 0.000020 loss_cls: 2.7234 (2.9669) grad_norm: 1.9488 (2.3388) time: 2.9513 data: 0.0003 max mem: 29202 +[2024-12-10 12:03:07 root] (utils.py 283): INFO Epoch: [0] [ 850/2502] eta: 1:21:14 lr: 0.000020 loss_cls: 2.9911 (2.9645) grad_norm: 2.0051 (2.3402) time: 2.9447 data: 0.0003 max mem: 29202 +[2024-12-10 12:03:36 root] (utils.py 283): INFO Epoch: [0] [ 860/2502] eta: 1:20:44 lr: 0.000020 loss_cls: 2.9766 (2.9643) grad_norm: 1.9367 (2.3373) time: 2.9427 data: 0.0003 max mem: 29202 +[2024-12-10 12:04:06 root] (utils.py 283): INFO Epoch: [0] [ 870/2502] eta: 1:20:15 lr: 0.000020 loss_cls: 2.9631 (2.9630) grad_norm: 1.9367 (2.3345) time: 2.9413 data: 0.0003 max mem: 29202 +[2024-12-10 12:04:35 root] (utils.py 283): INFO Epoch: [0] [ 880/2502] eta: 1:19:45 lr: 0.000020 loss_cls: 2.9438 (2.9605) grad_norm: 1.9318 (2.3297) time: 2.9420 data: 0.0003 max mem: 29202 +[2024-12-10 12:05:04 root] (utils.py 283): INFO Epoch: [0] [ 890/2502] eta: 1:19:15 lr: 0.000020 loss_cls: 2.8341 (2.9577) grad_norm: 1.8920 (2.3266) time: 2.9392 data: 0.0003 max mem: 29202 +[2024-12-10 12:05:34 root] (utils.py 283): INFO Epoch: [0] [ 900/2502] eta: 1:18:46 lr: 0.000020 loss_cls: 2.8977 (2.9571) grad_norm: 2.0689 (2.3255) time: 2.9404 data: 0.0003 max mem: 29202 +[2024-12-10 12:06:03 root] (utils.py 283): INFO Epoch: [0] [ 910/2502] eta: 1:18:16 lr: 0.000020 loss_cls: 3.1088 (2.9566) grad_norm: 2.0481 (2.3221) time: 2.9501 data: 0.0003 max mem: 29202 +[2024-12-10 12:06:33 root] (utils.py 283): INFO Epoch: [0] [ 920/2502] eta: 1:17:46 lr: 0.000020 loss_cls: 3.1436 (2.9575) grad_norm: 1.9300 (2.3180) time: 2.9400 data: 0.0003 max mem: 29202 +[2024-12-10 12:07:02 root] (utils.py 283): INFO Epoch: [0] [ 930/2502] eta: 1:17:16 lr: 0.000020 loss_cls: 3.1436 (2.9587) grad_norm: 1.8382 (2.3140) time: 2.9236 data: 0.0003 max mem: 29202 +[2024-12-10 12:07:31 root] (utils.py 283): INFO Epoch: [0] [ 940/2502] eta: 1:16:47 lr: 0.000020 loss_cls: 3.0149 (2.9569) grad_norm: 1.8150 (2.3108) time: 2.9260 data: 0.0003 max mem: 29202 +[2024-12-10 12:08:00 root] (utils.py 283): INFO Epoch: [0] [ 950/2502] eta: 1:16:17 lr: 0.000020 loss_cls: 2.4341 (2.9518) grad_norm: 1.9634 (2.3090) time: 2.9258 data: 0.0003 max mem: 29202 +[2024-12-10 12:08:30 root] (utils.py 283): INFO Epoch: [0] [ 960/2502] eta: 1:15:47 lr: 0.000020 loss_cls: 2.8148 (2.9521) grad_norm: 2.0529 (2.3064) time: 2.9286 data: 0.0003 max mem: 29202 +[2024-12-10 12:08:59 root] (utils.py 283): INFO Epoch: [0] [ 970/2502] eta: 1:15:17 lr: 0.000020 loss_cls: 3.0996 (2.9523) grad_norm: 1.9920 (2.3044) time: 2.9310 data: 0.0003 max mem: 29202 +[2024-12-10 12:09:28 root] (utils.py 283): INFO Epoch: [0] [ 980/2502] eta: 1:14:47 lr: 0.000020 loss_cls: 2.9955 (2.9508) grad_norm: 1.8029 (2.3003) time: 2.9263 data: 0.0003 max mem: 29202 +[2024-12-10 12:09:58 root] (utils.py 283): INFO Epoch: [0] [ 990/2502] eta: 1:14:17 lr: 0.000020 loss_cls: 3.0659 (2.9500) grad_norm: 1.8801 (2.2972) time: 2.9237 data: 0.0003 max mem: 29202 +[2024-12-10 12:10:27 root] (utils.py 283): INFO Epoch: [0] [1000/2502] eta: 1:13:48 lr: 0.000020 loss_cls: 3.0638 (2.9487) grad_norm: 1.8801 (2.2925) time: 2.9252 data: 0.0003 max mem: 29202 +[2024-12-10 12:10:56 root] (utils.py 283): INFO Epoch: [0] [1010/2502] eta: 1:13:18 lr: 0.000020 loss_cls: 2.9837 (2.9492) grad_norm: 1.9229 (2.2900) time: 2.9292 data: 0.0003 max mem: 29202 +[2024-12-10 12:11:26 root] (utils.py 283): INFO Epoch: [0] [1020/2502] eta: 1:12:48 lr: 0.000020 loss_cls: 2.9837 (2.9481) grad_norm: 1.9328 (2.2864) time: 2.9347 data: 0.0003 max mem: 29202 +[2024-12-10 12:11:55 root] (utils.py 283): INFO Epoch: [0] [1030/2502] eta: 1:12:19 lr: 0.000020 loss_cls: 2.7346 (2.9443) grad_norm: 1.9328 (2.2838) time: 2.9507 data: 0.0003 max mem: 29202 +[2024-12-10 12:12:25 root] (utils.py 283): INFO Epoch: [0] [1040/2502] eta: 1:11:49 lr: 0.000020 loss_cls: 2.9180 (2.9449) grad_norm: 1.8833 (2.2797) time: 2.9524 data: 0.0003 max mem: 29202 +[2024-12-10 12:12:54 root] (utils.py 283): INFO Epoch: [0] [1050/2502] eta: 1:11:20 lr: 0.000020 loss_cls: 2.9562 (2.9434) grad_norm: 1.9048 (2.2782) time: 2.9425 data: 0.0003 max mem: 29202 +[2024-12-10 12:13:24 root] (utils.py 283): INFO Epoch: [0] [1060/2502] eta: 1:10:50 lr: 0.000020 loss_cls: 2.9027 (2.9435) grad_norm: 2.0531 (2.2762) time: 2.9514 data: 0.0003 max mem: 29202 +[2024-12-10 12:13:53 root] (utils.py 283): INFO Epoch: [0] [1070/2502] eta: 1:10:21 lr: 0.000020 loss_cls: 2.9668 (2.9425) grad_norm: 2.0301 (2.2735) time: 2.9554 data: 0.0003 max mem: 29202 +[2024-12-10 12:14:23 root] (utils.py 283): INFO Epoch: [0] [1080/2502] eta: 1:09:51 lr: 0.000020 loss_cls: 2.9668 (2.9415) grad_norm: 1.9249 (2.2716) time: 2.9460 data: 0.0003 max mem: 29202 +[2024-12-10 12:14:52 root] (utils.py 283): INFO Epoch: [0] [1090/2502] eta: 1:09:22 lr: 0.000020 loss_cls: 2.8620 (2.9398) grad_norm: 1.8723 (2.2683) time: 2.9423 data: 0.0003 max mem: 29202 +[2024-12-10 12:15:21 root] (utils.py 283): INFO Epoch: [0] [1100/2502] eta: 1:08:52 lr: 0.000020 loss_cls: 2.8364 (2.9384) grad_norm: 1.8874 (2.2659) time: 2.9405 data: 0.0003 max mem: 29202 +[2024-12-10 12:15:51 root] (utils.py 283): INFO Epoch: [0] [1110/2502] eta: 1:08:23 lr: 0.000020 loss_cls: 2.8450 (2.9382) grad_norm: 1.9219 (2.2630) time: 2.9368 data: 0.0003 max mem: 29202 +[2024-12-10 12:16:20 root] (utils.py 283): INFO Epoch: [0] [1120/2502] eta: 1:07:53 lr: 0.000020 loss_cls: 3.0162 (2.9387) grad_norm: 1.9288 (2.2629) time: 2.9355 data: 0.0002 max mem: 29202 +[2024-12-10 12:16:50 root] (utils.py 283): INFO Epoch: [0] [1130/2502] eta: 1:07:24 lr: 0.000020 loss_cls: 3.0750 (2.9396) grad_norm: 1.9149 (2.2598) time: 2.9491 data: 0.0003 max mem: 29202 +[2024-12-10 12:17:19 root] (utils.py 283): INFO Epoch: [0] [1140/2502] eta: 1:06:54 lr: 0.000020 loss_cls: 3.0579 (2.9382) grad_norm: 1.8160 (2.2566) time: 2.9514 data: 0.0003 max mem: 29202 +[2024-12-10 12:17:48 root] (utils.py 283): INFO Epoch: [0] [1150/2502] eta: 1:06:25 lr: 0.000020 loss_cls: 2.9054 (2.9371) grad_norm: 1.7371 (2.2521) time: 2.9382 data: 0.0003 max mem: 29202 +[2024-12-10 12:18:18 root] (utils.py 283): INFO Epoch: [0] [1160/2502] eta: 1:05:55 lr: 0.000020 loss_cls: 2.9175 (2.9364) grad_norm: 1.7802 (2.2494) time: 2.9481 data: 0.0003 max mem: 29202 +[2024-12-10 12:18:47 root] (utils.py 283): INFO Epoch: [0] [1170/2502] eta: 1:05:26 lr: 0.000020 loss_cls: 2.8825 (2.9350) grad_norm: 1.8604 (2.2462) time: 2.9482 data: 0.0003 max mem: 29202 +[2024-12-10 12:19:17 root] (utils.py 283): INFO Epoch: [0] [1180/2502] eta: 1:04:56 lr: 0.000020 loss_cls: 3.0993 (2.9367) grad_norm: 1.9760 (2.2450) time: 2.9357 data: 0.0003 max mem: 29202 +[2024-12-10 12:19:46 root] (utils.py 283): INFO Epoch: [0] [1190/2502] eta: 1:04:26 lr: 0.000020 loss_cls: 3.1300 (2.9382) grad_norm: 1.8813 (2.2419) time: 2.9370 data: 0.0003 max mem: 29202 +[2024-12-10 12:20:16 root] (utils.py 283): INFO Epoch: [0] [1200/2502] eta: 1:03:57 lr: 0.000020 loss_cls: 3.0898 (2.9382) grad_norm: 1.8129 (2.2393) time: 2.9398 data: 0.0003 max mem: 29202 +[2024-12-10 12:20:45 root] (utils.py 283): INFO Epoch: [0] [1210/2502] eta: 1:03:27 lr: 0.000020 loss_cls: 2.9543 (2.9373) grad_norm: 1.8276 (2.2378) time: 2.9395 data: 0.0003 max mem: 29202 +[2024-12-10 12:21:14 root] (utils.py 283): INFO Epoch: [0] [1220/2502] eta: 1:02:58 lr: 0.000020 loss_cls: 2.8361 (2.9360) grad_norm: 1.9983 (2.2359) time: 2.9428 data: 0.0003 max mem: 29202 +[2024-12-10 12:21:44 root] (utils.py 283): INFO Epoch: [0] [1230/2502] eta: 1:02:28 lr: 0.000020 loss_cls: 2.8612 (2.9357) grad_norm: 1.9870 (2.2342) time: 2.9435 data: 0.0003 max mem: 29202 +[2024-12-10 12:22:13 root] (utils.py 283): INFO Epoch: [0] [1240/2502] eta: 1:01:59 lr: 0.000020 loss_cls: 2.9596 (2.9355) grad_norm: 1.9367 (2.2322) time: 2.9460 data: 0.0003 max mem: 29202 +[2024-12-10 12:22:43 root] (utils.py 283): INFO Epoch: [0] [1250/2502] eta: 1:01:29 lr: 0.000020 loss_cls: 2.8591 (2.9336) grad_norm: 1.8656 (2.2290) time: 2.9462 data: 0.0003 max mem: 29202 +[2024-12-10 12:23:12 root] (utils.py 283): INFO Epoch: [0] [1260/2502] eta: 1:01:00 lr: 0.000020 loss_cls: 2.9199 (2.9339) grad_norm: 1.8782 (2.2277) time: 2.9452 data: 0.0003 max mem: 29202 +[2024-12-10 12:23:42 root] (utils.py 283): INFO Epoch: [0] [1270/2502] eta: 1:00:30 lr: 0.000020 loss_cls: 2.9584 (2.9344) grad_norm: 1.8967 (2.2258) time: 2.9472 data: 0.0003 max mem: 29202 +[2024-12-10 12:24:11 root] (utils.py 283): INFO Epoch: [0] [1280/2502] eta: 1:00:01 lr: 0.000020 loss_cls: 3.0658 (2.9356) grad_norm: 1.9023 (2.2278) time: 2.9510 data: 0.0003 max mem: 29202 +[2024-12-10 12:24:41 root] (utils.py 283): INFO Epoch: [0] [1290/2502] eta: 0:59:31 lr: 0.000020 loss_cls: 3.0842 (2.9362) grad_norm: 2.0560 (2.2274) time: 2.9477 data: 0.0003 max mem: 29202 +[2024-12-10 12:25:10 root] (utils.py 283): INFO Epoch: [0] [1300/2502] eta: 0:59:02 lr: 0.000020 loss_cls: 2.9911 (2.9358) grad_norm: 1.9233 (2.2252) time: 2.9427 data: 0.0003 max mem: 29202 +[2024-12-10 12:25:40 root] (utils.py 283): INFO Epoch: [0] [1310/2502] eta: 0:58:32 lr: 0.000020 loss_cls: 2.8203 (2.9342) grad_norm: 1.9233 (2.2233) time: 2.9437 data: 0.0003 max mem: 29202 +[2024-12-10 12:26:09 root] (utils.py 283): INFO Epoch: [0] [1320/2502] eta: 0:58:03 lr: 0.000020 loss_cls: 2.8115 (2.9321) grad_norm: 1.8333 (2.2201) time: 2.9566 data: 0.0003 max mem: 29202 +[2024-12-10 12:26:39 root] (utils.py 283): INFO Epoch: [0] [1330/2502] eta: 0:57:34 lr: 0.000020 loss_cls: 2.9601 (2.9327) grad_norm: 1.8214 (2.2180) time: 2.9574 data: 0.0003 max mem: 29202 +[2024-12-10 12:27:08 root] (utils.py 283): INFO Epoch: [0] [1340/2502] eta: 0:57:04 lr: 0.000020 loss_cls: 3.1125 (2.9327) grad_norm: 1.8214 (2.2158) time: 2.9404 data: 0.0003 max mem: 29202 +[2024-12-10 12:27:38 root] (utils.py 283): INFO Epoch: [0] [1350/2502] eta: 0:56:35 lr: 0.000020 loss_cls: 3.0040 (2.9323) grad_norm: 1.7337 (2.2128) time: 2.9424 data: 0.0003 max mem: 29202 +[2024-12-10 12:28:07 root] (utils.py 283): INFO Epoch: [0] [1360/2502] eta: 0:56:05 lr: 0.000020 loss_cls: 2.9058 (2.9331) grad_norm: 1.7949 (2.2110) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-10 12:28:36 root] (utils.py 283): INFO Epoch: [0] [1370/2502] eta: 0:55:36 lr: 0.000020 loss_cls: 3.0504 (2.9337) grad_norm: 1.7949 (2.2083) time: 2.9464 data: 0.0003 max mem: 29202 +[2024-12-10 12:29:06 root] (utils.py 283): INFO Epoch: [0] [1380/2502] eta: 0:55:06 lr: 0.000020 loss_cls: 3.0504 (2.9341) grad_norm: 1.8626 (2.2058) time: 2.9513 data: 0.0003 max mem: 29202 +[2024-12-10 12:29:35 root] (utils.py 283): INFO Epoch: [0] [1390/2502] eta: 0:54:37 lr: 0.000020 loss_cls: 3.0862 (2.9340) grad_norm: 1.8870 (2.2072) time: 2.9405 data: 0.0003 max mem: 29202 +[2024-12-10 12:30:05 root] (utils.py 283): INFO Epoch: [0] [1400/2502] eta: 0:54:07 lr: 0.000020 loss_cls: 3.0481 (2.9334) grad_norm: 1.9564 (2.2054) time: 2.9368 data: 0.0003 max mem: 29202 +[2024-12-10 12:30:34 root] (utils.py 283): INFO Epoch: [0] [1410/2502] eta: 0:53:38 lr: 0.000020 loss_cls: 3.1481 (2.9352) grad_norm: 1.8781 (2.2031) time: 2.9416 data: 0.0003 max mem: 29202 +[2024-12-10 12:31:03 root] (utils.py 283): INFO Epoch: [0] [1420/2502] eta: 0:53:08 lr: 0.000020 loss_cls: 2.9944 (2.9341) grad_norm: 1.8298 (2.2003) time: 2.9423 data: 0.0003 max mem: 29202 +[2024-12-10 12:31:33 root] (utils.py 283): INFO Epoch: [0] [1430/2502] eta: 0:52:39 lr: 0.000020 loss_cls: 2.9148 (2.9341) grad_norm: 1.8052 (2.1978) time: 2.9384 data: 0.0003 max mem: 29202 +[2024-12-10 12:32:02 root] (utils.py 283): INFO Epoch: [0] [1440/2502] eta: 0:52:09 lr: 0.000020 loss_cls: 2.9168 (2.9332) grad_norm: 1.8052 (2.1952) time: 2.9375 data: 0.0003 max mem: 29202 +[2024-12-10 12:32:32 root] (utils.py 283): INFO Epoch: [0] [1450/2502] eta: 0:51:40 lr: 0.000020 loss_cls: 2.9168 (2.9334) grad_norm: 1.9251 (2.1937) time: 2.9375 data: 0.0003 max mem: 29202 +[2024-12-10 12:33:01 root] (utils.py 283): INFO Epoch: [0] [1460/2502] eta: 0:51:10 lr: 0.000020 loss_cls: 3.0803 (2.9340) grad_norm: 1.9468 (2.1923) time: 2.9393 data: 0.0003 max mem: 29202 +[2024-12-10 12:33:30 root] (utils.py 283): INFO Epoch: [0] [1470/2502] eta: 0:50:40 lr: 0.000020 loss_cls: 3.0499 (2.9328) grad_norm: 1.8355 (2.1906) time: 2.9368 data: 0.0003 max mem: 29202 +[2024-12-10 12:34:00 root] (utils.py 283): INFO Epoch: [0] [1480/2502] eta: 0:50:11 lr: 0.000020 loss_cls: 2.7134 (2.9305) grad_norm: 1.7482 (2.1885) time: 2.9359 data: 0.0003 max mem: 29202 +[2024-12-10 12:34:29 root] (utils.py 283): INFO Epoch: [0] [1490/2502] eta: 0:49:41 lr: 0.000020 loss_cls: 2.5883 (2.9269) grad_norm: 2.0229 (2.1884) time: 2.9371 data: 0.0003 max mem: 29202 +[2024-12-10 12:34:58 root] (utils.py 283): INFO Epoch: [0] [1500/2502] eta: 0:49:12 lr: 0.000020 loss_cls: 2.5883 (2.9263) grad_norm: 2.0466 (2.1873) time: 2.9361 data: 0.0003 max mem: 29202 +[2024-12-10 12:35:28 root] (utils.py 283): INFO Epoch: [0] [1510/2502] eta: 0:48:42 lr: 0.000020 loss_cls: 2.9000 (2.9261) grad_norm: 1.9524 (2.1856) time: 2.9390 data: 0.0003 max mem: 29202 +[2024-12-10 12:35:57 root] (utils.py 283): INFO Epoch: [0] [1520/2502] eta: 0:48:13 lr: 0.000020 loss_cls: 2.9322 (2.9266) grad_norm: 1.9216 (2.1859) time: 2.9390 data: 0.0003 max mem: 29202 +[2024-12-10 12:36:27 root] (utils.py 283): INFO Epoch: [0] [1530/2502] eta: 0:47:43 lr: 0.000020 loss_cls: 2.9176 (2.9259) grad_norm: 1.8318 (2.1835) time: 2.9351 data: 0.0003 max mem: 29202 +[2024-12-10 12:36:56 root] (utils.py 283): INFO Epoch: [0] [1540/2502] eta: 0:47:14 lr: 0.000020 loss_cls: 3.0039 (2.9254) grad_norm: 1.7839 (2.1810) time: 2.9325 data: 0.0003 max mem: 29202 +[2024-12-10 12:37:25 root] (utils.py 283): INFO Epoch: [0] [1550/2502] eta: 0:46:44 lr: 0.000020 loss_cls: 2.9039 (2.9240) grad_norm: 1.8628 (2.1794) time: 2.9360 data: 0.0003 max mem: 29202 +[2024-12-10 12:37:55 root] (utils.py 283): INFO Epoch: [0] [1560/2502] eta: 0:46:15 lr: 0.000020 loss_cls: 2.8097 (2.9230) grad_norm: 1.9149 (2.1800) time: 2.9424 data: 0.0003 max mem: 29202 +[2024-12-10 12:38:24 root] (utils.py 283): INFO Epoch: [0] [1570/2502] eta: 0:45:45 lr: 0.000020 loss_cls: 2.7823 (2.9206) grad_norm: 1.9149 (2.1784) time: 2.9410 data: 0.0003 max mem: 29202 +[2024-12-10 12:38:54 root] (utils.py 283): INFO Epoch: [0] [1580/2502] eta: 0:45:16 lr: 0.000020 loss_cls: 2.7823 (2.9200) grad_norm: 1.8495 (2.1768) time: 2.9443 data: 0.0003 max mem: 29202 +[2024-12-10 12:39:23 root] (utils.py 283): INFO Epoch: [0] [1590/2502] eta: 0:44:46 lr: 0.000020 loss_cls: 2.9717 (2.9206) grad_norm: 2.0043 (2.1771) time: 2.9495 data: 0.0003 max mem: 29202 +[2024-12-10 12:39:53 root] (utils.py 283): INFO Epoch: [0] [1600/2502] eta: 0:44:17 lr: 0.000020 loss_cls: 3.0049 (2.9219) grad_norm: 2.0043 (2.1771) time: 2.9471 data: 0.0003 max mem: 29202 +[2024-12-10 12:40:22 root] (utils.py 283): INFO Epoch: [0] [1610/2502] eta: 0:43:47 lr: 0.000020 loss_cls: 3.0125 (2.9220) grad_norm: 1.7863 (2.1747) time: 2.9449 data: 0.0003 max mem: 29202 +[2024-12-10 12:40:51 root] (utils.py 283): INFO Epoch: [0] [1620/2502] eta: 0:43:18 lr: 0.000020 loss_cls: 2.8877 (2.9217) grad_norm: 1.8872 (2.1748) time: 2.9434 data: 0.0003 max mem: 29202 +[2024-12-10 12:41:21 root] (utils.py 283): INFO Epoch: [0] [1630/2502] eta: 0:42:48 lr: 0.000020 loss_cls: 2.9949 (2.9219) grad_norm: 1.9433 (2.1732) time: 2.9410 data: 0.0003 max mem: 29202 +[2024-12-10 12:41:50 root] (utils.py 283): INFO Epoch: [0] [1640/2502] eta: 0:42:19 lr: 0.000020 loss_cls: 2.9692 (2.9211) grad_norm: 1.8659 (2.1716) time: 2.9429 data: 0.0003 max mem: 29202 +[2024-12-10 12:42:20 root] (utils.py 283): INFO Epoch: [0] [1650/2502] eta: 0:41:50 lr: 0.000020 loss_cls: 2.7351 (2.9182) grad_norm: 1.7866 (2.1694) time: 2.9469 data: 0.0003 max mem: 29202 +[2024-12-10 12:42:49 root] (utils.py 283): INFO Epoch: [0] [1660/2502] eta: 0:41:20 lr: 0.000020 loss_cls: 2.4445 (2.9157) grad_norm: 1.7763 (2.1680) time: 2.9464 data: 0.0003 max mem: 29202 +[2024-12-10 12:43:19 root] (utils.py 283): INFO Epoch: [0] [1670/2502] eta: 0:40:51 lr: 0.000020 loss_cls: 2.7034 (2.9154) grad_norm: 1.8211 (2.1674) time: 2.9434 data: 0.0003 max mem: 29202 +[2024-12-10 12:43:48 root] (utils.py 283): INFO Epoch: [0] [1680/2502] eta: 0:40:21 lr: 0.000020 loss_cls: 2.9689 (2.9143) grad_norm: 1.9418 (2.1672) time: 2.9508 data: 0.0003 max mem: 29202 +[2024-12-10 12:44:18 root] (utils.py 283): INFO Epoch: [0] [1690/2502] eta: 0:39:52 lr: 0.000020 loss_cls: 2.9602 (2.9141) grad_norm: 1.9418 (2.1666) time: 2.9635 data: 0.0003 max mem: 29202 +[2024-12-10 12:44:47 root] (utils.py 283): INFO Epoch: [0] [1700/2502] eta: 0:39:22 lr: 0.000020 loss_cls: 3.1340 (2.9157) grad_norm: 1.8940 (2.1661) time: 2.9578 data: 0.0003 max mem: 29202 +[2024-12-10 12:45:17 root] (utils.py 283): INFO Epoch: [0] [1710/2502] eta: 0:38:53 lr: 0.000020 loss_cls: 3.0179 (2.9152) grad_norm: 1.8988 (2.1651) time: 2.9453 data: 0.0003 max mem: 29202 +[2024-12-10 12:45:46 root] (utils.py 283): INFO Epoch: [0] [1720/2502] eta: 0:38:23 lr: 0.000020 loss_cls: 2.8234 (2.9143) grad_norm: 1.8523 (2.1636) time: 2.9456 data: 0.0003 max mem: 29202 +[2024-12-10 12:46:16 root] (utils.py 283): INFO Epoch: [0] [1730/2502] eta: 0:37:54 lr: 0.000020 loss_cls: 3.0167 (2.9150) grad_norm: 1.8295 (2.1619) time: 2.9480 data: 0.0003 max mem: 29202 +[2024-12-10 12:46:45 root] (utils.py 283): INFO Epoch: [0] [1740/2502] eta: 0:37:25 lr: 0.000020 loss_cls: 3.0167 (2.9141) grad_norm: 1.7973 (2.1602) time: 2.9461 data: 0.0003 max mem: 29202 +[2024-12-10 12:47:15 root] (utils.py 283): INFO Epoch: [0] [1750/2502] eta: 0:36:55 lr: 0.000020 loss_cls: 2.7856 (2.9129) grad_norm: 1.7951 (2.1589) time: 2.9549 data: 0.0003 max mem: 29202 +[2024-12-10 12:47:44 root] (utils.py 283): INFO Epoch: [0] [1760/2502] eta: 0:36:26 lr: 0.000020 loss_cls: 2.5688 (2.9123) grad_norm: 1.8433 (2.1580) time: 2.9550 data: 0.0003 max mem: 29202 +[2024-12-10 12:48:14 root] (utils.py 283): INFO Epoch: [0] [1770/2502] eta: 0:35:56 lr: 0.000020 loss_cls: 2.9699 (2.9126) grad_norm: 1.8433 (2.1564) time: 2.9469 data: 0.0003 max mem: 29202 +[2024-12-10 12:48:43 root] (utils.py 283): INFO Epoch: [0] [1780/2502] eta: 0:35:27 lr: 0.000020 loss_cls: 2.9740 (2.9117) grad_norm: 1.8344 (2.1559) time: 2.9480 data: 0.0003 max mem: 29202 +[2024-12-10 12:49:13 root] (utils.py 283): INFO Epoch: [0] [1790/2502] eta: 0:34:57 lr: 0.000020 loss_cls: 2.8818 (2.9106) grad_norm: 1.8333 (2.1545) time: 2.9459 data: 0.0003 max mem: 29202 +[2024-12-10 12:49:42 root] (utils.py 283): INFO Epoch: [0] [1800/2502] eta: 0:34:28 lr: 0.000020 loss_cls: 2.9215 (2.9111) grad_norm: 1.8307 (2.1532) time: 2.9454 data: 0.0003 max mem: 29202 +[2024-12-10 12:50:12 root] (utils.py 283): INFO Epoch: [0] [1810/2502] eta: 0:33:58 lr: 0.000020 loss_cls: 2.9214 (2.9102) grad_norm: 1.8353 (2.1513) time: 2.9477 data: 0.0003 max mem: 29202 +[2024-12-10 12:50:41 root] (utils.py 283): INFO Epoch: [0] [1820/2502] eta: 0:33:29 lr: 0.000020 loss_cls: 2.8807 (2.9103) grad_norm: 1.8352 (2.1501) time: 2.9502 data: 0.0003 max mem: 29202 +[2024-12-10 12:51:11 root] (utils.py 283): INFO Epoch: [0] [1830/2502] eta: 0:32:59 lr: 0.000020 loss_cls: 3.0519 (2.9117) grad_norm: 1.8136 (2.1483) time: 2.9492 data: 0.0003 max mem: 29202 +[2024-12-10 12:51:40 root] (utils.py 283): INFO Epoch: [0] [1840/2502] eta: 0:32:30 lr: 0.000020 loss_cls: 3.0091 (2.9107) grad_norm: 1.8163 (2.1467) time: 2.9460 data: 0.0003 max mem: 29202 +[2024-12-10 12:52:10 root] (utils.py 283): INFO Epoch: [0] [1850/2502] eta: 0:32:01 lr: 0.000020 loss_cls: 2.8518 (2.9100) grad_norm: 1.8127 (2.1453) time: 2.9491 data: 0.0003 max mem: 29202 +[2024-12-10 12:52:39 root] (utils.py 283): INFO Epoch: [0] [1860/2502] eta: 0:31:31 lr: 0.000020 loss_cls: 2.8736 (2.9092) grad_norm: 1.8127 (2.1444) time: 2.9524 data: 0.0003 max mem: 29202 +[2024-12-10 12:53:09 root] (utils.py 283): INFO Epoch: [0] [1870/2502] eta: 0:31:02 lr: 0.000020 loss_cls: 2.8785 (2.9079) grad_norm: 1.9247 (2.1435) time: 2.9477 data: 0.0003 max mem: 29202 +[2024-12-10 12:53:38 root] (utils.py 283): INFO Epoch: [0] [1880/2502] eta: 0:30:32 lr: 0.000020 loss_cls: 3.1046 (2.9090) grad_norm: 1.8808 (2.1420) time: 2.9438 data: 0.0003 max mem: 29202 +[2024-12-10 12:54:08 root] (utils.py 283): INFO Epoch: [0] [1890/2502] eta: 0:30:03 lr: 0.000020 loss_cls: 3.1637 (2.9079) grad_norm: 1.8978 (2.1406) time: 2.9465 data: 0.0003 max mem: 29202 +[2024-12-10 12:54:37 root] (utils.py 283): INFO Epoch: [0] [1900/2502] eta: 0:29:33 lr: 0.000020 loss_cls: 2.7361 (2.9068) grad_norm: 1.9297 (2.1404) time: 2.9473 data: 0.0003 max mem: 29202 +[2024-12-10 12:55:07 root] (utils.py 283): INFO Epoch: [0] [1910/2502] eta: 0:29:04 lr: 0.000020 loss_cls: 2.8306 (2.9071) grad_norm: 1.8224 (2.1383) time: 2.9471 data: 0.0003 max mem: 29202 +[2024-12-10 12:55:36 root] (utils.py 283): INFO Epoch: [0] [1920/2502] eta: 0:28:34 lr: 0.000020 loss_cls: 2.7197 (2.9046) grad_norm: 1.6555 (2.1359) time: 2.9470 data: 0.0003 max mem: 29202 +[2024-12-10 12:56:05 root] (utils.py 283): INFO Epoch: [0] [1930/2502] eta: 0:28:05 lr: 0.000020 loss_cls: 2.6305 (2.9041) grad_norm: 1.7930 (2.1349) time: 2.9461 data: 0.0003 max mem: 29202 +[2024-12-10 12:56:35 root] (utils.py 283): INFO Epoch: [0] [1940/2502] eta: 0:27:35 lr: 0.000020 loss_cls: 2.8085 (2.9033) grad_norm: 1.8880 (2.1336) time: 2.9446 data: 0.0003 max mem: 29202 +[2024-12-10 12:57:04 root] (utils.py 283): INFO Epoch: [0] [1950/2502] eta: 0:27:06 lr: 0.000020 loss_cls: 2.8085 (2.9023) grad_norm: 1.9384 (2.1332) time: 2.9452 data: 0.0003 max mem: 29202 +[2024-12-10 12:57:34 root] (utils.py 283): INFO Epoch: [0] [1960/2502] eta: 0:26:37 lr: 0.000020 loss_cls: 2.8713 (2.9013) grad_norm: 1.9384 (2.1321) time: 2.9574 data: 0.0003 max mem: 29202 +[2024-12-10 12:58:03 root] (utils.py 283): INFO Epoch: [0] [1970/2502] eta: 0:26:07 lr: 0.000020 loss_cls: 2.8683 (2.8992) grad_norm: 1.7832 (2.1301) time: 2.9553 data: 0.0003 max mem: 29202 +[2024-12-10 12:58:34 root] (utils.py 283): INFO Epoch: [0] [1980/2502] eta: 0:25:38 lr: 0.000020 loss_cls: 2.7520 (2.8987) grad_norm: 1.7832 (2.1293) time: 2.9731 data: 0.0003 max mem: 29202 +[2024-12-10 12:59:03 root] (utils.py 283): INFO Epoch: [0] [1990/2502] eta: 0:25:08 lr: 0.000020 loss_cls: 2.9541 (2.8980) grad_norm: 1.8685 (2.1291) time: 2.9724 data: 0.0003 max mem: 29202 +[2024-12-10 12:59:32 root] (utils.py 283): INFO Epoch: [0] [2000/2502] eta: 0:24:39 lr: 0.000020 loss_cls: 2.9362 (2.8973) grad_norm: 1.9026 (2.1280) time: 2.9431 data: 0.0003 max mem: 29202 +[2024-12-10 13:00:02 root] (utils.py 283): INFO Epoch: [0] [2010/2502] eta: 0:24:09 lr: 0.000020 loss_cls: 2.9609 (2.8981) grad_norm: 1.8373 (2.1273) time: 2.9421 data: 0.0003 max mem: 29202 +[2024-12-10 13:00:31 root] (utils.py 283): INFO Epoch: [0] [2020/2502] eta: 0:23:40 lr: 0.000020 loss_cls: 3.0559 (2.8976) grad_norm: 1.8576 (2.1261) time: 2.9456 data: 0.0003 max mem: 29202 +[2024-12-10 13:01:01 root] (utils.py 283): INFO Epoch: [0] [2030/2502] eta: 0:23:10 lr: 0.000020 loss_cls: 3.0538 (2.8980) grad_norm: 1.9688 (2.1286) time: 2.9590 data: 0.0003 max mem: 29202 +[2024-12-10 13:01:30 root] (utils.py 283): INFO Epoch: [0] [2040/2502] eta: 0:22:41 lr: 0.000020 loss_cls: 3.0538 (2.8983) grad_norm: 1.9580 (2.1279) time: 2.9529 data: 0.0003 max mem: 29202 +[2024-12-10 13:02:00 root] (utils.py 283): INFO Epoch: [0] [2050/2502] eta: 0:22:11 lr: 0.000020 loss_cls: 2.8685 (2.8974) grad_norm: 1.8487 (2.1277) time: 2.9451 data: 0.0003 max mem: 29202 +[2024-12-10 13:02:29 root] (utils.py 283): INFO Epoch: [0] [2060/2502] eta: 0:21:42 lr: 0.000020 loss_cls: 2.6428 (2.8969) grad_norm: 1.9056 (2.1270) time: 2.9495 data: 0.0003 max mem: 29202 +[2024-12-10 13:02:59 root] (utils.py 283): INFO Epoch: [0] [2070/2502] eta: 0:21:13 lr: 0.000020 loss_cls: 3.0399 (2.8972) grad_norm: 1.8469 (2.1254) time: 2.9433 data: 0.0003 max mem: 29202 +[2024-12-10 13:03:28 root] (utils.py 283): INFO Epoch: [0] [2080/2502] eta: 0:20:43 lr: 0.000020 loss_cls: 3.0150 (2.8969) grad_norm: 1.9196 (2.1257) time: 2.9375 data: 0.0003 max mem: 29202 +[2024-12-10 13:03:57 root] (utils.py 283): INFO Epoch: [0] [2090/2502] eta: 0:20:14 lr: 0.000020 loss_cls: 3.0150 (2.8975) grad_norm: 1.9535 (2.1243) time: 2.9324 data: 0.0002 max mem: 29202 +[2024-12-10 13:04:27 root] (utils.py 283): INFO Epoch: [0] [2100/2502] eta: 0:19:44 lr: 0.000020 loss_cls: 3.0968 (2.8961) grad_norm: 1.8291 (2.1239) time: 2.9285 data: 0.0003 max mem: 29202 +[2024-12-10 13:04:56 root] (utils.py 283): INFO Epoch: [0] [2110/2502] eta: 0:19:15 lr: 0.000020 loss_cls: 3.0218 (2.8965) grad_norm: 1.8380 (2.1225) time: 2.9294 data: 0.0003 max mem: 29202 +[2024-12-10 13:05:25 root] (utils.py 283): INFO Epoch: [0] [2120/2502] eta: 0:18:45 lr: 0.000020 loss_cls: 3.0022 (2.8958) grad_norm: 1.7211 (2.1210) time: 2.9309 data: 0.0003 max mem: 29202 +[2024-12-10 13:05:55 root] (utils.py 283): INFO Epoch: [0] [2130/2502] eta: 0:18:16 lr: 0.000020 loss_cls: 2.9158 (2.8959) grad_norm: 1.6981 (2.1193) time: 2.9295 data: 0.0003 max mem: 29202 +[2024-12-10 13:06:24 root] (utils.py 283): INFO Epoch: [0] [2140/2502] eta: 0:17:46 lr: 0.000020 loss_cls: 3.0246 (2.8968) grad_norm: 1.8569 (2.1187) time: 2.9275 data: 0.0003 max mem: 29202 +[2024-12-10 13:06:53 root] (utils.py 283): INFO Epoch: [0] [2150/2502] eta: 0:17:17 lr: 0.000020 loss_cls: 3.1488 (2.8965) grad_norm: 1.8856 (2.1172) time: 2.9263 data: 0.0003 max mem: 29202 +[2024-12-10 13:07:22 root] (utils.py 283): INFO Epoch: [0] [2160/2502] eta: 0:16:47 lr: 0.000020 loss_cls: 3.0402 (2.8965) grad_norm: 1.8607 (2.1160) time: 2.9276 data: 0.0003 max mem: 29202 +[2024-12-10 13:07:52 root] (utils.py 283): INFO Epoch: [0] [2170/2502] eta: 0:16:18 lr: 0.000020 loss_cls: 3.0658 (2.8973) grad_norm: 1.8098 (2.1151) time: 2.9291 data: 0.0003 max mem: 29202 +[2024-12-10 13:08:21 root] (utils.py 283): INFO Epoch: [0] [2180/2502] eta: 0:15:48 lr: 0.000020 loss_cls: 3.0282 (2.8967) grad_norm: 1.8634 (2.1149) time: 2.9272 data: 0.0003 max mem: 29202 +[2024-12-10 13:08:50 root] (utils.py 283): INFO Epoch: [0] [2190/2502] eta: 0:15:19 lr: 0.000020 loss_cls: 3.1027 (2.8975) grad_norm: 1.9517 (2.1142) time: 2.9259 data: 0.0002 max mem: 29202 +[2024-12-10 13:09:20 root] (utils.py 283): INFO Epoch: [0] [2200/2502] eta: 0:14:49 lr: 0.000020 loss_cls: 2.9577 (2.8960) grad_norm: 1.9004 (2.1131) time: 2.9283 data: 0.0003 max mem: 29202 +[2024-12-10 13:09:49 root] (utils.py 283): INFO Epoch: [0] [2210/2502] eta: 0:14:20 lr: 0.000020 loss_cls: 2.7239 (2.8956) grad_norm: 1.8170 (2.1119) time: 2.9378 data: 0.0003 max mem: 29202 +[2024-12-10 13:10:18 root] (utils.py 283): INFO Epoch: [0] [2220/2502] eta: 0:13:50 lr: 0.000020 loss_cls: 2.8709 (2.8951) grad_norm: 1.7761 (2.1107) time: 2.9454 data: 0.0003 max mem: 29202 +[2024-12-10 13:10:48 root] (utils.py 283): INFO Epoch: [0] [2230/2502] eta: 0:13:21 lr: 0.000020 loss_cls: 2.8661 (2.8942) grad_norm: 1.7649 (2.1096) time: 2.9366 data: 0.0003 max mem: 29202 +[2024-12-10 13:11:17 root] (utils.py 283): INFO Epoch: [0] [2240/2502] eta: 0:12:51 lr: 0.000020 loss_cls: 2.8479 (2.8935) grad_norm: 1.8464 (2.1088) time: 2.9299 data: 0.0003 max mem: 29202 +[2024-12-10 13:11:46 root] (utils.py 283): INFO Epoch: [0] [2250/2502] eta: 0:12:22 lr: 0.000020 loss_cls: 2.6577 (2.8926) grad_norm: 1.8464 (2.1082) time: 2.9309 data: 0.0003 max mem: 29202 +[2024-12-10 13:12:16 root] (utils.py 283): INFO Epoch: [0] [2260/2502] eta: 0:11:52 lr: 0.000020 loss_cls: 2.5697 (2.8919) grad_norm: 1.8442 (2.1073) time: 2.9299 data: 0.0003 max mem: 29202 +[2024-12-10 13:12:45 root] (utils.py 283): INFO Epoch: [0] [2270/2502] eta: 0:11:23 lr: 0.000020 loss_cls: 2.9128 (2.8923) grad_norm: 1.8121 (2.1064) time: 2.9299 data: 0.0003 max mem: 29202 +[2024-12-10 13:13:14 root] (utils.py 283): INFO Epoch: [0] [2280/2502] eta: 0:10:53 lr: 0.000020 loss_cls: 2.9495 (2.8930) grad_norm: 1.8149 (2.1054) time: 2.9285 data: 0.0003 max mem: 29202 +[2024-12-10 13:13:43 root] (utils.py 283): INFO Epoch: [0] [2290/2502] eta: 0:10:24 lr: 0.000020 loss_cls: 2.9007 (2.8929) grad_norm: 1.8399 (2.1045) time: 2.9280 data: 0.0003 max mem: 29202 +[2024-12-10 13:14:13 root] (utils.py 283): INFO Epoch: [0] [2300/2502] eta: 0:09:54 lr: 0.000020 loss_cls: 2.8202 (2.8921) grad_norm: 1.8977 (2.1038) time: 2.9305 data: 0.0003 max mem: 29202 +[2024-12-10 13:14:42 root] (utils.py 283): INFO Epoch: [0] [2310/2502] eta: 0:09:25 lr: 0.000020 loss_cls: 2.7098 (2.8910) grad_norm: 1.8291 (2.1025) time: 2.9327 data: 0.0003 max mem: 29202 +[2024-12-10 13:15:11 root] (utils.py 283): INFO Epoch: [0] [2320/2502] eta: 0:08:55 lr: 0.000020 loss_cls: 2.8372 (2.8913) grad_norm: 1.8023 (2.1019) time: 2.9305 data: 0.0003 max mem: 29202 +[2024-12-10 13:15:41 root] (utils.py 283): INFO Epoch: [0] [2330/2502] eta: 0:08:26 lr: 0.000020 loss_cls: 3.0208 (2.8920) grad_norm: 1.9444 (2.1018) time: 2.9283 data: 0.0003 max mem: 29202 +[2024-12-10 13:16:10 root] (utils.py 283): INFO Epoch: [0] [2340/2502] eta: 0:07:57 lr: 0.000020 loss_cls: 3.0546 (2.8916) grad_norm: 1.9193 (2.1022) time: 2.9303 data: 0.0003 max mem: 29202 +[2024-12-10 13:16:39 root] (utils.py 283): INFO Epoch: [0] [2350/2502] eta: 0:07:27 lr: 0.000020 loss_cls: 2.8482 (2.8915) grad_norm: 1.7918 (2.1006) time: 2.9304 data: 0.0003 max mem: 29202 +[2024-12-10 13:17:09 root] (utils.py 283): INFO Epoch: [0] [2360/2502] eta: 0:06:58 lr: 0.000020 loss_cls: 2.7669 (2.8905) grad_norm: 1.7311 (2.0998) time: 2.9302 data: 0.0003 max mem: 29202 +[2024-12-10 13:17:38 root] (utils.py 283): INFO Epoch: [0] [2370/2502] eta: 0:06:28 lr: 0.000020 loss_cls: 2.7631 (2.8902) grad_norm: 1.7975 (2.0988) time: 2.9298 data: 0.0003 max mem: 29202 +[2024-12-10 13:18:07 root] (utils.py 283): INFO Epoch: [0] [2380/2502] eta: 0:05:59 lr: 0.000020 loss_cls: 2.9930 (2.8906) grad_norm: 1.8265 (2.0980) time: 2.9273 data: 0.0003 max mem: 29202 +[2024-12-10 13:18:36 root] (utils.py 283): INFO Epoch: [0] [2390/2502] eta: 0:05:29 lr: 0.000020 loss_cls: 2.9878 (2.8897) grad_norm: 1.8331 (2.0973) time: 2.9279 data: 0.0003 max mem: 29202 +[2024-12-10 13:19:06 root] (utils.py 283): INFO Epoch: [0] [2400/2502] eta: 0:05:00 lr: 0.000020 loss_cls: 2.8594 (2.8895) grad_norm: 1.7950 (2.0963) time: 2.9285 data: 0.0003 max mem: 29202 +[2024-12-10 13:19:35 root] (utils.py 283): INFO Epoch: [0] [2410/2502] eta: 0:04:30 lr: 0.000020 loss_cls: 3.0295 (2.8901) grad_norm: 1.7723 (2.0960) time: 2.9266 data: 0.0003 max mem: 29202 +[2024-12-10 13:20:04 root] (utils.py 283): INFO Epoch: [0] [2420/2502] eta: 0:04:01 lr: 0.000020 loss_cls: 2.9447 (2.8892) grad_norm: 1.7723 (2.0952) time: 2.9266 data: 0.0003 max mem: 29202 +[2024-12-10 13:20:34 root] (utils.py 283): INFO Epoch: [0] [2430/2502] eta: 0:03:31 lr: 0.000020 loss_cls: 2.6132 (2.8889) grad_norm: 1.8162 (2.0944) time: 2.9264 data: 0.0003 max mem: 29202 +[2024-12-10 13:21:03 root] (utils.py 283): INFO Epoch: [0] [2440/2502] eta: 0:03:02 lr: 0.000020 loss_cls: 3.0060 (2.8890) grad_norm: 1.8506 (2.0936) time: 2.9276 data: 0.0003 max mem: 29202 +[2024-12-10 13:21:32 root] (utils.py 283): INFO Epoch: [0] [2450/2502] eta: 0:02:33 lr: 0.000020 loss_cls: 2.9926 (2.8894) grad_norm: 1.9105 (2.0937) time: 2.9354 data: 0.0003 max mem: 29202 +[2024-12-10 13:22:02 root] (utils.py 283): INFO Epoch: [0] [2460/2502] eta: 0:02:03 lr: 0.000020 loss_cls: 2.9906 (2.8893) grad_norm: 1.9124 (2.0932) time: 2.9486 data: 0.0003 max mem: 29202 +[2024-12-10 13:22:31 root] (utils.py 283): INFO Epoch: [0] [2470/2502] eta: 0:01:34 lr: 0.000020 loss_cls: 3.0056 (2.8884) grad_norm: 1.8647 (2.0929) time: 2.9520 data: 0.0003 max mem: 29202 +[2024-12-10 13:23:01 root] (utils.py 283): INFO Epoch: [0] [2480/2502] eta: 0:01:04 lr: 0.000020 loss_cls: 2.6203 (2.8877) grad_norm: 1.8440 (2.0924) time: 2.9504 data: 0.0003 max mem: 29202 +[2024-12-10 13:23:31 root] (utils.py 283): INFO Epoch: [0] [2490/2502] eta: 0:00:35 lr: 0.000020 loss_cls: 2.8292 (2.8883) grad_norm: 1.8205 (2.0914) time: 2.9799 data: 0.0238 max mem: 29202 +[2024-12-10 13:24:00 root] (utils.py 283): INFO Epoch: [0] [2500/2502] eta: 0:00:05 lr: 0.000020 loss_cls: 2.8337 (2.8878) grad_norm: 1.7958 (2.0908) time: 2.9731 data: 0.0238 max mem: 29202 +[2024-12-10 13:24:03 root] (utils.py 283): INFO Epoch: [0] [2501/2502] eta: 0:00:02 lr: 0.000020 loss_cls: 2.8337 (2.8873) grad_norm: 1.7958 (2.0907) time: 2.9713 data: 0.0238 max mem: 29202 +[2024-12-10 13:24:03 root] (utils.py 297): INFO Epoch: [0] Total time: 2:02:47 (2.9447 s / it) +[2024-12-10 13:24:03 root] (engine.py 179): INFO Averaged stats:lr: 0.000020 loss_cls: 2.8337 (2.8864) grad_norm: 1.7958 (2.0907) +[2024-12-10 13:24:06 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:53 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.3156 (0.3156) acc1: 92.9688 (92.9688) acc3: 98.4375 (98.4375) acc5: 98.4375 (98.4375) time: 0.5464 data: 0.0004 max mem: 29202 +[2024-12-10 13:24:12 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:48 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.7558 (0.7079) acc1: 87.5000 (86.5057) acc3: 96.0938 (96.3778) acc5: 98.4375 (97.7273) time: 0.5501 data: 0.0004 max mem: 29202 +[2024-12-10 13:24:17 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:42 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.7551 (0.7578) acc1: 87.5000 (85.4539) acc3: 96.0938 (95.8333) acc5: 97.6562 (97.5446) time: 0.5507 data: 0.0004 max mem: 29202 +[2024-12-10 13:24:23 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:37 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.7551 (0.7926) acc1: 84.3750 (84.6270) acc3: 94.5312 (95.4889) acc5: 96.8750 (97.2782) time: 0.5512 data: 0.0004 max mem: 29202 +[2024-12-10 13:24:28 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:31 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.8399 (0.8024) acc1: 84.3750 (84.8133) acc3: 95.3125 (95.3316) acc5: 96.8750 (97.1989) time: 0.5517 data: 0.0004 max mem: 29202 +[2024-12-10 13:24:34 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:26 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 1.0001 (0.8854) acc1: 79.6875 (82.9350) acc3: 91.4062 (94.0717) acc5: 93.7500 (96.2316) time: 0.5519 data: 0.0004 max mem: 29202 +[2024-12-10 13:24:39 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:20 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 1.0594 (0.9197) acc1: 77.3438 (82.4027) acc3: 89.8438 (93.4426) acc5: 92.9688 (95.6839) time: 0.5520 data: 0.0004 max mem: 29202 +[2024-12-10 13:24:45 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:15 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 1.0938 (0.9533) acc1: 78.1250 (81.5361) acc3: 89.8438 (93.0788) acc5: 93.7500 (95.4335) time: 0.5520 data: 0.0004 max mem: 29202 +[2024-12-10 13:24:50 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:09 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 1.1539 (0.9790) acc1: 77.3438 (80.9317) acc3: 89.8438 (92.5733) acc5: 93.7500 (95.1003) time: 0.5522 data: 0.0006 max mem: 29202 +[2024-12-10 13:24:56 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:04 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 1.1539 (1.0084) acc1: 75.7812 (80.1253) acc3: 89.0625 (92.2734) acc5: 92.1875 (94.8918) time: 0.5528 data: 0.0006 max mem: 29202 +[2024-12-10 13:24:59 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 1.1203 (1.0055) acc1: 75.7812 (80.0640) acc3: 89.8438 (92.3760) acc5: 94.5312 (94.9760) time: 0.5438 data: 0.0005 max mem: 29202 +[2024-12-10 13:24:59 root] (utils.py 297): INFO Test: Total time: 0:00:53 (0.5501 s / it) +[2024-12-10 13:24:59 root] (engine.py 264): INFO * Acc@1 80.276 Acc@3 92.294 Acc@5 94.784 loss 0.994 flops 13.207 layer_flops 13.109 +[2024-12-10 13:24:59 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 80.3% +[2024-12-10 13:25:01 root] (main.py 576): INFO Max accuracy: 80.28% +[2024-12-10 13:25:04 root] (utils.py 283): INFO Epoch: [1] [ 0/2502] eta: 2:00:25 lr: 0.000020 loss_cls: 2.5145 (2.5145) grad_norm: 2.0576 (2.0576) time: 2.8879 data: 0.0003 max mem: 29202 +[2024-12-10 13:25:33 root] (utils.py 283): INFO Epoch: [1] [ 10/2502] eta: 2:01:14 lr: 0.000020 loss_cls: 2.5762 (2.7699) grad_norm: 1.8194 (1.8431) time: 2.9190 data: 0.0003 max mem: 29202 +[2024-12-10 13:26:02 root] (utils.py 283): INFO Epoch: [1] [ 20/2502] eta: 2:00:50 lr: 0.000020 loss_cls: 3.0509 (2.8677) grad_norm: 1.7902 (1.8512) time: 2.9228 data: 0.0003 max mem: 29202 +[2024-12-10 13:26:32 root] (utils.py 283): INFO Epoch: [1] [ 30/2502] eta: 2:00:25 lr: 0.000020 loss_cls: 3.0942 (2.9315) grad_norm: 1.8178 (1.9360) time: 2.9251 data: 0.0003 max mem: 29202 +[2024-12-10 13:27:01 root] (utils.py 283): INFO Epoch: [1] [ 40/2502] eta: 2:00:05 lr: 0.000020 loss_cls: 3.0078 (2.9072) grad_norm: 1.7308 (1.9181) time: 2.9328 data: 0.0003 max mem: 29202 +[2024-12-10 13:27:30 root] (utils.py 283): INFO Epoch: [1] [ 50/2502] eta: 1:59:36 lr: 0.000020 loss_cls: 2.9625 (2.9171) grad_norm: 1.7404 (1.9696) time: 2.9328 data: 0.0003 max mem: 29202 +[2024-12-10 13:27:59 root] (utils.py 283): INFO Epoch: [1] [ 60/2502] eta: 1:59:05 lr: 0.000020 loss_cls: 3.0067 (2.9454) grad_norm: 1.8837 (1.9633) time: 2.9246 data: 0.0003 max mem: 29202 +[2024-12-10 13:28:29 root] (utils.py 283): INFO Epoch: [1] [ 70/2502] eta: 1:58:35 lr: 0.000020 loss_cls: 3.0067 (2.9571) grad_norm: 1.8837 (1.9521) time: 2.9226 data: 0.0003 max mem: 29202 +[2024-12-10 13:28:58 root] (utils.py 283): INFO Epoch: [1] [ 80/2502] eta: 1:58:04 lr: 0.000020 loss_cls: 2.8818 (2.9437) grad_norm: 1.7753 (1.9336) time: 2.9223 data: 0.0003 max mem: 29202 +[2024-12-10 13:29:27 root] (utils.py 283): INFO Epoch: [1] [ 90/2502] eta: 1:57:35 lr: 0.000020 loss_cls: 2.8818 (2.9176) grad_norm: 1.7607 (1.9380) time: 2.9228 data: 0.0003 max mem: 29202 +[2024-12-10 13:29:56 root] (utils.py 283): INFO Epoch: [1] [ 100/2502] eta: 1:57:04 lr: 0.000020 loss_cls: 2.9429 (2.9145) grad_norm: 1.8976 (1.9539) time: 2.9214 data: 0.0003 max mem: 29202 +[2024-12-10 13:30:26 root] (utils.py 283): INFO Epoch: [1] [ 110/2502] eta: 1:56:34 lr: 0.000020 loss_cls: 2.8854 (2.8844) grad_norm: 1.9294 (1.9778) time: 2.9212 data: 0.0003 max mem: 29202 +[2024-12-10 13:30:55 root] (utils.py 283): INFO Epoch: [1] [ 120/2502] eta: 1:56:12 lr: 0.000020 loss_cls: 2.7194 (2.8894) grad_norm: 1.7103 (1.9609) time: 2.9420 data: 0.0003 max mem: 29202 +[2024-12-10 13:31:25 root] (utils.py 283): INFO Epoch: [1] [ 130/2502] eta: 1:55:46 lr: 0.000020 loss_cls: 2.9252 (2.8850) grad_norm: 1.8099 (1.9682) time: 2.9508 data: 0.0004 max mem: 29202 +[2024-12-10 13:31:54 root] (utils.py 283): INFO Epoch: [1] [ 140/2502] eta: 1:55:18 lr: 0.000020 loss_cls: 2.8477 (2.8909) grad_norm: 1.9804 (1.9764) time: 2.9388 data: 0.0004 max mem: 29202 +[2024-12-10 13:32:23 root] (utils.py 283): INFO Epoch: [1] [ 150/2502] eta: 1:54:51 lr: 0.000020 loss_cls: 2.8363 (2.8748) grad_norm: 1.9804 (1.9739) time: 2.9413 data: 0.0003 max mem: 29202 +[2024-12-10 13:32:53 root] (utils.py 283): INFO Epoch: [1] [ 160/2502] eta: 1:54:22 lr: 0.000020 loss_cls: 3.0569 (2.8811) grad_norm: 1.8546 (1.9695) time: 2.9371 data: 0.0003 max mem: 29202 +[2024-12-10 13:33:22 root] (utils.py 283): INFO Epoch: [1] [ 170/2502] eta: 1:53:52 lr: 0.000020 loss_cls: 3.0582 (2.8825) grad_norm: 1.8076 (1.9589) time: 2.9295 data: 0.0003 max mem: 29202 +[2024-12-10 13:33:51 root] (utils.py 283): INFO Epoch: [1] [ 180/2502] eta: 1:53:23 lr: 0.000020 loss_cls: 3.0916 (2.8878) grad_norm: 1.7146 (1.9491) time: 2.9318 data: 0.0003 max mem: 29202 +[2024-12-10 13:34:21 root] (utils.py 283): INFO Epoch: [1] [ 190/2502] eta: 1:52:55 lr: 0.000020 loss_cls: 3.0401 (2.8867) grad_norm: 1.7656 (1.9468) time: 2.9359 data: 0.0003 max mem: 29202 +[2024-12-10 13:34:50 root] (utils.py 283): INFO Epoch: [1] [ 200/2502] eta: 1:52:27 lr: 0.000020 loss_cls: 2.9820 (2.8938) grad_norm: 1.7337 (1.9341) time: 2.9406 data: 0.0003 max mem: 29202 +[2024-12-10 13:35:20 root] (utils.py 283): INFO Epoch: [1] [ 210/2502] eta: 1:51:59 lr: 0.000020 loss_cls: 2.9820 (2.9015) grad_norm: 1.7306 (1.9268) time: 2.9404 data: 0.0003 max mem: 29202 +[2024-12-10 13:35:49 root] (utils.py 283): INFO Epoch: [1] [ 220/2502] eta: 1:51:30 lr: 0.000020 loss_cls: 2.9134 (2.8949) grad_norm: 1.7306 (1.9288) time: 2.9373 data: 0.0003 max mem: 29202 +[2024-12-10 13:36:18 root] (utils.py 283): INFO Epoch: [1] [ 230/2502] eta: 1:51:01 lr: 0.000020 loss_cls: 3.1097 (2.9065) grad_norm: 1.8134 (1.9288) time: 2.9371 data: 0.0003 max mem: 29202 +[2024-12-10 13:36:48 root] (utils.py 283): INFO Epoch: [1] [ 240/2502] eta: 1:50:32 lr: 0.000020 loss_cls: 2.8815 (2.8893) grad_norm: 1.7575 (1.9180) time: 2.9380 data: 0.0003 max mem: 29202 +[2024-12-10 13:37:17 root] (utils.py 283): INFO Epoch: [1] [ 250/2502] eta: 1:50:04 lr: 0.000020 loss_cls: 2.7177 (2.8807) grad_norm: 1.6678 (1.9125) time: 2.9406 data: 0.0003 max mem: 29202 +[2024-12-10 13:37:47 root] (utils.py 283): INFO Epoch: [1] [ 260/2502] eta: 1:49:36 lr: 0.000020 loss_cls: 2.8342 (2.8756) grad_norm: 1.7695 (1.9176) time: 2.9434 data: 0.0003 max mem: 29202 +[2024-12-10 13:38:16 root] (utils.py 283): INFO Epoch: [1] [ 270/2502] eta: 1:49:08 lr: 0.000020 loss_cls: 3.0342 (2.8818) grad_norm: 1.7326 (1.9098) time: 2.9464 data: 0.0003 max mem: 29202 +[2024-12-10 13:38:45 root] (utils.py 283): INFO Epoch: [1] [ 280/2502] eta: 1:48:39 lr: 0.000020 loss_cls: 2.9641 (2.8780) grad_norm: 1.6718 (1.9041) time: 2.9444 data: 0.0003 max mem: 29202 +[2024-12-10 13:39:15 root] (utils.py 283): INFO Epoch: [1] [ 290/2502] eta: 1:48:10 lr: 0.000020 loss_cls: 2.6231 (2.8627) grad_norm: 1.6877 (1.8966) time: 2.9395 data: 0.0003 max mem: 29202 +[2024-12-10 13:39:44 root] (utils.py 283): INFO Epoch: [1] [ 300/2502] eta: 1:47:41 lr: 0.000020 loss_cls: 2.5995 (2.8600) grad_norm: 1.6516 (1.9015) time: 2.9394 data: 0.0003 max mem: 29202 +[2024-12-10 13:40:14 root] (utils.py 283): INFO Epoch: [1] [ 310/2502] eta: 1:47:13 lr: 0.000020 loss_cls: 2.9192 (2.8586) grad_norm: 1.7885 (1.8980) time: 2.9486 data: 0.0003 max mem: 29202 +[2024-12-10 13:40:43 root] (utils.py 283): INFO Epoch: [1] [ 320/2502] eta: 1:46:45 lr: 0.000020 loss_cls: 2.9178 (2.8568) grad_norm: 1.7510 (1.8970) time: 2.9542 data: 0.0003 max mem: 29202 +[2024-12-10 13:41:13 root] (utils.py 283): INFO Epoch: [1] [ 330/2502] eta: 1:46:16 lr: 0.000020 loss_cls: 2.9178 (2.8541) grad_norm: 1.7633 (1.8984) time: 2.9455 data: 0.0003 max mem: 29202 +[2024-12-10 13:41:42 root] (utils.py 283): INFO Epoch: [1] [ 340/2502] eta: 1:45:47 lr: 0.000020 loss_cls: 2.7049 (2.8501) grad_norm: 1.8581 (1.8985) time: 2.9400 data: 0.0003 max mem: 29202 +[2024-12-10 13:42:12 root] (utils.py 283): INFO Epoch: [1] [ 350/2502] eta: 1:45:18 lr: 0.000020 loss_cls: 2.7049 (2.8474) grad_norm: 1.8324 (1.8976) time: 2.9400 data: 0.0003 max mem: 29202 +[2024-12-10 13:42:41 root] (utils.py 283): INFO Epoch: [1] [ 360/2502] eta: 1:44:49 lr: 0.000020 loss_cls: 2.4883 (2.8325) grad_norm: 1.6859 (1.8909) time: 2.9423 data: 0.0003 max mem: 29202 +[2024-12-10 13:43:10 root] (utils.py 283): INFO Epoch: [1] [ 370/2502] eta: 1:44:20 lr: 0.000020 loss_cls: 2.4213 (2.8284) grad_norm: 1.6859 (1.8867) time: 2.9436 data: 0.0003 max mem: 29202 +[2024-12-10 13:43:40 root] (utils.py 283): INFO Epoch: [1] [ 380/2502] eta: 1:43:51 lr: 0.000020 loss_cls: 2.8355 (2.8277) grad_norm: 1.7249 (1.8859) time: 2.9424 data: 0.0003 max mem: 29202 +[2024-12-10 13:44:09 root] (utils.py 283): INFO Epoch: [1] [ 390/2502] eta: 1:43:22 lr: 0.000020 loss_cls: 2.8355 (2.8251) grad_norm: 1.7606 (1.8931) time: 2.9424 data: 0.0003 max mem: 29202 +[2024-12-10 13:44:39 root] (utils.py 283): INFO Epoch: [1] [ 400/2502] eta: 1:42:53 lr: 0.000020 loss_cls: 2.9620 (2.8259) grad_norm: 1.8646 (1.8937) time: 2.9450 data: 0.0003 max mem: 29202 +[2024-12-10 13:45:08 root] (utils.py 283): INFO Epoch: [1] [ 410/2502] eta: 1:42:24 lr: 0.000020 loss_cls: 2.9620 (2.8235) grad_norm: 1.7739 (1.8910) time: 2.9428 data: 0.0003 max mem: 29202 +[2024-12-10 13:45:38 root] (utils.py 283): INFO Epoch: [1] [ 420/2502] eta: 1:41:55 lr: 0.000020 loss_cls: 2.7967 (2.8245) grad_norm: 1.7132 (1.8897) time: 2.9418 data: 0.0003 max mem: 29202 +[2024-12-10 13:46:07 root] (utils.py 283): INFO Epoch: [1] [ 430/2502] eta: 1:41:26 lr: 0.000020 loss_cls: 2.9844 (2.8280) grad_norm: 1.7601 (1.8869) time: 2.9487 data: 0.0003 max mem: 29202 +[2024-12-10 13:46:37 root] (utils.py 283): INFO Epoch: [1] [ 440/2502] eta: 1:40:57 lr: 0.000020 loss_cls: 3.0644 (2.8318) grad_norm: 1.8253 (1.8854) time: 2.9513 data: 0.0003 max mem: 29202 +[2024-12-10 13:47:06 root] (utils.py 283): INFO Epoch: [1] [ 450/2502] eta: 1:40:28 lr: 0.000020 loss_cls: 2.8302 (2.8258) grad_norm: 1.6613 (1.8819) time: 2.9491 data: 0.0003 max mem: 29202 +[2024-12-10 13:47:36 root] (utils.py 283): INFO Epoch: [1] [ 460/2502] eta: 1:40:00 lr: 0.000020 loss_cls: 2.6772 (2.8241) grad_norm: 1.6412 (1.8844) time: 2.9518 data: 0.0003 max mem: 29202 +[2024-12-10 13:48:05 root] (utils.py 283): INFO Epoch: [1] [ 470/2502] eta: 1:39:31 lr: 0.000020 loss_cls: 2.8691 (2.8260) grad_norm: 1.7232 (1.8811) time: 2.9536 data: 0.0003 max mem: 29202 +[2024-12-10 13:48:35 root] (utils.py 283): INFO Epoch: [1] [ 480/2502] eta: 1:39:02 lr: 0.000020 loss_cls: 2.9072 (2.8222) grad_norm: 1.7208 (1.8800) time: 2.9493 data: 0.0003 max mem: 29202 +[2024-12-10 13:49:04 root] (utils.py 283): INFO Epoch: [1] [ 490/2502] eta: 1:38:33 lr: 0.000020 loss_cls: 2.9385 (2.8252) grad_norm: 1.8519 (1.8786) time: 2.9516 data: 0.0003 max mem: 29202 +[2024-12-10 13:49:34 root] (utils.py 283): INFO Epoch: [1] [ 500/2502] eta: 1:38:04 lr: 0.000020 loss_cls: 2.9385 (2.8221) grad_norm: 1.8519 (1.8792) time: 2.9527 data: 0.0003 max mem: 29202 +[2024-12-10 13:50:03 root] (utils.py 283): INFO Epoch: [1] [ 510/2502] eta: 1:37:35 lr: 0.000020 loss_cls: 2.7735 (2.8212) grad_norm: 1.7190 (1.8761) time: 2.9455 data: 0.0003 max mem: 29202 +[2024-12-10 13:50:33 root] (utils.py 283): INFO Epoch: [1] [ 520/2502] eta: 1:37:06 lr: 0.000020 loss_cls: 2.7735 (2.8209) grad_norm: 1.7572 (1.8761) time: 2.9446 data: 0.0003 max mem: 29202 +[2024-12-10 13:51:02 root] (utils.py 283): INFO Epoch: [1] [ 530/2502] eta: 1:36:38 lr: 0.000020 loss_cls: 2.8048 (2.8196) grad_norm: 1.7855 (1.8749) time: 2.9597 data: 0.0003 max mem: 29202 +[2024-12-10 13:51:32 root] (utils.py 283): INFO Epoch: [1] [ 540/2502] eta: 1:36:08 lr: 0.000020 loss_cls: 2.8889 (2.8184) grad_norm: 1.7623 (1.8737) time: 2.9599 data: 0.0003 max mem: 29202 +[2024-12-10 13:52:01 root] (utils.py 283): INFO Epoch: [1] [ 550/2502] eta: 1:35:39 lr: 0.000020 loss_cls: 2.9778 (2.8206) grad_norm: 1.8043 (1.8744) time: 2.9501 data: 0.0003 max mem: 29202 +[2024-12-10 13:52:31 root] (utils.py 283): INFO Epoch: [1] [ 560/2502] eta: 1:35:11 lr: 0.000020 loss_cls: 2.9086 (2.8190) grad_norm: 1.8765 (1.8762) time: 2.9549 data: 0.0003 max mem: 29202 +[2024-12-10 13:53:01 root] (utils.py 283): INFO Epoch: [1] [ 570/2502] eta: 1:34:42 lr: 0.000020 loss_cls: 2.7333 (2.8188) grad_norm: 1.8456 (1.8765) time: 2.9631 data: 0.0003 max mem: 29202 +[2024-12-10 13:53:30 root] (utils.py 283): INFO Epoch: [1] [ 580/2502] eta: 1:34:13 lr: 0.000020 loss_cls: 2.6493 (2.8147) grad_norm: 1.7516 (1.8751) time: 2.9647 data: 0.0003 max mem: 29202 +[2024-12-10 13:54:00 root] (utils.py 283): INFO Epoch: [1] [ 590/2502] eta: 1:33:44 lr: 0.000020 loss_cls: 2.9305 (2.8152) grad_norm: 1.7143 (1.8733) time: 2.9582 data: 0.0003 max mem: 29202 +[2024-12-10 13:54:29 root] (utils.py 283): INFO Epoch: [1] [ 600/2502] eta: 1:33:15 lr: 0.000020 loss_cls: 3.0408 (2.8156) grad_norm: 1.7159 (1.8726) time: 2.9558 data: 0.0003 max mem: 29202 +[2024-12-10 13:54:59 root] (utils.py 283): INFO Epoch: [1] [ 610/2502] eta: 1:32:46 lr: 0.000020 loss_cls: 2.9612 (2.8147) grad_norm: 1.8488 (1.8759) time: 2.9516 data: 0.0003 max mem: 29202 +[2024-12-10 13:55:28 root] (utils.py 283): INFO Epoch: [1] [ 620/2502] eta: 1:32:18 lr: 0.000020 loss_cls: 2.8555 (2.8161) grad_norm: 1.8304 (1.8765) time: 2.9583 data: 0.0003 max mem: 29202 +[2024-12-10 13:55:58 root] (utils.py 283): INFO Epoch: [1] [ 630/2502] eta: 1:31:49 lr: 0.000020 loss_cls: 2.9131 (2.8174) grad_norm: 1.8289 (1.8773) time: 2.9745 data: 0.0003 max mem: 29202 +[2024-12-10 13:56:28 root] (utils.py 283): INFO Epoch: [1] [ 640/2502] eta: 1:31:21 lr: 0.000020 loss_cls: 2.8140 (2.8129) grad_norm: 1.8379 (1.8759) time: 2.9779 data: 0.0003 max mem: 29202 +[2024-12-10 13:56:58 root] (utils.py 283): INFO Epoch: [1] [ 650/2502] eta: 1:30:52 lr: 0.000020 loss_cls: 2.8492 (2.8135) grad_norm: 1.8124 (1.8775) time: 2.9661 data: 0.0003 max mem: 29202 +[2024-12-10 13:57:27 root] (utils.py 283): INFO Epoch: [1] [ 660/2502] eta: 1:30:22 lr: 0.000020 loss_cls: 2.9755 (2.8112) grad_norm: 1.8161 (1.8768) time: 2.9504 data: 0.0003 max mem: 29202 +[2024-12-10 13:57:57 root] (utils.py 283): INFO Epoch: [1] [ 670/2502] eta: 1:29:53 lr: 0.000020 loss_cls: 2.9550 (2.8115) grad_norm: 1.8547 (1.8791) time: 2.9494 data: 0.0003 max mem: 29202 +[2024-12-10 13:58:26 root] (utils.py 283): INFO Epoch: [1] [ 680/2502] eta: 1:29:24 lr: 0.000020 loss_cls: 2.9646 (2.8116) grad_norm: 1.8765 (1.8815) time: 2.9531 data: 0.0003 max mem: 29202 +[2024-12-10 13:58:56 root] (utils.py 283): INFO Epoch: [1] [ 690/2502] eta: 1:28:55 lr: 0.000020 loss_cls: 2.7556 (2.8089) grad_norm: 1.8911 (1.8819) time: 2.9508 data: 0.0003 max mem: 29202 +[2024-12-10 13:59:25 root] (utils.py 283): INFO Epoch: [1] [ 700/2502] eta: 1:28:25 lr: 0.000020 loss_cls: 2.8500 (2.8098) grad_norm: 1.7786 (1.8808) time: 2.9510 data: 0.0003 max mem: 29202 +[2024-12-10 13:59:55 root] (utils.py 283): INFO Epoch: [1] [ 710/2502] eta: 1:27:56 lr: 0.000020 loss_cls: 3.0693 (2.8113) grad_norm: 1.7721 (1.8799) time: 2.9550 data: 0.0003 max mem: 29202 +[2024-12-10 14:00:24 root] (utils.py 283): INFO Epoch: [1] [ 720/2502] eta: 1:27:27 lr: 0.000020 loss_cls: 3.0406 (2.8103) grad_norm: 1.7439 (1.8797) time: 2.9539 data: 0.0003 max mem: 29202 +[2024-12-10 14:00:54 root] (utils.py 283): INFO Epoch: [1] [ 730/2502] eta: 1:26:58 lr: 0.000020 loss_cls: 2.9204 (2.8124) grad_norm: 1.7315 (1.8780) time: 2.9497 data: 0.0003 max mem: 29202 +[2024-12-10 14:01:23 root] (utils.py 283): INFO Epoch: [1] [ 740/2502] eta: 1:26:29 lr: 0.000020 loss_cls: 3.0390 (2.8160) grad_norm: 1.6896 (1.8774) time: 2.9596 data: 0.0003 max mem: 29202 +[2024-12-10 14:01:53 root] (utils.py 283): INFO Epoch: [1] [ 750/2502] eta: 1:25:59 lr: 0.000020 loss_cls: 3.0058 (2.8157) grad_norm: 1.7428 (1.8767) time: 2.9606 data: 0.0003 max mem: 29202 +[2024-12-10 14:02:22 root] (utils.py 283): INFO Epoch: [1] [ 760/2502] eta: 1:25:30 lr: 0.000020 loss_cls: 2.8584 (2.8169) grad_norm: 1.7929 (1.8760) time: 2.9533 data: 0.0003 max mem: 29202 +[2024-12-10 14:02:52 root] (utils.py 283): INFO Epoch: [1] [ 770/2502] eta: 1:25:01 lr: 0.000020 loss_cls: 2.7533 (2.8145) grad_norm: 1.7822 (1.8745) time: 2.9606 data: 0.0003 max mem: 29202 +[2024-12-10 14:03:22 root] (utils.py 283): INFO Epoch: [1] [ 780/2502] eta: 1:24:32 lr: 0.000020 loss_cls: 2.7465 (2.8135) grad_norm: 1.7056 (1.8745) time: 2.9654 data: 0.0003 max mem: 29202 +[2024-12-10 14:03:51 root] (utils.py 283): INFO Epoch: [1] [ 790/2502] eta: 1:24:03 lr: 0.000020 loss_cls: 2.8862 (2.8136) grad_norm: 1.8000 (1.8763) time: 2.9592 data: 0.0003 max mem: 29202 +[2024-12-10 14:04:21 root] (utils.py 283): INFO Epoch: [1] [ 800/2502] eta: 1:23:33 lr: 0.000020 loss_cls: 2.8925 (2.8148) grad_norm: 1.8000 (1.8760) time: 2.9491 data: 0.0003 max mem: 29202 +[2024-12-10 14:04:50 root] (utils.py 283): INFO Epoch: [1] [ 810/2502] eta: 1:23:04 lr: 0.000020 loss_cls: 2.9055 (2.8155) grad_norm: 1.7396 (1.8749) time: 2.9494 data: 0.0003 max mem: 29202 +[2024-12-10 14:05:20 root] (utils.py 283): INFO Epoch: [1] [ 820/2502] eta: 1:22:35 lr: 0.000020 loss_cls: 2.8532 (2.8147) grad_norm: 1.6708 (1.8763) time: 2.9520 data: 0.0003 max mem: 29202 +[2024-12-10 14:05:49 root] (utils.py 283): INFO Epoch: [1] [ 830/2502] eta: 1:22:05 lr: 0.000020 loss_cls: 2.8532 (2.8128) grad_norm: 1.8386 (1.8822) time: 2.9518 data: 0.0003 max mem: 29202 +[2024-12-10 14:06:19 root] (utils.py 283): INFO Epoch: [1] [ 840/2502] eta: 1:21:36 lr: 0.000020 loss_cls: 2.6772 (2.8097) grad_norm: 1.8386 (1.8834) time: 2.9484 data: 0.0003 max mem: 29202 +[2024-12-10 14:06:48 root] (utils.py 283): INFO Epoch: [1] [ 850/2502] eta: 1:21:06 lr: 0.000020 loss_cls: 2.8075 (2.8103) grad_norm: 1.7346 (1.8815) time: 2.9439 data: 0.0003 max mem: 29202 +[2024-12-10 14:07:18 root] (utils.py 283): INFO Epoch: [1] [ 860/2502] eta: 1:20:37 lr: 0.000020 loss_cls: 2.9185 (2.8096) grad_norm: 1.7346 (1.8798) time: 2.9414 data: 0.0003 max mem: 29202 +[2024-12-10 14:07:47 root] (utils.py 283): INFO Epoch: [1] [ 870/2502] eta: 1:20:07 lr: 0.000020 loss_cls: 2.8459 (2.8075) grad_norm: 1.7686 (1.8786) time: 2.9445 data: 0.0003 max mem: 29202 +[2024-12-10 14:08:17 root] (utils.py 283): INFO Epoch: [1] [ 880/2502] eta: 1:19:38 lr: 0.000020 loss_cls: 2.8710 (2.8091) grad_norm: 1.7781 (1.8779) time: 2.9523 data: 0.0003 max mem: 29202 +[2024-12-10 14:08:46 root] (utils.py 283): INFO Epoch: [1] [ 890/2502] eta: 1:19:09 lr: 0.000020 loss_cls: 2.9950 (2.8108) grad_norm: 1.7781 (1.8776) time: 2.9612 data: 0.0003 max mem: 29202 +[2024-12-10 14:09:16 root] (utils.py 283): INFO Epoch: [1] [ 900/2502] eta: 1:18:40 lr: 0.000020 loss_cls: 2.7994 (2.8091) grad_norm: 1.6801 (1.8762) time: 2.9611 data: 0.0003 max mem: 29202 +[2024-12-10 14:09:45 root] (utils.py 283): INFO Epoch: [1] [ 910/2502] eta: 1:18:10 lr: 0.000020 loss_cls: 2.8073 (2.8095) grad_norm: 1.6919 (1.8812) time: 2.9456 data: 0.0003 max mem: 29202 +[2024-12-10 14:10:15 root] (utils.py 283): INFO Epoch: [1] [ 920/2502] eta: 1:17:41 lr: 0.000020 loss_cls: 3.0286 (2.8109) grad_norm: 1.6919 (1.8789) time: 2.9387 data: 0.0003 max mem: 29202 +[2024-12-10 14:10:44 root] (utils.py 283): INFO Epoch: [1] [ 930/2502] eta: 1:17:11 lr: 0.000020 loss_cls: 2.9035 (2.8098) grad_norm: 1.6590 (1.8767) time: 2.9475 data: 0.0003 max mem: 29202 +[2024-12-10 14:11:14 root] (utils.py 283): INFO Epoch: [1] [ 940/2502] eta: 1:16:42 lr: 0.000020 loss_cls: 2.5996 (2.8086) grad_norm: 1.6585 (1.8755) time: 2.9491 data: 0.0003 max mem: 29202 +[2024-12-10 14:11:43 root] (utils.py 283): INFO Epoch: [1] [ 950/2502] eta: 1:16:12 lr: 0.000020 loss_cls: 2.6085 (2.8079) grad_norm: 1.7582 (1.8764) time: 2.9386 data: 0.0003 max mem: 29202 +[2024-12-10 14:12:12 root] (utils.py 283): INFO Epoch: [1] [ 960/2502] eta: 1:15:43 lr: 0.000020 loss_cls: 2.7713 (2.8082) grad_norm: 1.7172 (1.8744) time: 2.9439 data: 0.0003 max mem: 29202 +[2024-12-10 14:12:42 root] (utils.py 283): INFO Epoch: [1] [ 970/2502] eta: 1:15:13 lr: 0.000020 loss_cls: 2.6755 (2.8049) grad_norm: 1.7508 (1.8738) time: 2.9537 data: 0.0003 max mem: 29202 +[2024-12-10 14:13:11 root] (utils.py 283): INFO Epoch: [1] [ 980/2502] eta: 1:14:44 lr: 0.000020 loss_cls: 2.6772 (2.8041) grad_norm: 1.7805 (1.8748) time: 2.9452 data: 0.0003 max mem: 29202 +[2024-12-10 14:13:41 root] (utils.py 283): INFO Epoch: [1] [ 990/2502] eta: 1:14:14 lr: 0.000020 loss_cls: 2.7556 (2.8049) grad_norm: 1.7490 (1.8782) time: 2.9446 data: 0.0003 max mem: 29202 +[2024-12-10 14:14:10 root] (utils.py 283): INFO Epoch: [1] [1000/2502] eta: 1:13:45 lr: 0.000020 loss_cls: 2.7904 (2.8038) grad_norm: 1.8203 (1.8786) time: 2.9475 data: 0.0003 max mem: 29202 +[2024-12-10 14:14:40 root] (utils.py 283): INFO Epoch: [1] [1010/2502] eta: 1:13:15 lr: 0.000020 loss_cls: 2.8825 (2.8041) grad_norm: 1.8183 (1.8789) time: 2.9405 data: 0.0003 max mem: 29202 +[2024-12-10 14:15:09 root] (utils.py 283): INFO Epoch: [1] [1020/2502] eta: 1:12:46 lr: 0.000020 loss_cls: 2.9105 (2.8032) grad_norm: 1.6751 (1.8775) time: 2.9386 data: 0.0003 max mem: 29202 +[2024-12-10 14:15:39 root] (utils.py 283): INFO Epoch: [1] [1030/2502] eta: 1:12:16 lr: 0.000020 loss_cls: 2.9125 (2.8033) grad_norm: 1.7845 (1.8796) time: 2.9473 data: 0.0003 max mem: 29202 +[2024-12-10 14:16:08 root] (utils.py 283): INFO Epoch: [1] [1040/2502] eta: 1:11:47 lr: 0.000020 loss_cls: 2.6854 (2.8010) grad_norm: 1.8089 (1.8788) time: 2.9570 data: 0.0003 max mem: 29202 +[2024-12-10 14:16:38 root] (utils.py 283): INFO Epoch: [1] [1050/2502] eta: 1:11:17 lr: 0.000020 loss_cls: 2.7139 (2.8008) grad_norm: 1.7819 (1.8786) time: 2.9468 data: 0.0003 max mem: 29202 +[2024-12-10 14:17:07 root] (utils.py 283): INFO Epoch: [1] [1060/2502] eta: 1:10:48 lr: 0.000020 loss_cls: 2.8791 (2.8028) grad_norm: 1.7819 (1.8779) time: 2.9397 data: 0.0003 max mem: 29202 +[2024-12-10 14:17:36 root] (utils.py 283): INFO Epoch: [1] [1070/2502] eta: 1:10:18 lr: 0.000020 loss_cls: 3.0506 (2.8032) grad_norm: 1.7790 (1.8800) time: 2.9428 data: 0.0003 max mem: 29202 +[2024-12-10 14:18:06 root] (utils.py 283): INFO Epoch: [1] [1080/2502] eta: 1:09:49 lr: 0.000020 loss_cls: 2.9906 (2.8045) grad_norm: 1.8111 (1.8855) time: 2.9390 data: 0.0003 max mem: 29202 +[2024-12-10 14:18:35 root] (utils.py 283): INFO Epoch: [1] [1090/2502] eta: 1:09:19 lr: 0.000020 loss_cls: 2.9739 (2.8051) grad_norm: 1.8111 (1.8873) time: 2.9367 data: 0.0003 max mem: 29202 +[2024-12-10 14:19:05 root] (utils.py 283): INFO Epoch: [1] [1100/2502] eta: 1:08:50 lr: 0.000020 loss_cls: 2.9739 (2.8037) grad_norm: 1.7691 (1.8871) time: 2.9390 data: 0.0003 max mem: 29202 +[2024-12-10 14:19:34 root] (utils.py 283): INFO Epoch: [1] [1110/2502] eta: 1:08:20 lr: 0.000020 loss_cls: 3.0299 (2.8054) grad_norm: 1.7691 (1.8863) time: 2.9399 data: 0.0003 max mem: 29202 +[2024-12-10 14:20:03 root] (utils.py 283): INFO Epoch: [1] [1120/2502] eta: 1:07:51 lr: 0.000020 loss_cls: 3.0169 (2.8066) grad_norm: 1.7017 (1.8851) time: 2.9404 data: 0.0003 max mem: 29202 +[2024-12-10 14:20:33 root] (utils.py 283): INFO Epoch: [1] [1130/2502] eta: 1:07:21 lr: 0.000020 loss_cls: 3.0169 (2.8084) grad_norm: 1.7402 (1.8849) time: 2.9396 data: 0.0003 max mem: 29202 +[2024-12-10 14:21:02 root] (utils.py 283): INFO Epoch: [1] [1140/2502] eta: 1:06:52 lr: 0.000020 loss_cls: 3.0186 (2.8087) grad_norm: 1.8537 (1.8848) time: 2.9374 data: 0.0003 max mem: 29202 +[2024-12-10 14:21:32 root] (utils.py 283): INFO Epoch: [1] [1150/2502] eta: 1:06:22 lr: 0.000020 loss_cls: 3.0023 (2.8104) grad_norm: 1.7531 (1.8835) time: 2.9385 data: 0.0003 max mem: 29202 +[2024-12-10 14:22:01 root] (utils.py 283): INFO Epoch: [1] [1160/2502] eta: 1:05:53 lr: 0.000020 loss_cls: 3.0023 (2.8101) grad_norm: 1.7074 (1.8824) time: 2.9415 data: 0.0003 max mem: 29202 +[2024-12-10 14:22:31 root] (utils.py 283): INFO Epoch: [1] [1170/2502] eta: 1:05:23 lr: 0.000020 loss_cls: 2.9405 (2.8106) grad_norm: 1.6595 (1.8805) time: 2.9477 data: 0.0003 max mem: 29202 +[2024-12-10 14:23:00 root] (utils.py 283): INFO Epoch: [1] [1180/2502] eta: 1:04:54 lr: 0.000020 loss_cls: 2.9405 (2.8111) grad_norm: 1.6281 (1.8795) time: 2.9629 data: 0.0003 max mem: 29202 +[2024-12-10 14:23:30 root] (utils.py 283): INFO Epoch: [1] [1190/2502] eta: 1:04:25 lr: 0.000020 loss_cls: 2.9007 (2.8107) grad_norm: 1.6334 (1.8782) time: 2.9617 data: 0.0003 max mem: 29202 +[2024-12-10 14:23:59 root] (utils.py 283): INFO Epoch: [1] [1200/2502] eta: 1:03:55 lr: 0.000020 loss_cls: 2.8465 (2.8099) grad_norm: 1.6752 (1.8775) time: 2.9438 data: 0.0003 max mem: 29202 +[2024-12-10 14:24:29 root] (utils.py 283): INFO Epoch: [1] [1210/2502] eta: 1:03:26 lr: 0.000020 loss_cls: 2.7908 (2.8090) grad_norm: 1.7775 (1.8773) time: 2.9463 data: 0.0003 max mem: 29202 +[2024-12-10 14:24:58 root] (utils.py 283): INFO Epoch: [1] [1220/2502] eta: 1:02:56 lr: 0.000020 loss_cls: 2.9218 (2.8101) grad_norm: 1.7294 (1.8765) time: 2.9564 data: 0.0003 max mem: 29202 +[2024-12-10 14:25:28 root] (utils.py 283): INFO Epoch: [1] [1230/2502] eta: 1:02:27 lr: 0.000020 loss_cls: 2.9218 (2.8102) grad_norm: 1.6369 (1.8758) time: 2.9498 data: 0.0003 max mem: 29202 +[2024-12-10 14:25:57 root] (utils.py 283): INFO Epoch: [1] [1240/2502] eta: 1:01:57 lr: 0.000020 loss_cls: 2.9579 (2.8113) grad_norm: 1.7554 (1.8752) time: 2.9421 data: 0.0003 max mem: 29202 +[2024-12-10 14:26:27 root] (utils.py 283): INFO Epoch: [1] [1250/2502] eta: 1:01:28 lr: 0.000020 loss_cls: 2.9579 (2.8101) grad_norm: 1.7554 (1.8759) time: 2.9432 data: 0.0003 max mem: 29202 +[2024-12-10 14:26:56 root] (utils.py 283): INFO Epoch: [1] [1260/2502] eta: 1:00:58 lr: 0.000020 loss_cls: 2.8967 (2.8111) grad_norm: 1.7661 (1.8748) time: 2.9397 data: 0.0003 max mem: 29202 +[2024-12-10 14:27:25 root] (utils.py 283): INFO Epoch: [1] [1270/2502] eta: 1:00:29 lr: 0.000020 loss_cls: 2.9533 (2.8103) grad_norm: 1.6815 (1.8737) time: 2.9446 data: 0.0003 max mem: 29202 +[2024-12-10 14:27:55 root] (utils.py 283): INFO Epoch: [1] [1280/2502] eta: 1:00:00 lr: 0.000020 loss_cls: 2.8900 (2.8099) grad_norm: 1.7050 (1.8730) time: 2.9597 data: 0.0003 max mem: 29202 +[2024-12-10 14:28:25 root] (utils.py 283): INFO Epoch: [1] [1290/2502] eta: 0:59:30 lr: 0.000020 loss_cls: 2.9523 (2.8107) grad_norm: 1.7597 (1.8727) time: 2.9541 data: 0.0003 max mem: 29202 +[2024-12-10 14:28:54 root] (utils.py 283): INFO Epoch: [1] [1300/2502] eta: 0:59:01 lr: 0.000020 loss_cls: 2.9576 (2.8112) grad_norm: 1.8794 (1.8729) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-10 14:29:23 root] (utils.py 283): INFO Epoch: [1] [1310/2502] eta: 0:58:31 lr: 0.000020 loss_cls: 2.7412 (2.8096) grad_norm: 1.7833 (1.8725) time: 2.9392 data: 0.0003 max mem: 29202 +[2024-12-10 14:29:53 root] (utils.py 283): INFO Epoch: [1] [1320/2502] eta: 0:58:02 lr: 0.000020 loss_cls: 2.7607 (2.8098) grad_norm: 1.6582 (1.8715) time: 2.9426 data: 0.0003 max mem: 29202 +[2024-12-10 14:30:22 root] (utils.py 283): INFO Epoch: [1] [1330/2502] eta: 0:57:32 lr: 0.000020 loss_cls: 2.9536 (2.8101) grad_norm: 1.6823 (1.8705) time: 2.9428 data: 0.0003 max mem: 29202 +[2024-12-10 14:30:52 root] (utils.py 283): INFO Epoch: [1] [1340/2502] eta: 0:57:03 lr: 0.000020 loss_cls: 2.9536 (2.8085) grad_norm: 1.6823 (1.8697) time: 2.9426 data: 0.0003 max mem: 29202 +[2024-12-10 14:31:21 root] (utils.py 283): INFO Epoch: [1] [1350/2502] eta: 0:56:33 lr: 0.000020 loss_cls: 2.9856 (2.8099) grad_norm: 1.8268 (1.8701) time: 2.9455 data: 0.0003 max mem: 29202 +[2024-12-10 14:31:51 root] (utils.py 283): INFO Epoch: [1] [1360/2502] eta: 0:56:04 lr: 0.000020 loss_cls: 2.9539 (2.8084) grad_norm: 1.6709 (1.8691) time: 2.9432 data: 0.0003 max mem: 29202 +[2024-12-10 14:32:20 root] (utils.py 283): INFO Epoch: [1] [1370/2502] eta: 0:55:34 lr: 0.000020 loss_cls: 2.7722 (2.8076) grad_norm: 1.6481 (1.8678) time: 2.9406 data: 0.0003 max mem: 29202 +[2024-12-10 14:32:49 root] (utils.py 283): INFO Epoch: [1] [1380/2502] eta: 0:55:05 lr: 0.000020 loss_cls: 2.7777 (2.8058) grad_norm: 1.6894 (1.8675) time: 2.9427 data: 0.0003 max mem: 29202 +[2024-12-10 14:33:19 root] (utils.py 283): INFO Epoch: [1] [1390/2502] eta: 0:54:35 lr: 0.000020 loss_cls: 2.7777 (2.8055) grad_norm: 1.6930 (1.8668) time: 2.9465 data: 0.0003 max mem: 29202 +[2024-12-10 14:33:48 root] (utils.py 283): INFO Epoch: [1] [1400/2502] eta: 0:54:06 lr: 0.000020 loss_cls: 2.9390 (2.8048) grad_norm: 1.7390 (1.8658) time: 2.9532 data: 0.0003 max mem: 29202 +[2024-12-10 14:34:18 root] (utils.py 283): INFO Epoch: [1] [1410/2502] eta: 0:53:36 lr: 0.000020 loss_cls: 2.8386 (2.8048) grad_norm: 1.7385 (1.8647) time: 2.9500 data: 0.0003 max mem: 29202 +[2024-12-10 14:34:47 root] (utils.py 283): INFO Epoch: [1] [1420/2502] eta: 0:53:07 lr: 0.000020 loss_cls: 2.8386 (2.8050) grad_norm: 1.7385 (1.8644) time: 2.9424 data: 0.0003 max mem: 29202 +[2024-12-10 14:35:17 root] (utils.py 283): INFO Epoch: [1] [1430/2502] eta: 0:52:38 lr: 0.000020 loss_cls: 2.9711 (2.8047) grad_norm: 1.7071 (1.8636) time: 2.9562 data: 0.0003 max mem: 29202 +[2024-12-10 14:35:46 root] (utils.py 283): INFO Epoch: [1] [1440/2502] eta: 0:52:08 lr: 0.000020 loss_cls: 2.6329 (2.8035) grad_norm: 1.7008 (1.8626) time: 2.9557 data: 0.0003 max mem: 29202 +[2024-12-10 14:36:16 root] (utils.py 283): INFO Epoch: [1] [1450/2502] eta: 0:51:39 lr: 0.000020 loss_cls: 2.6329 (2.8020) grad_norm: 1.6596 (1.8611) time: 2.9408 data: 0.0003 max mem: 29202 +[2024-12-10 14:36:45 root] (utils.py 283): INFO Epoch: [1] [1460/2502] eta: 0:51:09 lr: 0.000020 loss_cls: 2.8312 (2.8025) grad_norm: 1.7416 (1.8615) time: 2.9405 data: 0.0003 max mem: 29202 +[2024-12-10 14:37:15 root] (utils.py 283): INFO Epoch: [1] [1470/2502] eta: 0:50:40 lr: 0.000020 loss_cls: 2.9742 (2.8041) grad_norm: 1.8796 (1.8625) time: 2.9402 data: 0.0003 max mem: 29202 +[2024-12-10 14:37:44 root] (utils.py 283): INFO Epoch: [1] [1480/2502] eta: 0:50:10 lr: 0.000020 loss_cls: 2.9169 (2.8038) grad_norm: 1.7886 (1.8623) time: 2.9411 data: 0.0003 max mem: 29202 +[2024-12-10 14:38:14 root] (utils.py 283): INFO Epoch: [1] [1490/2502] eta: 0:49:41 lr: 0.000020 loss_cls: 2.8715 (2.8036) grad_norm: 1.7495 (1.8617) time: 2.9439 data: 0.0003 max mem: 29202 +[2024-12-10 14:38:43 root] (utils.py 283): INFO Epoch: [1] [1500/2502] eta: 0:49:11 lr: 0.000020 loss_cls: 2.8778 (2.8023) grad_norm: 1.7466 (1.8616) time: 2.9414 data: 0.0003 max mem: 29202 +[2024-12-10 14:39:12 root] (utils.py 283): INFO Epoch: [1] [1510/2502] eta: 0:48:42 lr: 0.000020 loss_cls: 2.6897 (2.8012) grad_norm: 1.7702 (1.8615) time: 2.9387 data: 0.0003 max mem: 29202 +[2024-12-10 14:39:42 root] (utils.py 283): INFO Epoch: [1] [1520/2502] eta: 0:48:12 lr: 0.000020 loss_cls: 2.8775 (2.8018) grad_norm: 1.7819 (1.8617) time: 2.9461 data: 0.0003 max mem: 29202 +[2024-12-10 14:40:11 root] (utils.py 283): INFO Epoch: [1] [1530/2502] eta: 0:47:43 lr: 0.000020 loss_cls: 2.8487 (2.8013) grad_norm: 1.7845 (1.8623) time: 2.9485 data: 0.0003 max mem: 29202 +[2024-12-10 14:40:41 root] (utils.py 283): INFO Epoch: [1] [1540/2502] eta: 0:47:13 lr: 0.000020 loss_cls: 2.8164 (2.8023) grad_norm: 1.6757 (1.8609) time: 2.9432 data: 0.0003 max mem: 29202 +[2024-12-10 14:41:10 root] (utils.py 283): INFO Epoch: [1] [1550/2502] eta: 0:46:44 lr: 0.000020 loss_cls: 3.0369 (2.8040) grad_norm: 1.6767 (1.8610) time: 2.9445 data: 0.0003 max mem: 29202 +[2024-12-10 14:41:40 root] (utils.py 283): INFO Epoch: [1] [1560/2502] eta: 0:46:14 lr: 0.000020 loss_cls: 3.0627 (2.8047) grad_norm: 1.8301 (1.8614) time: 2.9456 data: 0.0003 max mem: 29202 +[2024-12-10 14:42:09 root] (utils.py 283): INFO Epoch: [1] [1570/2502] eta: 0:45:45 lr: 0.000020 loss_cls: 2.6608 (2.8024) grad_norm: 1.7666 (1.8601) time: 2.9416 data: 0.0003 max mem: 29202 +[2024-12-10 14:42:38 root] (utils.py 283): INFO Epoch: [1] [1580/2502] eta: 0:45:15 lr: 0.000020 loss_cls: 2.4159 (2.8007) grad_norm: 1.5659 (1.8587) time: 2.9395 data: 0.0003 max mem: 29202 +[2024-12-10 14:43:08 root] (utils.py 283): INFO Epoch: [1] [1590/2502] eta: 0:44:46 lr: 0.000020 loss_cls: 2.5178 (2.7997) grad_norm: 1.7359 (1.8585) time: 2.9408 data: 0.0003 max mem: 29202 +[2024-12-10 14:43:37 root] (utils.py 283): INFO Epoch: [1] [1600/2502] eta: 0:44:17 lr: 0.000020 loss_cls: 2.6710 (2.7980) grad_norm: 1.7478 (1.8577) time: 2.9431 data: 0.0003 max mem: 29202 +[2024-12-10 14:44:07 root] (utils.py 283): INFO Epoch: [1] [1610/2502] eta: 0:43:47 lr: 0.000020 loss_cls: 2.7730 (2.7979) grad_norm: 1.6226 (1.8569) time: 2.9440 data: 0.0003 max mem: 29202 +[2024-12-10 14:44:36 root] (utils.py 283): INFO Epoch: [1] [1620/2502] eta: 0:43:18 lr: 0.000020 loss_cls: 2.8240 (2.7974) grad_norm: 1.7123 (1.8565) time: 2.9440 data: 0.0003 max mem: 29202 +[2024-12-10 14:45:06 root] (utils.py 283): INFO Epoch: [1] [1630/2502] eta: 0:42:48 lr: 0.000020 loss_cls: 2.8370 (2.7977) grad_norm: 1.8329 (1.8570) time: 2.9453 data: 0.0003 max mem: 29202 +[2024-12-10 14:45:35 root] (utils.py 283): INFO Epoch: [1] [1640/2502] eta: 0:42:19 lr: 0.000020 loss_cls: 2.9458 (2.7979) grad_norm: 1.8403 (1.8566) time: 2.9546 data: 0.0003 max mem: 29202 +[2024-12-10 14:46:05 root] (utils.py 283): INFO Epoch: [1] [1650/2502] eta: 0:41:49 lr: 0.000020 loss_cls: 2.8222 (2.7974) grad_norm: 1.8996 (1.8571) time: 2.9615 data: 0.0003 max mem: 29202 +[2024-12-10 14:46:34 root] (utils.py 283): INFO Epoch: [1] [1660/2502] eta: 0:41:20 lr: 0.000020 loss_cls: 2.7097 (2.7964) grad_norm: 2.0317 (1.8583) time: 2.9607 data: 0.0003 max mem: 29202 +[2024-12-10 14:47:04 root] (utils.py 283): INFO Epoch: [1] [1670/2502] eta: 0:40:51 lr: 0.000020 loss_cls: 2.8495 (2.7976) grad_norm: 1.7734 (1.8588) time: 2.9574 data: 0.0003 max mem: 29202 +[2024-12-10 14:47:33 root] (utils.py 283): INFO Epoch: [1] [1680/2502] eta: 0:40:21 lr: 0.000020 loss_cls: 2.8242 (2.7983) grad_norm: 1.7734 (1.8586) time: 2.9510 data: 0.0003 max mem: 29202 +[2024-12-10 14:48:03 root] (utils.py 283): INFO Epoch: [1] [1690/2502] eta: 0:39:52 lr: 0.000020 loss_cls: 2.7268 (2.7968) grad_norm: 1.6714 (1.8579) time: 2.9450 data: 0.0003 max mem: 29202 +[2024-12-10 14:48:32 root] (utils.py 283): INFO Epoch: [1] [1700/2502] eta: 0:39:22 lr: 0.000020 loss_cls: 2.8403 (2.7974) grad_norm: 1.7503 (1.8580) time: 2.9413 data: 0.0003 max mem: 29202 +[2024-12-10 14:49:02 root] (utils.py 283): INFO Epoch: [1] [1710/2502] eta: 0:38:53 lr: 0.000020 loss_cls: 2.8782 (2.7974) grad_norm: 1.7155 (1.8572) time: 2.9448 data: 0.0003 max mem: 29202 +[2024-12-10 14:49:31 root] (utils.py 283): INFO Epoch: [1] [1720/2502] eta: 0:38:23 lr: 0.000020 loss_cls: 3.0000 (2.7981) grad_norm: 1.7488 (1.8586) time: 2.9453 data: 0.0003 max mem: 29202 +[2024-12-10 14:50:01 root] (utils.py 283): INFO Epoch: [1] [1730/2502] eta: 0:37:54 lr: 0.000020 loss_cls: 3.0140 (2.7964) grad_norm: 1.7488 (1.8576) time: 2.9404 data: 0.0003 max mem: 29202 +[2024-12-10 14:50:30 root] (utils.py 283): INFO Epoch: [1] [1740/2502] eta: 0:37:24 lr: 0.000020 loss_cls: 2.7609 (2.7965) grad_norm: 1.7045 (1.8573) time: 2.9387 data: 0.0003 max mem: 29202 +[2024-12-10 14:51:00 root] (utils.py 283): INFO Epoch: [1] [1750/2502] eta: 0:36:55 lr: 0.000020 loss_cls: 2.8133 (2.7966) grad_norm: 1.7845 (1.8568) time: 2.9511 data: 0.0003 max mem: 29202 +[2024-12-10 14:51:29 root] (utils.py 283): INFO Epoch: [1] [1760/2502] eta: 0:36:25 lr: 0.000020 loss_cls: 2.7868 (2.7966) grad_norm: 1.7730 (1.8565) time: 2.9624 data: 0.0003 max mem: 29202 +[2024-12-10 14:51:59 root] (utils.py 283): INFO Epoch: [1] [1770/2502] eta: 0:35:56 lr: 0.000020 loss_cls: 2.7135 (2.7958) grad_norm: 1.6669 (1.8556) time: 2.9515 data: 0.0003 max mem: 29202 +[2024-12-10 14:52:28 root] (utils.py 283): INFO Epoch: [1] [1780/2502] eta: 0:35:27 lr: 0.000020 loss_cls: 2.7195 (2.7951) grad_norm: 1.7204 (1.8563) time: 2.9397 data: 0.0003 max mem: 29202 +[2024-12-10 14:52:57 root] (utils.py 283): INFO Epoch: [1] [1790/2502] eta: 0:34:57 lr: 0.000020 loss_cls: 2.7649 (2.7948) grad_norm: 1.8541 (1.8564) time: 2.9369 data: 0.0003 max mem: 29202 +[2024-12-10 14:53:27 root] (utils.py 283): INFO Epoch: [1] [1800/2502] eta: 0:34:28 lr: 0.000020 loss_cls: 2.7649 (2.7940) grad_norm: 1.8115 (1.8570) time: 2.9395 data: 0.0003 max mem: 29202 +[2024-12-10 14:53:56 root] (utils.py 283): INFO Epoch: [1] [1810/2502] eta: 0:33:58 lr: 0.000020 loss_cls: 2.8337 (2.7950) grad_norm: 1.7891 (1.8564) time: 2.9404 data: 0.0003 max mem: 29202 +[2024-12-10 14:54:26 root] (utils.py 283): INFO Epoch: [1] [1820/2502] eta: 0:33:29 lr: 0.000020 loss_cls: 3.0126 (2.7952) grad_norm: 1.7568 (1.8561) time: 2.9372 data: 0.0003 max mem: 29202 +[2024-12-10 14:54:55 root] (utils.py 283): INFO Epoch: [1] [1830/2502] eta: 0:32:59 lr: 0.000020 loss_cls: 2.8268 (2.7951) grad_norm: 1.7568 (1.8562) time: 2.9488 data: 0.0003 max mem: 29202 +[2024-12-10 14:55:25 root] (utils.py 283): INFO Epoch: [1] [1840/2502] eta: 0:32:30 lr: 0.000020 loss_cls: 2.8865 (2.7937) grad_norm: 1.6714 (1.8557) time: 2.9651 data: 0.0003 max mem: 29202 +[2024-12-10 14:55:55 root] (utils.py 283): INFO Epoch: [1] [1850/2502] eta: 0:32:00 lr: 0.000020 loss_cls: 2.9020 (2.7934) grad_norm: 1.7191 (1.8559) time: 2.9702 data: 0.0003 max mem: 29202 +[2024-12-10 14:56:24 root] (utils.py 283): INFO Epoch: [1] [1860/2502] eta: 0:31:31 lr: 0.000020 loss_cls: 2.9498 (2.7935) grad_norm: 1.7303 (1.8560) time: 2.9727 data: 0.0003 max mem: 29202 +[2024-12-10 14:56:54 root] (utils.py 283): INFO Epoch: [1] [1870/2502] eta: 0:31:02 lr: 0.000020 loss_cls: 2.9767 (2.7941) grad_norm: 1.7882 (1.8557) time: 2.9598 data: 0.0003 max mem: 29202 +[2024-12-10 14:57:23 root] (utils.py 283): INFO Epoch: [1] [1880/2502] eta: 0:30:32 lr: 0.000020 loss_cls: 3.0409 (2.7938) grad_norm: 1.7882 (1.8555) time: 2.9418 data: 0.0003 max mem: 29202 +[2024-12-10 14:57:53 root] (utils.py 283): INFO Epoch: [1] [1890/2502] eta: 0:30:03 lr: 0.000020 loss_cls: 2.9627 (2.7938) grad_norm: 1.8066 (1.8559) time: 2.9385 data: 0.0003 max mem: 29202 +[2024-12-10 14:58:22 root] (utils.py 283): INFO Epoch: [1] [1900/2502] eta: 0:29:33 lr: 0.000020 loss_cls: 2.9976 (2.7948) grad_norm: 1.7064 (1.8547) time: 2.9392 data: 0.0003 max mem: 29202 +[2024-12-10 14:58:51 root] (utils.py 283): INFO Epoch: [1] [1910/2502] eta: 0:29:04 lr: 0.000020 loss_cls: 3.0691 (2.7952) grad_norm: 1.6346 (1.8539) time: 2.9396 data: 0.0003 max mem: 29202 +[2024-12-10 14:59:21 root] (utils.py 283): INFO Epoch: [1] [1920/2502] eta: 0:28:34 lr: 0.000020 loss_cls: 2.9376 (2.7954) grad_norm: 1.7563 (1.8537) time: 2.9450 data: 0.0003 max mem: 29202 +[2024-12-10 14:59:50 root] (utils.py 283): INFO Epoch: [1] [1930/2502] eta: 0:28:05 lr: 0.000020 loss_cls: 2.9082 (2.7941) grad_norm: 1.7839 (1.8535) time: 2.9461 data: 0.0003 max mem: 29202 +[2024-12-10 15:00:20 root] (utils.py 283): INFO Epoch: [1] [1940/2502] eta: 0:27:35 lr: 0.000020 loss_cls: 2.9975 (2.7944) grad_norm: 1.8032 (1.8537) time: 2.9390 data: 0.0003 max mem: 29202 +[2024-12-10 15:00:49 root] (utils.py 283): INFO Epoch: [1] [1950/2502] eta: 0:27:06 lr: 0.000020 loss_cls: 3.0249 (2.7946) grad_norm: 1.7463 (1.8545) time: 2.9384 data: 0.0003 max mem: 29202 +[2024-12-10 15:01:18 root] (utils.py 283): INFO Epoch: [1] [1960/2502] eta: 0:26:36 lr: 0.000020 loss_cls: 2.9696 (2.7938) grad_norm: 1.7417 (1.8541) time: 2.9408 data: 0.0003 max mem: 29202 +[2024-12-10 15:01:48 root] (utils.py 283): INFO Epoch: [1] [1970/2502] eta: 0:26:07 lr: 0.000020 loss_cls: 3.0281 (2.7954) grad_norm: 1.7588 (1.8551) time: 2.9462 data: 0.0003 max mem: 29202 +[2024-12-10 15:02:18 root] (utils.py 283): INFO Epoch: [1] [1980/2502] eta: 0:25:37 lr: 0.000020 loss_cls: 3.0738 (2.7957) grad_norm: 1.7225 (1.8551) time: 2.9535 data: 0.0003 max mem: 29202 +[2024-12-10 15:02:47 root] (utils.py 283): INFO Epoch: [1] [1990/2502] eta: 0:25:08 lr: 0.000020 loss_cls: 2.6609 (2.7939) grad_norm: 1.7047 (1.8563) time: 2.9490 data: 0.0003 max mem: 29202 +[2024-12-10 15:03:16 root] (utils.py 283): INFO Epoch: [1] [2000/2502] eta: 0:24:38 lr: 0.000020 loss_cls: 2.7515 (2.7944) grad_norm: 1.7990 (1.8570) time: 2.9438 data: 0.0003 max mem: 29202 +[2024-12-10 15:03:46 root] (utils.py 283): INFO Epoch: [1] [2010/2502] eta: 0:24:09 lr: 0.000020 loss_cls: 2.9998 (2.7949) grad_norm: 1.7796 (1.8563) time: 2.9439 data: 0.0003 max mem: 29202 +[2024-12-10 15:04:15 root] (utils.py 283): INFO Epoch: [1] [2020/2502] eta: 0:23:40 lr: 0.000020 loss_cls: 2.9752 (2.7950) grad_norm: 1.6384 (1.8557) time: 2.9433 data: 0.0003 max mem: 29202 +[2024-12-10 15:04:45 root] (utils.py 283): INFO Epoch: [1] [2030/2502] eta: 0:23:10 lr: 0.000020 loss_cls: 2.9600 (2.7950) grad_norm: 1.7327 (1.8557) time: 2.9422 data: 0.0003 max mem: 29202 +[2024-12-10 15:05:14 root] (utils.py 283): INFO Epoch: [1] [2040/2502] eta: 0:22:41 lr: 0.000020 loss_cls: 2.9600 (2.7944) grad_norm: 1.7954 (1.8559) time: 2.9419 data: 0.0003 max mem: 29202 +[2024-12-10 15:05:44 root] (utils.py 283): INFO Epoch: [1] [2050/2502] eta: 0:22:11 lr: 0.000020 loss_cls: 2.8779 (2.7947) grad_norm: 1.7370 (1.8554) time: 2.9414 data: 0.0003 max mem: 29202 +[2024-12-10 15:06:13 root] (utils.py 283): INFO Epoch: [1] [2060/2502] eta: 0:21:42 lr: 0.000020 loss_cls: 2.9906 (2.7955) grad_norm: 1.7041 (1.8550) time: 2.9506 data: 0.0003 max mem: 29202 +[2024-12-10 15:06:43 root] (utils.py 283): INFO Epoch: [1] [2070/2502] eta: 0:21:12 lr: 0.000020 loss_cls: 3.0873 (2.7956) grad_norm: 1.6646 (1.8543) time: 2.9530 data: 0.0003 max mem: 29202 +[2024-12-10 15:07:12 root] (utils.py 283): INFO Epoch: [1] [2080/2502] eta: 0:20:43 lr: 0.000020 loss_cls: 2.9945 (2.7962) grad_norm: 1.7173 (1.8543) time: 2.9502 data: 0.0003 max mem: 29202 +[2024-12-10 15:07:42 root] (utils.py 283): INFO Epoch: [1] [2090/2502] eta: 0:20:13 lr: 0.000020 loss_cls: 2.9945 (2.7967) grad_norm: 1.8660 (1.8548) time: 2.9592 data: 0.0003 max mem: 29202 +[2024-12-10 15:08:11 root] (utils.py 283): INFO Epoch: [1] [2100/2502] eta: 0:19:44 lr: 0.000020 loss_cls: 2.8595 (2.7967) grad_norm: 1.7881 (1.8538) time: 2.9528 data: 0.0003 max mem: 29202 +[2024-12-10 15:08:41 root] (utils.py 283): INFO Epoch: [1] [2110/2502] eta: 0:19:14 lr: 0.000020 loss_cls: 2.7113 (2.7958) grad_norm: 1.6955 (1.8553) time: 2.9435 data: 0.0003 max mem: 29202 +[2024-12-10 15:09:10 root] (utils.py 283): INFO Epoch: [1] [2120/2502] eta: 0:18:45 lr: 0.000020 loss_cls: 2.8706 (2.7963) grad_norm: 1.7569 (1.8554) time: 2.9510 data: 0.0003 max mem: 29202 +[2024-12-10 15:09:40 root] (utils.py 283): INFO Epoch: [1] [2130/2502] eta: 0:18:15 lr: 0.000020 loss_cls: 2.9897 (2.7963) grad_norm: 1.7431 (1.8550) time: 2.9485 data: 0.0003 max mem: 29202 +[2024-12-10 15:10:09 root] (utils.py 283): INFO Epoch: [1] [2140/2502] eta: 0:17:46 lr: 0.000020 loss_cls: 2.8600 (2.7971) grad_norm: 1.7431 (1.8549) time: 2.9389 data: 0.0003 max mem: 29202 +[2024-12-10 15:10:38 root] (utils.py 283): INFO Epoch: [1] [2150/2502] eta: 0:17:17 lr: 0.000020 loss_cls: 2.8694 (2.7972) grad_norm: 1.7328 (1.8545) time: 2.9389 data: 0.0003 max mem: 29202 +[2024-12-10 15:11:08 root] (utils.py 283): INFO Epoch: [1] [2160/2502] eta: 0:16:47 lr: 0.000020 loss_cls: 2.8802 (2.7982) grad_norm: 1.7274 (1.8543) time: 2.9423 data: 0.0003 max mem: 29202 +[2024-12-10 15:11:37 root] (utils.py 283): INFO Epoch: [1] [2170/2502] eta: 0:16:18 lr: 0.000020 loss_cls: 2.8819 (2.7979) grad_norm: 1.7274 (1.8542) time: 2.9458 data: 0.0003 max mem: 29202 +[2024-12-10 15:12:07 root] (utils.py 283): INFO Epoch: [1] [2180/2502] eta: 0:15:48 lr: 0.000020 loss_cls: 2.9321 (2.7981) grad_norm: 1.7675 (1.8548) time: 2.9443 data: 0.0003 max mem: 29202 +[2024-12-10 15:12:36 root] (utils.py 283): INFO Epoch: [1] [2190/2502] eta: 0:15:19 lr: 0.000020 loss_cls: 2.9980 (2.7992) grad_norm: 1.7675 (1.8542) time: 2.9413 data: 0.0003 max mem: 29202 +[2024-12-10 15:13:06 root] (utils.py 283): INFO Epoch: [1] [2200/2502] eta: 0:14:49 lr: 0.000020 loss_cls: 2.9566 (2.7998) grad_norm: 1.7523 (1.8553) time: 2.9421 data: 0.0003 max mem: 29202 +[2024-12-10 15:13:35 root] (utils.py 283): INFO Epoch: [1] [2210/2502] eta: 0:14:20 lr: 0.000020 loss_cls: 2.9347 (2.7997) grad_norm: 1.6678 (1.8545) time: 2.9456 data: 0.0003 max mem: 29202 +[2024-12-10 15:14:05 root] (utils.py 283): INFO Epoch: [1] [2220/2502] eta: 0:13:50 lr: 0.000020 loss_cls: 2.9347 (2.8005) grad_norm: 1.6791 (1.8548) time: 2.9544 data: 0.0003 max mem: 29202 +[2024-12-10 15:14:34 root] (utils.py 283): INFO Epoch: [1] [2230/2502] eta: 0:13:21 lr: 0.000020 loss_cls: 2.8346 (2.7997) grad_norm: 1.7324 (1.8543) time: 2.9520 data: 0.0003 max mem: 29202 +[2024-12-10 15:15:03 root] (utils.py 283): INFO Epoch: [1] [2240/2502] eta: 0:12:51 lr: 0.000020 loss_cls: 2.9686 (2.8006) grad_norm: 1.6840 (1.8537) time: 2.9408 data: 0.0003 max mem: 29202 +[2024-12-10 15:15:33 root] (utils.py 283): INFO Epoch: [1] [2250/2502] eta: 0:12:22 lr: 0.000020 loss_cls: 2.9178 (2.7998) grad_norm: 1.7362 (1.8533) time: 2.9442 data: 0.0003 max mem: 29202 +[2024-12-10 15:16:02 root] (utils.py 283): INFO Epoch: [1] [2260/2502] eta: 0:11:52 lr: 0.000020 loss_cls: 2.7710 (2.7998) grad_norm: 1.6693 (1.8530) time: 2.9464 data: 0.0003 max mem: 29202 +[2024-12-10 15:16:32 root] (utils.py 283): INFO Epoch: [1] [2270/2502] eta: 0:11:23 lr: 0.000020 loss_cls: 2.7710 (2.7990) grad_norm: 1.6356 (1.8521) time: 2.9438 data: 0.0003 max mem: 29202 +[2024-12-10 15:17:01 root] (utils.py 283): INFO Epoch: [1] [2280/2502] eta: 0:10:54 lr: 0.000020 loss_cls: 2.7830 (2.7991) grad_norm: 1.6356 (1.8516) time: 2.9487 data: 0.0003 max mem: 29202 +[2024-12-10 15:17:31 root] (utils.py 283): INFO Epoch: [1] [2290/2502] eta: 0:10:24 lr: 0.000020 loss_cls: 2.9207 (2.7993) grad_norm: 1.6851 (1.8508) time: 2.9525 data: 0.0003 max mem: 29202 +[2024-12-10 15:18:00 root] (utils.py 283): INFO Epoch: [1] [2300/2502] eta: 0:09:55 lr: 0.000020 loss_cls: 2.9662 (2.7996) grad_norm: 1.5836 (1.8500) time: 2.9476 data: 0.0003 max mem: 29202 +[2024-12-10 15:18:30 root] (utils.py 283): INFO Epoch: [1] [2310/2502] eta: 0:09:25 lr: 0.000020 loss_cls: 2.9420 (2.7998) grad_norm: 1.6622 (1.8493) time: 2.9429 data: 0.0003 max mem: 29202 +[2024-12-10 15:18:59 root] (utils.py 283): INFO Epoch: [1] [2320/2502] eta: 0:08:56 lr: 0.000020 loss_cls: 2.7341 (2.7988) grad_norm: 1.6734 (1.8489) time: 2.9468 data: 0.0003 max mem: 29202 +[2024-12-10 15:19:29 root] (utils.py 283): INFO Epoch: [1] [2330/2502] eta: 0:08:26 lr: 0.000020 loss_cls: 2.8222 (2.7995) grad_norm: 1.6752 (1.8483) time: 2.9567 data: 0.0003 max mem: 29202 +[2024-12-10 15:19:59 root] (utils.py 283): INFO Epoch: [1] [2340/2502] eta: 0:07:57 lr: 0.000020 loss_cls: 2.9400 (2.7997) grad_norm: 1.6364 (1.8475) time: 2.9652 data: 0.0003 max mem: 29202 +[2024-12-10 15:20:28 root] (utils.py 283): INFO Epoch: [1] [2350/2502] eta: 0:07:27 lr: 0.000020 loss_cls: 2.9400 (2.7991) grad_norm: 1.6241 (1.8470) time: 2.9530 data: 0.0003 max mem: 29202 +[2024-12-10 15:20:57 root] (utils.py 283): INFO Epoch: [1] [2360/2502] eta: 0:06:58 lr: 0.000020 loss_cls: 2.7690 (2.7981) grad_norm: 1.7306 (1.8467) time: 2.9404 data: 0.0003 max mem: 29202 +[2024-12-10 15:21:27 root] (utils.py 283): INFO Epoch: [1] [2370/2502] eta: 0:06:28 lr: 0.000020 loss_cls: 2.6815 (2.7970) grad_norm: 1.7404 (1.8462) time: 2.9436 data: 0.0003 max mem: 29202 +[2024-12-10 15:21:56 root] (utils.py 283): INFO Epoch: [1] [2380/2502] eta: 0:05:59 lr: 0.000020 loss_cls: 2.7283 (2.7971) grad_norm: 1.6799 (1.8457) time: 2.9419 data: 0.0003 max mem: 29202 +[2024-12-10 15:22:26 root] (utils.py 283): INFO Epoch: [1] [2390/2502] eta: 0:05:29 lr: 0.000020 loss_cls: 2.8240 (2.7967) grad_norm: 1.5877 (1.8452) time: 2.9387 data: 0.0003 max mem: 29202 +[2024-12-10 15:22:55 root] (utils.py 283): INFO Epoch: [1] [2400/2502] eta: 0:05:00 lr: 0.000020 loss_cls: 2.7096 (2.7960) grad_norm: 1.6525 (1.8446) time: 2.9381 data: 0.0003 max mem: 29202 +[2024-12-10 15:23:24 root] (utils.py 283): INFO Epoch: [1] [2410/2502] eta: 0:04:31 lr: 0.000020 loss_cls: 2.7955 (2.7959) grad_norm: 1.6298 (1.8439) time: 2.9420 data: 0.0003 max mem: 29202 +[2024-12-10 15:23:54 root] (utils.py 283): INFO Epoch: [1] [2420/2502] eta: 0:04:01 lr: 0.000020 loss_cls: 2.8676 (2.7964) grad_norm: 1.6298 (1.8435) time: 2.9493 data: 0.0003 max mem: 29202 +[2024-12-10 15:24:23 root] (utils.py 283): INFO Epoch: [1] [2430/2502] eta: 0:03:32 lr: 0.000020 loss_cls: 3.0152 (2.7968) grad_norm: 1.8600 (1.8443) time: 2.9463 data: 0.0003 max mem: 29202 +[2024-12-10 15:24:53 root] (utils.py 283): INFO Epoch: [1] [2440/2502] eta: 0:03:02 lr: 0.000020 loss_cls: 3.0731 (2.7976) grad_norm: 1.8338 (1.8442) time: 2.9388 data: 0.0003 max mem: 29202 +[2024-12-10 15:25:22 root] (utils.py 283): INFO Epoch: [1] [2450/2502] eta: 0:02:33 lr: 0.000020 loss_cls: 3.0508 (2.7976) grad_norm: 1.8124 (1.8443) time: 2.9399 data: 0.0003 max mem: 29202 +[2024-12-10 15:25:52 root] (utils.py 283): INFO Epoch: [1] [2460/2502] eta: 0:02:03 lr: 0.000020 loss_cls: 2.7847 (2.7967) grad_norm: 1.7945 (1.8443) time: 2.9413 data: 0.0003 max mem: 29202 +[2024-12-10 15:26:21 root] (utils.py 283): INFO Epoch: [1] [2470/2502] eta: 0:01:34 lr: 0.000020 loss_cls: 2.9006 (2.7971) grad_norm: 1.7945 (1.8446) time: 2.9511 data: 0.0003 max mem: 29202 +[2024-12-10 15:26:51 root] (utils.py 283): INFO Epoch: [1] [2480/2502] eta: 0:01:04 lr: 0.000020 loss_cls: 2.9769 (2.7984) grad_norm: 1.7587 (1.8439) time: 2.9557 data: 0.0003 max mem: 29202 +[2024-12-10 15:27:21 root] (utils.py 283): INFO Epoch: [1] [2490/2502] eta: 0:00:35 lr: 0.000020 loss_cls: 2.9643 (2.7984) grad_norm: 1.6005 (1.8434) time: 2.9666 data: 0.0232 max mem: 29202 +[2024-12-10 15:27:50 root] (utils.py 283): INFO Epoch: [1] [2500/2502] eta: 0:00:05 lr: 0.000020 loss_cls: 2.9622 (2.7990) grad_norm: 1.6087 (1.8434) time: 2.9633 data: 0.0232 max mem: 29202 +[2024-12-10 15:27:53 root] (utils.py 283): INFO Epoch: [1] [2501/2502] eta: 0:00:02 lr: 0.000020 loss_cls: 2.9622 (2.7993) grad_norm: 1.6121 (1.8435) time: 2.9625 data: 0.0232 max mem: 29202 +[2024-12-10 15:27:53 root] (utils.py 297): INFO Epoch: [1] Total time: 2:02:52 (2.9464 s / it) +[2024-12-10 15:27:53 root] (engine.py 179): INFO Averaged stats:lr: 0.000020 loss_cls: 2.9622 (2.7958) grad_norm: 1.6121 (1.8435) +[2024-12-10 15:27:57 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:55 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.3452 (0.3452) acc1: 92.1875 (92.1875) acc3: 98.4375 (98.4375) acc5: 98.4375 (98.4375) time: 0.5649 data: 0.0003 max mem: 29202 +[2024-12-10 15:28:03 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:48 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.7812 (0.7153) acc1: 86.7188 (86.2216) acc3: 96.8750 (96.2358) acc5: 98.4375 (97.7273) time: 0.5512 data: 0.0003 max mem: 29202 +[2024-12-10 15:28:08 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:42 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.7197 (0.7484) acc1: 85.9375 (85.2679) acc3: 96.0938 (95.7589) acc5: 97.6562 (97.3586) time: 0.5503 data: 0.0004 max mem: 29202 +[2024-12-10 15:28:14 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:37 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.7197 (0.7706) acc1: 85.1562 (84.5010) acc3: 94.5312 (95.4637) acc5: 96.8750 (97.2026) time: 0.5512 data: 0.0004 max mem: 29202 +[2024-12-10 15:28:19 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:31 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.8066 (0.7805) acc1: 83.5938 (84.5274) acc3: 94.5312 (95.2934) acc5: 97.6562 (97.1989) time: 0.5519 data: 0.0004 max mem: 29202 +[2024-12-10 15:28:25 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:26 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9382 (0.8617) acc1: 78.1250 (82.7665) acc3: 92.9688 (94.1176) acc5: 95.3125 (96.3695) time: 0.5523 data: 0.0004 max mem: 29202 +[2024-12-10 15:28:30 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:20 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 1.0537 (0.8937) acc1: 77.3438 (82.1977) acc3: 89.0625 (93.5579) acc5: 92.1875 (95.8504) time: 0.5526 data: 0.0004 max mem: 29202 +[2024-12-10 15:28:36 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:15 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 1.0399 (0.9254) acc1: 78.9062 (81.3710) acc3: 91.4062 (93.2218) acc5: 93.7500 (95.6536) time: 0.5527 data: 0.0004 max mem: 29202 +[2024-12-10 15:28:41 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:09 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 1.0888 (0.9504) acc1: 75.7812 (80.7870) acc3: 90.6250 (92.7758) acc5: 93.7500 (95.2836) time: 0.5529 data: 0.0006 max mem: 29202 +[2024-12-10 15:28:47 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:04 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 1.1093 (0.9816) acc1: 75.7812 (80.0395) acc3: 89.8438 (92.4451) acc5: 92.9688 (95.0464) time: 0.5530 data: 0.0006 max mem: 29202 +[2024-12-10 15:28:50 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 1.0908 (0.9807) acc1: 75.7812 (80.0000) acc3: 90.6250 (92.5040) acc5: 93.7500 (95.1040) time: 0.5436 data: 0.0006 max mem: 29202 +[2024-12-10 15:28:50 root] (utils.py 297): INFO Test: Total time: 0:00:53 (0.5505 s / it) +[2024-12-10 15:28:50 root] (engine.py 264): INFO * Acc@1 80.364 Acc@3 92.378 Acc@5 94.906 loss 0.972 flops 13.207 layer_flops 13.109 +[2024-12-10 15:28:50 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 80.4% +[2024-12-10 15:28:53 root] (main.py 576): INFO Max accuracy: 80.36% +[2024-12-10 15:28:56 root] (utils.py 283): INFO Epoch: [2] [ 0/2502] eta: 2:01:01 lr: 0.000020 loss_cls: 3.1328 (3.1328) grad_norm: 1.5764 (1.5764) time: 2.9022 data: 0.0004 max mem: 29202 +[2024-12-10 15:29:25 root] (utils.py 283): INFO Epoch: [2] [ 10/2502] eta: 2:01:41 lr: 0.000020 loss_cls: 2.8939 (2.8336) grad_norm: 1.7489 (1.7598) time: 2.9300 data: 0.0003 max mem: 29202 +[2024-12-10 15:29:55 root] (utils.py 283): INFO Epoch: [2] [ 20/2502] eta: 2:01:25 lr: 0.000020 loss_cls: 2.8939 (2.7562) grad_norm: 1.7051 (1.7473) time: 2.9368 data: 0.0003 max mem: 29202 +[2024-12-10 15:30:24 root] (utils.py 283): INFO Epoch: [2] [ 30/2502] eta: 2:01:02 lr: 0.000020 loss_cls: 2.6909 (2.6982) grad_norm: 1.7089 (1.8139) time: 2.9426 data: 0.0003 max mem: 29202 +[2024-12-10 15:30:54 root] (utils.py 283): INFO Epoch: [2] [ 40/2502] eta: 2:00:35 lr: 0.000020 loss_cls: 2.8393 (2.7343) grad_norm: 1.7952 (1.7835) time: 2.9426 data: 0.0003 max mem: 29202 +[2024-12-10 15:31:23 root] (utils.py 283): INFO Epoch: [2] [ 50/2502] eta: 2:00:08 lr: 0.000020 loss_cls: 2.8487 (2.7338) grad_norm: 1.7591 (1.8000) time: 2.9422 data: 0.0003 max mem: 29202 +[2024-12-10 15:31:53 root] (utils.py 283): INFO Epoch: [2] [ 60/2502] eta: 1:59:41 lr: 0.000020 loss_cls: 2.9008 (2.7597) grad_norm: 1.7445 (1.7986) time: 2.9451 data: 0.0003 max mem: 29202 +[2024-12-10 15:32:22 root] (utils.py 283): INFO Epoch: [2] [ 70/2502] eta: 1:59:16 lr: 0.000020 loss_cls: 2.9140 (2.7442) grad_norm: 1.7069 (1.8252) time: 2.9504 data: 0.0003 max mem: 29202 +[2024-12-10 15:32:52 root] (utils.py 283): INFO Epoch: [2] [ 80/2502] eta: 1:58:51 lr: 0.000020 loss_cls: 2.6957 (2.7441) grad_norm: 1.7413 (1.8611) time: 2.9555 data: 0.0003 max mem: 29202 +[2024-12-10 15:33:21 root] (utils.py 283): INFO Epoch: [2] [ 90/2502] eta: 1:58:21 lr: 0.000020 loss_cls: 2.7626 (2.7352) grad_norm: 1.7023 (1.8626) time: 2.9487 data: 0.0003 max mem: 29202 +[2024-12-10 15:33:51 root] (utils.py 283): INFO Epoch: [2] [ 100/2502] eta: 1:57:49 lr: 0.000020 loss_cls: 2.8639 (2.7647) grad_norm: 1.6911 (1.8747) time: 2.9377 data: 0.0003 max mem: 29202 +[2024-12-10 15:34:20 root] (utils.py 283): INFO Epoch: [2] [ 110/2502] eta: 1:57:23 lr: 0.000020 loss_cls: 2.9901 (2.7625) grad_norm: 1.8870 (1.8702) time: 2.9465 data: 0.0003 max mem: 29202 +[2024-12-10 15:34:50 root] (utils.py 283): INFO Epoch: [2] [ 120/2502] eta: 1:56:58 lr: 0.000020 loss_cls: 2.6749 (2.7543) grad_norm: 1.7556 (1.8586) time: 2.9637 data: 0.0003 max mem: 29202 +[2024-12-10 15:35:19 root] (utils.py 283): INFO Epoch: [2] [ 130/2502] eta: 1:56:27 lr: 0.000020 loss_cls: 2.7628 (2.7627) grad_norm: 1.6621 (1.8491) time: 2.9545 data: 0.0003 max mem: 29202 +[2024-12-10 15:35:49 root] (utils.py 283): INFO Epoch: [2] [ 140/2502] eta: 1:55:58 lr: 0.000020 loss_cls: 2.9689 (2.7758) grad_norm: 1.6621 (1.8404) time: 2.9424 data: 0.0003 max mem: 29202 +[2024-12-10 15:36:18 root] (utils.py 283): INFO Epoch: [2] [ 150/2502] eta: 1:55:28 lr: 0.000020 loss_cls: 2.8656 (2.7652) grad_norm: 1.6368 (1.8322) time: 2.9442 data: 0.0003 max mem: 29202 +[2024-12-10 15:36:48 root] (utils.py 283): INFO Epoch: [2] [ 160/2502] eta: 1:54:58 lr: 0.000020 loss_cls: 2.8743 (2.7783) grad_norm: 1.6827 (1.8306) time: 2.9430 data: 0.0003 max mem: 29202 +[2024-12-10 15:37:17 root] (utils.py 283): INFO Epoch: [2] [ 170/2502] eta: 1:54:28 lr: 0.000020 loss_cls: 2.9617 (2.7711) grad_norm: 1.7964 (1.8297) time: 2.9434 data: 0.0003 max mem: 29202 +[2024-12-10 15:37:46 root] (utils.py 283): INFO Epoch: [2] [ 180/2502] eta: 1:53:59 lr: 0.000020 loss_cls: 2.9275 (2.7807) grad_norm: 1.8461 (1.8326) time: 2.9443 data: 0.0003 max mem: 29202 +[2024-12-10 15:38:16 root] (utils.py 283): INFO Epoch: [2] [ 190/2502] eta: 1:53:29 lr: 0.000020 loss_cls: 2.8621 (2.7764) grad_norm: 1.7747 (1.8281) time: 2.9434 data: 0.0003 max mem: 29202 +[2024-12-10 15:38:45 root] (utils.py 283): INFO Epoch: [2] [ 200/2502] eta: 1:52:59 lr: 0.000020 loss_cls: 2.9264 (2.7812) grad_norm: 1.6946 (1.8221) time: 2.9426 data: 0.0003 max mem: 29202 +[2024-12-10 15:39:15 root] (utils.py 283): INFO Epoch: [2] [ 210/2502] eta: 1:52:29 lr: 0.000020 loss_cls: 3.0623 (2.7896) grad_norm: 1.6686 (1.8194) time: 2.9382 data: 0.0003 max mem: 29202 +[2024-12-10 15:39:44 root] (utils.py 283): INFO Epoch: [2] [ 220/2502] eta: 1:52:02 lr: 0.000020 loss_cls: 3.0142 (2.7956) grad_norm: 1.7131 (1.8183) time: 2.9523 data: 0.0003 max mem: 29202 +[2024-12-10 15:40:14 root] (utils.py 283): INFO Epoch: [2] [ 230/2502] eta: 1:51:32 lr: 0.000020 loss_cls: 2.9666 (2.7907) grad_norm: 1.6969 (1.8112) time: 2.9562 data: 0.0003 max mem: 29202 +[2024-12-10 15:40:43 root] (utils.py 283): INFO Epoch: [2] [ 240/2502] eta: 1:51:02 lr: 0.000020 loss_cls: 3.0378 (2.8008) grad_norm: 1.6969 (1.8078) time: 2.9410 data: 0.0003 max mem: 29202 +[2024-12-10 15:41:13 root] (utils.py 283): INFO Epoch: [2] [ 250/2502] eta: 1:50:32 lr: 0.000020 loss_cls: 3.0228 (2.8004) grad_norm: 1.7463 (1.8084) time: 2.9395 data: 0.0003 max mem: 29202 +[2024-12-10 15:41:42 root] (utils.py 283): INFO Epoch: [2] [ 260/2502] eta: 1:50:02 lr: 0.000020 loss_cls: 2.8792 (2.8018) grad_norm: 1.8453 (1.8109) time: 2.9387 data: 0.0003 max mem: 29202 +[2024-12-10 15:42:11 root] (utils.py 283): INFO Epoch: [2] [ 270/2502] eta: 1:49:32 lr: 0.000020 loss_cls: 2.8468 (2.8008) grad_norm: 1.8453 (1.8168) time: 2.9394 data: 0.0003 max mem: 29202 +[2024-12-10 15:42:41 root] (utils.py 283): INFO Epoch: [2] [ 280/2502] eta: 1:49:02 lr: 0.000020 loss_cls: 2.8828 (2.8027) grad_norm: 1.8037 (1.8185) time: 2.9393 data: 0.0003 max mem: 29202 +[2024-12-10 15:43:10 root] (utils.py 283): INFO Epoch: [2] [ 290/2502] eta: 1:48:32 lr: 0.000020 loss_cls: 3.0105 (2.8031) grad_norm: 1.8037 (1.8172) time: 2.9384 data: 0.0003 max mem: 29202 +[2024-12-10 15:43:39 root] (utils.py 283): INFO Epoch: [2] [ 300/2502] eta: 1:48:02 lr: 0.000020 loss_cls: 3.0105 (2.8109) grad_norm: 1.7585 (1.8172) time: 2.9391 data: 0.0003 max mem: 29202 +[2024-12-10 15:44:09 root] (utils.py 283): INFO Epoch: [2] [ 310/2502] eta: 1:47:33 lr: 0.000020 loss_cls: 2.9032 (2.8049) grad_norm: 1.7166 (1.8127) time: 2.9440 data: 0.0003 max mem: 29202 +[2024-12-10 15:44:39 root] (utils.py 283): INFO Epoch: [2] [ 320/2502] eta: 1:47:05 lr: 0.000020 loss_cls: 2.7024 (2.8031) grad_norm: 1.7016 (1.8098) time: 2.9546 data: 0.0003 max mem: 29202 +[2024-12-10 15:45:08 root] (utils.py 283): INFO Epoch: [2] [ 330/2502] eta: 1:46:36 lr: 0.000020 loss_cls: 2.7012 (2.7981) grad_norm: 1.7178 (1.8064) time: 2.9600 data: 0.0003 max mem: 29202 +[2024-12-10 15:45:38 root] (utils.py 283): INFO Epoch: [2] [ 340/2502] eta: 1:46:06 lr: 0.000020 loss_cls: 2.8797 (2.8017) grad_norm: 1.7178 (1.8076) time: 2.9462 data: 0.0003 max mem: 29202 +[2024-12-10 15:46:07 root] (utils.py 283): INFO Epoch: [2] [ 350/2502] eta: 1:45:37 lr: 0.000020 loss_cls: 3.0497 (2.8039) grad_norm: 1.7146 (1.8038) time: 2.9410 data: 0.0003 max mem: 29202 +[2024-12-10 15:46:37 root] (utils.py 283): INFO Epoch: [2] [ 360/2502] eta: 1:45:08 lr: 0.000020 loss_cls: 2.9213 (2.8017) grad_norm: 1.7150 (1.8072) time: 2.9505 data: 0.0003 max mem: 29202 +[2024-12-10 15:47:06 root] (utils.py 283): INFO Epoch: [2] [ 370/2502] eta: 1:44:39 lr: 0.000020 loss_cls: 2.6814 (2.7989) grad_norm: 1.8531 (1.8081) time: 2.9487 data: 0.0003 max mem: 29202 +[2024-12-10 15:47:35 root] (utils.py 283): INFO Epoch: [2] [ 380/2502] eta: 1:44:09 lr: 0.000020 loss_cls: 2.6814 (2.7959) grad_norm: 1.7034 (1.8072) time: 2.9464 data: 0.0003 max mem: 29202 +[2024-12-10 15:48:05 root] (utils.py 283): INFO Epoch: [2] [ 390/2502] eta: 1:43:40 lr: 0.000020 loss_cls: 2.8202 (2.7959) grad_norm: 1.7702 (1.8064) time: 2.9468 data: 0.0003 max mem: 29202 +[2024-12-10 15:48:34 root] (utils.py 283): INFO Epoch: [2] [ 400/2502] eta: 1:43:11 lr: 0.000020 loss_cls: 2.8750 (2.7995) grad_norm: 1.8172 (1.8137) time: 2.9482 data: 0.0003 max mem: 29202 +[2024-12-10 15:49:04 root] (utils.py 283): INFO Epoch: [2] [ 410/2502] eta: 1:42:41 lr: 0.000020 loss_cls: 2.8231 (2.7999) grad_norm: 1.8175 (1.8165) time: 2.9471 data: 0.0003 max mem: 29202 +[2024-12-10 15:49:33 root] (utils.py 283): INFO Epoch: [2] [ 420/2502] eta: 1:42:12 lr: 0.000020 loss_cls: 2.6686 (2.7943) grad_norm: 1.6849 (1.8126) time: 2.9462 data: 0.0003 max mem: 29202 +[2024-12-10 15:50:03 root] (utils.py 283): INFO Epoch: [2] [ 430/2502] eta: 1:41:42 lr: 0.000020 loss_cls: 2.7979 (2.7945) grad_norm: 1.6889 (1.8132) time: 2.9451 data: 0.0003 max mem: 29202 +[2024-12-10 15:50:32 root] (utils.py 283): INFO Epoch: [2] [ 440/2502] eta: 1:41:13 lr: 0.000020 loss_cls: 2.8831 (2.7954) grad_norm: 1.7575 (1.8137) time: 2.9415 data: 0.0003 max mem: 29202 +[2024-12-10 15:51:02 root] (utils.py 283): INFO Epoch: [2] [ 450/2502] eta: 1:40:43 lr: 0.000020 loss_cls: 2.8831 (2.7960) grad_norm: 1.7207 (1.8123) time: 2.9415 data: 0.0003 max mem: 29202 +[2024-12-10 15:51:31 root] (utils.py 283): INFO Epoch: [2] [ 460/2502] eta: 1:40:13 lr: 0.000020 loss_cls: 3.0587 (2.7979) grad_norm: 1.7923 (1.8144) time: 2.9412 data: 0.0003 max mem: 29202 +[2024-12-10 15:52:00 root] (utils.py 283): INFO Epoch: [2] [ 470/2502] eta: 1:39:44 lr: 0.000020 loss_cls: 3.0587 (2.8002) grad_norm: 1.6353 (1.8095) time: 2.9409 data: 0.0003 max mem: 29202 +[2024-12-10 15:52:30 root] (utils.py 283): INFO Epoch: [2] [ 480/2502] eta: 1:39:14 lr: 0.000020 loss_cls: 2.8268 (2.7967) grad_norm: 1.5834 (1.8063) time: 2.9408 data: 0.0003 max mem: 29202 +[2024-12-10 15:52:59 root] (utils.py 283): INFO Epoch: [2] [ 490/2502] eta: 1:38:44 lr: 0.000020 loss_cls: 2.8888 (2.7997) grad_norm: 1.6572 (1.8059) time: 2.9409 data: 0.0003 max mem: 29202 +[2024-12-10 15:53:29 root] (utils.py 283): INFO Epoch: [2] [ 500/2502] eta: 1:38:15 lr: 0.000020 loss_cls: 3.0527 (2.8004) grad_norm: 1.6641 (1.8037) time: 2.9404 data: 0.0003 max mem: 29202 +[2024-12-10 15:53:58 root] (utils.py 283): INFO Epoch: [2] [ 510/2502] eta: 1:37:45 lr: 0.000020 loss_cls: 2.8312 (2.7977) grad_norm: 1.6943 (1.8041) time: 2.9433 data: 0.0003 max mem: 29202 +[2024-12-10 15:54:28 root] (utils.py 283): INFO Epoch: [2] [ 520/2502] eta: 1:37:16 lr: 0.000020 loss_cls: 2.8740 (2.8018) grad_norm: 1.7889 (1.8036) time: 2.9446 data: 0.0003 max mem: 29202 +[2024-12-10 15:54:57 root] (utils.py 283): INFO Epoch: [2] [ 530/2502] eta: 1:36:46 lr: 0.000020 loss_cls: 2.8152 (2.7952) grad_norm: 1.7210 (1.8018) time: 2.9425 data: 0.0003 max mem: 29202 +[2024-12-10 15:55:26 root] (utils.py 283): INFO Epoch: [2] [ 540/2502] eta: 1:36:17 lr: 0.000020 loss_cls: 2.7543 (2.7937) grad_norm: 1.6532 (1.7973) time: 2.9409 data: 0.0003 max mem: 29202 +[2024-12-10 15:55:56 root] (utils.py 283): INFO Epoch: [2] [ 550/2502] eta: 1:35:47 lr: 0.000020 loss_cls: 2.8372 (2.7925) grad_norm: 1.5687 (1.7960) time: 2.9402 data: 0.0003 max mem: 29202 +[2024-12-10 15:56:25 root] (utils.py 283): INFO Epoch: [2] [ 560/2502] eta: 1:35:18 lr: 0.000020 loss_cls: 3.0209 (2.7928) grad_norm: 1.7106 (1.7950) time: 2.9484 data: 0.0003 max mem: 29202 +[2024-12-10 15:56:55 root] (utils.py 283): INFO Epoch: [2] [ 570/2502] eta: 1:34:49 lr: 0.000020 loss_cls: 2.8350 (2.7906) grad_norm: 1.7381 (1.7947) time: 2.9574 data: 0.0003 max mem: 29202 +[2024-12-10 15:57:24 root] (utils.py 283): INFO Epoch: [2] [ 580/2502] eta: 1:34:20 lr: 0.000020 loss_cls: 2.7071 (2.7890) grad_norm: 1.7381 (1.7949) time: 2.9543 data: 0.0003 max mem: 29202 +[2024-12-10 15:57:54 root] (utils.py 283): INFO Epoch: [2] [ 590/2502] eta: 1:33:50 lr: 0.000020 loss_cls: 3.0389 (2.7931) grad_norm: 1.7568 (1.7952) time: 2.9464 data: 0.0003 max mem: 29202 +[2024-12-10 15:58:23 root] (utils.py 283): INFO Epoch: [2] [ 600/2502] eta: 1:33:21 lr: 0.000020 loss_cls: 3.0261 (2.7889) grad_norm: 1.7256 (1.7937) time: 2.9424 data: 0.0003 max mem: 29202 +[2024-12-10 15:58:53 root] (utils.py 283): INFO Epoch: [2] [ 610/2502] eta: 1:32:51 lr: 0.000020 loss_cls: 2.6856 (2.7888) grad_norm: 1.6195 (1.7907) time: 2.9443 data: 0.0003 max mem: 29202 +[2024-12-10 15:59:22 root] (utils.py 283): INFO Epoch: [2] [ 620/2502] eta: 1:32:22 lr: 0.000020 loss_cls: 2.9267 (2.7906) grad_norm: 1.5990 (1.7894) time: 2.9443 data: 0.0003 max mem: 29202 +[2024-12-10 15:59:52 root] (utils.py 283): INFO Epoch: [2] [ 630/2502] eta: 1:31:52 lr: 0.000020 loss_cls: 2.9793 (2.7946) grad_norm: 1.7186 (1.7905) time: 2.9414 data: 0.0003 max mem: 29202 +[2024-12-10 16:00:21 root] (utils.py 283): INFO Epoch: [2] [ 640/2502] eta: 1:31:23 lr: 0.000020 loss_cls: 2.9001 (2.7924) grad_norm: 1.6358 (1.7886) time: 2.9422 data: 0.0003 max mem: 29202 +[2024-12-10 16:00:50 root] (utils.py 283): INFO Epoch: [2] [ 650/2502] eta: 1:30:53 lr: 0.000020 loss_cls: 2.7235 (2.7926) grad_norm: 1.6036 (1.7867) time: 2.9427 data: 0.0003 max mem: 29202 +[2024-12-10 16:01:20 root] (utils.py 283): INFO Epoch: [2] [ 660/2502] eta: 1:30:24 lr: 0.000020 loss_cls: 2.8367 (2.7919) grad_norm: 1.6373 (1.7857) time: 2.9431 data: 0.0003 max mem: 29202 +[2024-12-10 16:01:49 root] (utils.py 283): INFO Epoch: [2] [ 670/2502] eta: 1:29:55 lr: 0.000020 loss_cls: 2.6743 (2.7929) grad_norm: 1.7931 (1.7869) time: 2.9475 data: 0.0003 max mem: 29202 +[2024-12-10 16:02:19 root] (utils.py 283): INFO Epoch: [2] [ 680/2502] eta: 1:29:25 lr: 0.000020 loss_cls: 2.9194 (2.7947) grad_norm: 1.8036 (1.7877) time: 2.9458 data: 0.0003 max mem: 29202 +[2024-12-10 16:02:48 root] (utils.py 283): INFO Epoch: [2] [ 690/2502] eta: 1:28:56 lr: 0.000020 loss_cls: 2.9373 (2.7957) grad_norm: 1.7052 (1.7867) time: 2.9432 data: 0.0003 max mem: 29202 +[2024-12-10 16:03:18 root] (utils.py 283): INFO Epoch: [2] [ 700/2502] eta: 1:28:26 lr: 0.000020 loss_cls: 2.9687 (2.7971) grad_norm: 1.6078 (1.7849) time: 2.9427 data: 0.0003 max mem: 29202 +[2024-12-10 16:03:47 root] (utils.py 283): INFO Epoch: [2] [ 710/2502] eta: 1:27:57 lr: 0.000020 loss_cls: 2.9473 (2.7957) grad_norm: 1.7470 (1.7893) time: 2.9428 data: 0.0003 max mem: 29202 +[2024-12-10 16:04:17 root] (utils.py 283): INFO Epoch: [2] [ 720/2502] eta: 1:27:27 lr: 0.000020 loss_cls: 2.7747 (2.7938) grad_norm: 1.8446 (1.7906) time: 2.9452 data: 0.0003 max mem: 29202 +[2024-12-10 16:04:46 root] (utils.py 283): INFO Epoch: [2] [ 730/2502] eta: 1:26:57 lr: 0.000020 loss_cls: 2.7747 (2.7924) grad_norm: 1.6971 (1.7883) time: 2.9417 data: 0.0003 max mem: 29202 +[2024-12-10 16:05:15 root] (utils.py 283): INFO Epoch: [2] [ 740/2502] eta: 1:26:28 lr: 0.000020 loss_cls: 2.7020 (2.7910) grad_norm: 1.6027 (1.7866) time: 2.9390 data: 0.0003 max mem: 29202 +[2024-12-10 16:05:45 root] (utils.py 283): INFO Epoch: [2] [ 750/2502] eta: 1:25:58 lr: 0.000020 loss_cls: 2.8791 (2.7933) grad_norm: 1.5814 (1.7865) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-10 16:06:14 root] (utils.py 283): INFO Epoch: [2] [ 760/2502] eta: 1:25:29 lr: 0.000020 loss_cls: 2.9495 (2.7956) grad_norm: 1.8092 (1.7873) time: 2.9441 data: 0.0003 max mem: 29202 +[2024-12-10 16:06:44 root] (utils.py 283): INFO Epoch: [2] [ 770/2502] eta: 1:24:59 lr: 0.000020 loss_cls: 2.8942 (2.7943) grad_norm: 1.7342 (1.7857) time: 2.9438 data: 0.0003 max mem: 29202 +[2024-12-10 16:07:13 root] (utils.py 283): INFO Epoch: [2] [ 780/2502] eta: 1:24:30 lr: 0.000020 loss_cls: 2.8482 (2.7949) grad_norm: 1.6794 (1.7843) time: 2.9482 data: 0.0003 max mem: 29202 +[2024-12-10 16:07:43 root] (utils.py 283): INFO Epoch: [2] [ 790/2502] eta: 1:24:01 lr: 0.000020 loss_cls: 2.8669 (2.7935) grad_norm: 1.6567 (1.7832) time: 2.9570 data: 0.0003 max mem: 29202 +[2024-12-10 16:08:12 root] (utils.py 283): INFO Epoch: [2] [ 800/2502] eta: 1:23:32 lr: 0.000020 loss_cls: 2.7910 (2.7936) grad_norm: 1.6567 (1.7824) time: 2.9484 data: 0.0003 max mem: 29202 +[2024-12-10 16:08:42 root] (utils.py 283): INFO Epoch: [2] [ 810/2502] eta: 1:23:02 lr: 0.000020 loss_cls: 2.7910 (2.7933) grad_norm: 1.7751 (1.7824) time: 2.9388 data: 0.0003 max mem: 29202 +[2024-12-10 16:09:11 root] (utils.py 283): INFO Epoch: [2] [ 820/2502] eta: 1:22:33 lr: 0.000020 loss_cls: 2.8744 (2.7949) grad_norm: 1.8173 (1.7827) time: 2.9419 data: 0.0003 max mem: 29202 +[2024-12-10 16:09:40 root] (utils.py 283): INFO Epoch: [2] [ 830/2502] eta: 1:22:03 lr: 0.000020 loss_cls: 2.9441 (2.7960) grad_norm: 1.7807 (1.7825) time: 2.9439 data: 0.0003 max mem: 29202 +[2024-12-10 16:10:10 root] (utils.py 283): INFO Epoch: [2] [ 840/2502] eta: 1:21:34 lr: 0.000020 loss_cls: 2.9380 (2.7963) grad_norm: 1.6054 (1.7815) time: 2.9516 data: 0.0003 max mem: 29202 +[2024-12-10 16:10:39 root] (utils.py 283): INFO Epoch: [2] [ 850/2502] eta: 1:21:04 lr: 0.000020 loss_cls: 2.8298 (2.7953) grad_norm: 1.6470 (1.7810) time: 2.9496 data: 0.0003 max mem: 29202 +[2024-12-10 16:11:09 root] (utils.py 283): INFO Epoch: [2] [ 860/2502] eta: 1:20:35 lr: 0.000020 loss_cls: 2.9781 (2.7986) grad_norm: 1.7168 (1.7845) time: 2.9436 data: 0.0003 max mem: 29202 +[2024-12-10 16:11:39 root] (utils.py 283): INFO Epoch: [2] [ 870/2502] eta: 1:20:06 lr: 0.000020 loss_cls: 3.1549 (2.8024) grad_norm: 1.8100 (1.7851) time: 2.9607 data: 0.0003 max mem: 29202 +[2024-12-10 16:12:08 root] (utils.py 283): INFO Epoch: [2] [ 880/2502] eta: 1:19:37 lr: 0.000020 loss_cls: 3.0764 (2.8038) grad_norm: 1.7917 (1.7850) time: 2.9650 data: 0.0003 max mem: 29202 +[2024-12-10 16:12:38 root] (utils.py 283): INFO Epoch: [2] [ 890/2502] eta: 1:19:07 lr: 0.000020 loss_cls: 2.8850 (2.8024) grad_norm: 1.7509 (1.7865) time: 2.9489 data: 0.0003 max mem: 29202 +[2024-12-10 16:13:07 root] (utils.py 283): INFO Epoch: [2] [ 900/2502] eta: 1:18:38 lr: 0.000020 loss_cls: 2.6754 (2.7991) grad_norm: 1.7676 (1.7871) time: 2.9409 data: 0.0003 max mem: 29202 +[2024-12-10 16:13:36 root] (utils.py 283): INFO Epoch: [2] [ 910/2502] eta: 1:18:08 lr: 0.000020 loss_cls: 2.8366 (2.8000) grad_norm: 1.7488 (1.7870) time: 2.9385 data: 0.0003 max mem: 29202 +[2024-12-10 16:14:06 root] (utils.py 283): INFO Epoch: [2] [ 920/2502] eta: 1:17:39 lr: 0.000020 loss_cls: 2.9608 (2.8000) grad_norm: 1.7486 (1.7865) time: 2.9520 data: 0.0003 max mem: 29202 +[2024-12-10 16:14:36 root] (utils.py 283): INFO Epoch: [2] [ 930/2502] eta: 1:17:10 lr: 0.000020 loss_cls: 3.0593 (2.8007) grad_norm: 1.7113 (1.7857) time: 2.9711 data: 0.0003 max mem: 29202 +[2024-12-10 16:15:05 root] (utils.py 283): INFO Epoch: [2] [ 940/2502] eta: 1:16:41 lr: 0.000020 loss_cls: 3.0248 (2.8020) grad_norm: 1.6280 (1.7843) time: 2.9720 data: 0.0003 max mem: 29202 +[2024-12-10 16:15:35 root] (utils.py 283): INFO Epoch: [2] [ 950/2502] eta: 1:16:12 lr: 0.000020 loss_cls: 3.0248 (2.8020) grad_norm: 1.6302 (1.7839) time: 2.9710 data: 0.0003 max mem: 29202 +[2024-12-10 16:16:05 root] (utils.py 283): INFO Epoch: [2] [ 960/2502] eta: 1:15:43 lr: 0.000020 loss_cls: 2.8180 (2.8005) grad_norm: 1.7061 (1.7836) time: 2.9730 data: 0.0003 max mem: 29202 +[2024-12-10 16:16:35 root] (utils.py 283): INFO Epoch: [2] [ 970/2502] eta: 1:15:14 lr: 0.000020 loss_cls: 2.8292 (2.8012) grad_norm: 1.6504 (1.7821) time: 2.9738 data: 0.0003 max mem: 29202 +[2024-12-10 16:17:04 root] (utils.py 283): INFO Epoch: [2] [ 980/2502] eta: 1:14:45 lr: 0.000020 loss_cls: 2.9432 (2.8030) grad_norm: 1.7173 (1.7834) time: 2.9749 data: 0.0003 max mem: 29202 +[2024-12-10 16:17:34 root] (utils.py 283): INFO Epoch: [2] [ 990/2502] eta: 1:14:16 lr: 0.000020 loss_cls: 2.8979 (2.8034) grad_norm: 1.8104 (1.7831) time: 2.9742 data: 0.0003 max mem: 29202 +[2024-12-10 16:18:04 root] (utils.py 283): INFO Epoch: [2] [1000/2502] eta: 1:13:47 lr: 0.000020 loss_cls: 2.8962 (2.8041) grad_norm: 1.6853 (1.7838) time: 2.9718 data: 0.0003 max mem: 29202 +[2024-12-10 16:18:33 root] (utils.py 283): INFO Epoch: [2] [1010/2502] eta: 1:13:17 lr: 0.000020 loss_cls: 2.9917 (2.8053) grad_norm: 1.6408 (1.7831) time: 2.9574 data: 0.0003 max mem: 29202 +[2024-12-10 16:19:03 root] (utils.py 283): INFO Epoch: [2] [1020/2502] eta: 1:12:48 lr: 0.000020 loss_cls: 2.9928 (2.8046) grad_norm: 1.6408 (1.7832) time: 2.9420 data: 0.0003 max mem: 29202 +[2024-12-10 16:19:32 root] (utils.py 283): INFO Epoch: [2] [1030/2502] eta: 1:12:18 lr: 0.000020 loss_cls: 2.5886 (2.8017) grad_norm: 1.6316 (1.7812) time: 2.9448 data: 0.0003 max mem: 29202 +[2024-12-10 16:20:02 root] (utils.py 283): INFO Epoch: [2] [1040/2502] eta: 1:11:49 lr: 0.000020 loss_cls: 2.7049 (2.8018) grad_norm: 1.5531 (1.7806) time: 2.9551 data: 0.0003 max mem: 29202 +[2024-12-10 16:20:31 root] (utils.py 283): INFO Epoch: [2] [1050/2502] eta: 1:11:19 lr: 0.000020 loss_cls: 2.8203 (2.8014) grad_norm: 1.7158 (1.7806) time: 2.9545 data: 0.0003 max mem: 29202 +[2024-12-10 16:21:01 root] (utils.py 283): INFO Epoch: [2] [1060/2502] eta: 1:10:50 lr: 0.000020 loss_cls: 2.8203 (2.8022) grad_norm: 1.7612 (1.7811) time: 2.9552 data: 0.0003 max mem: 29202 +[2024-12-10 16:21:31 root] (utils.py 283): INFO Epoch: [2] [1070/2502] eta: 1:10:21 lr: 0.000020 loss_cls: 2.9694 (2.8033) grad_norm: 1.6547 (1.7808) time: 2.9657 data: 0.0003 max mem: 29202 +[2024-12-10 16:22:00 root] (utils.py 283): INFO Epoch: [2] [1080/2502] eta: 1:09:52 lr: 0.000020 loss_cls: 2.8429 (2.8033) grad_norm: 1.6423 (1.7806) time: 2.9632 data: 0.0003 max mem: 29202 +[2024-12-10 16:22:30 root] (utils.py 283): INFO Epoch: [2] [1090/2502] eta: 1:09:22 lr: 0.000020 loss_cls: 2.9219 (2.8038) grad_norm: 1.6454 (1.7799) time: 2.9574 data: 0.0003 max mem: 29202 +[2024-12-10 16:22:59 root] (utils.py 283): INFO Epoch: [2] [1100/2502] eta: 1:08:53 lr: 0.000020 loss_cls: 2.9219 (2.8036) grad_norm: 1.6695 (1.7789) time: 2.9509 data: 0.0003 max mem: 29202 +[2024-12-10 16:23:29 root] (utils.py 283): INFO Epoch: [2] [1110/2502] eta: 1:08:23 lr: 0.000020 loss_cls: 2.5943 (2.8011) grad_norm: 1.6864 (1.7788) time: 2.9434 data: 0.0003 max mem: 29202 +[2024-12-10 16:23:58 root] (utils.py 283): INFO Epoch: [2] [1120/2502] eta: 1:07:54 lr: 0.000020 loss_cls: 2.6190 (2.8008) grad_norm: 1.7781 (1.7820) time: 2.9478 data: 0.0003 max mem: 29202 +[2024-12-10 16:24:28 root] (utils.py 283): INFO Epoch: [2] [1130/2502] eta: 1:07:24 lr: 0.000020 loss_cls: 2.7372 (2.8006) grad_norm: 1.7496 (1.7827) time: 2.9467 data: 0.0003 max mem: 29202 +[2024-12-10 16:24:57 root] (utils.py 283): INFO Epoch: [2] [1140/2502] eta: 1:06:55 lr: 0.000020 loss_cls: 2.7212 (2.7983) grad_norm: 1.7190 (1.7832) time: 2.9391 data: 0.0003 max mem: 29202 +[2024-12-10 16:25:26 root] (utils.py 283): INFO Epoch: [2] [1150/2502] eta: 1:06:25 lr: 0.000020 loss_cls: 2.7212 (2.7986) grad_norm: 1.6819 (1.7823) time: 2.9372 data: 0.0003 max mem: 29202 +[2024-12-10 16:25:56 root] (utils.py 283): INFO Epoch: [2] [1160/2502] eta: 1:05:55 lr: 0.000020 loss_cls: 2.9455 (2.7987) grad_norm: 1.7936 (1.7826) time: 2.9369 data: 0.0003 max mem: 29202 +[2024-12-10 16:26:25 root] (utils.py 283): INFO Epoch: [2] [1170/2502] eta: 1:05:26 lr: 0.000020 loss_cls: 2.9564 (2.8001) grad_norm: 1.7576 (1.7816) time: 2.9372 data: 0.0003 max mem: 29202 +[2024-12-10 16:26:54 root] (utils.py 283): INFO Epoch: [2] [1180/2502] eta: 1:04:56 lr: 0.000020 loss_cls: 2.9564 (2.7998) grad_norm: 1.6942 (1.7809) time: 2.9373 data: 0.0003 max mem: 29202 +[2024-12-10 16:27:24 root] (utils.py 283): INFO Epoch: [2] [1190/2502] eta: 1:04:27 lr: 0.000020 loss_cls: 2.9063 (2.8001) grad_norm: 1.7066 (1.7806) time: 2.9366 data: 0.0003 max mem: 29202 +[2024-12-10 16:27:53 root] (utils.py 283): INFO Epoch: [2] [1200/2502] eta: 1:03:57 lr: 0.000020 loss_cls: 2.9439 (2.8015) grad_norm: 1.7279 (1.7816) time: 2.9369 data: 0.0003 max mem: 29202 +[2024-12-10 16:28:23 root] (utils.py 283): INFO Epoch: [2] [1210/2502] eta: 1:03:27 lr: 0.000020 loss_cls: 2.6461 (2.7980) grad_norm: 1.7690 (1.7813) time: 2.9378 data: 0.0003 max mem: 29202 +[2024-12-10 16:28:52 root] (utils.py 283): INFO Epoch: [2] [1220/2502] eta: 1:02:58 lr: 0.000020 loss_cls: 2.6461 (2.7984) grad_norm: 1.7022 (1.7816) time: 2.9369 data: 0.0003 max mem: 29202 +[2024-12-10 16:29:21 root] (utils.py 283): INFO Epoch: [2] [1230/2502] eta: 1:02:28 lr: 0.000020 loss_cls: 2.8608 (2.7974) grad_norm: 1.6123 (1.7802) time: 2.9404 data: 0.0003 max mem: 29202 +[2024-12-10 16:29:51 root] (utils.py 283): INFO Epoch: [2] [1240/2502] eta: 1:01:59 lr: 0.000020 loss_cls: 2.9262 (2.7987) grad_norm: 1.6175 (1.7812) time: 2.9435 data: 0.0003 max mem: 29202 +[2024-12-10 16:30:20 root] (utils.py 283): INFO Epoch: [2] [1250/2502] eta: 1:01:29 lr: 0.000020 loss_cls: 3.0410 (2.8007) grad_norm: 1.7493 (1.7813) time: 2.9419 data: 0.0003 max mem: 29202 +[2024-12-10 16:30:50 root] (utils.py 283): INFO Epoch: [2] [1260/2502] eta: 1:01:00 lr: 0.000020 loss_cls: 2.9895 (2.8018) grad_norm: 1.7074 (1.7810) time: 2.9457 data: 0.0003 max mem: 29202 +[2024-12-10 16:31:19 root] (utils.py 283): INFO Epoch: [2] [1270/2502] eta: 1:00:30 lr: 0.000020 loss_cls: 2.9671 (2.8021) grad_norm: 1.6473 (1.7825) time: 2.9486 data: 0.0003 max mem: 29202 +[2024-12-10 16:31:49 root] (utils.py 283): INFO Epoch: [2] [1280/2502] eta: 1:00:01 lr: 0.000020 loss_cls: 2.8588 (2.8021) grad_norm: 1.7567 (1.7835) time: 2.9432 data: 0.0003 max mem: 29202 +[2024-12-10 16:32:18 root] (utils.py 283): INFO Epoch: [2] [1290/2502] eta: 0:59:31 lr: 0.000020 loss_cls: 2.9654 (2.8040) grad_norm: 1.8541 (1.7838) time: 2.9402 data: 0.0003 max mem: 29202 +[2024-12-10 16:32:47 root] (utils.py 283): INFO Epoch: [2] [1300/2502] eta: 0:59:02 lr: 0.000020 loss_cls: 3.0312 (2.8056) grad_norm: 1.8541 (1.7835) time: 2.9432 data: 0.0003 max mem: 29202 +[2024-12-10 16:33:17 root] (utils.py 283): INFO Epoch: [2] [1310/2502] eta: 0:58:32 lr: 0.000020 loss_cls: 3.0550 (2.8068) grad_norm: 1.6962 (1.7824) time: 2.9410 data: 0.0003 max mem: 29202 +[2024-12-10 16:33:46 root] (utils.py 283): INFO Epoch: [2] [1320/2502] eta: 0:58:03 lr: 0.000020 loss_cls: 3.0550 (2.8079) grad_norm: 1.6795 (1.7827) time: 2.9401 data: 0.0003 max mem: 29202 +[2024-12-10 16:34:16 root] (utils.py 283): INFO Epoch: [2] [1330/2502] eta: 0:57:33 lr: 0.000020 loss_cls: 2.9119 (2.8065) grad_norm: 1.6288 (1.7820) time: 2.9386 data: 0.0003 max mem: 29202 +[2024-12-10 16:34:45 root] (utils.py 283): INFO Epoch: [2] [1340/2502] eta: 0:57:04 lr: 0.000020 loss_cls: 2.8445 (2.8066) grad_norm: 1.6105 (1.7812) time: 2.9409 data: 0.0003 max mem: 29202 +[2024-12-10 16:35:15 root] (utils.py 283): INFO Epoch: [2] [1350/2502] eta: 0:56:34 lr: 0.000020 loss_cls: 2.9384 (2.8060) grad_norm: 1.6170 (1.7803) time: 2.9476 data: 0.0003 max mem: 29202 +[2024-12-10 16:35:44 root] (utils.py 283): INFO Epoch: [2] [1360/2502] eta: 0:56:05 lr: 0.000020 loss_cls: 2.9384 (2.8066) grad_norm: 1.6373 (1.7807) time: 2.9460 data: 0.0003 max mem: 29202 +[2024-12-10 16:36:14 root] (utils.py 283): INFO Epoch: [2] [1370/2502] eta: 0:55:35 lr: 0.000020 loss_cls: 3.0449 (2.8077) grad_norm: 1.6580 (1.7801) time: 2.9555 data: 0.0003 max mem: 29202 +[2024-12-10 16:36:43 root] (utils.py 283): INFO Epoch: [2] [1380/2502] eta: 0:55:06 lr: 0.000020 loss_cls: 2.9419 (2.8078) grad_norm: 1.7688 (1.7804) time: 2.9540 data: 0.0003 max mem: 29202 +[2024-12-10 16:37:12 root] (utils.py 283): INFO Epoch: [2] [1390/2502] eta: 0:54:36 lr: 0.000020 loss_cls: 2.9588 (2.8084) grad_norm: 1.8439 (1.7806) time: 2.9334 data: 0.0003 max mem: 29202 +[2024-12-10 16:37:42 root] (utils.py 283): INFO Epoch: [2] [1400/2502] eta: 0:54:07 lr: 0.000020 loss_cls: 2.9588 (2.8083) grad_norm: 1.7450 (1.7809) time: 2.9246 data: 0.0003 max mem: 29202 +[2024-12-10 16:38:11 root] (utils.py 283): INFO Epoch: [2] [1410/2502] eta: 0:53:37 lr: 0.000020 loss_cls: 2.8660 (2.8083) grad_norm: 1.6695 (1.7806) time: 2.9318 data: 0.0003 max mem: 29202 +[2024-12-10 16:38:40 root] (utils.py 283): INFO Epoch: [2] [1420/2502] eta: 0:53:07 lr: 0.000020 loss_cls: 3.0005 (2.8085) grad_norm: 1.6395 (1.7804) time: 2.9322 data: 0.0003 max mem: 29202 +[2024-12-10 16:39:10 root] (utils.py 283): INFO Epoch: [2] [1430/2502] eta: 0:52:38 lr: 0.000020 loss_cls: 2.6487 (2.8068) grad_norm: 1.5992 (1.7798) time: 2.9266 data: 0.0003 max mem: 29202 +[2024-12-10 16:39:39 root] (utils.py 283): INFO Epoch: [2] [1440/2502] eta: 0:52:08 lr: 0.000020 loss_cls: 2.8220 (2.8072) grad_norm: 1.7387 (1.7797) time: 2.9275 data: 0.0003 max mem: 29202 +[2024-12-10 16:40:08 root] (utils.py 283): INFO Epoch: [2] [1450/2502] eta: 0:51:39 lr: 0.000020 loss_cls: 2.9919 (2.8066) grad_norm: 1.7387 (1.7793) time: 2.9253 data: 0.0003 max mem: 29202 +[2024-12-10 16:40:37 root] (utils.py 283): INFO Epoch: [2] [1460/2502] eta: 0:51:09 lr: 0.000020 loss_cls: 2.7884 (2.8056) grad_norm: 1.7357 (1.7791) time: 2.9301 data: 0.0003 max mem: 29202 +[2024-12-10 16:41:07 root] (utils.py 283): INFO Epoch: [2] [1470/2502] eta: 0:50:40 lr: 0.000020 loss_cls: 2.5930 (2.8038) grad_norm: 1.7570 (1.7787) time: 2.9370 data: 0.0003 max mem: 29202 +[2024-12-10 16:41:36 root] (utils.py 283): INFO Epoch: [2] [1480/2502] eta: 0:50:10 lr: 0.000020 loss_cls: 2.6364 (2.8030) grad_norm: 1.7534 (1.7780) time: 2.9385 data: 0.0003 max mem: 29202 +[2024-12-10 16:42:06 root] (utils.py 283): INFO Epoch: [2] [1490/2502] eta: 0:49:41 lr: 0.000020 loss_cls: 2.8936 (2.8039) grad_norm: 1.6828 (1.7780) time: 2.9385 data: 0.0003 max mem: 29202 +[2024-12-10 16:42:35 root] (utils.py 283): INFO Epoch: [2] [1500/2502] eta: 0:49:11 lr: 0.000020 loss_cls: 2.9189 (2.8030) grad_norm: 1.6828 (1.7774) time: 2.9366 data: 0.0003 max mem: 29202 +[2024-12-10 16:43:04 root] (utils.py 283): INFO Epoch: [2] [1510/2502] eta: 0:48:42 lr: 0.000020 loss_cls: 2.9141 (2.8033) grad_norm: 1.6186 (1.7778) time: 2.9361 data: 0.0003 max mem: 29202 +[2024-12-10 16:43:34 root] (utils.py 283): INFO Epoch: [2] [1520/2502] eta: 0:48:12 lr: 0.000020 loss_cls: 2.7985 (2.8023) grad_norm: 1.6628 (1.7774) time: 2.9341 data: 0.0003 max mem: 29202 +[2024-12-10 16:44:03 root] (utils.py 283): INFO Epoch: [2] [1530/2502] eta: 0:47:43 lr: 0.000020 loss_cls: 2.7423 (2.8025) grad_norm: 1.6909 (1.7775) time: 2.9565 data: 0.0003 max mem: 29202 +[2024-12-10 16:44:33 root] (utils.py 283): INFO Epoch: [2] [1540/2502] eta: 0:47:13 lr: 0.000020 loss_cls: 2.9681 (2.8030) grad_norm: 1.7063 (1.7768) time: 2.9602 data: 0.0003 max mem: 29202 +[2024-12-10 16:45:02 root] (utils.py 283): INFO Epoch: [2] [1550/2502] eta: 0:46:44 lr: 0.000020 loss_cls: 2.9195 (2.8031) grad_norm: 1.7063 (1.7763) time: 2.9382 data: 0.0003 max mem: 29202 +[2024-12-10 16:45:32 root] (utils.py 283): INFO Epoch: [2] [1560/2502] eta: 0:46:14 lr: 0.000020 loss_cls: 2.8166 (2.8030) grad_norm: 1.7223 (1.7760) time: 2.9376 data: 0.0003 max mem: 29202 +[2024-12-10 16:46:01 root] (utils.py 283): INFO Epoch: [2] [1570/2502] eta: 0:45:45 lr: 0.000020 loss_cls: 2.9343 (2.8037) grad_norm: 1.7047 (1.7758) time: 2.9402 data: 0.0003 max mem: 29202 +[2024-12-10 16:46:30 root] (utils.py 283): INFO Epoch: [2] [1580/2502] eta: 0:45:15 lr: 0.000020 loss_cls: 3.0110 (2.8033) grad_norm: 1.6669 (1.7759) time: 2.9407 data: 0.0003 max mem: 29202 +[2024-12-10 16:47:00 root] (utils.py 283): INFO Epoch: [2] [1590/2502] eta: 0:44:46 lr: 0.000020 loss_cls: 2.9839 (2.8039) grad_norm: 1.6660 (1.7761) time: 2.9421 data: 0.0003 max mem: 29202 +[2024-12-10 16:47:29 root] (utils.py 283): INFO Epoch: [2] [1600/2502] eta: 0:44:16 lr: 0.000020 loss_cls: 2.9502 (2.8035) grad_norm: 1.7986 (1.7770) time: 2.9432 data: 0.0003 max mem: 29202 +[2024-12-10 16:47:59 root] (utils.py 283): INFO Epoch: [2] [1610/2502] eta: 0:43:47 lr: 0.000020 loss_cls: 2.9436 (2.8032) grad_norm: 1.6192 (1.7760) time: 2.9385 data: 0.0003 max mem: 29202 +[2024-12-10 16:48:28 root] (utils.py 283): INFO Epoch: [2] [1620/2502] eta: 0:43:17 lr: 0.000020 loss_cls: 2.9491 (2.8041) grad_norm: 1.6192 (1.7758) time: 2.9380 data: 0.0003 max mem: 29202 +[2024-12-10 16:48:57 root] (utils.py 283): INFO Epoch: [2] [1630/2502] eta: 0:42:48 lr: 0.000020 loss_cls: 2.9662 (2.8032) grad_norm: 1.6865 (1.7767) time: 2.9381 data: 0.0003 max mem: 29202 +[2024-12-10 16:49:27 root] (utils.py 283): INFO Epoch: [2] [1640/2502] eta: 0:42:18 lr: 0.000020 loss_cls: 2.9661 (2.8039) grad_norm: 1.6934 (1.7769) time: 2.9389 data: 0.0003 max mem: 29202 +[2024-12-10 16:49:56 root] (utils.py 283): INFO Epoch: [2] [1650/2502] eta: 0:41:49 lr: 0.000020 loss_cls: 2.7996 (2.8030) grad_norm: 1.7097 (1.7765) time: 2.9487 data: 0.0003 max mem: 29202 +[2024-12-10 16:50:26 root] (utils.py 283): INFO Epoch: [2] [1660/2502] eta: 0:41:20 lr: 0.000020 loss_cls: 2.7870 (2.8024) grad_norm: 1.6456 (1.7759) time: 2.9519 data: 0.0003 max mem: 29202 +[2024-12-10 16:50:55 root] (utils.py 283): INFO Epoch: [2] [1670/2502] eta: 0:40:50 lr: 0.000020 loss_cls: 2.7147 (2.8011) grad_norm: 1.5964 (1.7753) time: 2.9453 data: 0.0003 max mem: 29202 +[2024-12-10 16:51:25 root] (utils.py 283): INFO Epoch: [2] [1680/2502] eta: 0:40:21 lr: 0.000020 loss_cls: 2.6522 (2.8004) grad_norm: 1.5721 (1.7751) time: 2.9402 data: 0.0003 max mem: 29202 +[2024-12-10 16:51:54 root] (utils.py 283): INFO Epoch: [2] [1690/2502] eta: 0:39:51 lr: 0.000020 loss_cls: 2.6585 (2.7994) grad_norm: 1.7028 (1.7751) time: 2.9408 data: 0.0003 max mem: 29202 +[2024-12-10 16:52:23 root] (utils.py 283): INFO Epoch: [2] [1700/2502] eta: 0:39:22 lr: 0.000020 loss_cls: 2.8589 (2.8003) grad_norm: 1.6350 (1.7748) time: 2.9419 data: 0.0003 max mem: 29202 +[2024-12-10 16:52:53 root] (utils.py 283): INFO Epoch: [2] [1710/2502] eta: 0:38:52 lr: 0.000020 loss_cls: 2.8950 (2.8006) grad_norm: 1.7259 (1.7751) time: 2.9444 data: 0.0003 max mem: 29202 +[2024-12-10 16:53:22 root] (utils.py 283): INFO Epoch: [2] [1720/2502] eta: 0:38:23 lr: 0.000020 loss_cls: 2.9897 (2.8020) grad_norm: 1.7394 (1.7747) time: 2.9491 data: 0.0003 max mem: 29202 +[2024-12-10 16:53:52 root] (utils.py 283): INFO Epoch: [2] [1730/2502] eta: 0:37:53 lr: 0.000020 loss_cls: 2.9545 (2.8019) grad_norm: 1.7161 (1.7751) time: 2.9459 data: 0.0003 max mem: 29202 +[2024-12-10 16:54:21 root] (utils.py 283): INFO Epoch: [2] [1740/2502] eta: 0:37:24 lr: 0.000020 loss_cls: 2.6117 (2.8003) grad_norm: 1.7267 (1.7749) time: 2.9418 data: 0.0003 max mem: 29202 +[2024-12-10 16:54:51 root] (utils.py 283): INFO Epoch: [2] [1750/2502] eta: 0:36:54 lr: 0.000020 loss_cls: 2.5071 (2.7991) grad_norm: 1.7632 (1.7750) time: 2.9484 data: 0.0003 max mem: 29202 +[2024-12-10 16:55:20 root] (utils.py 283): INFO Epoch: [2] [1760/2502] eta: 0:36:25 lr: 0.000020 loss_cls: 2.5975 (2.7982) grad_norm: 1.7709 (1.7747) time: 2.9577 data: 0.0003 max mem: 29202 +[2024-12-10 16:55:50 root] (utils.py 283): INFO Epoch: [2] [1770/2502] eta: 0:35:56 lr: 0.000020 loss_cls: 2.8848 (2.7986) grad_norm: 1.6783 (1.7750) time: 2.9540 data: 0.0003 max mem: 29202 +[2024-12-10 16:56:19 root] (utils.py 283): INFO Epoch: [2] [1780/2502] eta: 0:35:26 lr: 0.000020 loss_cls: 2.9121 (2.7985) grad_norm: 1.6558 (1.7738) time: 2.9442 data: 0.0003 max mem: 29202 +[2024-12-10 16:56:49 root] (utils.py 283): INFO Epoch: [2] [1790/2502] eta: 0:34:57 lr: 0.000020 loss_cls: 2.8618 (2.7994) grad_norm: 1.5456 (1.7731) time: 2.9420 data: 0.0003 max mem: 29202 +[2024-12-10 16:57:18 root] (utils.py 283): INFO Epoch: [2] [1800/2502] eta: 0:34:27 lr: 0.000020 loss_cls: 2.9492 (2.7986) grad_norm: 1.6168 (1.7724) time: 2.9434 data: 0.0003 max mem: 29202 +[2024-12-10 16:57:48 root] (utils.py 283): INFO Epoch: [2] [1810/2502] eta: 0:33:58 lr: 0.000020 loss_cls: 2.8702 (2.7983) grad_norm: 1.6196 (1.7720) time: 2.9464 data: 0.0003 max mem: 29202 +[2024-12-10 16:58:17 root] (utils.py 283): INFO Epoch: [2] [1820/2502] eta: 0:33:28 lr: 0.000020 loss_cls: 2.8712 (2.7983) grad_norm: 1.7095 (1.7721) time: 2.9474 data: 0.0003 max mem: 29202 +[2024-12-10 16:58:47 root] (utils.py 283): INFO Epoch: [2] [1830/2502] eta: 0:32:59 lr: 0.000020 loss_cls: 2.8310 (2.7975) grad_norm: 1.5871 (1.7708) time: 2.9453 data: 0.0003 max mem: 29202 +[2024-12-10 16:59:16 root] (utils.py 283): INFO Epoch: [2] [1840/2502] eta: 0:32:29 lr: 0.000020 loss_cls: 2.8310 (2.7972) grad_norm: 1.5504 (1.7702) time: 2.9443 data: 0.0003 max mem: 29202 +[2024-12-10 16:59:45 root] (utils.py 283): INFO Epoch: [2] [1850/2502] eta: 0:32:00 lr: 0.000020 loss_cls: 2.5644 (2.7957) grad_norm: 1.6038 (1.7699) time: 2.9419 data: 0.0003 max mem: 29202 +[2024-12-10 17:00:15 root] (utils.py 283): INFO Epoch: [2] [1860/2502] eta: 0:31:30 lr: 0.000020 loss_cls: 2.6027 (2.7952) grad_norm: 1.7067 (1.7697) time: 2.9391 data: 0.0003 max mem: 29202 +[2024-12-10 17:00:44 root] (utils.py 283): INFO Epoch: [2] [1870/2502] eta: 0:31:01 lr: 0.000020 loss_cls: 2.8600 (2.7954) grad_norm: 1.7316 (1.7701) time: 2.9400 data: 0.0003 max mem: 29202 +[2024-12-10 17:01:14 root] (utils.py 283): INFO Epoch: [2] [1880/2502] eta: 0:30:31 lr: 0.000020 loss_cls: 2.9034 (2.7942) grad_norm: 1.6583 (1.7693) time: 2.9421 data: 0.0003 max mem: 29202 +[2024-12-10 17:01:43 root] (utils.py 283): INFO Epoch: [2] [1890/2502] eta: 0:30:02 lr: 0.000020 loss_cls: 2.8053 (2.7940) grad_norm: 1.5661 (1.7683) time: 2.9447 data: 0.0003 max mem: 29202 +[2024-12-10 17:02:13 root] (utils.py 283): INFO Epoch: [2] [1900/2502] eta: 0:29:33 lr: 0.000020 loss_cls: 2.8053 (2.7933) grad_norm: 1.6075 (1.7676) time: 2.9486 data: 0.0003 max mem: 29202 +[2024-12-10 17:02:42 root] (utils.py 283): INFO Epoch: [2] [1910/2502] eta: 0:29:03 lr: 0.000020 loss_cls: 2.7716 (2.7925) grad_norm: 1.6582 (1.7674) time: 2.9469 data: 0.0003 max mem: 29202 +[2024-12-10 17:03:11 root] (utils.py 283): INFO Epoch: [2] [1920/2502] eta: 0:28:34 lr: 0.000020 loss_cls: 2.7308 (2.7923) grad_norm: 1.6743 (1.7672) time: 2.9415 data: 0.0003 max mem: 29202 +[2024-12-10 17:03:41 root] (utils.py 283): INFO Epoch: [2] [1930/2502] eta: 0:28:04 lr: 0.000020 loss_cls: 2.8147 (2.7925) grad_norm: 1.6824 (1.7674) time: 2.9397 data: 0.0003 max mem: 29202 +[2024-12-10 17:04:10 root] (utils.py 283): INFO Epoch: [2] [1940/2502] eta: 0:27:35 lr: 0.000020 loss_cls: 2.8147 (2.7918) grad_norm: 1.7146 (1.7677) time: 2.9485 data: 0.0003 max mem: 29202 +[2024-12-10 17:04:40 root] (utils.py 283): INFO Epoch: [2] [1950/2502] eta: 0:27:05 lr: 0.000020 loss_cls: 2.7555 (2.7906) grad_norm: 1.7146 (1.7680) time: 2.9552 data: 0.0003 max mem: 29202 +[2024-12-10 17:05:09 root] (utils.py 283): INFO Epoch: [2] [1960/2502] eta: 0:26:36 lr: 0.000020 loss_cls: 2.9597 (2.7913) grad_norm: 1.6013 (1.7677) time: 2.9457 data: 0.0003 max mem: 29202 +[2024-12-10 17:05:39 root] (utils.py 283): INFO Epoch: [2] [1970/2502] eta: 0:26:06 lr: 0.000020 loss_cls: 2.8557 (2.7905) grad_norm: 1.6933 (1.7690) time: 2.9416 data: 0.0003 max mem: 29202 +[2024-12-10 17:06:08 root] (utils.py 283): INFO Epoch: [2] [1980/2502] eta: 0:25:37 lr: 0.000020 loss_cls: 2.7877 (2.7907) grad_norm: 1.6968 (1.7694) time: 2.9443 data: 0.0003 max mem: 29202 +[2024-12-10 17:06:38 root] (utils.py 283): INFO Epoch: [2] [1990/2502] eta: 0:25:08 lr: 0.000020 loss_cls: 2.6588 (2.7900) grad_norm: 1.6542 (1.7690) time: 2.9406 data: 0.0003 max mem: 29202 +[2024-12-10 17:07:07 root] (utils.py 283): INFO Epoch: [2] [2000/2502] eta: 0:24:38 lr: 0.000020 loss_cls: 2.6027 (2.7883) grad_norm: 1.6412 (1.7685) time: 2.9417 data: 0.0003 max mem: 29202 +[2024-12-10 17:07:37 root] (utils.py 283): INFO Epoch: [2] [2010/2502] eta: 0:24:09 lr: 0.000020 loss_cls: 2.5432 (2.7882) grad_norm: 1.6713 (1.7689) time: 2.9478 data: 0.0002 max mem: 29202 +[2024-12-10 17:08:06 root] (utils.py 283): INFO Epoch: [2] [2020/2502] eta: 0:23:39 lr: 0.000020 loss_cls: 2.8708 (2.7880) grad_norm: 1.6633 (1.7681) time: 2.9454 data: 0.0003 max mem: 29202 +[2024-12-10 17:08:35 root] (utils.py 283): INFO Epoch: [2] [2030/2502] eta: 0:23:10 lr: 0.000020 loss_cls: 2.9224 (2.7882) grad_norm: 1.6223 (1.7677) time: 2.9430 data: 0.0003 max mem: 29202 +[2024-12-10 17:09:05 root] (utils.py 283): INFO Epoch: [2] [2040/2502] eta: 0:22:40 lr: 0.000020 loss_cls: 2.8407 (2.7868) grad_norm: 1.5489 (1.7670) time: 2.9446 data: 0.0003 max mem: 29202 +[2024-12-10 17:09:34 root] (utils.py 283): INFO Epoch: [2] [2050/2502] eta: 0:22:11 lr: 0.000020 loss_cls: 2.8407 (2.7864) grad_norm: 1.5306 (1.7662) time: 2.9454 data: 0.0003 max mem: 29202 +[2024-12-10 17:10:04 root] (utils.py 283): INFO Epoch: [2] [2060/2502] eta: 0:21:41 lr: 0.000020 loss_cls: 2.9552 (2.7872) grad_norm: 1.6014 (1.7657) time: 2.9430 data: 0.0003 max mem: 29202 +[2024-12-10 17:10:33 root] (utils.py 283): INFO Epoch: [2] [2070/2502] eta: 0:21:12 lr: 0.000020 loss_cls: 2.8573 (2.7864) grad_norm: 1.6274 (1.7655) time: 2.9371 data: 0.0003 max mem: 29202 +[2024-12-10 17:11:03 root] (utils.py 283): INFO Epoch: [2] [2080/2502] eta: 0:20:42 lr: 0.000020 loss_cls: 2.6975 (2.7853) grad_norm: 1.6338 (1.7653) time: 2.9406 data: 0.0003 max mem: 29202 +[2024-12-10 17:11:32 root] (utils.py 283): INFO Epoch: [2] [2090/2502] eta: 0:20:13 lr: 0.000020 loss_cls: 2.7320 (2.7855) grad_norm: 1.6338 (1.7652) time: 2.9452 data: 0.0003 max mem: 29202 +[2024-12-10 17:12:01 root] (utils.py 283): INFO Epoch: [2] [2100/2502] eta: 0:19:43 lr: 0.000020 loss_cls: 2.7248 (2.7854) grad_norm: 1.6472 (1.7651) time: 2.9412 data: 0.0003 max mem: 29202 +[2024-12-10 17:12:31 root] (utils.py 283): INFO Epoch: [2] [2110/2502] eta: 0:19:14 lr: 0.000020 loss_cls: 2.6857 (2.7845) grad_norm: 1.6006 (1.7649) time: 2.9407 data: 0.0003 max mem: 29202 +[2024-12-10 17:13:00 root] (utils.py 283): INFO Epoch: [2] [2120/2502] eta: 0:18:45 lr: 0.000020 loss_cls: 2.6927 (2.7843) grad_norm: 1.7104 (1.7653) time: 2.9535 data: 0.0003 max mem: 29202 +[2024-12-10 17:13:30 root] (utils.py 283): INFO Epoch: [2] [2130/2502] eta: 0:18:15 lr: 0.000020 loss_cls: 2.8695 (2.7848) grad_norm: 1.7410 (1.7653) time: 2.9533 data: 0.0003 max mem: 29202 +[2024-12-10 17:13:59 root] (utils.py 283): INFO Epoch: [2] [2140/2502] eta: 0:17:46 lr: 0.000020 loss_cls: 2.8695 (2.7845) grad_norm: 1.6388 (1.7654) time: 2.9419 data: 0.0003 max mem: 29202 +[2024-12-10 17:14:29 root] (utils.py 283): INFO Epoch: [2] [2150/2502] eta: 0:17:16 lr: 0.000020 loss_cls: 3.0924 (2.7863) grad_norm: 1.7808 (1.7660) time: 2.9441 data: 0.0003 max mem: 29202 +[2024-12-10 17:14:58 root] (utils.py 283): INFO Epoch: [2] [2160/2502] eta: 0:16:47 lr: 0.000020 loss_cls: 3.1202 (2.7873) grad_norm: 1.7993 (1.7660) time: 2.9425 data: 0.0003 max mem: 29202 +[2024-12-10 17:15:28 root] (utils.py 283): INFO Epoch: [2] [2170/2502] eta: 0:16:17 lr: 0.000020 loss_cls: 3.0090 (2.7874) grad_norm: 1.6564 (1.7660) time: 2.9386 data: 0.0003 max mem: 29202 +[2024-12-10 17:15:57 root] (utils.py 283): INFO Epoch: [2] [2180/2502] eta: 0:15:48 lr: 0.000020 loss_cls: 2.8769 (2.7864) grad_norm: 1.6978 (1.7654) time: 2.9480 data: 0.0003 max mem: 29202 +[2024-12-10 17:16:27 root] (utils.py 283): INFO Epoch: [2] [2190/2502] eta: 0:15:18 lr: 0.000020 loss_cls: 2.8114 (2.7860) grad_norm: 1.7317 (1.7674) time: 2.9470 data: 0.0003 max mem: 29202 +[2024-12-10 17:16:56 root] (utils.py 283): INFO Epoch: [2] [2200/2502] eta: 0:14:49 lr: 0.000020 loss_cls: 2.9134 (2.7864) grad_norm: 1.8606 (1.7678) time: 2.9363 data: 0.0003 max mem: 29202 +[2024-12-10 17:17:25 root] (utils.py 283): INFO Epoch: [2] [2210/2502] eta: 0:14:19 lr: 0.000020 loss_cls: 2.8147 (2.7860) grad_norm: 1.7700 (1.7677) time: 2.9407 data: 0.0003 max mem: 29202 +[2024-12-10 17:17:55 root] (utils.py 283): INFO Epoch: [2] [2220/2502] eta: 0:13:50 lr: 0.000020 loss_cls: 2.8147 (2.7864) grad_norm: 1.7236 (1.7677) time: 2.9439 data: 0.0003 max mem: 29202 +[2024-12-10 17:18:24 root] (utils.py 283): INFO Epoch: [2] [2230/2502] eta: 0:13:21 lr: 0.000020 loss_cls: 2.8286 (2.7866) grad_norm: 1.7327 (1.7682) time: 2.9419 data: 0.0003 max mem: 29202 +[2024-12-10 17:18:54 root] (utils.py 283): INFO Epoch: [2] [2240/2502] eta: 0:12:51 lr: 0.000020 loss_cls: 2.8951 (2.7872) grad_norm: 1.6701 (1.7679) time: 2.9425 data: 0.0003 max mem: 29202 +[2024-12-10 17:19:23 root] (utils.py 283): INFO Epoch: [2] [2250/2502] eta: 0:12:22 lr: 0.000020 loss_cls: 2.9519 (2.7866) grad_norm: 1.5949 (1.7674) time: 2.9419 data: 0.0003 max mem: 29202 +[2024-12-10 17:19:52 root] (utils.py 283): INFO Epoch: [2] [2260/2502] eta: 0:11:52 lr: 0.000020 loss_cls: 2.9842 (2.7878) grad_norm: 1.6033 (1.7672) time: 2.9396 data: 0.0003 max mem: 29202 +[2024-12-10 17:20:22 root] (utils.py 283): INFO Epoch: [2] [2270/2502] eta: 0:11:23 lr: 0.000020 loss_cls: 3.0574 (2.7884) grad_norm: 1.6862 (1.7671) time: 2.9415 data: 0.0003 max mem: 29202 +[2024-12-10 17:20:51 root] (utils.py 283): INFO Epoch: [2] [2280/2502] eta: 0:10:53 lr: 0.000020 loss_cls: 2.6502 (2.7872) grad_norm: 1.6517 (1.7664) time: 2.9439 data: 0.0003 max mem: 29202 +[2024-12-10 17:21:21 root] (utils.py 283): INFO Epoch: [2] [2290/2502] eta: 0:10:24 lr: 0.000020 loss_cls: 2.6117 (2.7869) grad_norm: 1.6183 (1.7663) time: 2.9425 data: 0.0003 max mem: 29202 +[2024-12-10 17:21:50 root] (utils.py 283): INFO Epoch: [2] [2300/2502] eta: 0:09:54 lr: 0.000020 loss_cls: 2.8637 (2.7876) grad_norm: 1.7454 (1.7664) time: 2.9430 data: 0.0003 max mem: 29202 +[2024-12-10 17:22:20 root] (utils.py 283): INFO Epoch: [2] [2310/2502] eta: 0:09:25 lr: 0.000020 loss_cls: 2.8319 (2.7879) grad_norm: 1.6875 (1.7666) time: 2.9433 data: 0.0003 max mem: 29202 +[2024-12-10 17:22:49 root] (utils.py 283): INFO Epoch: [2] [2320/2502] eta: 0:08:55 lr: 0.000020 loss_cls: 2.8319 (2.7882) grad_norm: 1.7007 (1.7671) time: 2.9435 data: 0.0003 max mem: 29202 +[2024-12-10 17:23:18 root] (utils.py 283): INFO Epoch: [2] [2330/2502] eta: 0:08:26 lr: 0.000020 loss_cls: 2.7126 (2.7874) grad_norm: 1.7007 (1.7668) time: 2.9410 data: 0.0003 max mem: 29202 +[2024-12-10 17:23:48 root] (utils.py 283): INFO Epoch: [2] [2340/2502] eta: 0:07:57 lr: 0.000020 loss_cls: 2.8294 (2.7882) grad_norm: 1.5952 (1.7661) time: 2.9398 data: 0.0003 max mem: 29202 +[2024-12-10 17:24:17 root] (utils.py 283): INFO Epoch: [2] [2350/2502] eta: 0:07:27 lr: 0.000020 loss_cls: 2.8962 (2.7885) grad_norm: 1.6293 (1.7665) time: 2.9405 data: 0.0002 max mem: 29202 +[2024-12-10 17:24:47 root] (utils.py 283): INFO Epoch: [2] [2360/2502] eta: 0:06:58 lr: 0.000020 loss_cls: 2.8443 (2.7887) grad_norm: 1.6434 (1.7661) time: 2.9438 data: 0.0003 max mem: 29202 +[2024-12-10 17:25:16 root] (utils.py 283): INFO Epoch: [2] [2370/2502] eta: 0:06:28 lr: 0.000020 loss_cls: 2.7479 (2.7883) grad_norm: 1.5175 (1.7655) time: 2.9471 data: 0.0003 max mem: 29202 +[2024-12-10 17:25:46 root] (utils.py 283): INFO Epoch: [2] [2380/2502] eta: 0:05:59 lr: 0.000020 loss_cls: 2.7940 (2.7886) grad_norm: 1.5655 (1.7649) time: 2.9445 data: 0.0003 max mem: 29202 +[2024-12-10 17:26:15 root] (utils.py 283): INFO Epoch: [2] [2390/2502] eta: 0:05:29 lr: 0.000020 loss_cls: 2.9921 (2.7890) grad_norm: 1.6599 (1.7655) time: 2.9519 data: 0.0003 max mem: 29202 +[2024-12-10 17:26:45 root] (utils.py 283): INFO Epoch: [2] [2400/2502] eta: 0:05:00 lr: 0.000020 loss_cls: 2.8647 (2.7879) grad_norm: 1.6673 (1.7648) time: 2.9538 data: 0.0003 max mem: 29202 +[2024-12-10 17:27:14 root] (utils.py 283): INFO Epoch: [2] [2410/2502] eta: 0:04:30 lr: 0.000020 loss_cls: 2.7220 (2.7878) grad_norm: 1.5918 (1.7646) time: 2.9514 data: 0.0003 max mem: 29202 +[2024-12-10 17:27:44 root] (utils.py 283): INFO Epoch: [2] [2420/2502] eta: 0:04:01 lr: 0.000020 loss_cls: 2.8871 (2.7886) grad_norm: 1.6594 (1.7648) time: 2.9542 data: 0.0003 max mem: 29202 +[2024-12-10 17:28:13 root] (utils.py 283): INFO Epoch: [2] [2430/2502] eta: 0:03:32 lr: 0.000020 loss_cls: 2.9018 (2.7886) grad_norm: 1.7263 (1.7646) time: 2.9597 data: 0.0004 max mem: 29202 +[2024-12-10 17:28:43 root] (utils.py 283): INFO Epoch: [2] [2440/2502] eta: 0:03:02 lr: 0.000020 loss_cls: 2.7035 (2.7874) grad_norm: 1.7263 (1.7664) time: 2.9687 data: 0.0004 max mem: 29202 +[2024-12-10 17:29:13 root] (utils.py 283): INFO Epoch: [2] [2450/2502] eta: 0:02:33 lr: 0.000020 loss_cls: 2.6787 (2.7871) grad_norm: 1.7706 (1.7664) time: 2.9684 data: 0.0003 max mem: 29202 +[2024-12-10 17:29:42 root] (utils.py 283): INFO Epoch: [2] [2460/2502] eta: 0:02:03 lr: 0.000020 loss_cls: 2.8670 (2.7876) grad_norm: 1.7824 (1.7666) time: 2.9678 data: 0.0003 max mem: 29202 +[2024-12-10 17:30:12 root] (utils.py 283): INFO Epoch: [2] [2470/2502] eta: 0:01:34 lr: 0.000020 loss_cls: 3.0441 (2.7878) grad_norm: 1.7433 (1.7664) time: 2.9755 data: 0.0004 max mem: 29202 +[2024-12-10 17:30:42 root] (utils.py 283): INFO Epoch: [2] [2480/2502] eta: 0:01:04 lr: 0.000020 loss_cls: 2.8503 (2.7881) grad_norm: 1.6698 (1.7662) time: 2.9751 data: 0.0004 max mem: 29202 +[2024-12-10 17:31:12 root] (utils.py 283): INFO Epoch: [2] [2490/2502] eta: 0:00:35 lr: 0.000020 loss_cls: 2.7640 (2.7876) grad_norm: 1.5937 (1.7656) time: 2.9862 data: 0.0255 max mem: 29202 +[2024-12-10 17:31:42 root] (utils.py 283): INFO Epoch: [2] [2500/2502] eta: 0:00:05 lr: 0.000020 loss_cls: 2.8340 (2.7872) grad_norm: 1.7298 (1.7658) time: 2.9799 data: 0.0254 max mem: 29202 +[2024-12-10 17:31:45 root] (utils.py 283): INFO Epoch: [2] [2501/2502] eta: 0:00:02 lr: 0.000020 loss_cls: 2.8340 (2.7871) grad_norm: 1.7298 (1.7657) time: 2.9791 data: 0.0254 max mem: 29202 +[2024-12-10 17:31:45 root] (utils.py 297): INFO Epoch: [2] Total time: 2:02:51 (2.9462 s / it) +[2024-12-10 17:31:45 root] (engine.py 179): INFO Averaged stats:lr: 0.000020 loss_cls: 2.8340 (2.7845) grad_norm: 1.7298 (1.7657) +[2024-12-10 17:31:49 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:55 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.3436 (0.3436) acc1: 93.7500 (93.7500) acc3: 97.6562 (97.6562) acc5: 98.4375 (98.4375) time: 0.5687 data: 0.0003 max mem: 29202 +[2024-12-10 17:31:54 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:48 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6451 (0.6604) acc1: 86.7188 (86.4347) acc3: 97.6562 (96.5909) acc5: 98.4375 (98.0824) time: 0.5520 data: 0.0003 max mem: 29202 +[2024-12-10 17:32:00 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:43 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6451 (0.6987) acc1: 84.3750 (85.4167) acc3: 96.8750 (95.9077) acc5: 97.6562 (97.6190) time: 0.5511 data: 0.0004 max mem: 29202 +[2024-12-10 17:32:05 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:37 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6598 (0.7227) acc1: 84.3750 (84.7530) acc3: 95.3125 (95.6401) acc5: 96.8750 (97.4294) time: 0.5515 data: 0.0004 max mem: 29202 +[2024-12-10 17:32:11 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:32 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.7050 (0.7346) acc1: 85.1562 (84.8323) acc3: 95.3125 (95.5793) acc5: 96.8750 (97.3704) time: 0.5517 data: 0.0005 max mem: 29202 +[2024-12-10 17:32:16 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:26 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9742 (0.8201) acc1: 78.9062 (83.0116) acc3: 92.1875 (94.4393) acc5: 95.3125 (96.4308) time: 0.5524 data: 0.0005 max mem: 29202 +[2024-12-10 17:32:22 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:20 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 1.0055 (0.8520) acc1: 77.3438 (82.5307) acc3: 89.8438 (93.7884) acc5: 93.7500 (95.9657) time: 0.5527 data: 0.0004 max mem: 29202 +[2024-12-10 17:32:27 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:15 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 1.0563 (0.8897) acc1: 78.9062 (81.6351) acc3: 90.6250 (93.3539) acc5: 93.7500 (95.6976) time: 0.5523 data: 0.0004 max mem: 29202 +[2024-12-10 17:32:33 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:09 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 1.0787 (0.9148) acc1: 77.3438 (81.0957) acc3: 90.6250 (92.9012) acc5: 93.7500 (95.3704) time: 0.5525 data: 0.0007 max mem: 29202 +[2024-12-10 17:32:38 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:04 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 1.1719 (0.9456) acc1: 75.7812 (80.3486) acc3: 89.0625 (92.5824) acc5: 92.9688 (95.1923) time: 0.5535 data: 0.0007 max mem: 29202 +[2024-12-10 17:32:42 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 1.0562 (0.9470) acc1: 75.7812 (80.3040) acc3: 90.6250 (92.6560) acc5: 94.5312 (95.2880) time: 0.5440 data: 0.0005 max mem: 29202 +[2024-12-10 17:32:42 root] (utils.py 297): INFO Test: Total time: 0:00:53 (0.5506 s / it) +[2024-12-10 17:32:42 root] (engine.py 264): INFO * Acc@1 80.360 Acc@3 92.498 Acc@5 95.086 loss 0.948 flops 13.207 layer_flops 13.109 +[2024-12-10 17:32:42 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 80.4% +[2024-12-10 17:32:42 root] (main.py 576): INFO Max accuracy: 80.36% +[2024-12-10 17:32:45 root] (utils.py 283): INFO Epoch: [3] [ 0/2502] eta: 2:00:57 lr: 0.000019 loss_cls: 2.1652 (2.1652) grad_norm: 1.7471 (1.7471) time: 2.9008 data: 0.0002 max mem: 29202 +[2024-12-10 17:33:14 root] (utils.py 283): INFO Epoch: [3] [ 10/2502] eta: 2:02:05 lr: 0.000019 loss_cls: 2.6315 (2.6212) grad_norm: 1.7372 (1.7372) time: 2.9397 data: 0.0003 max mem: 29202 +[2024-12-10 17:33:44 root] (utils.py 283): INFO Epoch: [3] [ 20/2502] eta: 2:01:56 lr: 0.000019 loss_cls: 2.7462 (2.6674) grad_norm: 1.6233 (1.6893) time: 2.9502 data: 0.0003 max mem: 29202 +[2024-12-10 17:34:13 root] (utils.py 283): INFO Epoch: [3] [ 30/2502] eta: 2:01:29 lr: 0.000019 loss_cls: 2.7936 (2.6623) grad_norm: 1.6213 (1.6899) time: 2.9539 data: 0.0003 max mem: 29202 +[2024-12-10 17:34:43 root] (utils.py 283): INFO Epoch: [3] [ 40/2502] eta: 2:01:04 lr: 0.000019 loss_cls: 2.8282 (2.7321) grad_norm: 1.6690 (1.6984) time: 2.9537 data: 0.0003 max mem: 29202 +[2024-12-10 17:35:13 root] (utils.py 283): INFO Epoch: [3] [ 50/2502] eta: 2:00:37 lr: 0.000019 loss_cls: 2.8282 (2.6892) grad_norm: 1.6116 (1.6929) time: 2.9559 data: 0.0003 max mem: 29202 +[2024-12-10 17:35:42 root] (utils.py 283): INFO Epoch: [3] [ 60/2502] eta: 2:00:04 lr: 0.000019 loss_cls: 2.5732 (2.6490) grad_norm: 1.5849 (1.6732) time: 2.9488 data: 0.0003 max mem: 29202 +[2024-12-10 17:36:11 root] (utils.py 283): INFO Epoch: [3] [ 70/2502] eta: 1:59:30 lr: 0.000019 loss_cls: 2.6779 (2.6740) grad_norm: 1.5627 (1.6798) time: 2.9399 data: 0.0003 max mem: 29202 +[2024-12-10 17:36:41 root] (utils.py 283): INFO Epoch: [3] [ 80/2502] eta: 1:59:01 lr: 0.000019 loss_cls: 2.9240 (2.7041) grad_norm: 1.6114 (1.7112) time: 2.9436 data: 0.0003 max mem: 29202 +[2024-12-10 17:37:10 root] (utils.py 283): INFO Epoch: [3] [ 90/2502] eta: 1:58:30 lr: 0.000019 loss_cls: 2.8566 (2.7058) grad_norm: 1.7359 (1.7138) time: 2.9460 data: 0.0003 max mem: 29202 +[2024-12-10 17:37:40 root] (utils.py 283): INFO Epoch: [3] [ 100/2502] eta: 1:57:58 lr: 0.000019 loss_cls: 2.8839 (2.7181) grad_norm: 1.7359 (1.7104) time: 2.9409 data: 0.0003 max mem: 29202 +[2024-12-10 17:38:09 root] (utils.py 283): INFO Epoch: [3] [ 110/2502] eta: 1:57:30 lr: 0.000019 loss_cls: 2.8839 (2.7078) grad_norm: 1.6076 (1.7068) time: 2.9451 data: 0.0003 max mem: 29202 +[2024-12-10 17:38:39 root] (utils.py 283): INFO Epoch: [3] [ 120/2502] eta: 1:57:00 lr: 0.000019 loss_cls: 2.8625 (2.7163) grad_norm: 1.6076 (1.7020) time: 2.9496 data: 0.0003 max mem: 29202 +[2024-12-10 17:39:08 root] (utils.py 283): INFO Epoch: [3] [ 130/2502] eta: 1:56:33 lr: 0.000019 loss_cls: 2.9333 (2.7298) grad_norm: 1.6684 (1.7073) time: 2.9530 data: 0.0003 max mem: 29202 +[2024-12-10 17:39:38 root] (utils.py 283): INFO Epoch: [3] [ 140/2502] eta: 1:56:06 lr: 0.000019 loss_cls: 2.9247 (2.7288) grad_norm: 1.6081 (1.7024) time: 2.9615 data: 0.0003 max mem: 29202 +[2024-12-10 17:40:07 root] (utils.py 283): INFO Epoch: [3] [ 150/2502] eta: 1:55:37 lr: 0.000019 loss_cls: 2.9014 (2.7354) grad_norm: 1.5569 (1.6995) time: 2.9578 data: 0.0003 max mem: 29202 +[2024-12-10 17:40:37 root] (utils.py 283): INFO Epoch: [3] [ 160/2502] eta: 1:55:08 lr: 0.000019 loss_cls: 2.9189 (2.7497) grad_norm: 1.6759 (1.7001) time: 2.9543 data: 0.0003 max mem: 29202 +[2024-12-10 17:41:06 root] (utils.py 283): INFO Epoch: [3] [ 170/2502] eta: 1:54:37 lr: 0.000019 loss_cls: 2.8453 (2.7448) grad_norm: 1.6733 (1.6998) time: 2.9460 data: 0.0003 max mem: 29202 +[2024-12-10 17:41:36 root] (utils.py 283): INFO Epoch: [3] [ 180/2502] eta: 1:54:06 lr: 0.000019 loss_cls: 2.8233 (2.7505) grad_norm: 1.6074 (1.6953) time: 2.9375 data: 0.0003 max mem: 29202 +[2024-12-10 17:42:05 root] (utils.py 283): INFO Epoch: [3] [ 190/2502] eta: 1:53:36 lr: 0.000019 loss_cls: 2.8615 (2.7557) grad_norm: 1.5941 (1.6993) time: 2.9405 data: 0.0003 max mem: 29202 +[2024-12-10 17:42:35 root] (utils.py 283): INFO Epoch: [3] [ 200/2502] eta: 1:53:05 lr: 0.000019 loss_cls: 2.8615 (2.7652) grad_norm: 1.6777 (1.7033) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-10 17:43:04 root] (utils.py 283): INFO Epoch: [3] [ 210/2502] eta: 1:52:35 lr: 0.000019 loss_cls: 2.8289 (2.7587) grad_norm: 1.6456 (1.7007) time: 2.9397 data: 0.0003 max mem: 29202 +[2024-12-10 17:43:33 root] (utils.py 283): INFO Epoch: [3] [ 220/2502] eta: 1:52:06 lr: 0.000019 loss_cls: 2.5566 (2.7388) grad_norm: 1.6425 (1.6998) time: 2.9452 data: 0.0003 max mem: 29202 +[2024-12-10 17:44:03 root] (utils.py 283): INFO Epoch: [3] [ 230/2502] eta: 1:51:36 lr: 0.000019 loss_cls: 2.7350 (2.7418) grad_norm: 1.7149 (1.7055) time: 2.9488 data: 0.0003 max mem: 29202 +[2024-12-10 17:44:32 root] (utils.py 283): INFO Epoch: [3] [ 240/2502] eta: 1:51:06 lr: 0.000019 loss_cls: 2.8152 (2.7436) grad_norm: 1.7123 (1.7044) time: 2.9436 data: 0.0003 max mem: 29202 +[2024-12-10 17:45:02 root] (utils.py 283): INFO Epoch: [3] [ 250/2502] eta: 1:50:36 lr: 0.000019 loss_cls: 2.7655 (2.7415) grad_norm: 1.7017 (1.7070) time: 2.9425 data: 0.0003 max mem: 29202 +[2024-12-10 17:45:31 root] (utils.py 283): INFO Epoch: [3] [ 260/2502] eta: 1:50:06 lr: 0.000019 loss_cls: 2.7953 (2.7421) grad_norm: 1.7149 (1.7050) time: 2.9426 data: 0.0003 max mem: 29202 +[2024-12-10 17:46:01 root] (utils.py 283): INFO Epoch: [3] [ 270/2502] eta: 1:49:37 lr: 0.000019 loss_cls: 2.7953 (2.7420) grad_norm: 1.6449 (1.7100) time: 2.9427 data: 0.0003 max mem: 29202 +[2024-12-10 17:46:30 root] (utils.py 283): INFO Epoch: [3] [ 280/2502] eta: 1:49:06 lr: 0.000019 loss_cls: 2.7006 (2.7416) grad_norm: 1.6366 (1.7073) time: 2.9372 data: 0.0003 max mem: 29202 +[2024-12-10 17:46:59 root] (utils.py 283): INFO Epoch: [3] [ 290/2502] eta: 1:48:35 lr: 0.000019 loss_cls: 2.7253 (2.7445) grad_norm: 1.6509 (1.7069) time: 2.9274 data: 0.0003 max mem: 29202 +[2024-12-10 17:47:28 root] (utils.py 283): INFO Epoch: [3] [ 300/2502] eta: 1:48:04 lr: 0.000019 loss_cls: 2.8896 (2.7460) grad_norm: 1.6300 (1.7045) time: 2.9235 data: 0.0003 max mem: 29202 +[2024-12-10 17:47:58 root] (utils.py 283): INFO Epoch: [3] [ 310/2502] eta: 1:47:33 lr: 0.000019 loss_cls: 2.9167 (2.7465) grad_norm: 1.5970 (1.7045) time: 2.9230 data: 0.0003 max mem: 29202 +[2024-12-10 17:48:28 root] (utils.py 283): INFO Epoch: [3] [ 320/2502] eta: 1:47:07 lr: 0.000019 loss_cls: 2.8434 (2.7432) grad_norm: 1.5927 (1.7019) time: 2.9622 data: 0.0003 max mem: 29202 +[2024-12-10 17:48:57 root] (utils.py 283): INFO Epoch: [3] [ 330/2502] eta: 1:46:38 lr: 0.000019 loss_cls: 2.9095 (2.7481) grad_norm: 1.5733 (1.7011) time: 2.9791 data: 0.0003 max mem: 29202 +[2024-12-10 17:49:27 root] (utils.py 283): INFO Epoch: [3] [ 340/2502] eta: 1:46:09 lr: 0.000019 loss_cls: 2.9943 (2.7481) grad_norm: 1.6321 (1.6980) time: 2.9498 data: 0.0003 max mem: 29202 +[2024-12-10 17:49:56 root] (utils.py 283): INFO Epoch: [3] [ 350/2502] eta: 1:45:39 lr: 0.000019 loss_cls: 2.6876 (2.7435) grad_norm: 1.6551 (1.7086) time: 2.9433 data: 0.0003 max mem: 29202 +[2024-12-10 17:50:25 root] (utils.py 283): INFO Epoch: [3] [ 360/2502] eta: 1:45:09 lr: 0.000019 loss_cls: 2.8150 (2.7497) grad_norm: 1.7605 (1.7128) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-10 17:50:55 root] (utils.py 283): INFO Epoch: [3] [ 370/2502] eta: 1:44:39 lr: 0.000019 loss_cls: 2.9676 (2.7535) grad_norm: 1.7487 (1.7140) time: 2.9351 data: 0.0003 max mem: 29202 +[2024-12-10 17:51:24 root] (utils.py 283): INFO Epoch: [3] [ 380/2502] eta: 1:44:09 lr: 0.000019 loss_cls: 2.9676 (2.7532) grad_norm: 1.6963 (1.7156) time: 2.9370 data: 0.0003 max mem: 29202 +[2024-12-10 17:51:54 root] (utils.py 283): INFO Epoch: [3] [ 390/2502] eta: 1:43:40 lr: 0.000019 loss_cls: 2.9435 (2.7555) grad_norm: 1.6733 (1.7150) time: 2.9401 data: 0.0003 max mem: 29202 +[2024-12-10 17:52:23 root] (utils.py 283): INFO Epoch: [3] [ 400/2502] eta: 1:43:10 lr: 0.000019 loss_cls: 2.8938 (2.7615) grad_norm: 1.6589 (1.7166) time: 2.9424 data: 0.0003 max mem: 29202 +[2024-12-10 17:52:53 root] (utils.py 283): INFO Epoch: [3] [ 410/2502] eta: 1:42:41 lr: 0.000019 loss_cls: 2.7472 (2.7582) grad_norm: 1.6589 (1.7163) time: 2.9464 data: 0.0003 max mem: 29202 +[2024-12-10 17:53:22 root] (utils.py 283): INFO Epoch: [3] [ 420/2502] eta: 1:42:12 lr: 0.000019 loss_cls: 2.6963 (2.7588) grad_norm: 1.5543 (1.7154) time: 2.9579 data: 0.0003 max mem: 29202 +[2024-12-10 17:53:52 root] (utils.py 283): INFO Epoch: [3] [ 430/2502] eta: 1:41:43 lr: 0.000019 loss_cls: 2.7605 (2.7578) grad_norm: 1.5880 (1.7152) time: 2.9590 data: 0.0003 max mem: 29202 +[2024-12-10 17:54:21 root] (utils.py 283): INFO Epoch: [3] [ 440/2502] eta: 1:41:14 lr: 0.000019 loss_cls: 2.9239 (2.7612) grad_norm: 1.6435 (1.7137) time: 2.9539 data: 0.0003 max mem: 29202 +[2024-12-10 17:54:51 root] (utils.py 283): INFO Epoch: [3] [ 450/2502] eta: 1:40:45 lr: 0.000019 loss_cls: 2.7967 (2.7579) grad_norm: 1.6435 (1.7139) time: 2.9543 data: 0.0003 max mem: 29202 +[2024-12-10 17:55:20 root] (utils.py 283): INFO Epoch: [3] [ 460/2502] eta: 1:40:16 lr: 0.000019 loss_cls: 2.7967 (2.7563) grad_norm: 1.6727 (1.7147) time: 2.9480 data: 0.0003 max mem: 29202 +[2024-12-10 17:55:50 root] (utils.py 283): INFO Epoch: [3] [ 470/2502] eta: 1:39:46 lr: 0.000019 loss_cls: 2.8799 (2.7564) grad_norm: 1.6744 (1.7193) time: 2.9442 data: 0.0003 max mem: 29202 +[2024-12-10 17:56:19 root] (utils.py 283): INFO Epoch: [3] [ 480/2502] eta: 1:39:17 lr: 0.000019 loss_cls: 2.7867 (2.7572) grad_norm: 1.6967 (1.7194) time: 2.9458 data: 0.0003 max mem: 29202 +[2024-12-10 17:56:49 root] (utils.py 283): INFO Epoch: [3] [ 490/2502] eta: 1:38:47 lr: 0.000019 loss_cls: 2.7643 (2.7549) grad_norm: 1.6625 (1.7191) time: 2.9490 data: 0.0003 max mem: 29202 +[2024-12-10 17:57:18 root] (utils.py 283): INFO Epoch: [3] [ 500/2502] eta: 1:38:18 lr: 0.000019 loss_cls: 2.8611 (2.7542) grad_norm: 1.6415 (1.7174) time: 2.9460 data: 0.0003 max mem: 29202 +[2024-12-10 17:57:48 root] (utils.py 283): INFO Epoch: [3] [ 510/2502] eta: 1:37:48 lr: 0.000019 loss_cls: 2.7974 (2.7537) grad_norm: 1.6249 (1.7189) time: 2.9478 data: 0.0003 max mem: 29202 +[2024-12-10 17:58:17 root] (utils.py 283): INFO Epoch: [3] [ 520/2502] eta: 1:37:19 lr: 0.000019 loss_cls: 2.6475 (2.7501) grad_norm: 1.6484 (1.7232) time: 2.9497 data: 0.0003 max mem: 29202 +[2024-12-10 17:58:47 root] (utils.py 283): INFO Epoch: [3] [ 530/2502] eta: 1:36:50 lr: 0.000019 loss_cls: 2.8200 (2.7537) grad_norm: 1.6964 (1.7241) time: 2.9458 data: 0.0003 max mem: 29202 +[2024-12-10 17:59:16 root] (utils.py 283): INFO Epoch: [3] [ 540/2502] eta: 1:36:20 lr: 0.000019 loss_cls: 2.7936 (2.7516) grad_norm: 1.5431 (1.7217) time: 2.9443 data: 0.0003 max mem: 29202 +[2024-12-10 17:59:45 root] (utils.py 283): INFO Epoch: [3] [ 550/2502] eta: 1:35:51 lr: 0.000019 loss_cls: 2.5052 (2.7453) grad_norm: 1.6024 (1.7218) time: 2.9453 data: 0.0003 max mem: 29202 +[2024-12-10 18:00:15 root] (utils.py 283): INFO Epoch: [3] [ 560/2502] eta: 1:35:21 lr: 0.000019 loss_cls: 2.8935 (2.7472) grad_norm: 1.5848 (1.7186) time: 2.9436 data: 0.0003 max mem: 29202 +[2024-12-10 18:00:44 root] (utils.py 283): INFO Epoch: [3] [ 570/2502] eta: 1:34:51 lr: 0.000019 loss_cls: 3.0969 (2.7498) grad_norm: 1.5848 (1.7183) time: 2.9427 data: 0.0003 max mem: 29202 +[2024-12-10 18:01:14 root] (utils.py 283): INFO Epoch: [3] [ 580/2502] eta: 1:34:22 lr: 0.000019 loss_cls: 2.9307 (2.7491) grad_norm: 1.6098 (1.7175) time: 2.9441 data: 0.0003 max mem: 29202 +[2024-12-10 18:01:43 root] (utils.py 283): INFO Epoch: [3] [ 590/2502] eta: 1:33:52 lr: 0.000019 loss_cls: 2.9347 (2.7553) grad_norm: 1.6097 (1.7186) time: 2.9446 data: 0.0003 max mem: 29202 +[2024-12-10 18:02:13 root] (utils.py 283): INFO Epoch: [3] [ 600/2502] eta: 1:33:23 lr: 0.000019 loss_cls: 2.9362 (2.7519) grad_norm: 1.7214 (1.7183) time: 2.9453 data: 0.0003 max mem: 29202 +[2024-12-10 18:02:42 root] (utils.py 283): INFO Epoch: [3] [ 610/2502] eta: 1:32:53 lr: 0.000019 loss_cls: 2.5927 (2.7492) grad_norm: 1.6262 (1.7175) time: 2.9454 data: 0.0003 max mem: 29202 +[2024-12-10 18:03:12 root] (utils.py 283): INFO Epoch: [3] [ 620/2502] eta: 1:32:24 lr: 0.000019 loss_cls: 2.8368 (2.7514) grad_norm: 1.5560 (1.7166) time: 2.9460 data: 0.0003 max mem: 29202 +[2024-12-10 18:03:41 root] (utils.py 283): INFO Epoch: [3] [ 630/2502] eta: 1:31:54 lr: 0.000019 loss_cls: 2.8936 (2.7498) grad_norm: 1.6568 (1.7179) time: 2.9455 data: 0.0003 max mem: 29202 +[2024-12-10 18:04:10 root] (utils.py 283): INFO Epoch: [3] [ 640/2502] eta: 1:31:25 lr: 0.000019 loss_cls: 2.9005 (2.7523) grad_norm: 1.6226 (1.7200) time: 2.9431 data: 0.0003 max mem: 29202 +[2024-12-10 18:04:40 root] (utils.py 283): INFO Epoch: [3] [ 650/2502] eta: 1:30:55 lr: 0.000019 loss_cls: 3.0397 (2.7538) grad_norm: 1.6226 (1.7198) time: 2.9457 data: 0.0003 max mem: 29202 +[2024-12-10 18:05:09 root] (utils.py 283): INFO Epoch: [3] [ 660/2502] eta: 1:30:26 lr: 0.000019 loss_cls: 2.8071 (2.7521) grad_norm: 1.6323 (1.7185) time: 2.9462 data: 0.0003 max mem: 29202 +[2024-12-10 18:05:39 root] (utils.py 283): INFO Epoch: [3] [ 670/2502] eta: 1:29:56 lr: 0.000019 loss_cls: 2.7917 (2.7531) grad_norm: 1.6212 (1.7187) time: 2.9423 data: 0.0003 max mem: 29202 +[2024-12-10 18:06:08 root] (utils.py 283): INFO Epoch: [3] [ 680/2502] eta: 1:29:27 lr: 0.000019 loss_cls: 2.9425 (2.7555) grad_norm: 1.7598 (1.7196) time: 2.9501 data: 0.0003 max mem: 29202 +[2024-12-10 18:06:38 root] (utils.py 283): INFO Epoch: [3] [ 690/2502] eta: 1:28:58 lr: 0.000019 loss_cls: 2.9176 (2.7517) grad_norm: 1.6865 (1.7170) time: 2.9557 data: 0.0003 max mem: 29202 +[2024-12-10 18:07:07 root] (utils.py 283): INFO Epoch: [3] [ 700/2502] eta: 1:28:28 lr: 0.000019 loss_cls: 2.7243 (2.7535) grad_norm: 1.4899 (1.7157) time: 2.9434 data: 0.0003 max mem: 29202 +[2024-12-10 18:07:37 root] (utils.py 283): INFO Epoch: [3] [ 710/2502] eta: 1:27:58 lr: 0.000019 loss_cls: 2.8547 (2.7525) grad_norm: 1.7127 (1.7180) time: 2.9328 data: 0.0003 max mem: 29202 +[2024-12-10 18:08:06 root] (utils.py 283): INFO Epoch: [3] [ 720/2502] eta: 1:27:29 lr: 0.000019 loss_cls: 2.5075 (2.7490) grad_norm: 1.6351 (1.7175) time: 2.9331 data: 0.0003 max mem: 29202 +[2024-12-10 18:08:35 root] (utils.py 283): INFO Epoch: [3] [ 730/2502] eta: 1:26:59 lr: 0.000019 loss_cls: 2.4334 (2.7478) grad_norm: 1.6351 (1.7171) time: 2.9346 data: 0.0003 max mem: 29202 +[2024-12-10 18:09:05 root] (utils.py 283): INFO Epoch: [3] [ 740/2502] eta: 1:26:29 lr: 0.000019 loss_cls: 2.8792 (2.7484) grad_norm: 1.6600 (1.7163) time: 2.9363 data: 0.0003 max mem: 29202 +[2024-12-10 18:09:34 root] (utils.py 283): INFO Epoch: [3] [ 750/2502] eta: 1:25:59 lr: 0.000019 loss_cls: 2.9810 (2.7492) grad_norm: 1.6058 (1.7164) time: 2.9341 data: 0.0003 max mem: 29202 +[2024-12-10 18:10:03 root] (utils.py 283): INFO Epoch: [3] [ 760/2502] eta: 1:25:30 lr: 0.000019 loss_cls: 2.9810 (2.7520) grad_norm: 1.5908 (1.7178) time: 2.9353 data: 0.0003 max mem: 29202 +[2024-12-10 18:10:33 root] (utils.py 283): INFO Epoch: [3] [ 770/2502] eta: 1:25:00 lr: 0.000019 loss_cls: 2.9660 (2.7534) grad_norm: 1.5469 (1.7176) time: 2.9372 data: 0.0003 max mem: 29202 +[2024-12-10 18:11:02 root] (utils.py 283): INFO Epoch: [3] [ 780/2502] eta: 1:24:31 lr: 0.000019 loss_cls: 2.8737 (2.7535) grad_norm: 1.5916 (1.7184) time: 2.9347 data: 0.0003 max mem: 29202 +[2024-12-10 18:11:31 root] (utils.py 283): INFO Epoch: [3] [ 790/2502] eta: 1:24:01 lr: 0.000019 loss_cls: 2.7951 (2.7540) grad_norm: 1.6770 (1.7186) time: 2.9347 data: 0.0003 max mem: 29202 +[2024-12-10 18:12:01 root] (utils.py 283): INFO Epoch: [3] [ 800/2502] eta: 1:23:31 lr: 0.000019 loss_cls: 2.6881 (2.7529) grad_norm: 1.5846 (1.7174) time: 2.9357 data: 0.0003 max mem: 29202 +[2024-12-10 18:12:30 root] (utils.py 283): INFO Epoch: [3] [ 810/2502] eta: 1:23:02 lr: 0.000019 loss_cls: 2.6451 (2.7522) grad_norm: 1.5584 (1.7178) time: 2.9342 data: 0.0003 max mem: 29202 +[2024-12-10 18:12:59 root] (utils.py 283): INFO Epoch: [3] [ 820/2502] eta: 1:22:32 lr: 0.000019 loss_cls: 2.8152 (2.7521) grad_norm: 1.5808 (1.7169) time: 2.9331 data: 0.0003 max mem: 29202 +[2024-12-10 18:13:29 root] (utils.py 283): INFO Epoch: [3] [ 830/2502] eta: 1:22:02 lr: 0.000019 loss_cls: 2.8152 (2.7532) grad_norm: 1.5927 (1.7183) time: 2.9323 data: 0.0003 max mem: 29202 +[2024-12-10 18:13:58 root] (utils.py 283): INFO Epoch: [3] [ 840/2502] eta: 1:21:33 lr: 0.000019 loss_cls: 2.6829 (2.7513) grad_norm: 1.6585 (1.7175) time: 2.9336 data: 0.0003 max mem: 29202 +[2024-12-10 18:14:27 root] (utils.py 283): INFO Epoch: [3] [ 850/2502] eta: 1:21:03 lr: 0.000019 loss_cls: 2.9130 (2.7532) grad_norm: 1.7093 (1.7186) time: 2.9356 data: 0.0003 max mem: 29202 +[2024-12-10 18:14:57 root] (utils.py 283): INFO Epoch: [3] [ 860/2502] eta: 1:20:33 lr: 0.000019 loss_cls: 2.9901 (2.7536) grad_norm: 1.7268 (1.7186) time: 2.9359 data: 0.0003 max mem: 29202 +[2024-12-10 18:15:26 root] (utils.py 283): INFO Epoch: [3] [ 870/2502] eta: 1:20:04 lr: 0.000019 loss_cls: 3.0035 (2.7556) grad_norm: 1.5975 (1.7194) time: 2.9360 data: 0.0003 max mem: 29202 +[2024-12-10 18:15:56 root] (utils.py 283): INFO Epoch: [3] [ 880/2502] eta: 1:19:34 lr: 0.000019 loss_cls: 2.9712 (2.7577) grad_norm: 1.5816 (1.7183) time: 2.9425 data: 0.0003 max mem: 29202 +[2024-12-10 18:16:25 root] (utils.py 283): INFO Epoch: [3] [ 890/2502] eta: 1:19:05 lr: 0.000019 loss_cls: 2.8854 (2.7597) grad_norm: 1.5504 (1.7167) time: 2.9425 data: 0.0003 max mem: 29202 +[2024-12-10 18:16:54 root] (utils.py 283): INFO Epoch: [3] [ 900/2502] eta: 1:18:35 lr: 0.000019 loss_cls: 2.8012 (2.7595) grad_norm: 1.5490 (1.7155) time: 2.9361 data: 0.0003 max mem: 29202 +[2024-12-10 18:17:24 root] (utils.py 283): INFO Epoch: [3] [ 910/2502] eta: 1:18:06 lr: 0.000019 loss_cls: 2.6798 (2.7572) grad_norm: 1.5490 (1.7173) time: 2.9356 data: 0.0003 max mem: 29202 +[2024-12-10 18:17:53 root] (utils.py 283): INFO Epoch: [3] [ 920/2502] eta: 1:17:36 lr: 0.000019 loss_cls: 2.7087 (2.7571) grad_norm: 1.5467 (1.7168) time: 2.9341 data: 0.0003 max mem: 29202 +[2024-12-10 18:18:22 root] (utils.py 283): INFO Epoch: [3] [ 930/2502] eta: 1:17:06 lr: 0.000019 loss_cls: 2.7814 (2.7580) grad_norm: 1.6690 (1.7170) time: 2.9335 data: 0.0003 max mem: 29202 +[2024-12-10 18:18:52 root] (utils.py 283): INFO Epoch: [3] [ 940/2502] eta: 1:16:37 lr: 0.000019 loss_cls: 2.8500 (2.7598) grad_norm: 1.7338 (1.7183) time: 2.9324 data: 0.0003 max mem: 29202 +[2024-12-10 18:19:21 root] (utils.py 283): INFO Epoch: [3] [ 950/2502] eta: 1:16:07 lr: 0.000019 loss_cls: 2.9282 (2.7618) grad_norm: 1.7445 (1.7191) time: 2.9312 data: 0.0003 max mem: 29202 +[2024-12-10 18:19:50 root] (utils.py 283): INFO Epoch: [3] [ 960/2502] eta: 1:15:38 lr: 0.000019 loss_cls: 2.9479 (2.7625) grad_norm: 1.6243 (1.7181) time: 2.9323 data: 0.0003 max mem: 29202 +[2024-12-10 18:20:20 root] (utils.py 283): INFO Epoch: [3] [ 970/2502] eta: 1:15:09 lr: 0.000019 loss_cls: 2.8542 (2.7609) grad_norm: 1.6243 (1.7175) time: 2.9486 data: 0.0003 max mem: 29202 +[2024-12-10 18:20:49 root] (utils.py 283): INFO Epoch: [3] [ 980/2502] eta: 1:14:39 lr: 0.000019 loss_cls: 2.6012 (2.7598) grad_norm: 1.6689 (1.7170) time: 2.9536 data: 0.0003 max mem: 29202 +[2024-12-10 18:21:19 root] (utils.py 283): INFO Epoch: [3] [ 990/2502] eta: 1:14:10 lr: 0.000019 loss_cls: 2.9650 (2.7639) grad_norm: 1.6536 (1.7165) time: 2.9450 data: 0.0003 max mem: 29202 +[2024-12-10 18:21:48 root] (utils.py 283): INFO Epoch: [3] [1000/2502] eta: 1:13:40 lr: 0.000019 loss_cls: 3.1172 (2.7650) grad_norm: 1.6331 (1.7163) time: 2.9437 data: 0.0003 max mem: 29202 +[2024-12-10 18:22:18 root] (utils.py 283): INFO Epoch: [3] [1010/2502] eta: 1:13:11 lr: 0.000019 loss_cls: 2.8370 (2.7649) grad_norm: 1.5759 (1.7165) time: 2.9438 data: 0.0003 max mem: 29202 +[2024-12-10 18:22:47 root] (utils.py 283): INFO Epoch: [3] [1020/2502] eta: 1:12:41 lr: 0.000019 loss_cls: 2.8597 (2.7656) grad_norm: 1.6825 (1.7172) time: 2.9447 data: 0.0003 max mem: 29202 +[2024-12-10 18:23:17 root] (utils.py 283): INFO Epoch: [3] [1030/2502] eta: 1:12:12 lr: 0.000019 loss_cls: 2.9490 (2.7675) grad_norm: 1.7318 (1.7173) time: 2.9426 data: 0.0003 max mem: 29202 +[2024-12-10 18:23:46 root] (utils.py 283): INFO Epoch: [3] [1040/2502] eta: 1:11:43 lr: 0.000019 loss_cls: 2.9962 (2.7684) grad_norm: 1.6630 (1.7171) time: 2.9600 data: 0.0003 max mem: 29202 +[2024-12-10 18:24:16 root] (utils.py 283): INFO Epoch: [3] [1050/2502] eta: 1:11:14 lr: 0.000019 loss_cls: 3.0068 (2.7704) grad_norm: 1.7074 (1.7179) time: 2.9614 data: 0.0003 max mem: 29202 +[2024-12-10 18:24:45 root] (utils.py 283): INFO Epoch: [3] [1060/2502] eta: 1:10:44 lr: 0.000019 loss_cls: 2.9195 (2.7710) grad_norm: 1.5704 (1.7165) time: 2.9432 data: 0.0003 max mem: 29202 +[2024-12-10 18:25:15 root] (utils.py 283): INFO Epoch: [3] [1070/2502] eta: 1:10:15 lr: 0.000019 loss_cls: 2.7959 (2.7701) grad_norm: 1.5364 (1.7155) time: 2.9390 data: 0.0003 max mem: 29202 +[2024-12-10 18:25:44 root] (utils.py 283): INFO Epoch: [3] [1080/2502] eta: 1:09:45 lr: 0.000019 loss_cls: 2.7693 (2.7698) grad_norm: 1.6212 (1.7157) time: 2.9366 data: 0.0003 max mem: 29202 +[2024-12-10 18:26:13 root] (utils.py 283): INFO Epoch: [3] [1090/2502] eta: 1:09:16 lr: 0.000019 loss_cls: 3.0424 (2.7711) grad_norm: 1.6367 (1.7151) time: 2.9383 data: 0.0003 max mem: 29202 +[2024-12-10 18:26:43 root] (utils.py 283): INFO Epoch: [3] [1100/2502] eta: 1:08:46 lr: 0.000019 loss_cls: 3.0424 (2.7723) grad_norm: 1.6367 (1.7144) time: 2.9404 data: 0.0003 max mem: 29202 +[2024-12-10 18:27:12 root] (utils.py 283): INFO Epoch: [3] [1110/2502] eta: 1:08:17 lr: 0.000019 loss_cls: 2.9236 (2.7726) grad_norm: 1.6333 (1.7134) time: 2.9411 data: 0.0003 max mem: 29202 +[2024-12-10 18:27:42 root] (utils.py 283): INFO Epoch: [3] [1120/2502] eta: 1:07:47 lr: 0.000019 loss_cls: 2.9135 (2.7732) grad_norm: 1.7102 (1.7173) time: 2.9400 data: 0.0003 max mem: 29202 +[2024-12-10 18:28:11 root] (utils.py 283): INFO Epoch: [3] [1130/2502] eta: 1:07:18 lr: 0.000019 loss_cls: 2.8126 (2.7718) grad_norm: 1.8090 (1.7175) time: 2.9502 data: 0.0003 max mem: 29202 +[2024-12-10 18:28:41 root] (utils.py 283): INFO Epoch: [3] [1140/2502] eta: 1:06:49 lr: 0.000019 loss_cls: 2.8909 (2.7729) grad_norm: 1.6982 (1.7191) time: 2.9631 data: 0.0003 max mem: 29202 +[2024-12-10 18:29:10 root] (utils.py 283): INFO Epoch: [3] [1150/2502] eta: 1:06:19 lr: 0.000019 loss_cls: 2.8909 (2.7720) grad_norm: 1.7122 (1.7187) time: 2.9504 data: 0.0003 max mem: 29202 +[2024-12-10 18:29:40 root] (utils.py 283): INFO Epoch: [3] [1160/2502] eta: 1:05:50 lr: 0.000019 loss_cls: 2.8103 (2.7724) grad_norm: 1.7122 (1.7203) time: 2.9337 data: 0.0003 max mem: 29202 +[2024-12-10 18:30:09 root] (utils.py 283): INFO Epoch: [3] [1170/2502] eta: 1:05:20 lr: 0.000019 loss_cls: 2.8103 (2.7713) grad_norm: 1.6011 (1.7189) time: 2.9312 data: 0.0003 max mem: 29202 +[2024-12-10 18:30:38 root] (utils.py 283): INFO Epoch: [3] [1180/2502] eta: 1:04:51 lr: 0.000019 loss_cls: 2.7835 (2.7692) grad_norm: 1.5910 (1.7183) time: 2.9373 data: 0.0003 max mem: 29202 +[2024-12-10 18:31:08 root] (utils.py 283): INFO Epoch: [3] [1190/2502] eta: 1:04:21 lr: 0.000019 loss_cls: 2.5866 (2.7687) grad_norm: 1.5910 (1.7182) time: 2.9400 data: 0.0003 max mem: 29202 +[2024-12-10 18:31:37 root] (utils.py 283): INFO Epoch: [3] [1200/2502] eta: 1:03:52 lr: 0.000019 loss_cls: 2.8474 (2.7697) grad_norm: 1.5863 (1.7180) time: 2.9374 data: 0.0003 max mem: 29202 +[2024-12-10 18:32:06 root] (utils.py 283): INFO Epoch: [3] [1210/2502] eta: 1:03:22 lr: 0.000019 loss_cls: 2.8566 (2.7698) grad_norm: 1.6029 (1.7173) time: 2.9381 data: 0.0003 max mem: 29202 +[2024-12-10 18:32:36 root] (utils.py 283): INFO Epoch: [3] [1220/2502] eta: 1:02:53 lr: 0.000019 loss_cls: 2.8378 (2.7700) grad_norm: 1.7228 (1.7175) time: 2.9436 data: 0.0003 max mem: 29202 +[2024-12-10 18:33:05 root] (utils.py 283): INFO Epoch: [3] [1230/2502] eta: 1:02:23 lr: 0.000019 loss_cls: 2.9251 (2.7705) grad_norm: 1.7937 (1.7193) time: 2.9494 data: 0.0003 max mem: 29202 +[2024-12-10 18:33:35 root] (utils.py 283): INFO Epoch: [3] [1240/2502] eta: 1:01:54 lr: 0.000019 loss_cls: 2.9287 (2.7716) grad_norm: 1.7980 (1.7195) time: 2.9469 data: 0.0003 max mem: 29202 +[2024-12-10 18:34:04 root] (utils.py 283): INFO Epoch: [3] [1250/2502] eta: 1:01:25 lr: 0.000019 loss_cls: 2.9823 (2.7731) grad_norm: 1.6243 (1.7207) time: 2.9436 data: 0.0003 max mem: 29202 +[2024-12-10 18:34:34 root] (utils.py 283): INFO Epoch: [3] [1260/2502] eta: 1:00:55 lr: 0.000019 loss_cls: 2.9613 (2.7732) grad_norm: 1.6890 (1.7206) time: 2.9419 data: 0.0003 max mem: 29202 +[2024-12-10 18:35:03 root] (utils.py 283): INFO Epoch: [3] [1270/2502] eta: 1:00:26 lr: 0.000019 loss_cls: 2.8712 (2.7735) grad_norm: 1.6890 (1.7202) time: 2.9394 data: 0.0003 max mem: 29202 +[2024-12-10 18:35:33 root] (utils.py 283): INFO Epoch: [3] [1280/2502] eta: 0:59:56 lr: 0.000019 loss_cls: 2.8712 (2.7737) grad_norm: 1.6134 (1.7198) time: 2.9388 data: 0.0003 max mem: 29202 +[2024-12-10 18:36:02 root] (utils.py 283): INFO Epoch: [3] [1290/2502] eta: 0:59:27 lr: 0.000019 loss_cls: 2.9872 (2.7745) grad_norm: 1.6120 (1.7195) time: 2.9409 data: 0.0003 max mem: 29202 +[2024-12-10 18:36:31 root] (utils.py 283): INFO Epoch: [3] [1300/2502] eta: 0:58:57 lr: 0.000019 loss_cls: 2.8328 (2.7733) grad_norm: 1.6404 (1.7210) time: 2.9396 data: 0.0003 max mem: 29202 +[2024-12-10 18:37:01 root] (utils.py 283): INFO Epoch: [3] [1310/2502] eta: 0:58:28 lr: 0.000019 loss_cls: 2.7133 (2.7739) grad_norm: 1.7154 (1.7213) time: 2.9399 data: 0.0003 max mem: 29202 +[2024-12-10 18:37:30 root] (utils.py 283): INFO Epoch: [3] [1320/2502] eta: 0:57:58 lr: 0.000019 loss_cls: 2.8903 (2.7746) grad_norm: 1.6591 (1.7208) time: 2.9445 data: 0.0003 max mem: 29202 +[2024-12-10 18:38:00 root] (utils.py 283): INFO Epoch: [3] [1330/2502] eta: 0:57:29 lr: 0.000019 loss_cls: 2.8903 (2.7749) grad_norm: 1.6637 (1.7254) time: 2.9419 data: 0.0003 max mem: 29202 +[2024-12-10 18:38:29 root] (utils.py 283): INFO Epoch: [3] [1340/2502] eta: 0:56:59 lr: 0.000019 loss_cls: 2.8555 (2.7763) grad_norm: 1.6791 (1.7253) time: 2.9380 data: 0.0003 max mem: 29202 +[2024-12-10 18:38:58 root] (utils.py 283): INFO Epoch: [3] [1350/2502] eta: 0:56:30 lr: 0.000019 loss_cls: 2.8519 (2.7758) grad_norm: 1.6682 (1.7257) time: 2.9408 data: 0.0003 max mem: 29202 +[2024-12-10 18:39:28 root] (utils.py 283): INFO Epoch: [3] [1360/2502] eta: 0:56:01 lr: 0.000019 loss_cls: 2.7447 (2.7752) grad_norm: 1.6798 (1.7258) time: 2.9387 data: 0.0003 max mem: 29202 +[2024-12-10 18:39:57 root] (utils.py 283): INFO Epoch: [3] [1370/2502] eta: 0:55:31 lr: 0.000019 loss_cls: 2.9493 (2.7754) grad_norm: 1.6531 (1.7271) time: 2.9378 data: 0.0003 max mem: 29202 +[2024-12-10 18:40:27 root] (utils.py 283): INFO Epoch: [3] [1380/2502] eta: 0:55:02 lr: 0.000019 loss_cls: 2.9159 (2.7752) grad_norm: 1.6531 (1.7275) time: 2.9431 data: 0.0003 max mem: 29202 +[2024-12-10 18:40:56 root] (utils.py 283): INFO Epoch: [3] [1390/2502] eta: 0:54:32 lr: 0.000019 loss_cls: 2.8158 (2.7747) grad_norm: 1.6662 (1.7272) time: 2.9512 data: 0.0003 max mem: 29202 +[2024-12-10 18:41:26 root] (utils.py 283): INFO Epoch: [3] [1400/2502] eta: 0:54:03 lr: 0.000019 loss_cls: 2.8065 (2.7736) grad_norm: 1.6162 (1.7266) time: 2.9543 data: 0.0003 max mem: 29202 +[2024-12-10 18:41:55 root] (utils.py 283): INFO Epoch: [3] [1410/2502] eta: 0:53:34 lr: 0.000019 loss_cls: 2.8472 (2.7747) grad_norm: 1.5654 (1.7262) time: 2.9517 data: 0.0003 max mem: 29202 +[2024-12-10 18:42:25 root] (utils.py 283): INFO Epoch: [3] [1420/2502] eta: 0:53:04 lr: 0.000019 loss_cls: 2.9389 (2.7757) grad_norm: 1.6079 (1.7258) time: 2.9473 data: 0.0003 max mem: 29202 +[2024-12-10 18:42:54 root] (utils.py 283): INFO Epoch: [3] [1430/2502] eta: 0:52:35 lr: 0.000019 loss_cls: 2.9329 (2.7774) grad_norm: 1.6503 (1.7257) time: 2.9404 data: 0.0003 max mem: 29202 +[2024-12-10 18:43:24 root] (utils.py 283): INFO Epoch: [3] [1440/2502] eta: 0:52:05 lr: 0.000019 loss_cls: 2.7886 (2.7768) grad_norm: 1.7006 (1.7254) time: 2.9534 data: 0.0003 max mem: 29202 +[2024-12-10 18:43:53 root] (utils.py 283): INFO Epoch: [3] [1450/2502] eta: 0:51:36 lr: 0.000019 loss_cls: 2.6856 (2.7762) grad_norm: 1.6424 (1.7253) time: 2.9675 data: 0.0003 max mem: 29202 +[2024-12-10 18:44:23 root] (utils.py 283): INFO Epoch: [3] [1460/2502] eta: 0:51:07 lr: 0.000019 loss_cls: 3.1740 (2.7791) grad_norm: 1.6513 (1.7252) time: 2.9692 data: 0.0003 max mem: 29202 +[2024-12-10 18:44:53 root] (utils.py 283): INFO Epoch: [3] [1470/2502] eta: 0:50:38 lr: 0.000019 loss_cls: 3.1410 (2.7786) grad_norm: 1.6031 (1.7243) time: 2.9720 data: 0.0003 max mem: 29202 +[2024-12-10 18:45:22 root] (utils.py 283): INFO Epoch: [3] [1480/2502] eta: 0:50:08 lr: 0.000019 loss_cls: 2.7320 (2.7786) grad_norm: 1.5730 (1.7234) time: 2.9631 data: 0.0003 max mem: 29202 +[2024-12-10 18:45:52 root] (utils.py 283): INFO Epoch: [3] [1490/2502] eta: 0:49:39 lr: 0.000019 loss_cls: 2.7925 (2.7785) grad_norm: 1.5930 (1.7231) time: 2.9474 data: 0.0003 max mem: 29202 +[2024-12-10 18:46:21 root] (utils.py 283): INFO Epoch: [3] [1500/2502] eta: 0:49:09 lr: 0.000019 loss_cls: 2.6043 (2.7762) grad_norm: 1.5794 (1.7220) time: 2.9414 data: 0.0003 max mem: 29202 +[2024-12-10 18:46:51 root] (utils.py 283): INFO Epoch: [3] [1510/2502] eta: 0:48:40 lr: 0.000019 loss_cls: 2.5967 (2.7756) grad_norm: 1.5362 (1.7220) time: 2.9420 data: 0.0003 max mem: 29202 +[2024-12-10 18:47:20 root] (utils.py 283): INFO Epoch: [3] [1520/2502] eta: 0:48:11 lr: 0.000019 loss_cls: 2.9575 (2.7766) grad_norm: 1.6218 (1.7218) time: 2.9419 data: 0.0003 max mem: 29202 +[2024-12-10 18:47:49 root] (utils.py 283): INFO Epoch: [3] [1530/2502] eta: 0:47:41 lr: 0.000019 loss_cls: 3.0273 (2.7770) grad_norm: 1.6841 (1.7225) time: 2.9421 data: 0.0003 max mem: 29202 +[2024-12-10 18:48:19 root] (utils.py 283): INFO Epoch: [3] [1540/2502] eta: 0:47:12 lr: 0.000019 loss_cls: 2.7524 (2.7754) grad_norm: 1.6339 (1.7218) time: 2.9437 data: 0.0003 max mem: 29202 +[2024-12-10 18:48:48 root] (utils.py 283): INFO Epoch: [3] [1550/2502] eta: 0:46:42 lr: 0.000019 loss_cls: 2.5386 (2.7749) grad_norm: 1.6104 (1.7211) time: 2.9464 data: 0.0003 max mem: 29202 +[2024-12-10 18:49:18 root] (utils.py 283): INFO Epoch: [3] [1560/2502] eta: 0:46:13 lr: 0.000019 loss_cls: 2.8896 (2.7743) grad_norm: 1.7020 (1.7211) time: 2.9460 data: 0.0003 max mem: 29202 +[2024-12-10 18:49:47 root] (utils.py 283): INFO Epoch: [3] [1570/2502] eta: 0:45:43 lr: 0.000019 loss_cls: 2.8244 (2.7747) grad_norm: 1.7020 (1.7207) time: 2.9442 data: 0.0003 max mem: 29202 +[2024-12-10 18:50:17 root] (utils.py 283): INFO Epoch: [3] [1580/2502] eta: 0:45:14 lr: 0.000019 loss_cls: 2.8244 (2.7747) grad_norm: 1.6109 (1.7211) time: 2.9477 data: 0.0003 max mem: 29202 +[2024-12-10 18:50:46 root] (utils.py 283): INFO Epoch: [3] [1590/2502] eta: 0:44:44 lr: 0.000019 loss_cls: 2.7189 (2.7738) grad_norm: 1.6109 (1.7212) time: 2.9460 data: 0.0003 max mem: 29202 +[2024-12-10 18:51:16 root] (utils.py 283): INFO Epoch: [3] [1600/2502] eta: 0:44:15 lr: 0.000019 loss_cls: 2.6710 (2.7741) grad_norm: 1.5988 (1.7207) time: 2.9552 data: 0.0003 max mem: 29202 +[2024-12-10 18:51:45 root] (utils.py 283): INFO Epoch: [3] [1610/2502] eta: 0:43:46 lr: 0.000019 loss_cls: 2.8475 (2.7742) grad_norm: 1.5798 (1.7198) time: 2.9528 data: 0.0003 max mem: 29202 +[2024-12-10 18:52:15 root] (utils.py 283): INFO Epoch: [3] [1620/2502] eta: 0:43:16 lr: 0.000019 loss_cls: 2.8180 (2.7732) grad_norm: 1.6611 (1.7196) time: 2.9380 data: 0.0003 max mem: 29202 +[2024-12-10 18:52:45 root] (utils.py 283): INFO Epoch: [3] [1630/2502] eta: 0:42:47 lr: 0.000019 loss_cls: 2.6310 (2.7730) grad_norm: 1.6717 (1.7203) time: 2.9632 data: 0.0003 max mem: 29202 +[2024-12-10 18:53:14 root] (utils.py 283): INFO Epoch: [3] [1640/2502] eta: 0:42:18 lr: 0.000019 loss_cls: 2.8337 (2.7728) grad_norm: 1.6717 (1.7209) time: 2.9673 data: 0.0003 max mem: 29202 +[2024-12-10 18:53:43 root] (utils.py 283): INFO Epoch: [3] [1650/2502] eta: 0:41:48 lr: 0.000019 loss_cls: 2.6852 (2.7721) grad_norm: 1.7823 (1.7258) time: 2.9444 data: 0.0003 max mem: 29202 +[2024-12-10 18:54:13 root] (utils.py 283): INFO Epoch: [3] [1660/2502] eta: 0:41:19 lr: 0.000019 loss_cls: 2.7544 (2.7726) grad_norm: 1.8194 (1.7270) time: 2.9414 data: 0.0003 max mem: 29202 +[2024-12-10 18:54:42 root] (utils.py 283): INFO Epoch: [3] [1670/2502] eta: 0:40:49 lr: 0.000019 loss_cls: 2.8455 (2.7713) grad_norm: 1.7380 (1.7264) time: 2.9427 data: 0.0003 max mem: 29202 +[2024-12-10 18:55:12 root] (utils.py 283): INFO Epoch: [3] [1680/2502] eta: 0:40:20 lr: 0.000019 loss_cls: 2.6121 (2.7704) grad_norm: 1.6229 (1.7257) time: 2.9423 data: 0.0003 max mem: 29202 +[2024-12-10 18:55:41 root] (utils.py 283): INFO Epoch: [3] [1690/2502] eta: 0:39:50 lr: 0.000019 loss_cls: 2.8129 (2.7705) grad_norm: 1.6216 (1.7250) time: 2.9440 data: 0.0003 max mem: 29202 +[2024-12-10 18:56:11 root] (utils.py 283): INFO Epoch: [3] [1700/2502] eta: 0:39:21 lr: 0.000019 loss_cls: 2.8110 (2.7697) grad_norm: 1.6147 (1.7242) time: 2.9429 data: 0.0003 max mem: 29202 +[2024-12-10 18:56:40 root] (utils.py 283): INFO Epoch: [3] [1710/2502] eta: 0:38:51 lr: 0.000019 loss_cls: 2.8633 (2.7706) grad_norm: 1.6147 (1.7241) time: 2.9469 data: 0.0003 max mem: 29202 +[2024-12-10 18:57:10 root] (utils.py 283): INFO Epoch: [3] [1720/2502] eta: 0:38:22 lr: 0.000019 loss_cls: 2.8633 (2.7709) grad_norm: 1.6105 (1.7233) time: 2.9629 data: 0.0003 max mem: 29202 +[2024-12-10 18:57:40 root] (utils.py 283): INFO Epoch: [3] [1730/2502] eta: 0:37:53 lr: 0.000019 loss_cls: 2.6432 (2.7701) grad_norm: 1.6105 (1.7229) time: 2.9741 data: 0.0003 max mem: 29202 +[2024-12-10 18:58:09 root] (utils.py 283): INFO Epoch: [3] [1740/2502] eta: 0:37:24 lr: 0.000019 loss_cls: 2.6731 (2.7693) grad_norm: 1.6206 (1.7223) time: 2.9745 data: 0.0003 max mem: 29202 +[2024-12-10 18:58:39 root] (utils.py 283): INFO Epoch: [3] [1750/2502] eta: 0:36:54 lr: 0.000019 loss_cls: 2.8830 (2.7693) grad_norm: 1.4821 (1.7214) time: 2.9604 data: 0.0003 max mem: 29202 +[2024-12-10 18:59:08 root] (utils.py 283): INFO Epoch: [3] [1760/2502] eta: 0:36:25 lr: 0.000019 loss_cls: 2.9247 (2.7704) grad_norm: 1.5530 (1.7209) time: 2.9517 data: 0.0003 max mem: 29202 +[2024-12-10 18:59:38 root] (utils.py 283): INFO Epoch: [3] [1770/2502] eta: 0:35:55 lr: 0.000019 loss_cls: 2.9571 (2.7706) grad_norm: 1.6138 (1.7209) time: 2.9469 data: 0.0003 max mem: 29202 +[2024-12-10 19:00:07 root] (utils.py 283): INFO Epoch: [3] [1780/2502] eta: 0:35:26 lr: 0.000019 loss_cls: 3.0156 (2.7714) grad_norm: 1.5867 (1.7205) time: 2.9384 data: 0.0002 max mem: 29202 +[2024-12-10 19:00:36 root] (utils.py 283): INFO Epoch: [3] [1790/2502] eta: 0:34:56 lr: 0.000019 loss_cls: 2.8138 (2.7704) grad_norm: 1.6449 (1.7209) time: 2.9333 data: 0.0003 max mem: 29202 +[2024-12-10 19:01:06 root] (utils.py 283): INFO Epoch: [3] [1800/2502] eta: 0:34:27 lr: 0.000019 loss_cls: 2.7829 (2.7706) grad_norm: 1.7455 (1.7210) time: 2.9325 data: 0.0003 max mem: 29202 +[2024-12-10 19:01:35 root] (utils.py 283): INFO Epoch: [3] [1810/2502] eta: 0:33:57 lr: 0.000019 loss_cls: 2.7902 (2.7693) grad_norm: 1.6985 (1.7209) time: 2.9381 data: 0.0003 max mem: 29202 +[2024-12-10 19:02:05 root] (utils.py 283): INFO Epoch: [3] [1820/2502] eta: 0:33:28 lr: 0.000019 loss_cls: 2.7219 (2.7690) grad_norm: 1.6434 (1.7204) time: 2.9392 data: 0.0003 max mem: 29202 +[2024-12-10 19:02:34 root] (utils.py 283): INFO Epoch: [3] [1830/2502] eta: 0:32:58 lr: 0.000019 loss_cls: 2.7219 (2.7680) grad_norm: 1.6476 (1.7205) time: 2.9473 data: 0.0003 max mem: 29202 +[2024-12-10 19:03:04 root] (utils.py 283): INFO Epoch: [3] [1840/2502] eta: 0:32:29 lr: 0.000019 loss_cls: 2.6706 (2.7675) grad_norm: 1.6614 (1.7207) time: 2.9593 data: 0.0003 max mem: 29202 +[2024-12-10 19:03:33 root] (utils.py 283): INFO Epoch: [3] [1850/2502] eta: 0:32:00 lr: 0.000019 loss_cls: 2.7455 (2.7677) grad_norm: 1.7076 (1.7209) time: 2.9519 data: 0.0003 max mem: 29202 +[2024-12-10 19:04:03 root] (utils.py 283): INFO Epoch: [3] [1860/2502] eta: 0:31:30 lr: 0.000019 loss_cls: 3.0035 (2.7691) grad_norm: 1.7076 (1.7206) time: 2.9400 data: 0.0003 max mem: 29202 +[2024-12-10 19:04:32 root] (utils.py 283): INFO Epoch: [3] [1870/2502] eta: 0:31:01 lr: 0.000019 loss_cls: 2.8803 (2.7681) grad_norm: 1.6431 (1.7203) time: 2.9429 data: 0.0003 max mem: 29202 +[2024-12-10 19:05:02 root] (utils.py 283): INFO Epoch: [3] [1880/2502] eta: 0:30:31 lr: 0.000019 loss_cls: 2.8803 (2.7688) grad_norm: 1.6633 (1.7205) time: 2.9610 data: 0.0003 max mem: 29202 +[2024-12-10 19:05:31 root] (utils.py 283): INFO Epoch: [3] [1890/2502] eta: 0:30:02 lr: 0.000019 loss_cls: 2.9824 (2.7696) grad_norm: 1.6845 (1.7218) time: 2.9719 data: 0.0003 max mem: 29202 +[2024-12-10 19:06:01 root] (utils.py 283): INFO Epoch: [3] [1900/2502] eta: 0:29:33 lr: 0.000019 loss_cls: 2.8901 (2.7691) grad_norm: 1.6242 (1.7214) time: 2.9683 data: 0.0003 max mem: 29202 +[2024-12-10 19:06:31 root] (utils.py 283): INFO Epoch: [3] [1910/2502] eta: 0:29:03 lr: 0.000019 loss_cls: 2.9394 (2.7700) grad_norm: 1.5961 (1.7212) time: 2.9666 data: 0.0003 max mem: 29202 +[2024-12-10 19:07:00 root] (utils.py 283): INFO Epoch: [3] [1920/2502] eta: 0:28:34 lr: 0.000019 loss_cls: 3.0208 (2.7713) grad_norm: 1.6398 (1.7208) time: 2.9669 data: 0.0003 max mem: 29202 +[2024-12-10 19:07:30 root] (utils.py 283): INFO Epoch: [3] [1930/2502] eta: 0:28:04 lr: 0.000019 loss_cls: 2.9662 (2.7712) grad_norm: 1.6637 (1.7215) time: 2.9689 data: 0.0003 max mem: 29202 +[2024-12-10 19:08:00 root] (utils.py 283): INFO Epoch: [3] [1940/2502] eta: 0:27:35 lr: 0.000019 loss_cls: 2.8963 (2.7720) grad_norm: 1.7988 (1.7221) time: 2.9560 data: 0.0003 max mem: 29202 +[2024-12-10 19:08:29 root] (utils.py 283): INFO Epoch: [3] [1950/2502] eta: 0:27:05 lr: 0.000019 loss_cls: 2.8365 (2.7715) grad_norm: 1.6314 (1.7214) time: 2.9456 data: 0.0003 max mem: 29202 +[2024-12-10 19:08:59 root] (utils.py 283): INFO Epoch: [3] [1960/2502] eta: 0:26:36 lr: 0.000019 loss_cls: 2.8986 (2.7727) grad_norm: 1.6314 (1.7214) time: 2.9458 data: 0.0003 max mem: 29202 +[2024-12-10 19:09:28 root] (utils.py 283): INFO Epoch: [3] [1970/2502] eta: 0:26:07 lr: 0.000019 loss_cls: 2.8986 (2.7728) grad_norm: 1.6623 (1.7211) time: 2.9415 data: 0.0003 max mem: 29202 +[2024-12-10 19:09:57 root] (utils.py 283): INFO Epoch: [3] [1980/2502] eta: 0:25:37 lr: 0.000019 loss_cls: 2.8043 (2.7735) grad_norm: 1.6371 (1.7208) time: 2.9406 data: 0.0003 max mem: 29202 +[2024-12-10 19:10:27 root] (utils.py 283): INFO Epoch: [3] [1990/2502] eta: 0:25:08 lr: 0.000019 loss_cls: 2.9096 (2.7739) grad_norm: 1.6371 (1.7204) time: 2.9419 data: 0.0003 max mem: 29202 +[2024-12-10 19:10:56 root] (utils.py 283): INFO Epoch: [3] [2000/2502] eta: 0:24:38 lr: 0.000019 loss_cls: 2.3782 (2.7712) grad_norm: 1.7192 (1.7226) time: 2.9417 data: 0.0003 max mem: 29202 +[2024-12-10 19:11:26 root] (utils.py 283): INFO Epoch: [3] [2010/2502] eta: 0:24:09 lr: 0.000019 loss_cls: 2.2908 (2.7698) grad_norm: 1.7269 (1.7223) time: 2.9394 data: 0.0003 max mem: 29202 +[2024-12-10 19:11:55 root] (utils.py 283): INFO Epoch: [3] [2020/2502] eta: 0:23:39 lr: 0.000019 loss_cls: 2.5855 (2.7702) grad_norm: 1.5921 (1.7233) time: 2.9370 data: 0.0003 max mem: 29202 +[2024-12-10 19:12:24 root] (utils.py 283): INFO Epoch: [3] [2030/2502] eta: 0:23:10 lr: 0.000019 loss_cls: 2.9309 (2.7711) grad_norm: 1.6444 (1.7233) time: 2.9401 data: 0.0003 max mem: 29202 +[2024-12-10 19:12:54 root] (utils.py 283): INFO Epoch: [3] [2040/2502] eta: 0:22:40 lr: 0.000019 loss_cls: 2.7437 (2.7707) grad_norm: 1.6454 (1.7245) time: 2.9447 data: 0.0003 max mem: 29202 +[2024-12-10 19:13:23 root] (utils.py 283): INFO Epoch: [3] [2050/2502] eta: 0:22:11 lr: 0.000019 loss_cls: 2.7271 (2.7707) grad_norm: 1.7100 (1.7249) time: 2.9442 data: 0.0003 max mem: 29202 +[2024-12-10 19:13:53 root] (utils.py 283): INFO Epoch: [3] [2060/2502] eta: 0:21:41 lr: 0.000019 loss_cls: 2.8256 (2.7708) grad_norm: 1.7205 (1.7248) time: 2.9411 data: 0.0003 max mem: 29202 +[2024-12-10 19:14:22 root] (utils.py 283): INFO Epoch: [3] [2070/2502] eta: 0:21:12 lr: 0.000019 loss_cls: 2.7623 (2.7709) grad_norm: 1.7243 (1.7251) time: 2.9409 data: 0.0003 max mem: 29202 +[2024-12-10 19:14:52 root] (utils.py 283): INFO Epoch: [3] [2080/2502] eta: 0:20:42 lr: 0.000019 loss_cls: 2.6496 (2.7703) grad_norm: 1.7243 (1.7247) time: 2.9433 data: 0.0003 max mem: 29202 +[2024-12-10 19:15:21 root] (utils.py 283): INFO Epoch: [3] [2090/2502] eta: 0:20:13 lr: 0.000019 loss_cls: 2.6496 (2.7700) grad_norm: 1.6261 (1.7244) time: 2.9458 data: 0.0003 max mem: 29202 +[2024-12-10 19:15:50 root] (utils.py 283): INFO Epoch: [3] [2100/2502] eta: 0:19:44 lr: 0.000019 loss_cls: 2.6971 (2.7701) grad_norm: 1.5817 (1.7239) time: 2.9431 data: 0.0003 max mem: 29202 +[2024-12-10 19:16:20 root] (utils.py 283): INFO Epoch: [3] [2110/2502] eta: 0:19:14 lr: 0.000019 loss_cls: 2.6971 (2.7691) grad_norm: 1.5593 (1.7235) time: 2.9448 data: 0.0003 max mem: 29202 +[2024-12-10 19:16:50 root] (utils.py 283): INFO Epoch: [3] [2120/2502] eta: 0:18:45 lr: 0.000019 loss_cls: 2.8971 (2.7702) grad_norm: 1.6101 (1.7234) time: 2.9614 data: 0.0003 max mem: 29202 +[2024-12-10 19:17:19 root] (utils.py 283): INFO Epoch: [3] [2130/2502] eta: 0:18:15 lr: 0.000019 loss_cls: 2.8971 (2.7695) grad_norm: 1.5942 (1.7230) time: 2.9580 data: 0.0003 max mem: 29202 +[2024-12-10 19:17:48 root] (utils.py 283): INFO Epoch: [3] [2140/2502] eta: 0:17:46 lr: 0.000019 loss_cls: 2.5951 (2.7687) grad_norm: 1.5444 (1.7223) time: 2.9430 data: 0.0003 max mem: 29202 +[2024-12-10 19:18:18 root] (utils.py 283): INFO Epoch: [3] [2150/2502] eta: 0:17:16 lr: 0.000019 loss_cls: 2.7547 (2.7694) grad_norm: 1.6142 (1.7222) time: 2.9424 data: 0.0003 max mem: 29202 +[2024-12-10 19:18:47 root] (utils.py 283): INFO Epoch: [3] [2160/2502] eta: 0:16:47 lr: 0.000019 loss_cls: 2.9392 (2.7701) grad_norm: 1.7146 (1.7227) time: 2.9400 data: 0.0003 max mem: 29202 +[2024-12-10 19:19:17 root] (utils.py 283): INFO Epoch: [3] [2170/2502] eta: 0:16:17 lr: 0.000019 loss_cls: 2.9536 (2.7710) grad_norm: 1.6721 (1.7225) time: 2.9386 data: 0.0002 max mem: 29202 +[2024-12-10 19:19:46 root] (utils.py 283): INFO Epoch: [3] [2180/2502] eta: 0:15:48 lr: 0.000019 loss_cls: 2.9453 (2.7716) grad_norm: 1.6640 (1.7225) time: 2.9426 data: 0.0003 max mem: 29202 +[2024-12-10 19:20:16 root] (utils.py 283): INFO Epoch: [3] [2190/2502] eta: 0:15:18 lr: 0.000019 loss_cls: 2.9352 (2.7723) grad_norm: 1.6640 (1.7219) time: 2.9468 data: 0.0003 max mem: 29202 +[2024-12-10 19:20:45 root] (utils.py 283): INFO Epoch: [3] [2200/2502] eta: 0:14:49 lr: 0.000019 loss_cls: 3.0371 (2.7731) grad_norm: 1.6864 (1.7225) time: 2.9440 data: 0.0003 max mem: 29202 +[2024-12-10 19:21:15 root] (utils.py 283): INFO Epoch: [3] [2210/2502] eta: 0:14:20 lr: 0.000019 loss_cls: 2.9706 (2.7736) grad_norm: 1.6318 (1.7219) time: 2.9473 data: 0.0003 max mem: 29202 +[2024-12-10 19:21:44 root] (utils.py 283): INFO Epoch: [3] [2220/2502] eta: 0:13:50 lr: 0.000019 loss_cls: 2.9065 (2.7734) grad_norm: 1.5917 (1.7214) time: 2.9474 data: 0.0003 max mem: 29202 +[2024-12-10 19:22:13 root] (utils.py 283): INFO Epoch: [3] [2230/2502] eta: 0:13:21 lr: 0.000019 loss_cls: 2.8502 (2.7728) grad_norm: 1.6002 (1.7220) time: 2.9427 data: 0.0003 max mem: 29202 +[2024-12-10 19:22:43 root] (utils.py 283): INFO Epoch: [3] [2240/2502] eta: 0:12:51 lr: 0.000019 loss_cls: 2.6023 (2.7731) grad_norm: 1.7167 (1.7220) time: 2.9438 data: 0.0003 max mem: 29202 +[2024-12-10 19:23:12 root] (utils.py 283): INFO Epoch: [3] [2250/2502] eta: 0:12:22 lr: 0.000019 loss_cls: 2.6023 (2.7724) grad_norm: 1.7067 (1.7223) time: 2.9408 data: 0.0003 max mem: 29202 +[2024-12-10 19:23:42 root] (utils.py 283): INFO Epoch: [3] [2260/2502] eta: 0:11:52 lr: 0.000019 loss_cls: 2.7264 (2.7720) grad_norm: 1.6654 (1.7222) time: 2.9375 data: 0.0003 max mem: 29202 +[2024-12-10 19:24:11 root] (utils.py 283): INFO Epoch: [3] [2270/2502] eta: 0:11:23 lr: 0.000019 loss_cls: 2.7139 (2.7715) grad_norm: 1.7129 (1.7227) time: 2.9379 data: 0.0003 max mem: 29202 +[2024-12-10 19:24:41 root] (utils.py 283): INFO Epoch: [3] [2280/2502] eta: 0:10:53 lr: 0.000019 loss_cls: 2.7139 (2.7716) grad_norm: 1.7354 (1.7227) time: 2.9472 data: 0.0003 max mem: 29202 +[2024-12-10 19:25:10 root] (utils.py 283): INFO Epoch: [3] [2290/2502] eta: 0:10:24 lr: 0.000019 loss_cls: 2.7610 (2.7717) grad_norm: 1.6091 (1.7223) time: 2.9557 data: 0.0003 max mem: 29202 +[2024-12-10 19:25:40 root] (utils.py 283): INFO Epoch: [3] [2300/2502] eta: 0:09:54 lr: 0.000019 loss_cls: 2.9507 (2.7723) grad_norm: 1.5922 (1.7221) time: 2.9589 data: 0.0003 max mem: 29202 +[2024-12-10 19:26:09 root] (utils.py 283): INFO Epoch: [3] [2310/2502] eta: 0:09:25 lr: 0.000019 loss_cls: 2.9352 (2.7717) grad_norm: 1.5922 (1.7229) time: 2.9505 data: 0.0003 max mem: 29202 +[2024-12-10 19:26:39 root] (utils.py 283): INFO Epoch: [3] [2320/2502] eta: 0:08:56 lr: 0.000019 loss_cls: 2.8840 (2.7720) grad_norm: 1.6109 (1.7233) time: 2.9450 data: 0.0003 max mem: 29202 +[2024-12-10 19:27:08 root] (utils.py 283): INFO Epoch: [3] [2330/2502] eta: 0:08:26 lr: 0.000019 loss_cls: 2.8901 (2.7719) grad_norm: 1.6604 (1.7235) time: 2.9440 data: 0.0003 max mem: 29202 +[2024-12-10 19:27:37 root] (utils.py 283): INFO Epoch: [3] [2340/2502] eta: 0:07:57 lr: 0.000019 loss_cls: 2.6733 (2.7714) grad_norm: 1.6273 (1.7232) time: 2.9401 data: 0.0003 max mem: 29202 +[2024-12-10 19:28:07 root] (utils.py 283): INFO Epoch: [3] [2350/2502] eta: 0:07:27 lr: 0.000019 loss_cls: 2.5474 (2.7704) grad_norm: 1.5531 (1.7222) time: 2.9424 data: 0.0003 max mem: 29202 +[2024-12-10 19:28:36 root] (utils.py 283): INFO Epoch: [3] [2360/2502] eta: 0:06:58 lr: 0.000019 loss_cls: 2.7351 (2.7709) grad_norm: 1.6263 (1.7230) time: 2.9395 data: 0.0003 max mem: 29202 +[2024-12-10 19:29:06 root] (utils.py 283): INFO Epoch: [3] [2370/2502] eta: 0:06:28 lr: 0.000019 loss_cls: 2.8532 (2.7705) grad_norm: 1.7437 (1.7228) time: 2.9376 data: 0.0003 max mem: 29202 +[2024-12-10 19:29:35 root] (utils.py 283): INFO Epoch: [3] [2380/2502] eta: 0:05:59 lr: 0.000019 loss_cls: 2.8532 (2.7707) grad_norm: 1.7030 (1.7230) time: 2.9414 data: 0.0003 max mem: 29202 +[2024-12-10 19:30:05 root] (utils.py 283): INFO Epoch: [3] [2390/2502] eta: 0:05:29 lr: 0.000019 loss_cls: 2.7664 (2.7708) grad_norm: 1.6730 (1.7225) time: 2.9469 data: 0.0003 max mem: 29202 +[2024-12-10 19:30:34 root] (utils.py 283): INFO Epoch: [3] [2400/2502] eta: 0:05:00 lr: 0.000019 loss_cls: 2.7007 (2.7705) grad_norm: 1.6129 (1.7226) time: 2.9441 data: 0.0003 max mem: 29202 +[2024-12-10 19:31:03 root] (utils.py 283): INFO Epoch: [3] [2410/2502] eta: 0:04:30 lr: 0.000019 loss_cls: 2.9683 (2.7715) grad_norm: 1.6755 (1.7231) time: 2.9411 data: 0.0003 max mem: 29202 +[2024-12-10 19:31:33 root] (utils.py 283): INFO Epoch: [3] [2420/2502] eta: 0:04:01 lr: 0.000019 loss_cls: 2.9739 (2.7712) grad_norm: 1.7140 (1.7232) time: 2.9542 data: 0.0003 max mem: 29202 +[2024-12-10 19:32:02 root] (utils.py 283): INFO Epoch: [3] [2430/2502] eta: 0:03:32 lr: 0.000019 loss_cls: 2.9577 (2.7713) grad_norm: 1.6878 (1.7232) time: 2.9470 data: 0.0003 max mem: 29202 +[2024-12-10 19:32:32 root] (utils.py 283): INFO Epoch: [3] [2440/2502] eta: 0:03:02 lr: 0.000019 loss_cls: 2.9575 (2.7719) grad_norm: 1.6434 (1.7231) time: 2.9287 data: 0.0003 max mem: 29202 +[2024-12-10 19:33:01 root] (utils.py 283): INFO Epoch: [3] [2450/2502] eta: 0:02:33 lr: 0.000019 loss_cls: 2.7519 (2.7713) grad_norm: 1.6499 (1.7231) time: 2.9258 data: 0.0003 max mem: 29202 +[2024-12-10 19:33:30 root] (utils.py 283): INFO Epoch: [3] [2460/2502] eta: 0:02:03 lr: 0.000019 loss_cls: 2.5244 (2.7711) grad_norm: 1.7149 (1.7237) time: 2.9237 data: 0.0003 max mem: 29202 +[2024-12-10 19:33:59 root] (utils.py 283): INFO Epoch: [3] [2470/2502] eta: 0:01:34 lr: 0.000019 loss_cls: 2.7310 (2.7712) grad_norm: 1.6965 (1.7236) time: 2.9318 data: 0.0003 max mem: 29202 +[2024-12-10 19:34:29 root] (utils.py 283): INFO Epoch: [3] [2480/2502] eta: 0:01:04 lr: 0.000019 loss_cls: 2.7828 (2.7706) grad_norm: 1.7359 (1.7239) time: 2.9328 data: 0.0003 max mem: 29202 +[2024-12-10 19:34:59 root] (utils.py 283): INFO Epoch: [3] [2490/2502] eta: 0:00:35 lr: 0.000019 loss_cls: 2.7185 (2.7702) grad_norm: 1.5722 (1.7230) time: 2.9502 data: 0.0256 max mem: 29202 +[2024-12-10 19:35:28 root] (utils.py 283): INFO Epoch: [3] [2500/2502] eta: 0:00:05 lr: 0.000019 loss_cls: 2.7863 (2.7708) grad_norm: 1.5301 (1.7229) time: 2.9507 data: 0.0256 max mem: 29202 +[2024-12-10 19:35:31 root] (utils.py 283): INFO Epoch: [3] [2501/2502] eta: 0:00:02 lr: 0.000019 loss_cls: 2.7667 (2.7707) grad_norm: 1.5301 (1.7229) time: 2.9509 data: 0.0256 max mem: 29202 +[2024-12-10 19:35:31 root] (utils.py 297): INFO Epoch: [3] Total time: 2:02:48 (2.9451 s / it) +[2024-12-10 19:35:31 root] (engine.py 179): INFO Averaged stats:lr: 0.000019 loss_cls: 2.7667 (2.7663) grad_norm: 1.5301 (1.7229) +[2024-12-10 19:35:34 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:54 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.3800 (0.3800) acc1: 91.4062 (91.4062) acc3: 98.4375 (98.4375) acc5: 98.4375 (98.4375) time: 0.5577 data: 0.0003 max mem: 29202 +[2024-12-10 19:35:40 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:48 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6320 (0.6620) acc1: 85.9375 (86.1506) acc3: 97.6562 (96.7330) acc5: 98.4375 (98.0114) time: 0.5505 data: 0.0004 max mem: 29202 +[2024-12-10 19:35:45 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:42 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6320 (0.6966) acc1: 84.3750 (85.1190) acc3: 96.0938 (95.8333) acc5: 97.6562 (97.5818) time: 0.5504 data: 0.0004 max mem: 29202 +[2024-12-10 19:35:51 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:37 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6524 (0.7208) acc1: 84.3750 (84.5262) acc3: 94.5312 (95.5897) acc5: 96.8750 (97.3034) time: 0.5509 data: 0.0004 max mem: 29202 +[2024-12-10 19:35:56 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:31 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.7051 (0.7241) acc1: 84.3750 (84.6989) acc3: 95.3125 (95.5221) acc5: 96.8750 (97.2752) time: 0.5508 data: 0.0004 max mem: 29202 +[2024-12-10 19:36:02 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:26 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.8745 (0.8093) acc1: 80.4688 (82.9657) acc3: 92.1875 (94.4547) acc5: 94.5312 (96.3848) time: 0.5513 data: 0.0004 max mem: 29202 +[2024-12-10 19:36:07 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:20 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 1.0101 (0.8433) acc1: 77.3438 (82.4795) acc3: 89.8438 (93.8781) acc5: 92.9688 (95.8760) time: 0.5516 data: 0.0004 max mem: 29202 +[2024-12-10 19:36:13 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:15 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 1.0065 (0.8800) acc1: 78.9062 (81.6681) acc3: 89.8438 (93.4529) acc5: 93.7500 (95.6096) time: 0.5518 data: 0.0004 max mem: 29202 +[2024-12-10 19:36:19 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:09 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 1.0687 (0.9054) acc1: 78.9062 (81.1343) acc3: 89.8438 (92.9977) acc5: 93.7500 (95.2739) time: 0.5519 data: 0.0006 max mem: 29202 +[2024-12-10 19:36:24 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:04 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 1.1575 (0.9354) acc1: 75.7812 (80.3915) acc3: 89.8438 (92.6597) acc5: 92.1875 (95.0206) time: 0.5520 data: 0.0006 max mem: 29202 +[2024-12-10 19:36:28 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 1.0687 (0.9336) acc1: 75.7812 (80.3280) acc3: 90.6250 (92.7200) acc5: 94.5312 (95.1120) time: 0.5428 data: 0.0005 max mem: 29202 +[2024-12-10 19:36:28 root] (utils.py 297): INFO Test: Total time: 0:00:53 (0.5497 s / it) +[2024-12-10 19:36:28 root] (engine.py 264): INFO * Acc@1 80.490 Acc@3 92.616 Acc@5 95.072 loss 0.924 flops 13.207 layer_flops 13.109 +[2024-12-10 19:36:28 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 80.5% +[2024-12-10 19:36:30 root] (main.py 576): INFO Max accuracy: 80.49% +[2024-12-10 19:36:33 root] (utils.py 283): INFO Epoch: [4] [ 0/2502] eta: 2:00:34 lr: 0.000018 loss_cls: 2.7680 (2.7680) grad_norm: 1.4843 (1.4843) time: 2.8917 data: 0.0005 max mem: 29202 +[2024-12-10 19:37:02 root] (utils.py 283): INFO Epoch: [4] [ 10/2502] eta: 2:01:05 lr: 0.000018 loss_cls: 2.8916 (2.8324) grad_norm: 1.6058 (1.7277) time: 2.9154 data: 0.0003 max mem: 29202 +[2024-12-10 19:37:32 root] (utils.py 283): INFO Epoch: [4] [ 20/2502] eta: 2:00:46 lr: 0.000018 loss_cls: 2.9005 (2.9060) grad_norm: 1.7047 (1.7527) time: 2.9209 data: 0.0003 max mem: 29202 +[2024-12-10 19:38:01 root] (utils.py 283): INFO Epoch: [4] [ 30/2502] eta: 2:00:23 lr: 0.000018 loss_cls: 3.0373 (2.9145) grad_norm: 1.6436 (1.7181) time: 2.9259 data: 0.0003 max mem: 29202 +[2024-12-10 19:38:30 root] (utils.py 283): INFO Epoch: [4] [ 40/2502] eta: 1:59:56 lr: 0.000018 loss_cls: 2.7301 (2.8050) grad_norm: 1.6079 (1.7143) time: 2.9263 data: 0.0003 max mem: 29202 +[2024-12-10 19:39:00 root] (utils.py 283): INFO Epoch: [4] [ 50/2502] eta: 1:59:27 lr: 0.000018 loss_cls: 2.8442 (2.8461) grad_norm: 1.6119 (1.7182) time: 2.9241 data: 0.0003 max mem: 29202 +[2024-12-10 19:39:29 root] (utils.py 283): INFO Epoch: [4] [ 60/2502] eta: 1:58:59 lr: 0.000018 loss_cls: 2.8688 (2.8383) grad_norm: 1.7114 (1.7167) time: 2.9250 data: 0.0003 max mem: 29202 +[2024-12-10 19:39:58 root] (utils.py 283): INFO Epoch: [4] [ 70/2502] eta: 1:58:30 lr: 0.000018 loss_cls: 2.8294 (2.8479) grad_norm: 1.7499 (1.7282) time: 2.9256 data: 0.0003 max mem: 29202 +[2024-12-10 19:40:27 root] (utils.py 283): INFO Epoch: [4] [ 80/2502] eta: 1:58:02 lr: 0.000018 loss_cls: 2.9542 (2.8363) grad_norm: 1.6647 (1.7107) time: 2.9257 data: 0.0003 max mem: 29202 +[2024-12-10 19:40:57 root] (utils.py 283): INFO Epoch: [4] [ 90/2502] eta: 1:57:33 lr: 0.000018 loss_cls: 2.9023 (2.8455) grad_norm: 1.6649 (1.7249) time: 2.9268 data: 0.0003 max mem: 29202 +[2024-12-10 19:41:26 root] (utils.py 283): INFO Epoch: [4] [ 100/2502] eta: 1:57:04 lr: 0.000018 loss_cls: 2.7669 (2.8261) grad_norm: 1.6372 (1.7090) time: 2.9266 data: 0.0003 max mem: 29202 +[2024-12-10 19:41:55 root] (utils.py 283): INFO Epoch: [4] [ 110/2502] eta: 1:56:36 lr: 0.000018 loss_cls: 2.7263 (2.8349) grad_norm: 1.6148 (1.7125) time: 2.9281 data: 0.0003 max mem: 29202 +[2024-12-10 19:42:24 root] (utils.py 283): INFO Epoch: [4] [ 120/2502] eta: 1:56:07 lr: 0.000018 loss_cls: 2.9797 (2.8300) grad_norm: 1.7437 (1.7224) time: 2.9278 data: 0.0003 max mem: 29202 +[2024-12-10 19:42:54 root] (utils.py 283): INFO Epoch: [4] [ 130/2502] eta: 1:55:41 lr: 0.000018 loss_cls: 2.9628 (2.8304) grad_norm: 1.7120 (1.7196) time: 2.9330 data: 0.0003 max mem: 29202 +[2024-12-10 19:43:23 root] (utils.py 283): INFO Epoch: [4] [ 140/2502] eta: 1:55:12 lr: 0.000018 loss_cls: 2.7454 (2.8133) grad_norm: 1.6334 (1.7192) time: 2.9345 data: 0.0003 max mem: 29202 +[2024-12-10 19:43:52 root] (utils.py 283): INFO Epoch: [4] [ 150/2502] eta: 1:54:43 lr: 0.000018 loss_cls: 2.5688 (2.8045) grad_norm: 1.7097 (1.7232) time: 2.9307 data: 0.0003 max mem: 29202 +[2024-12-10 19:44:22 root] (utils.py 283): INFO Epoch: [4] [ 160/2502] eta: 1:54:14 lr: 0.000018 loss_cls: 2.8688 (2.8081) grad_norm: 1.8482 (1.7266) time: 2.9308 data: 0.0003 max mem: 29202 +[2024-12-10 19:44:51 root] (utils.py 283): INFO Epoch: [4] [ 170/2502] eta: 1:53:46 lr: 0.000018 loss_cls: 2.9753 (2.8158) grad_norm: 1.6475 (1.7259) time: 2.9293 data: 0.0003 max mem: 29202 +[2024-12-10 19:45:20 root] (utils.py 283): INFO Epoch: [4] [ 180/2502] eta: 1:53:17 lr: 0.000018 loss_cls: 2.9809 (2.8079) grad_norm: 1.6790 (1.7237) time: 2.9308 data: 0.0003 max mem: 29202 +[2024-12-10 19:45:50 root] (utils.py 283): INFO Epoch: [4] [ 190/2502] eta: 1:52:48 lr: 0.000018 loss_cls: 3.0223 (2.8186) grad_norm: 1.6297 (1.7178) time: 2.9301 data: 0.0003 max mem: 29202 +[2024-12-10 19:46:19 root] (utils.py 283): INFO Epoch: [4] [ 200/2502] eta: 1:52:19 lr: 0.000018 loss_cls: 3.0012 (2.8223) grad_norm: 1.6302 (1.7176) time: 2.9292 data: 0.0003 max mem: 29202 +[2024-12-10 19:46:48 root] (utils.py 283): INFO Epoch: [4] [ 210/2502] eta: 1:51:49 lr: 0.000018 loss_cls: 2.9046 (2.8173) grad_norm: 1.6302 (1.7106) time: 2.9285 data: 0.0003 max mem: 29202 +[2024-12-10 19:47:17 root] (utils.py 283): INFO Epoch: [4] [ 220/2502] eta: 1:51:20 lr: 0.000018 loss_cls: 2.7272 (2.8061) grad_norm: 1.7015 (1.7134) time: 2.9274 data: 0.0003 max mem: 29202 +[2024-12-10 19:47:47 root] (utils.py 283): INFO Epoch: [4] [ 230/2502] eta: 1:50:51 lr: 0.000018 loss_cls: 2.7272 (2.8061) grad_norm: 1.7015 (1.7106) time: 2.9292 data: 0.0003 max mem: 29202 +[2024-12-10 19:48:16 root] (utils.py 283): INFO Epoch: [4] [ 240/2502] eta: 1:50:22 lr: 0.000018 loss_cls: 3.0139 (2.8113) grad_norm: 1.6612 (1.7126) time: 2.9316 data: 0.0003 max mem: 29202 +[2024-12-10 19:48:45 root] (utils.py 283): INFO Epoch: [4] [ 250/2502] eta: 1:49:53 lr: 0.000018 loss_cls: 3.0321 (2.8178) grad_norm: 1.7424 (1.7533) time: 2.9307 data: 0.0003 max mem: 29202 +[2024-12-10 19:49:15 root] (utils.py 283): INFO Epoch: [4] [ 260/2502] eta: 1:49:24 lr: 0.000018 loss_cls: 2.9334 (2.8196) grad_norm: 1.9354 (1.7616) time: 2.9294 data: 0.0003 max mem: 29202 +[2024-12-10 19:49:44 root] (utils.py 283): INFO Epoch: [4] [ 270/2502] eta: 1:48:55 lr: 0.000018 loss_cls: 2.7752 (2.8105) grad_norm: 1.7709 (1.7624) time: 2.9297 data: 0.0003 max mem: 29202 +[2024-12-10 19:50:13 root] (utils.py 283): INFO Epoch: [4] [ 280/2502] eta: 1:48:26 lr: 0.000018 loss_cls: 2.7711 (2.8131) grad_norm: 1.6985 (1.7597) time: 2.9306 data: 0.0003 max mem: 29202 +[2024-12-10 19:50:43 root] (utils.py 283): INFO Epoch: [4] [ 290/2502] eta: 1:47:57 lr: 0.000018 loss_cls: 2.8878 (2.8125) grad_norm: 1.6167 (1.7554) time: 2.9306 data: 0.0003 max mem: 29202 +[2024-12-10 19:51:12 root] (utils.py 283): INFO Epoch: [4] [ 300/2502] eta: 1:47:27 lr: 0.000018 loss_cls: 2.9388 (2.8167) grad_norm: 1.6067 (1.7536) time: 2.9287 data: 0.0003 max mem: 29202 +[2024-12-10 19:51:41 root] (utils.py 283): INFO Epoch: [4] [ 310/2502] eta: 1:46:58 lr: 0.000018 loss_cls: 2.6572 (2.8067) grad_norm: 1.5981 (1.7488) time: 2.9291 data: 0.0003 max mem: 29202 +[2024-12-10 19:52:11 root] (utils.py 283): INFO Epoch: [4] [ 320/2502] eta: 1:46:30 lr: 0.000018 loss_cls: 2.5891 (2.8064) grad_norm: 1.6379 (1.7527) time: 2.9400 data: 0.0003 max mem: 29202 +[2024-12-10 19:52:40 root] (utils.py 283): INFO Epoch: [4] [ 330/2502] eta: 1:46:01 lr: 0.000018 loss_cls: 2.8883 (2.8076) grad_norm: 1.7357 (1.7499) time: 2.9410 data: 0.0003 max mem: 29202 +[2024-12-10 19:53:09 root] (utils.py 283): INFO Epoch: [4] [ 340/2502] eta: 1:45:32 lr: 0.000018 loss_cls: 2.9019 (2.8098) grad_norm: 1.5559 (1.7491) time: 2.9298 data: 0.0003 max mem: 29202 +[2024-12-10 19:53:39 root] (utils.py 283): INFO Epoch: [4] [ 350/2502] eta: 1:45:03 lr: 0.000018 loss_cls: 2.9167 (2.8118) grad_norm: 1.6056 (1.7465) time: 2.9272 data: 0.0003 max mem: 29202 +[2024-12-10 19:54:08 root] (utils.py 283): INFO Epoch: [4] [ 360/2502] eta: 1:44:34 lr: 0.000018 loss_cls: 2.9167 (2.8147) grad_norm: 1.6456 (1.7458) time: 2.9306 data: 0.0003 max mem: 29202 +[2024-12-10 19:54:37 root] (utils.py 283): INFO Epoch: [4] [ 370/2502] eta: 1:44:04 lr: 0.000018 loss_cls: 2.7706 (2.8046) grad_norm: 1.7185 (1.7445) time: 2.9320 data: 0.0003 max mem: 29202 +[2024-12-10 19:55:07 root] (utils.py 283): INFO Epoch: [4] [ 380/2502] eta: 1:43:35 lr: 0.000018 loss_cls: 2.4894 (2.7979) grad_norm: 1.7187 (1.7430) time: 2.9310 data: 0.0003 max mem: 29202 +[2024-12-10 19:55:36 root] (utils.py 283): INFO Epoch: [4] [ 390/2502] eta: 1:43:07 lr: 0.000018 loss_cls: 2.8227 (2.7960) grad_norm: 1.6348 (1.7412) time: 2.9356 data: 0.0003 max mem: 29202 +[2024-12-10 19:56:05 root] (utils.py 283): INFO Epoch: [4] [ 400/2502] eta: 1:42:38 lr: 0.000018 loss_cls: 2.6101 (2.7908) grad_norm: 1.6348 (1.7393) time: 2.9397 data: 0.0003 max mem: 29202 +[2024-12-10 19:56:35 root] (utils.py 283): INFO Epoch: [4] [ 410/2502] eta: 1:42:09 lr: 0.000018 loss_cls: 2.6101 (2.7905) grad_norm: 1.5976 (1.7367) time: 2.9356 data: 0.0003 max mem: 29202 +[2024-12-10 19:57:04 root] (utils.py 283): INFO Epoch: [4] [ 420/2502] eta: 1:41:41 lr: 0.000018 loss_cls: 2.5803 (2.7856) grad_norm: 1.5876 (1.7338) time: 2.9494 data: 0.0003 max mem: 29202 +[2024-12-10 19:57:34 root] (utils.py 283): INFO Epoch: [4] [ 430/2502] eta: 1:41:12 lr: 0.000018 loss_cls: 2.5943 (2.7824) grad_norm: 1.6784 (1.7381) time: 2.9559 data: 0.0003 max mem: 29202 +[2024-12-10 19:58:03 root] (utils.py 283): INFO Epoch: [4] [ 440/2502] eta: 1:40:44 lr: 0.000018 loss_cls: 2.7815 (2.7850) grad_norm: 1.6524 (1.7353) time: 2.9443 data: 0.0003 max mem: 29202 +[2024-12-10 19:58:33 root] (utils.py 283): INFO Epoch: [4] [ 450/2502] eta: 1:40:15 lr: 0.000018 loss_cls: 2.7815 (2.7828) grad_norm: 1.5645 (1.7322) time: 2.9455 data: 0.0003 max mem: 29202 +[2024-12-10 19:59:02 root] (utils.py 283): INFO Epoch: [4] [ 460/2502] eta: 1:39:46 lr: 0.000018 loss_cls: 2.7402 (2.7825) grad_norm: 1.5936 (1.7300) time: 2.9428 data: 0.0003 max mem: 29202 +[2024-12-10 19:59:32 root] (utils.py 283): INFO Epoch: [4] [ 470/2502] eta: 1:39:18 lr: 0.000018 loss_cls: 2.8666 (2.7851) grad_norm: 1.6545 (1.7310) time: 2.9429 data: 0.0003 max mem: 29202 +[2024-12-10 20:00:01 root] (utils.py 283): INFO Epoch: [4] [ 480/2502] eta: 1:38:49 lr: 0.000018 loss_cls: 2.7615 (2.7818) grad_norm: 1.6470 (1.7295) time: 2.9497 data: 0.0003 max mem: 29202 +[2024-12-10 20:00:31 root] (utils.py 283): INFO Epoch: [4] [ 490/2502] eta: 1:38:20 lr: 0.000018 loss_cls: 2.7749 (2.7809) grad_norm: 1.5704 (1.7274) time: 2.9497 data: 0.0003 max mem: 29202 +[2024-12-10 20:01:00 root] (utils.py 283): INFO Epoch: [4] [ 500/2502] eta: 1:37:52 lr: 0.000018 loss_cls: 2.8531 (2.7838) grad_norm: 1.6315 (1.7283) time: 2.9507 data: 0.0003 max mem: 29202 +[2024-12-10 20:01:30 root] (utils.py 283): INFO Epoch: [4] [ 510/2502] eta: 1:37:23 lr: 0.000018 loss_cls: 2.8531 (2.7829) grad_norm: 1.6640 (1.7291) time: 2.9524 data: 0.0003 max mem: 29202 +[2024-12-10 20:01:59 root] (utils.py 283): INFO Epoch: [4] [ 520/2502] eta: 1:36:54 lr: 0.000018 loss_cls: 2.8184 (2.7826) grad_norm: 1.6482 (1.7284) time: 2.9481 data: 0.0003 max mem: 29202 +[2024-12-10 20:02:28 root] (utils.py 283): INFO Epoch: [4] [ 530/2502] eta: 1:36:25 lr: 0.000018 loss_cls: 2.6256 (2.7779) grad_norm: 1.6379 (1.7271) time: 2.9457 data: 0.0003 max mem: 29202 +[2024-12-10 20:02:58 root] (utils.py 283): INFO Epoch: [4] [ 540/2502] eta: 1:35:56 lr: 0.000018 loss_cls: 2.6988 (2.7795) grad_norm: 1.6275 (1.7242) time: 2.9429 data: 0.0003 max mem: 29202 +[2024-12-10 20:03:27 root] (utils.py 283): INFO Epoch: [4] [ 550/2502] eta: 1:35:28 lr: 0.000018 loss_cls: 2.9849 (2.7790) grad_norm: 1.6342 (1.7234) time: 2.9450 data: 0.0003 max mem: 29202 +[2024-12-10 20:03:57 root] (utils.py 283): INFO Epoch: [4] [ 560/2502] eta: 1:34:59 lr: 0.000018 loss_cls: 2.4657 (2.7725) grad_norm: 1.6044 (1.7207) time: 2.9473 data: 0.0003 max mem: 29202 +[2024-12-10 20:04:26 root] (utils.py 283): INFO Epoch: [4] [ 570/2502] eta: 1:34:30 lr: 0.000018 loss_cls: 2.4657 (2.7734) grad_norm: 1.5468 (1.7175) time: 2.9503 data: 0.0003 max mem: 29202 +[2024-12-10 20:04:56 root] (utils.py 283): INFO Epoch: [4] [ 580/2502] eta: 1:34:01 lr: 0.000018 loss_cls: 2.7888 (2.7729) grad_norm: 1.5518 (1.7162) time: 2.9515 data: 0.0003 max mem: 29202 +[2024-12-10 20:05:25 root] (utils.py 283): INFO Epoch: [4] [ 590/2502] eta: 1:33:32 lr: 0.000018 loss_cls: 2.7241 (2.7695) grad_norm: 1.6112 (1.7146) time: 2.9496 data: 0.0003 max mem: 29202 +[2024-12-10 20:05:55 root] (utils.py 283): INFO Epoch: [4] [ 600/2502] eta: 1:33:03 lr: 0.000018 loss_cls: 2.7241 (2.7696) grad_norm: 1.6067 (1.7141) time: 2.9525 data: 0.0003 max mem: 29202 +[2024-12-10 20:06:25 root] (utils.py 283): INFO Epoch: [4] [ 610/2502] eta: 1:32:35 lr: 0.000018 loss_cls: 2.8452 (2.7684) grad_norm: 1.5846 (1.7126) time: 2.9595 data: 0.0003 max mem: 29202 +[2024-12-10 20:06:54 root] (utils.py 283): INFO Epoch: [4] [ 620/2502] eta: 1:32:06 lr: 0.000018 loss_cls: 2.7947 (2.7678) grad_norm: 1.5726 (1.7096) time: 2.9655 data: 0.0003 max mem: 29202 +[2024-12-10 20:07:24 root] (utils.py 283): INFO Epoch: [4] [ 630/2502] eta: 1:31:37 lr: 0.000018 loss_cls: 2.9234 (2.7710) grad_norm: 1.6131 (1.7100) time: 2.9553 data: 0.0003 max mem: 29202 +[2024-12-10 20:07:53 root] (utils.py 283): INFO Epoch: [4] [ 640/2502] eta: 1:31:08 lr: 0.000018 loss_cls: 2.9234 (2.7689) grad_norm: 1.6994 (1.7090) time: 2.9437 data: 0.0003 max mem: 29202 +[2024-12-10 20:08:23 root] (utils.py 283): INFO Epoch: [4] [ 650/2502] eta: 1:30:39 lr: 0.000018 loss_cls: 2.5319 (2.7654) grad_norm: 1.5730 (1.7066) time: 2.9442 data: 0.0003 max mem: 29202 +[2024-12-10 20:08:52 root] (utils.py 283): INFO Epoch: [4] [ 660/2502] eta: 1:30:10 lr: 0.000018 loss_cls: 2.7753 (2.7657) grad_norm: 1.5718 (1.7062) time: 2.9460 data: 0.0003 max mem: 29202 +[2024-12-10 20:09:21 root] (utils.py 283): INFO Epoch: [4] [ 670/2502] eta: 1:29:40 lr: 0.000018 loss_cls: 2.9412 (2.7675) grad_norm: 1.5697 (1.7040) time: 2.9416 data: 0.0003 max mem: 29202 +[2024-12-10 20:09:51 root] (utils.py 283): INFO Epoch: [4] [ 680/2502] eta: 1:29:11 lr: 0.000018 loss_cls: 2.9779 (2.7706) grad_norm: 1.5696 (1.7044) time: 2.9441 data: 0.0003 max mem: 29202 +[2024-12-10 20:10:21 root] (utils.py 283): INFO Epoch: [4] [ 690/2502] eta: 1:28:43 lr: 0.000018 loss_cls: 2.9548 (2.7714) grad_norm: 1.7224 (1.7055) time: 2.9567 data: 0.0003 max mem: 29202 +[2024-12-10 20:10:50 root] (utils.py 283): INFO Epoch: [4] [ 700/2502] eta: 1:28:14 lr: 0.000018 loss_cls: 2.8964 (2.7722) grad_norm: 1.6886 (1.7052) time: 2.9584 data: 0.0003 max mem: 29202 +[2024-12-10 20:11:20 root] (utils.py 283): INFO Epoch: [4] [ 710/2502] eta: 1:27:45 lr: 0.000018 loss_cls: 3.0196 (2.7721) grad_norm: 1.6420 (1.7053) time: 2.9493 data: 0.0003 max mem: 29202 +[2024-12-10 20:11:49 root] (utils.py 283): INFO Epoch: [4] [ 720/2502] eta: 1:27:15 lr: 0.000018 loss_cls: 2.8566 (2.7706) grad_norm: 1.6563 (1.7056) time: 2.9463 data: 0.0003 max mem: 29202 +[2024-12-10 20:12:18 root] (utils.py 283): INFO Epoch: [4] [ 730/2502] eta: 1:26:46 lr: 0.000018 loss_cls: 2.9344 (2.7725) grad_norm: 1.6563 (1.7054) time: 2.9457 data: 0.0003 max mem: 29202 +[2024-12-10 20:12:48 root] (utils.py 283): INFO Epoch: [4] [ 740/2502] eta: 1:26:17 lr: 0.000018 loss_cls: 3.0307 (2.7730) grad_norm: 1.6813 (1.7063) time: 2.9475 data: 0.0003 max mem: 29202 +[2024-12-10 20:13:17 root] (utils.py 283): INFO Epoch: [4] [ 750/2502] eta: 1:25:48 lr: 0.000018 loss_cls: 2.9714 (2.7744) grad_norm: 1.6229 (1.7055) time: 2.9477 data: 0.0003 max mem: 29202 +[2024-12-10 20:13:47 root] (utils.py 283): INFO Epoch: [4] [ 760/2502] eta: 1:25:19 lr: 0.000018 loss_cls: 2.7431 (2.7735) grad_norm: 1.6022 (1.7071) time: 2.9427 data: 0.0003 max mem: 29202 +[2024-12-10 20:14:16 root] (utils.py 283): INFO Epoch: [4] [ 770/2502] eta: 1:24:49 lr: 0.000018 loss_cls: 2.7431 (2.7718) grad_norm: 1.6462 (1.7071) time: 2.9436 data: 0.0003 max mem: 29202 +[2024-12-10 20:14:46 root] (utils.py 283): INFO Epoch: [4] [ 780/2502] eta: 1:24:20 lr: 0.000018 loss_cls: 2.8712 (2.7732) grad_norm: 1.6599 (1.7066) time: 2.9431 data: 0.0003 max mem: 29202 +[2024-12-10 20:15:15 root] (utils.py 283): INFO Epoch: [4] [ 790/2502] eta: 1:23:51 lr: 0.000018 loss_cls: 2.8712 (2.7705) grad_norm: 1.6252 (1.7049) time: 2.9413 data: 0.0003 max mem: 29202 +[2024-12-10 20:15:45 root] (utils.py 283): INFO Epoch: [4] [ 800/2502] eta: 1:23:21 lr: 0.000018 loss_cls: 2.8233 (2.7710) grad_norm: 1.5329 (1.7043) time: 2.9443 data: 0.0003 max mem: 29202 +[2024-12-10 20:16:14 root] (utils.py 283): INFO Epoch: [4] [ 810/2502] eta: 1:22:52 lr: 0.000018 loss_cls: 2.9165 (2.7705) grad_norm: 1.6145 (1.7035) time: 2.9487 data: 0.0003 max mem: 29202 +[2024-12-10 20:16:44 root] (utils.py 283): INFO Epoch: [4] [ 820/2502] eta: 1:22:23 lr: 0.000018 loss_cls: 2.6330 (2.7676) grad_norm: 1.5585 (1.7028) time: 2.9472 data: 0.0003 max mem: 29202 +[2024-12-10 20:17:13 root] (utils.py 283): INFO Epoch: [4] [ 830/2502] eta: 1:21:54 lr: 0.000018 loss_cls: 2.9271 (2.7700) grad_norm: 1.6462 (1.7028) time: 2.9487 data: 0.0003 max mem: 29202 +[2024-12-10 20:17:43 root] (utils.py 283): INFO Epoch: [4] [ 840/2502] eta: 1:21:25 lr: 0.000018 loss_cls: 2.9849 (2.7698) grad_norm: 1.6963 (1.7034) time: 2.9490 data: 0.0003 max mem: 29202 +[2024-12-10 20:18:12 root] (utils.py 283): INFO Epoch: [4] [ 850/2502] eta: 1:20:55 lr: 0.000018 loss_cls: 2.9788 (2.7710) grad_norm: 1.7129 (1.7036) time: 2.9464 data: 0.0003 max mem: 29202 +[2024-12-10 20:18:41 root] (utils.py 283): INFO Epoch: [4] [ 860/2502] eta: 1:20:26 lr: 0.000018 loss_cls: 2.8546 (2.7719) grad_norm: 1.7129 (1.7035) time: 2.9440 data: 0.0003 max mem: 29202 +[2024-12-10 20:19:11 root] (utils.py 283): INFO Epoch: [4] [ 870/2502] eta: 1:19:57 lr: 0.000018 loss_cls: 2.8960 (2.7721) grad_norm: 1.6698 (1.7031) time: 2.9467 data: 0.0003 max mem: 29202 +[2024-12-10 20:19:40 root] (utils.py 283): INFO Epoch: [4] [ 880/2502] eta: 1:19:28 lr: 0.000018 loss_cls: 2.9436 (2.7719) grad_norm: 1.6521 (1.7026) time: 2.9503 data: 0.0003 max mem: 29202 +[2024-12-10 20:20:10 root] (utils.py 283): INFO Epoch: [4] [ 890/2502] eta: 1:18:58 lr: 0.000018 loss_cls: 2.7956 (2.7712) grad_norm: 1.6349 (1.7021) time: 2.9466 data: 0.0003 max mem: 29202 +[2024-12-10 20:20:39 root] (utils.py 283): INFO Epoch: [4] [ 900/2502] eta: 1:18:29 lr: 0.000018 loss_cls: 2.8075 (2.7719) grad_norm: 1.5817 (1.7014) time: 2.9423 data: 0.0003 max mem: 29202 +[2024-12-10 20:21:09 root] (utils.py 283): INFO Epoch: [4] [ 910/2502] eta: 1:17:59 lr: 0.000018 loss_cls: 2.8188 (2.7697) grad_norm: 1.5817 (1.7010) time: 2.9378 data: 0.0003 max mem: 29202 +[2024-12-10 20:21:38 root] (utils.py 283): INFO Epoch: [4] [ 920/2502] eta: 1:17:30 lr: 0.000018 loss_cls: 2.5006 (2.7684) grad_norm: 1.6716 (1.7014) time: 2.9376 data: 0.0003 max mem: 29202 +[2024-12-10 20:22:07 root] (utils.py 283): INFO Epoch: [4] [ 930/2502] eta: 1:17:01 lr: 0.000018 loss_cls: 2.5006 (2.7669) grad_norm: 1.6111 (1.6996) time: 2.9389 data: 0.0003 max mem: 29202 +[2024-12-10 20:22:37 root] (utils.py 283): INFO Epoch: [4] [ 940/2502] eta: 1:16:31 lr: 0.000018 loss_cls: 2.7366 (2.7677) grad_norm: 1.5544 (1.6990) time: 2.9397 data: 0.0003 max mem: 29202 +[2024-12-10 20:23:06 root] (utils.py 283): INFO Epoch: [4] [ 950/2502] eta: 1:16:02 lr: 0.000018 loss_cls: 2.7628 (2.7672) grad_norm: 1.6408 (1.6982) time: 2.9405 data: 0.0003 max mem: 29202 +[2024-12-10 20:23:36 root] (utils.py 283): INFO Epoch: [4] [ 960/2502] eta: 1:15:33 lr: 0.000018 loss_cls: 2.9615 (2.7695) grad_norm: 1.6408 (1.6989) time: 2.9431 data: 0.0003 max mem: 29202 +[2024-12-10 20:24:05 root] (utils.py 283): INFO Epoch: [4] [ 970/2502] eta: 1:15:03 lr: 0.000018 loss_cls: 2.9615 (2.7696) grad_norm: 1.7198 (1.7074) time: 2.9429 data: 0.0003 max mem: 29202 +[2024-12-10 20:24:34 root] (utils.py 283): INFO Epoch: [4] [ 980/2502] eta: 1:14:34 lr: 0.000018 loss_cls: 2.7909 (2.7688) grad_norm: 1.7794 (1.7109) time: 2.9414 data: 0.0003 max mem: 29202 +[2024-12-10 20:25:04 root] (utils.py 283): INFO Epoch: [4] [ 990/2502] eta: 1:14:05 lr: 0.000018 loss_cls: 2.8666 (2.7687) grad_norm: 1.6922 (1.7109) time: 2.9435 data: 0.0003 max mem: 29202 +[2024-12-10 20:25:33 root] (utils.py 283): INFO Epoch: [4] [1000/2502] eta: 1:13:35 lr: 0.000018 loss_cls: 2.9915 (2.7709) grad_norm: 1.6409 (1.7095) time: 2.9485 data: 0.0003 max mem: 29202 +[2024-12-10 20:26:03 root] (utils.py 283): INFO Epoch: [4] [1010/2502] eta: 1:13:06 lr: 0.000018 loss_cls: 2.9641 (2.7720) grad_norm: 1.6117 (1.7089) time: 2.9495 data: 0.0003 max mem: 29202 +[2024-12-10 20:26:32 root] (utils.py 283): INFO Epoch: [4] [1020/2502] eta: 1:12:37 lr: 0.000018 loss_cls: 2.7574 (2.7723) grad_norm: 1.6134 (1.7081) time: 2.9482 data: 0.0003 max mem: 29202 +[2024-12-10 20:27:02 root] (utils.py 283): INFO Epoch: [4] [1030/2502] eta: 1:12:07 lr: 0.000018 loss_cls: 2.7444 (2.7724) grad_norm: 1.6398 (1.7083) time: 2.9475 data: 0.0003 max mem: 29202 +[2024-12-10 20:27:32 root] (utils.py 283): INFO Epoch: [4] [1040/2502] eta: 1:11:38 lr: 0.000018 loss_cls: 2.8804 (2.7724) grad_norm: 1.6422 (1.7081) time: 2.9537 data: 0.0003 max mem: 29202 +[2024-12-10 20:28:01 root] (utils.py 283): INFO Epoch: [4] [1050/2502] eta: 1:11:09 lr: 0.000018 loss_cls: 2.7029 (2.7705) grad_norm: 1.6198 (1.7076) time: 2.9528 data: 0.0003 max mem: 29202 +[2024-12-10 20:28:30 root] (utils.py 283): INFO Epoch: [4] [1060/2502] eta: 1:10:40 lr: 0.000018 loss_cls: 2.8822 (2.7719) grad_norm: 1.6014 (1.7060) time: 2.9467 data: 0.0003 max mem: 29202 +[2024-12-10 20:29:00 root] (utils.py 283): INFO Epoch: [4] [1070/2502] eta: 1:10:10 lr: 0.000018 loss_cls: 2.9265 (2.7711) grad_norm: 1.5394 (1.7059) time: 2.9442 data: 0.0003 max mem: 29202 +[2024-12-10 20:29:30 root] (utils.py 283): INFO Epoch: [4] [1080/2502] eta: 1:09:42 lr: 0.000018 loss_cls: 2.7465 (2.7702) grad_norm: 1.6219 (1.7054) time: 2.9662 data: 0.0004 max mem: 29202 +[2024-12-10 20:29:59 root] (utils.py 283): INFO Epoch: [4] [1090/2502] eta: 1:09:13 lr: 0.000018 loss_cls: 2.9198 (2.7718) grad_norm: 1.6211 (1.7046) time: 2.9807 data: 0.0004 max mem: 29202 +[2024-12-10 20:30:29 root] (utils.py 283): INFO Epoch: [4] [1100/2502] eta: 1:08:43 lr: 0.000018 loss_cls: 2.7687 (2.7692) grad_norm: 1.5661 (1.7035) time: 2.9550 data: 0.0003 max mem: 29202 +[2024-12-10 20:30:58 root] (utils.py 283): INFO Epoch: [4] [1110/2502] eta: 1:08:14 lr: 0.000018 loss_cls: 2.7473 (2.7697) grad_norm: 1.5661 (1.7043) time: 2.9437 data: 0.0003 max mem: 29202 +[2024-12-10 20:31:28 root] (utils.py 283): INFO Epoch: [4] [1120/2502] eta: 1:07:44 lr: 0.000018 loss_cls: 2.8965 (2.7707) grad_norm: 1.6596 (1.7036) time: 2.9470 data: 0.0003 max mem: 29202 +[2024-12-10 20:31:57 root] (utils.py 283): INFO Epoch: [4] [1130/2502] eta: 1:07:15 lr: 0.000018 loss_cls: 2.9013 (2.7701) grad_norm: 1.6609 (1.7032) time: 2.9475 data: 0.0003 max mem: 29202 +[2024-12-10 20:32:27 root] (utils.py 283): INFO Epoch: [4] [1140/2502] eta: 1:06:46 lr: 0.000018 loss_cls: 2.9013 (2.7712) grad_norm: 1.6874 (1.7034) time: 2.9422 data: 0.0003 max mem: 29202 +[2024-12-10 20:32:56 root] (utils.py 283): INFO Epoch: [4] [1150/2502] eta: 1:06:16 lr: 0.000018 loss_cls: 2.9422 (2.7714) grad_norm: 1.6529 (1.7025) time: 2.9390 data: 0.0003 max mem: 29202 +[2024-12-10 20:33:26 root] (utils.py 283): INFO Epoch: [4] [1160/2502] eta: 1:05:47 lr: 0.000018 loss_cls: 2.8862 (2.7710) grad_norm: 1.6360 (1.7025) time: 2.9433 data: 0.0003 max mem: 29202 +[2024-12-10 20:33:55 root] (utils.py 283): INFO Epoch: [4] [1170/2502] eta: 1:05:17 lr: 0.000018 loss_cls: 2.8570 (2.7699) grad_norm: 1.6685 (1.7046) time: 2.9466 data: 0.0003 max mem: 29202 +[2024-12-10 20:34:24 root] (utils.py 283): INFO Epoch: [4] [1180/2502] eta: 1:04:48 lr: 0.000018 loss_cls: 2.6582 (2.7684) grad_norm: 1.7451 (1.7051) time: 2.9473 data: 0.0003 max mem: 29202 +[2024-12-10 20:34:54 root] (utils.py 283): INFO Epoch: [4] [1190/2502] eta: 1:04:19 lr: 0.000018 loss_cls: 2.9117 (2.7680) grad_norm: 1.6536 (1.7045) time: 2.9496 data: 0.0003 max mem: 29202 +[2024-12-10 20:35:23 root] (utils.py 283): INFO Epoch: [4] [1200/2502] eta: 1:03:50 lr: 0.000018 loss_cls: 2.9117 (2.7674) grad_norm: 1.5152 (1.7034) time: 2.9504 data: 0.0003 max mem: 29202 +[2024-12-10 20:35:53 root] (utils.py 283): INFO Epoch: [4] [1210/2502] eta: 1:03:20 lr: 0.000018 loss_cls: 2.8114 (2.7680) grad_norm: 1.5873 (1.7036) time: 2.9489 data: 0.0003 max mem: 29202 +[2024-12-10 20:36:22 root] (utils.py 283): INFO Epoch: [4] [1220/2502] eta: 1:02:51 lr: 0.000018 loss_cls: 2.7660 (2.7665) grad_norm: 1.6485 (1.7036) time: 2.9484 data: 0.0003 max mem: 29202 +[2024-12-10 20:36:52 root] (utils.py 283): INFO Epoch: [4] [1230/2502] eta: 1:02:21 lr: 0.000018 loss_cls: 2.5949 (2.7667) grad_norm: 1.6305 (1.7030) time: 2.9464 data: 0.0003 max mem: 29202 +[2024-12-10 20:37:21 root] (utils.py 283): INFO Epoch: [4] [1240/2502] eta: 1:01:52 lr: 0.000018 loss_cls: 2.8581 (2.7649) grad_norm: 1.5485 (1.7019) time: 2.9496 data: 0.0003 max mem: 29202 +[2024-12-10 20:37:51 root] (utils.py 283): INFO Epoch: [4] [1250/2502] eta: 1:01:23 lr: 0.000018 loss_cls: 2.9581 (2.7654) grad_norm: 1.6106 (1.7022) time: 2.9471 data: 0.0003 max mem: 29202 +[2024-12-10 20:38:20 root] (utils.py 283): INFO Epoch: [4] [1260/2502] eta: 1:00:53 lr: 0.000018 loss_cls: 2.9595 (2.7652) grad_norm: 1.6356 (1.7019) time: 2.9391 data: 0.0003 max mem: 29202 +[2024-12-10 20:38:50 root] (utils.py 283): INFO Epoch: [4] [1270/2502] eta: 1:00:24 lr: 0.000018 loss_cls: 2.5887 (2.7643) grad_norm: 1.6262 (1.7011) time: 2.9396 data: 0.0003 max mem: 29202 +[2024-12-10 20:39:19 root] (utils.py 283): INFO Epoch: [4] [1280/2502] eta: 0:59:54 lr: 0.000018 loss_cls: 2.8379 (2.7646) grad_norm: 1.6262 (1.7015) time: 2.9390 data: 0.0003 max mem: 29202 +[2024-12-10 20:39:48 root] (utils.py 283): INFO Epoch: [4] [1290/2502] eta: 0:59:25 lr: 0.000018 loss_cls: 2.9921 (2.7658) grad_norm: 1.6548 (1.7019) time: 2.9415 data: 0.0003 max mem: 29202 +[2024-12-10 20:40:18 root] (utils.py 283): INFO Epoch: [4] [1300/2502] eta: 0:58:56 lr: 0.000018 loss_cls: 2.9518 (2.7667) grad_norm: 1.6615 (1.7024) time: 2.9444 data: 0.0003 max mem: 29202 +[2024-12-10 20:40:47 root] (utils.py 283): INFO Epoch: [4] [1310/2502] eta: 0:58:26 lr: 0.000018 loss_cls: 2.8508 (2.7661) grad_norm: 1.6615 (1.7022) time: 2.9435 data: 0.0003 max mem: 29202 +[2024-12-10 20:41:17 root] (utils.py 283): INFO Epoch: [4] [1320/2502] eta: 0:57:57 lr: 0.000018 loss_cls: 2.7385 (2.7668) grad_norm: 1.5649 (1.7011) time: 2.9466 data: 0.0003 max mem: 29202 +[2024-12-10 20:41:46 root] (utils.py 283): INFO Epoch: [4] [1330/2502] eta: 0:57:27 lr: 0.000018 loss_cls: 2.7497 (2.7663) grad_norm: 1.5591 (1.7004) time: 2.9448 data: 0.0003 max mem: 29202 +[2024-12-10 20:42:16 root] (utils.py 283): INFO Epoch: [4] [1340/2502] eta: 0:56:58 lr: 0.000018 loss_cls: 2.8532 (2.7678) grad_norm: 1.6421 (1.7007) time: 2.9409 data: 0.0003 max mem: 29202 +[2024-12-10 20:42:45 root] (utils.py 283): INFO Epoch: [4] [1350/2502] eta: 0:56:29 lr: 0.000018 loss_cls: 2.9867 (2.7686) grad_norm: 1.6656 (1.7007) time: 2.9400 data: 0.0003 max mem: 29202 +[2024-12-10 20:43:14 root] (utils.py 283): INFO Epoch: [4] [1360/2502] eta: 0:55:59 lr: 0.000018 loss_cls: 2.9292 (2.7684) grad_norm: 1.6904 (1.7009) time: 2.9394 data: 0.0003 max mem: 29202 +[2024-12-10 20:43:44 root] (utils.py 283): INFO Epoch: [4] [1370/2502] eta: 0:55:30 lr: 0.000018 loss_cls: 2.9348 (2.7689) grad_norm: 1.6566 (1.7000) time: 2.9391 data: 0.0003 max mem: 29202 +[2024-12-10 20:44:13 root] (utils.py 283): INFO Epoch: [4] [1380/2502] eta: 0:55:00 lr: 0.000018 loss_cls: 2.7763 (2.7678) grad_norm: 1.6484 (1.6998) time: 2.9400 data: 0.0003 max mem: 29202 +[2024-12-10 20:44:43 root] (utils.py 283): INFO Epoch: [4] [1390/2502] eta: 0:54:31 lr: 0.000018 loss_cls: 2.7287 (2.7674) grad_norm: 1.6171 (1.6989) time: 2.9411 data: 0.0003 max mem: 29202 +[2024-12-10 20:45:12 root] (utils.py 283): INFO Epoch: [4] [1400/2502] eta: 0:54:01 lr: 0.000018 loss_cls: 2.7359 (2.7659) grad_norm: 1.6000 (1.6989) time: 2.9392 data: 0.0003 max mem: 29202 +[2024-12-10 20:45:42 root] (utils.py 283): INFO Epoch: [4] [1410/2502] eta: 0:53:32 lr: 0.000018 loss_cls: 2.9570 (2.7678) grad_norm: 1.6526 (1.6987) time: 2.9434 data: 0.0003 max mem: 29202 +[2024-12-10 20:46:11 root] (utils.py 283): INFO Epoch: [4] [1420/2502] eta: 0:53:03 lr: 0.000018 loss_cls: 3.0134 (2.7672) grad_norm: 1.6111 (1.6981) time: 2.9458 data: 0.0003 max mem: 29202 +[2024-12-10 20:46:40 root] (utils.py 283): INFO Epoch: [4] [1430/2502] eta: 0:52:33 lr: 0.000018 loss_cls: 2.7501 (2.7676) grad_norm: 1.6321 (1.6987) time: 2.9426 data: 0.0003 max mem: 29202 +[2024-12-10 20:47:10 root] (utils.py 283): INFO Epoch: [4] [1440/2502] eta: 0:52:04 lr: 0.000018 loss_cls: 2.8508 (2.7675) grad_norm: 1.6192 (1.6988) time: 2.9391 data: 0.0003 max mem: 29202 +[2024-12-10 20:47:39 root] (utils.py 283): INFO Epoch: [4] [1450/2502] eta: 0:51:34 lr: 0.000018 loss_cls: 2.8717 (2.7677) grad_norm: 1.6678 (1.7000) time: 2.9396 data: 0.0003 max mem: 29202 +[2024-12-10 20:48:09 root] (utils.py 283): INFO Epoch: [4] [1460/2502] eta: 0:51:05 lr: 0.000018 loss_cls: 2.9519 (2.7687) grad_norm: 1.6668 (1.6996) time: 2.9440 data: 0.0003 max mem: 29202 +[2024-12-10 20:48:38 root] (utils.py 283): INFO Epoch: [4] [1470/2502] eta: 0:50:35 lr: 0.000018 loss_cls: 2.9296 (2.7680) grad_norm: 1.6335 (1.7005) time: 2.9440 data: 0.0003 max mem: 29202 +[2024-12-10 20:49:07 root] (utils.py 283): INFO Epoch: [4] [1480/2502] eta: 0:50:06 lr: 0.000018 loss_cls: 3.0582 (2.7703) grad_norm: 1.7625 (1.7014) time: 2.9421 data: 0.0003 max mem: 29202 +[2024-12-10 20:49:37 root] (utils.py 283): INFO Epoch: [4] [1490/2502] eta: 0:49:37 lr: 0.000018 loss_cls: 3.1596 (2.7723) grad_norm: 1.7200 (1.7014) time: 2.9439 data: 0.0003 max mem: 29202 +[2024-12-10 20:50:06 root] (utils.py 283): INFO Epoch: [4] [1500/2502] eta: 0:49:07 lr: 0.000018 loss_cls: 3.0639 (2.7737) grad_norm: 1.6368 (1.7008) time: 2.9437 data: 0.0003 max mem: 29202 +[2024-12-10 20:50:36 root] (utils.py 283): INFO Epoch: [4] [1510/2502] eta: 0:48:38 lr: 0.000018 loss_cls: 2.9710 (2.7744) grad_norm: 1.5719 (1.7004) time: 2.9426 data: 0.0003 max mem: 29202 +[2024-12-10 20:51:05 root] (utils.py 283): INFO Epoch: [4] [1520/2502] eta: 0:48:08 lr: 0.000018 loss_cls: 2.8112 (2.7729) grad_norm: 1.5439 (1.7002) time: 2.9436 data: 0.0003 max mem: 29202 +[2024-12-10 20:51:35 root] (utils.py 283): INFO Epoch: [4] [1530/2502] eta: 0:47:39 lr: 0.000018 loss_cls: 2.8112 (2.7725) grad_norm: 1.7351 (1.7013) time: 2.9406 data: 0.0003 max mem: 29202 +[2024-12-10 20:52:04 root] (utils.py 283): INFO Epoch: [4] [1540/2502] eta: 0:47:10 lr: 0.000018 loss_cls: 2.9287 (2.7734) grad_norm: 1.7351 (1.7017) time: 2.9372 data: 0.0003 max mem: 29202 +[2024-12-10 20:52:33 root] (utils.py 283): INFO Epoch: [4] [1550/2502] eta: 0:46:40 lr: 0.000018 loss_cls: 2.9287 (2.7732) grad_norm: 1.7027 (1.7024) time: 2.9374 data: 0.0003 max mem: 29202 +[2024-12-10 20:53:03 root] (utils.py 283): INFO Epoch: [4] [1560/2502] eta: 0:46:11 lr: 0.000018 loss_cls: 2.6767 (2.7725) grad_norm: 1.6426 (1.7024) time: 2.9375 data: 0.0002 max mem: 29202 +[2024-12-10 20:53:32 root] (utils.py 283): INFO Epoch: [4] [1570/2502] eta: 0:45:41 lr: 0.000018 loss_cls: 2.6767 (2.7714) grad_norm: 1.5953 (1.7019) time: 2.9443 data: 0.0003 max mem: 29202 +[2024-12-10 20:54:02 root] (utils.py 283): INFO Epoch: [4] [1580/2502] eta: 0:45:12 lr: 0.000018 loss_cls: 2.7611 (2.7707) grad_norm: 1.5635 (1.7012) time: 2.9468 data: 0.0003 max mem: 29202 +[2024-12-10 20:54:31 root] (utils.py 283): INFO Epoch: [4] [1590/2502] eta: 0:44:43 lr: 0.000018 loss_cls: 2.7611 (2.7703) grad_norm: 1.5693 (1.7011) time: 2.9498 data: 0.0003 max mem: 29202 +[2024-12-10 20:55:01 root] (utils.py 283): INFO Epoch: [4] [1600/2502] eta: 0:44:13 lr: 0.000018 loss_cls: 2.8388 (2.7701) grad_norm: 1.6429 (1.7008) time: 2.9523 data: 0.0003 max mem: 29202 +[2024-12-10 20:55:30 root] (utils.py 283): INFO Epoch: [4] [1610/2502] eta: 0:43:44 lr: 0.000018 loss_cls: 2.8388 (2.7699) grad_norm: 1.6542 (1.7005) time: 2.9443 data: 0.0003 max mem: 29202 +[2024-12-10 20:56:00 root] (utils.py 283): INFO Epoch: [4] [1620/2502] eta: 0:43:14 lr: 0.000018 loss_cls: 2.6534 (2.7684) grad_norm: 1.5948 (1.6998) time: 2.9532 data: 0.0003 max mem: 29202 +[2024-12-10 20:56:29 root] (utils.py 283): INFO Epoch: [4] [1630/2502] eta: 0:42:45 lr: 0.000018 loss_cls: 2.7474 (2.7682) grad_norm: 1.5350 (1.6994) time: 2.9659 data: 0.0003 max mem: 29202 +[2024-12-10 20:56:59 root] (utils.py 283): INFO Epoch: [4] [1640/2502] eta: 0:42:16 lr: 0.000018 loss_cls: 2.9346 (2.7682) grad_norm: 1.5633 (1.6998) time: 2.9536 data: 0.0003 max mem: 29202 +[2024-12-10 20:57:28 root] (utils.py 283): INFO Epoch: [4] [1650/2502] eta: 0:41:46 lr: 0.000018 loss_cls: 2.9663 (2.7682) grad_norm: 1.5633 (1.6995) time: 2.9408 data: 0.0003 max mem: 29202 +[2024-12-10 20:57:58 root] (utils.py 283): INFO Epoch: [4] [1660/2502] eta: 0:41:17 lr: 0.000018 loss_cls: 2.7821 (2.7678) grad_norm: 1.6022 (1.6996) time: 2.9421 data: 0.0003 max mem: 29202 +[2024-12-10 20:58:27 root] (utils.py 283): INFO Epoch: [4] [1670/2502] eta: 0:40:47 lr: 0.000018 loss_cls: 2.8729 (2.7690) grad_norm: 1.6469 (1.6994) time: 2.9422 data: 0.0003 max mem: 29202 +[2024-12-10 20:58:57 root] (utils.py 283): INFO Epoch: [4] [1680/2502] eta: 0:40:18 lr: 0.000018 loss_cls: 2.9210 (2.7681) grad_norm: 1.6335 (1.6987) time: 2.9415 data: 0.0003 max mem: 29202 +[2024-12-10 20:59:26 root] (utils.py 283): INFO Epoch: [4] [1690/2502] eta: 0:39:49 lr: 0.000018 loss_cls: 2.8081 (2.7685) grad_norm: 1.5986 (1.6981) time: 2.9481 data: 0.0003 max mem: 29202 +[2024-12-10 20:59:55 root] (utils.py 283): INFO Epoch: [4] [1700/2502] eta: 0:39:19 lr: 0.000018 loss_cls: 2.8376 (2.7688) grad_norm: 1.5315 (1.6974) time: 2.9466 data: 0.0003 max mem: 29202 +[2024-12-10 21:00:25 root] (utils.py 283): INFO Epoch: [4] [1710/2502] eta: 0:38:50 lr: 0.000018 loss_cls: 2.7167 (2.7676) grad_norm: 1.5411 (1.6966) time: 2.9393 data: 0.0003 max mem: 29202 +[2024-12-10 21:00:54 root] (utils.py 283): INFO Epoch: [4] [1720/2502] eta: 0:38:20 lr: 0.000018 loss_cls: 2.4115 (2.7662) grad_norm: 1.5443 (1.6961) time: 2.9405 data: 0.0003 max mem: 29202 +[2024-12-10 21:01:24 root] (utils.py 283): INFO Epoch: [4] [1730/2502] eta: 0:37:51 lr: 0.000018 loss_cls: 2.5472 (2.7654) grad_norm: 1.5627 (1.6960) time: 2.9409 data: 0.0003 max mem: 29202 +[2024-12-10 21:01:54 root] (utils.py 283): INFO Epoch: [4] [1740/2502] eta: 0:37:22 lr: 0.000018 loss_cls: 2.6640 (2.7650) grad_norm: 1.6232 (1.6957) time: 2.9614 data: 0.0004 max mem: 29202 +[2024-12-10 21:02:23 root] (utils.py 283): INFO Epoch: [4] [1750/2502] eta: 0:36:52 lr: 0.000018 loss_cls: 2.7237 (2.7639) grad_norm: 1.5924 (1.6949) time: 2.9614 data: 0.0004 max mem: 29202 +[2024-12-10 21:02:52 root] (utils.py 283): INFO Epoch: [4] [1760/2502] eta: 0:36:23 lr: 0.000018 loss_cls: 2.9066 (2.7645) grad_norm: 1.5924 (1.6960) time: 2.9464 data: 0.0003 max mem: 29202 +[2024-12-10 21:03:22 root] (utils.py 283): INFO Epoch: [4] [1770/2502] eta: 0:35:53 lr: 0.000018 loss_cls: 2.9066 (2.7650) grad_norm: 1.6833 (1.6956) time: 2.9450 data: 0.0003 max mem: 29202 +[2024-12-10 21:03:51 root] (utils.py 283): INFO Epoch: [4] [1780/2502] eta: 0:35:24 lr: 0.000018 loss_cls: 2.8399 (2.7650) grad_norm: 1.5722 (1.6949) time: 2.9392 data: 0.0003 max mem: 29202 +[2024-12-10 21:04:21 root] (utils.py 283): INFO Epoch: [4] [1790/2502] eta: 0:34:55 lr: 0.000018 loss_cls: 2.7037 (2.7642) grad_norm: 1.5185 (1.6944) time: 2.9421 data: 0.0003 max mem: 29202 +[2024-12-10 21:04:50 root] (utils.py 283): INFO Epoch: [4] [1800/2502] eta: 0:34:25 lr: 0.000018 loss_cls: 2.7623 (2.7645) grad_norm: 1.5673 (1.6940) time: 2.9422 data: 0.0003 max mem: 29202 +[2024-12-10 21:05:19 root] (utils.py 283): INFO Epoch: [4] [1810/2502] eta: 0:33:56 lr: 0.000018 loss_cls: 2.8749 (2.7655) grad_norm: 1.6273 (1.6941) time: 2.9390 data: 0.0003 max mem: 29202 +[2024-12-10 21:05:49 root] (utils.py 283): INFO Epoch: [4] [1820/2502] eta: 0:33:26 lr: 0.000018 loss_cls: 2.6769 (2.7648) grad_norm: 1.6294 (1.6950) time: 2.9401 data: 0.0003 max mem: 29202 +[2024-12-10 21:06:18 root] (utils.py 283): INFO Epoch: [4] [1830/2502] eta: 0:32:57 lr: 0.000018 loss_cls: 2.6718 (2.7642) grad_norm: 1.5950 (1.6946) time: 2.9427 data: 0.0003 max mem: 29202 +[2024-12-10 21:06:48 root] (utils.py 283): INFO Epoch: [4] [1840/2502] eta: 0:32:27 lr: 0.000018 loss_cls: 2.8543 (2.7646) grad_norm: 1.5876 (1.6938) time: 2.9440 data: 0.0003 max mem: 29202 +[2024-12-10 21:07:17 root] (utils.py 283): INFO Epoch: [4] [1850/2502] eta: 0:31:58 lr: 0.000018 loss_cls: 2.8720 (2.7639) grad_norm: 1.6411 (1.6936) time: 2.9497 data: 0.0003 max mem: 29202 +[2024-12-10 21:07:47 root] (utils.py 283): INFO Epoch: [4] [1860/2502] eta: 0:31:29 lr: 0.000018 loss_cls: 2.7076 (2.7635) grad_norm: 1.6979 (1.6937) time: 2.9453 data: 0.0003 max mem: 29202 +[2024-12-10 21:08:16 root] (utils.py 283): INFO Epoch: [4] [1870/2502] eta: 0:30:59 lr: 0.000018 loss_cls: 2.7347 (2.7637) grad_norm: 1.6979 (1.6943) time: 2.9395 data: 0.0003 max mem: 29202 +[2024-12-10 21:08:46 root] (utils.py 283): INFO Epoch: [4] [1880/2502] eta: 0:30:30 lr: 0.000018 loss_cls: 2.7347 (2.7626) grad_norm: 1.7165 (1.6947) time: 2.9414 data: 0.0003 max mem: 29202 +[2024-12-10 21:09:15 root] (utils.py 283): INFO Epoch: [4] [1890/2502] eta: 0:30:00 lr: 0.000018 loss_cls: 2.9405 (2.7638) grad_norm: 1.7139 (1.6945) time: 2.9392 data: 0.0003 max mem: 29202 +[2024-12-10 21:09:44 root] (utils.py 283): INFO Epoch: [4] [1900/2502] eta: 0:29:31 lr: 0.000018 loss_cls: 2.9405 (2.7643) grad_norm: 1.6621 (1.6952) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-10 21:10:14 root] (utils.py 283): INFO Epoch: [4] [1910/2502] eta: 0:29:01 lr: 0.000018 loss_cls: 2.8555 (2.7644) grad_norm: 1.6690 (1.6952) time: 2.9431 data: 0.0003 max mem: 29202 +[2024-12-10 21:10:43 root] (utils.py 283): INFO Epoch: [4] [1920/2502] eta: 0:28:32 lr: 0.000018 loss_cls: 2.8734 (2.7647) grad_norm: 1.6615 (1.6953) time: 2.9480 data: 0.0003 max mem: 29202 +[2024-12-10 21:11:13 root] (utils.py 283): INFO Epoch: [4] [1930/2502] eta: 0:28:03 lr: 0.000018 loss_cls: 2.6646 (2.7642) grad_norm: 1.6615 (1.6961) time: 2.9531 data: 0.0003 max mem: 29202 +[2024-12-10 21:11:42 root] (utils.py 283): INFO Epoch: [4] [1940/2502] eta: 0:27:33 lr: 0.000018 loss_cls: 2.7917 (2.7647) grad_norm: 1.8182 (1.6979) time: 2.9475 data: 0.0003 max mem: 29202 +[2024-12-10 21:12:12 root] (utils.py 283): INFO Epoch: [4] [1950/2502] eta: 0:27:04 lr: 0.000018 loss_cls: 2.8451 (2.7639) grad_norm: 1.6308 (1.6973) time: 2.9415 data: 0.0003 max mem: 29202 +[2024-12-10 21:12:41 root] (utils.py 283): INFO Epoch: [4] [1960/2502] eta: 0:26:34 lr: 0.000018 loss_cls: 2.7659 (2.7642) grad_norm: 1.6269 (1.6975) time: 2.9437 data: 0.0003 max mem: 29202 +[2024-12-10 21:13:11 root] (utils.py 283): INFO Epoch: [4] [1970/2502] eta: 0:26:05 lr: 0.000018 loss_cls: 2.6879 (2.7639) grad_norm: 1.7201 (1.6971) time: 2.9486 data: 0.0003 max mem: 29202 +[2024-12-10 21:13:40 root] (utils.py 283): INFO Epoch: [4] [1980/2502] eta: 0:25:36 lr: 0.000018 loss_cls: 2.5782 (2.7635) grad_norm: 1.7170 (1.6969) time: 2.9525 data: 0.0003 max mem: 29202 +[2024-12-10 21:14:10 root] (utils.py 283): INFO Epoch: [4] [1990/2502] eta: 0:25:06 lr: 0.000018 loss_cls: 2.6671 (2.7626) grad_norm: 1.6618 (1.6970) time: 2.9458 data: 0.0003 max mem: 29202 +[2024-12-10 21:14:39 root] (utils.py 283): INFO Epoch: [4] [2000/2502] eta: 0:24:37 lr: 0.000018 loss_cls: 2.9869 (2.7640) grad_norm: 1.6722 (1.6974) time: 2.9423 data: 0.0003 max mem: 29202 +[2024-12-10 21:15:08 root] (utils.py 283): INFO Epoch: [4] [2010/2502] eta: 0:24:07 lr: 0.000018 loss_cls: 2.9022 (2.7636) grad_norm: 1.6641 (1.6972) time: 2.9425 data: 0.0003 max mem: 29202 +[2024-12-10 21:15:38 root] (utils.py 283): INFO Epoch: [4] [2020/2502] eta: 0:23:38 lr: 0.000018 loss_cls: 2.8190 (2.7634) grad_norm: 1.6641 (1.6982) time: 2.9427 data: 0.0003 max mem: 29202 +[2024-12-10 21:16:07 root] (utils.py 283): INFO Epoch: [4] [2030/2502] eta: 0:23:08 lr: 0.000018 loss_cls: 2.8755 (2.7641) grad_norm: 1.5802 (1.6973) time: 2.9449 data: 0.0003 max mem: 29202 +[2024-12-10 21:16:37 root] (utils.py 283): INFO Epoch: [4] [2040/2502] eta: 0:22:39 lr: 0.000018 loss_cls: 2.8755 (2.7636) grad_norm: 1.5598 (1.6972) time: 2.9435 data: 0.0003 max mem: 29202 +[2024-12-10 21:17:06 root] (utils.py 283): INFO Epoch: [4] [2050/2502] eta: 0:22:10 lr: 0.000018 loss_cls: 2.8967 (2.7641) grad_norm: 1.6100 (1.6967) time: 2.9432 data: 0.0003 max mem: 29202 +[2024-12-10 21:17:36 root] (utils.py 283): INFO Epoch: [4] [2060/2502] eta: 0:21:40 lr: 0.000018 loss_cls: 2.9038 (2.7638) grad_norm: 1.6719 (1.6970) time: 2.9434 data: 0.0003 max mem: 29202 +[2024-12-10 21:18:05 root] (utils.py 283): INFO Epoch: [4] [2070/2502] eta: 0:21:11 lr: 0.000018 loss_cls: 2.9038 (2.7644) grad_norm: 1.7078 (1.6974) time: 2.9455 data: 0.0003 max mem: 29202 +[2024-12-10 21:18:34 root] (utils.py 283): INFO Epoch: [4] [2080/2502] eta: 0:20:41 lr: 0.000018 loss_cls: 2.8374 (2.7647) grad_norm: 1.6452 (1.6971) time: 2.9425 data: 0.0003 max mem: 29202 +[2024-12-10 21:19:04 root] (utils.py 283): INFO Epoch: [4] [2090/2502] eta: 0:20:12 lr: 0.000018 loss_cls: 2.7282 (2.7628) grad_norm: 1.6103 (1.6970) time: 2.9408 data: 0.0003 max mem: 29202 +[2024-12-10 21:19:33 root] (utils.py 283): INFO Epoch: [4] [2100/2502] eta: 0:19:42 lr: 0.000018 loss_cls: 2.5541 (2.7622) grad_norm: 1.5885 (1.6968) time: 2.9433 data: 0.0003 max mem: 29202 +[2024-12-10 21:20:03 root] (utils.py 283): INFO Epoch: [4] [2110/2502] eta: 0:19:13 lr: 0.000018 loss_cls: 2.6420 (2.7616) grad_norm: 1.5524 (1.6963) time: 2.9447 data: 0.0003 max mem: 29202 +[2024-12-10 21:20:32 root] (utils.py 283): INFO Epoch: [4] [2120/2502] eta: 0:18:44 lr: 0.000018 loss_cls: 2.7572 (2.7612) grad_norm: 1.5964 (1.6960) time: 2.9511 data: 0.0003 max mem: 29202 +[2024-12-10 21:21:02 root] (utils.py 283): INFO Epoch: [4] [2130/2502] eta: 0:18:14 lr: 0.000018 loss_cls: 2.6735 (2.7602) grad_norm: 1.6073 (1.6965) time: 2.9494 data: 0.0003 max mem: 29202 +[2024-12-10 21:21:31 root] (utils.py 283): INFO Epoch: [4] [2140/2502] eta: 0:17:45 lr: 0.000018 loss_cls: 2.6735 (2.7599) grad_norm: 1.6576 (1.6961) time: 2.9436 data: 0.0003 max mem: 29202 +[2024-12-10 21:22:01 root] (utils.py 283): INFO Epoch: [4] [2150/2502] eta: 0:17:15 lr: 0.000018 loss_cls: 2.7648 (2.7596) grad_norm: 1.6566 (1.6962) time: 2.9435 data: 0.0003 max mem: 29202 +[2024-12-10 21:22:30 root] (utils.py 283): INFO Epoch: [4] [2160/2502] eta: 0:16:46 lr: 0.000018 loss_cls: 2.8352 (2.7606) grad_norm: 1.6428 (1.6957) time: 2.9454 data: 0.0003 max mem: 29202 +[2024-12-10 21:23:00 root] (utils.py 283): INFO Epoch: [4] [2170/2502] eta: 0:16:17 lr: 0.000018 loss_cls: 2.7397 (2.7599) grad_norm: 1.6280 (1.6959) time: 2.9499 data: 0.0003 max mem: 29202 +[2024-12-10 21:23:29 root] (utils.py 283): INFO Epoch: [4] [2180/2502] eta: 0:15:47 lr: 0.000018 loss_cls: 2.5878 (2.7588) grad_norm: 1.5959 (1.6953) time: 2.9642 data: 0.0003 max mem: 29202 +[2024-12-10 21:23:59 root] (utils.py 283): INFO Epoch: [4] [2190/2502] eta: 0:15:18 lr: 0.000018 loss_cls: 2.6364 (2.7580) grad_norm: 1.6731 (1.6956) time: 2.9740 data: 0.0003 max mem: 29202 +[2024-12-10 21:24:29 root] (utils.py 283): INFO Epoch: [4] [2200/2502] eta: 0:14:48 lr: 0.000018 loss_cls: 2.6326 (2.7572) grad_norm: 1.7342 (1.6955) time: 2.9616 data: 0.0003 max mem: 29202 +[2024-12-10 21:24:58 root] (utils.py 283): INFO Epoch: [4] [2210/2502] eta: 0:14:19 lr: 0.000018 loss_cls: 2.7196 (2.7573) grad_norm: 1.6151 (1.6956) time: 2.9504 data: 0.0003 max mem: 29202 +[2024-12-10 21:25:28 root] (utils.py 283): INFO Epoch: [4] [2220/2502] eta: 0:13:49 lr: 0.000018 loss_cls: 2.8427 (2.7572) grad_norm: 1.6020 (1.6957) time: 2.9442 data: 0.0003 max mem: 29202 +[2024-12-10 21:25:57 root] (utils.py 283): INFO Epoch: [4] [2230/2502] eta: 0:13:20 lr: 0.000018 loss_cls: 2.8427 (2.7571) grad_norm: 1.5733 (1.6952) time: 2.9409 data: 0.0003 max mem: 29202 +[2024-12-10 21:26:26 root] (utils.py 283): INFO Epoch: [4] [2240/2502] eta: 0:12:51 lr: 0.000018 loss_cls: 2.8103 (2.7570) grad_norm: 1.6096 (1.6956) time: 2.9414 data: 0.0003 max mem: 29202 +[2024-12-10 21:26:56 root] (utils.py 283): INFO Epoch: [4] [2250/2502] eta: 0:12:21 lr: 0.000018 loss_cls: 2.8713 (2.7576) grad_norm: 1.7011 (1.6954) time: 2.9424 data: 0.0003 max mem: 29202 +[2024-12-10 21:27:25 root] (utils.py 283): INFO Epoch: [4] [2260/2502] eta: 0:11:52 lr: 0.000018 loss_cls: 2.9424 (2.7582) grad_norm: 1.6497 (1.6954) time: 2.9436 data: 0.0003 max mem: 29202 +[2024-12-10 21:27:55 root] (utils.py 283): INFO Epoch: [4] [2270/2502] eta: 0:11:22 lr: 0.000018 loss_cls: 2.7550 (2.7573) grad_norm: 1.6459 (1.6953) time: 2.9442 data: 0.0003 max mem: 29202 +[2024-12-10 21:28:24 root] (utils.py 283): INFO Epoch: [4] [2280/2502] eta: 0:10:53 lr: 0.000018 loss_cls: 2.7642 (2.7581) grad_norm: 1.6613 (1.6954) time: 2.9497 data: 0.0003 max mem: 29202 +[2024-12-10 21:28:54 root] (utils.py 283): INFO Epoch: [4] [2290/2502] eta: 0:10:23 lr: 0.000018 loss_cls: 2.9248 (2.7585) grad_norm: 1.6272 (1.6948) time: 2.9495 data: 0.0003 max mem: 29202 +[2024-12-10 21:29:23 root] (utils.py 283): INFO Epoch: [4] [2300/2502] eta: 0:09:54 lr: 0.000018 loss_cls: 2.9201 (2.7583) grad_norm: 1.6129 (1.6950) time: 2.9461 data: 0.0003 max mem: 29202 +[2024-12-10 21:29:53 root] (utils.py 283): INFO Epoch: [4] [2310/2502] eta: 0:09:25 lr: 0.000018 loss_cls: 2.7825 (2.7585) grad_norm: 1.6448 (1.6950) time: 2.9464 data: 0.0003 max mem: 29202 +[2024-12-10 21:30:22 root] (utils.py 283): INFO Epoch: [4] [2320/2502] eta: 0:08:55 lr: 0.000018 loss_cls: 2.8844 (2.7591) grad_norm: 1.6714 (1.6948) time: 2.9469 data: 0.0003 max mem: 29202 +[2024-12-10 21:30:52 root] (utils.py 283): INFO Epoch: [4] [2330/2502] eta: 0:08:26 lr: 0.000018 loss_cls: 2.8630 (2.7582) grad_norm: 1.6528 (1.6953) time: 2.9439 data: 0.0003 max mem: 29202 +[2024-12-10 21:31:21 root] (utils.py 283): INFO Epoch: [4] [2340/2502] eta: 0:07:56 lr: 0.000018 loss_cls: 2.8515 (2.7585) grad_norm: 1.6177 (1.6964) time: 2.9382 data: 0.0003 max mem: 29202 +[2024-12-10 21:31:50 root] (utils.py 283): INFO Epoch: [4] [2350/2502] eta: 0:07:27 lr: 0.000018 loss_cls: 2.8625 (2.7577) grad_norm: 1.5989 (1.6960) time: 2.9379 data: 0.0003 max mem: 29202 +[2024-12-10 21:32:20 root] (utils.py 283): INFO Epoch: [4] [2360/2502] eta: 0:06:57 lr: 0.000018 loss_cls: 2.5784 (2.7563) grad_norm: 1.6040 (1.6963) time: 2.9402 data: 0.0003 max mem: 29202 +[2024-12-10 21:32:49 root] (utils.py 283): INFO Epoch: [4] [2370/2502] eta: 0:06:28 lr: 0.000018 loss_cls: 2.5784 (2.7559) grad_norm: 1.5950 (1.6958) time: 2.9388 data: 0.0003 max mem: 29202 +[2024-12-10 21:33:18 root] (utils.py 283): INFO Epoch: [4] [2380/2502] eta: 0:05:59 lr: 0.000018 loss_cls: 2.9356 (2.7561) grad_norm: 1.5558 (1.6957) time: 2.9398 data: 0.0003 max mem: 29202 +[2024-12-10 21:33:48 root] (utils.py 283): INFO Epoch: [4] [2390/2502] eta: 0:05:29 lr: 0.000018 loss_cls: 2.7755 (2.7555) grad_norm: 1.5751 (1.6957) time: 2.9625 data: 0.0003 max mem: 29202 +[2024-12-10 21:34:18 root] (utils.py 283): INFO Epoch: [4] [2400/2502] eta: 0:05:00 lr: 0.000018 loss_cls: 2.6333 (2.7559) grad_norm: 1.6155 (1.6956) time: 2.9616 data: 0.0003 max mem: 29202 +[2024-12-10 21:34:47 root] (utils.py 283): INFO Epoch: [4] [2410/2502] eta: 0:04:30 lr: 0.000018 loss_cls: 2.6591 (2.7549) grad_norm: 1.6304 (1.6953) time: 2.9405 data: 0.0003 max mem: 29202 +[2024-12-10 21:35:17 root] (utils.py 283): INFO Epoch: [4] [2420/2502] eta: 0:04:01 lr: 0.000018 loss_cls: 2.8320 (2.7551) grad_norm: 1.6304 (1.6950) time: 2.9412 data: 0.0003 max mem: 29202 +[2024-12-10 21:35:46 root] (utils.py 283): INFO Epoch: [4] [2430/2502] eta: 0:03:31 lr: 0.000018 loss_cls: 2.8320 (2.7544) grad_norm: 1.6507 (1.6954) time: 2.9439 data: 0.0002 max mem: 29202 +[2024-12-10 21:36:15 root] (utils.py 283): INFO Epoch: [4] [2440/2502] eta: 0:03:02 lr: 0.000018 loss_cls: 2.7108 (2.7548) grad_norm: 1.7402 (1.6959) time: 2.9450 data: 0.0003 max mem: 29202 +[2024-12-10 21:36:45 root] (utils.py 283): INFO Epoch: [4] [2450/2502] eta: 0:02:33 lr: 0.000018 loss_cls: 2.8657 (2.7542) grad_norm: 1.6903 (1.6961) time: 2.9438 data: 0.0003 max mem: 29202 +[2024-12-10 21:37:14 root] (utils.py 283): INFO Epoch: [4] [2460/2502] eta: 0:02:03 lr: 0.000018 loss_cls: 2.8657 (2.7548) grad_norm: 1.6141 (1.6959) time: 2.9436 data: 0.0003 max mem: 29202 +[2024-12-10 21:37:44 root] (utils.py 283): INFO Epoch: [4] [2470/2502] eta: 0:01:34 lr: 0.000018 loss_cls: 2.7966 (2.7546) grad_norm: 1.5991 (1.6961) time: 2.9537 data: 0.0003 max mem: 29202 +[2024-12-10 21:38:13 root] (utils.py 283): INFO Epoch: [4] [2480/2502] eta: 0:01:04 lr: 0.000018 loss_cls: 2.7949 (2.7543) grad_norm: 1.5991 (1.6962) time: 2.9551 data: 0.0003 max mem: 29202 +[2024-12-10 21:38:43 root] (utils.py 283): INFO Epoch: [4] [2490/2502] eta: 0:00:35 lr: 0.000018 loss_cls: 2.8660 (2.7542) grad_norm: 1.6689 (1.6963) time: 2.9694 data: 0.0248 max mem: 29202 +[2024-12-10 21:39:13 root] (utils.py 283): INFO Epoch: [4] [2500/2502] eta: 0:00:05 lr: 0.000018 loss_cls: 2.7866 (2.7539) grad_norm: 1.6660 (1.6959) time: 2.9689 data: 0.0247 max mem: 29202 +[2024-12-10 21:39:16 root] (utils.py 283): INFO Epoch: [4] [2501/2502] eta: 0:00:02 lr: 0.000018 loss_cls: 2.7866 (2.7540) grad_norm: 1.6660 (1.6960) time: 2.9692 data: 0.0247 max mem: 29202 +[2024-12-10 21:39:16 root] (utils.py 297): INFO Epoch: [4] Total time: 2:02:45 (2.9438 s / it) +[2024-12-10 21:39:16 root] (engine.py 179): INFO Averaged stats:lr: 0.000018 loss_cls: 2.7866 (2.7598) grad_norm: 1.6660 (1.6960) +[2024-12-10 21:39:20 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:55 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.3296 (0.3296) acc1: 91.4062 (91.4062) acc3: 98.4375 (98.4375) acc5: 98.4375 (98.4375) time: 0.5678 data: 0.0003 max mem: 29202 +[2024-12-10 21:39:25 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:48 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6677 (0.6212) acc1: 85.9375 (86.7188) acc3: 96.8750 (96.5909) acc5: 98.4375 (98.0114) time: 0.5530 data: 0.0004 max mem: 29202 +[2024-12-10 21:39:31 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:43 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6677 (0.6701) acc1: 85.9375 (85.7143) acc3: 96.0938 (95.8333) acc5: 97.6562 (97.6190) time: 0.5521 data: 0.0004 max mem: 29202 +[2024-12-10 21:39:36 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:37 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6712 (0.6986) acc1: 85.1562 (84.8034) acc3: 94.5312 (95.6653) acc5: 96.8750 (97.4294) time: 0.5528 data: 0.0004 max mem: 29202 +[2024-12-10 21:39:42 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:32 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6886 (0.7022) acc1: 84.3750 (84.9276) acc3: 96.0938 (95.7127) acc5: 97.6562 (97.4466) time: 0.5526 data: 0.0004 max mem: 29202 +[2024-12-10 21:39:48 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:26 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9230 (0.7891) acc1: 78.9062 (83.2108) acc3: 92.9688 (94.5925) acc5: 94.5312 (96.5227) time: 0.5526 data: 0.0004 max mem: 29202 +[2024-12-10 21:39:53 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:21 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9941 (0.8192) acc1: 78.1250 (82.8125) acc3: 89.8438 (93.9677) acc5: 92.9688 (96.0169) time: 0.5528 data: 0.0004 max mem: 29202 +[2024-12-10 21:39:59 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:15 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 1.0071 (0.8518) acc1: 78.9062 (82.0423) acc3: 90.6250 (93.5849) acc5: 92.9688 (95.7636) time: 0.5529 data: 0.0004 max mem: 29202 +[2024-12-10 21:40:04 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:09 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 1.0782 (0.8784) acc1: 77.3438 (81.5104) acc3: 90.6250 (93.1617) acc5: 93.7500 (95.4475) time: 0.5539 data: 0.0007 max mem: 29202 +[2024-12-10 21:40:10 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:04 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 1.0861 (0.9069) acc1: 76.5625 (80.8036) acc3: 89.8438 (92.8486) acc5: 93.7500 (95.2610) time: 0.5539 data: 0.0006 max mem: 29202 +[2024-12-10 21:40:13 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 1.0170 (0.9054) acc1: 76.5625 (80.7280) acc3: 91.4062 (92.9120) acc5: 93.7500 (95.3360) time: 0.5443 data: 0.0005 max mem: 29202 +[2024-12-10 21:40:13 root] (utils.py 297): INFO Test: Total time: 0:00:54 (0.5514 s / it) +[2024-12-10 21:40:13 root] (engine.py 264): INFO * Acc@1 80.768 Acc@3 92.702 Acc@5 95.130 loss 0.900 flops 13.207 layer_flops 13.109 +[2024-12-10 21:40:13 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 80.8% +[2024-12-10 21:40:16 root] (main.py 576): INFO Max accuracy: 80.77% +[2024-12-10 21:40:19 root] (utils.py 283): INFO Epoch: [5] [ 0/2502] eta: 2:00:16 lr: 0.000017 loss_cls: 3.1176 (3.1176) grad_norm: 1.6576 (1.6576) time: 2.8843 data: 0.0003 max mem: 29202 +[2024-12-10 21:40:48 root] (utils.py 283): INFO Epoch: [5] [ 10/2502] eta: 2:01:37 lr: 0.000017 loss_cls: 2.7752 (2.9119) grad_norm: 1.6576 (1.6561) time: 2.9283 data: 0.0003 max mem: 29202 +[2024-12-10 21:41:17 root] (utils.py 283): INFO Epoch: [5] [ 20/2502] eta: 2:01:12 lr: 0.000017 loss_cls: 2.8371 (2.8581) grad_norm: 1.6430 (1.6823) time: 2.9323 data: 0.0003 max mem: 29202 +[2024-12-10 21:41:47 root] (utils.py 283): INFO Epoch: [5] [ 30/2502] eta: 2:00:43 lr: 0.000017 loss_cls: 2.8114 (2.7886) grad_norm: 1.6169 (1.6464) time: 2.9313 data: 0.0003 max mem: 29202 +[2024-12-10 21:42:16 root] (utils.py 283): INFO Epoch: [5] [ 40/2502] eta: 2:00:21 lr: 0.000017 loss_cls: 2.6915 (2.7923) grad_norm: 1.6303 (1.6533) time: 2.9364 data: 0.0003 max mem: 29202 +[2024-12-10 21:42:45 root] (utils.py 283): INFO Epoch: [5] [ 50/2502] eta: 1:59:51 lr: 0.000017 loss_cls: 2.7768 (2.7570) grad_norm: 1.5333 (1.6350) time: 2.9373 data: 0.0003 max mem: 29202 +[2024-12-10 21:43:15 root] (utils.py 283): INFO Epoch: [5] [ 60/2502] eta: 1:59:20 lr: 0.000017 loss_cls: 2.6547 (2.7469) grad_norm: 1.5519 (1.6677) time: 2.9302 data: 0.0003 max mem: 29202 +[2024-12-10 21:43:44 root] (utils.py 283): INFO Epoch: [5] [ 70/2502] eta: 1:58:49 lr: 0.000017 loss_cls: 2.9216 (2.7718) grad_norm: 1.6455 (1.6768) time: 2.9283 data: 0.0003 max mem: 29202 +[2024-12-10 21:44:13 root] (utils.py 283): INFO Epoch: [5] [ 80/2502] eta: 1:58:20 lr: 0.000017 loss_cls: 3.0743 (2.8054) grad_norm: 1.6875 (1.6938) time: 2.9305 data: 0.0003 max mem: 29202 +[2024-12-10 21:44:43 root] (utils.py 283): INFO Epoch: [5] [ 90/2502] eta: 1:57:52 lr: 0.000017 loss_cls: 3.0081 (2.7946) grad_norm: 1.6728 (1.6898) time: 2.9344 data: 0.0003 max mem: 29202 +[2024-12-10 21:45:12 root] (utils.py 283): INFO Epoch: [5] [ 100/2502] eta: 1:57:24 lr: 0.000017 loss_cls: 2.5974 (2.7815) grad_norm: 1.5212 (1.6987) time: 2.9366 data: 0.0003 max mem: 29202 +[2024-12-10 21:45:41 root] (utils.py 283): INFO Epoch: [5] [ 110/2502] eta: 1:56:55 lr: 0.000017 loss_cls: 2.7689 (2.7814) grad_norm: 1.5764 (1.6885) time: 2.9352 data: 0.0003 max mem: 29202 +[2024-12-10 21:46:11 root] (utils.py 283): INFO Epoch: [5] [ 120/2502] eta: 1:56:25 lr: 0.000017 loss_cls: 2.8193 (2.7812) grad_norm: 1.6162 (1.6861) time: 2.9326 data: 0.0002 max mem: 29202 +[2024-12-10 21:46:40 root] (utils.py 283): INFO Epoch: [5] [ 130/2502] eta: 1:55:55 lr: 0.000017 loss_cls: 2.8243 (2.7775) grad_norm: 1.6739 (1.6866) time: 2.9311 data: 0.0002 max mem: 29202 +[2024-12-10 21:47:09 root] (utils.py 283): INFO Epoch: [5] [ 140/2502] eta: 1:55:26 lr: 0.000017 loss_cls: 2.8259 (2.7791) grad_norm: 1.6395 (1.6788) time: 2.9298 data: 0.0002 max mem: 29202 +[2024-12-10 21:47:39 root] (utils.py 283): INFO Epoch: [5] [ 150/2502] eta: 1:54:57 lr: 0.000017 loss_cls: 2.8542 (2.7841) grad_norm: 1.5951 (1.6814) time: 2.9319 data: 0.0003 max mem: 29202 +[2024-12-10 21:48:08 root] (utils.py 283): INFO Epoch: [5] [ 160/2502] eta: 1:54:28 lr: 0.000017 loss_cls: 2.9497 (2.7943) grad_norm: 1.6004 (1.6816) time: 2.9367 data: 0.0003 max mem: 29202 +[2024-12-10 21:48:37 root] (utils.py 283): INFO Epoch: [5] [ 170/2502] eta: 1:53:59 lr: 0.000017 loss_cls: 2.9209 (2.7965) grad_norm: 1.5924 (1.6790) time: 2.9357 data: 0.0003 max mem: 29202 +[2024-12-10 21:49:07 root] (utils.py 283): INFO Epoch: [5] [ 180/2502] eta: 1:53:30 lr: 0.000017 loss_cls: 2.8918 (2.7935) grad_norm: 1.6421 (1.6811) time: 2.9361 data: 0.0003 max mem: 29202 +[2024-12-10 21:49:36 root] (utils.py 283): INFO Epoch: [5] [ 190/2502] eta: 1:53:01 lr: 0.000017 loss_cls: 2.9086 (2.7940) grad_norm: 1.5011 (1.6723) time: 2.9374 data: 0.0003 max mem: 29202 +[2024-12-10 21:50:05 root] (utils.py 283): INFO Epoch: [5] [ 200/2502] eta: 1:52:32 lr: 0.000017 loss_cls: 2.9609 (2.7893) grad_norm: 1.5415 (1.6750) time: 2.9340 data: 0.0003 max mem: 29202 +[2024-12-10 21:50:35 root] (utils.py 283): INFO Epoch: [5] [ 210/2502] eta: 1:52:03 lr: 0.000017 loss_cls: 2.9609 (2.7957) grad_norm: 1.7642 (1.6844) time: 2.9326 data: 0.0003 max mem: 29202 +[2024-12-10 21:51:04 root] (utils.py 283): INFO Epoch: [5] [ 220/2502] eta: 1:51:33 lr: 0.000017 loss_cls: 2.9528 (2.7967) grad_norm: 1.7288 (1.6829) time: 2.9316 data: 0.0003 max mem: 29202 +[2024-12-10 21:51:33 root] (utils.py 283): INFO Epoch: [5] [ 230/2502] eta: 1:51:04 lr: 0.000017 loss_cls: 2.8861 (2.7952) grad_norm: 1.6604 (1.6812) time: 2.9325 data: 0.0003 max mem: 29202 +[2024-12-10 21:52:03 root] (utils.py 283): INFO Epoch: [5] [ 240/2502] eta: 1:50:35 lr: 0.000017 loss_cls: 2.7025 (2.7830) grad_norm: 1.6727 (1.6798) time: 2.9347 data: 0.0003 max mem: 29202 +[2024-12-10 21:52:32 root] (utils.py 283): INFO Epoch: [5] [ 250/2502] eta: 1:50:05 lr: 0.000017 loss_cls: 2.6801 (2.7859) grad_norm: 1.6675 (1.6825) time: 2.9318 data: 0.0003 max mem: 29202 +[2024-12-10 21:53:01 root] (utils.py 283): INFO Epoch: [5] [ 260/2502] eta: 1:49:35 lr: 0.000017 loss_cls: 2.9780 (2.7929) grad_norm: 1.6624 (1.6810) time: 2.9301 data: 0.0003 max mem: 29202 +[2024-12-10 21:53:31 root] (utils.py 283): INFO Epoch: [5] [ 270/2502] eta: 1:49:06 lr: 0.000017 loss_cls: 2.6663 (2.7827) grad_norm: 1.5567 (1.6767) time: 2.9306 data: 0.0003 max mem: 29202 +[2024-12-10 21:54:00 root] (utils.py 283): INFO Epoch: [5] [ 280/2502] eta: 1:48:36 lr: 0.000017 loss_cls: 2.6119 (2.7779) grad_norm: 1.5593 (1.6739) time: 2.9310 data: 0.0003 max mem: 29202 +[2024-12-10 21:54:29 root] (utils.py 283): INFO Epoch: [5] [ 290/2502] eta: 1:48:07 lr: 0.000017 loss_cls: 2.6598 (2.7732) grad_norm: 1.6223 (1.6802) time: 2.9299 data: 0.0003 max mem: 29202 +[2024-12-10 21:54:59 root] (utils.py 283): INFO Epoch: [5] [ 300/2502] eta: 1:47:37 lr: 0.000017 loss_cls: 2.7955 (2.7706) grad_norm: 1.6635 (1.6779) time: 2.9311 data: 0.0003 max mem: 29202 +[2024-12-10 21:55:28 root] (utils.py 283): INFO Epoch: [5] [ 310/2502] eta: 1:47:08 lr: 0.000017 loss_cls: 2.9591 (2.7748) grad_norm: 1.6635 (1.6802) time: 2.9338 data: 0.0003 max mem: 29202 +[2024-12-10 21:55:57 root] (utils.py 283): INFO Epoch: [5] [ 320/2502] eta: 1:46:40 lr: 0.000017 loss_cls: 2.9102 (2.7741) grad_norm: 1.5744 (1.6779) time: 2.9392 data: 0.0003 max mem: 29202 +[2024-12-10 21:56:27 root] (utils.py 283): INFO Epoch: [5] [ 330/2502] eta: 1:46:10 lr: 0.000017 loss_cls: 2.8102 (2.7709) grad_norm: 1.5584 (1.6779) time: 2.9399 data: 0.0003 max mem: 29202 +[2024-12-10 21:56:56 root] (utils.py 283): INFO Epoch: [5] [ 340/2502] eta: 1:45:41 lr: 0.000017 loss_cls: 2.8206 (2.7751) grad_norm: 1.5650 (1.6739) time: 2.9327 data: 0.0003 max mem: 29202 +[2024-12-10 21:57:25 root] (utils.py 283): INFO Epoch: [5] [ 350/2502] eta: 1:45:11 lr: 0.000017 loss_cls: 2.8206 (2.7682) grad_norm: 1.5347 (1.6710) time: 2.9310 data: 0.0003 max mem: 29202 +[2024-12-10 21:57:55 root] (utils.py 283): INFO Epoch: [5] [ 360/2502] eta: 1:44:42 lr: 0.000017 loss_cls: 2.7159 (2.7714) grad_norm: 1.5866 (1.6717) time: 2.9325 data: 0.0003 max mem: 29202 +[2024-12-10 21:58:24 root] (utils.py 283): INFO Epoch: [5] [ 370/2502] eta: 1:44:13 lr: 0.000017 loss_cls: 2.9411 (2.7721) grad_norm: 1.5682 (1.6679) time: 2.9344 data: 0.0003 max mem: 29202 +[2024-12-10 21:58:53 root] (utils.py 283): INFO Epoch: [5] [ 380/2502] eta: 1:43:44 lr: 0.000017 loss_cls: 2.9411 (2.7703) grad_norm: 1.5031 (1.6669) time: 2.9335 data: 0.0003 max mem: 29202 +[2024-12-10 21:59:23 root] (utils.py 283): INFO Epoch: [5] [ 390/2502] eta: 1:43:14 lr: 0.000017 loss_cls: 2.9541 (2.7702) grad_norm: 1.6100 (1.6662) time: 2.9311 data: 0.0003 max mem: 29202 +[2024-12-10 21:59:52 root] (utils.py 283): INFO Epoch: [5] [ 400/2502] eta: 1:42:45 lr: 0.000017 loss_cls: 3.0784 (2.7753) grad_norm: 1.6381 (1.6679) time: 2.9296 data: 0.0003 max mem: 29202 +[2024-12-10 22:00:21 root] (utils.py 283): INFO Epoch: [5] [ 410/2502] eta: 1:42:15 lr: 0.000017 loss_cls: 3.0977 (2.7804) grad_norm: 1.6286 (1.6661) time: 2.9313 data: 0.0003 max mem: 29202 +[2024-12-10 22:00:51 root] (utils.py 283): INFO Epoch: [5] [ 420/2502] eta: 1:41:46 lr: 0.000017 loss_cls: 2.8024 (2.7786) grad_norm: 1.5893 (1.6642) time: 2.9321 data: 0.0003 max mem: 29202 +[2024-12-10 22:01:20 root] (utils.py 283): INFO Epoch: [5] [ 430/2502] eta: 1:41:16 lr: 0.000017 loss_cls: 2.7772 (2.7776) grad_norm: 1.5920 (1.6660) time: 2.9310 data: 0.0003 max mem: 29202 +[2024-12-10 22:01:49 root] (utils.py 283): INFO Epoch: [5] [ 440/2502] eta: 1:40:48 lr: 0.000017 loss_cls: 2.8738 (2.7757) grad_norm: 1.5507 (1.6637) time: 2.9436 data: 0.0003 max mem: 29202 +[2024-12-10 22:02:19 root] (utils.py 283): INFO Epoch: [5] [ 450/2502] eta: 1:40:18 lr: 0.000017 loss_cls: 2.9397 (2.7780) grad_norm: 1.5054 (1.6610) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-10 22:02:48 root] (utils.py 283): INFO Epoch: [5] [ 460/2502] eta: 1:39:49 lr: 0.000017 loss_cls: 2.9846 (2.7790) grad_norm: 1.5517 (1.6610) time: 2.9276 data: 0.0003 max mem: 29202 +[2024-12-10 22:03:17 root] (utils.py 283): INFO Epoch: [5] [ 470/2502] eta: 1:39:20 lr: 0.000017 loss_cls: 2.9023 (2.7758) grad_norm: 1.6757 (1.6611) time: 2.9311 data: 0.0003 max mem: 29202 +[2024-12-10 22:03:47 root] (utils.py 283): INFO Epoch: [5] [ 480/2502] eta: 1:38:50 lr: 0.000017 loss_cls: 2.7845 (2.7719) grad_norm: 1.5868 (1.6616) time: 2.9319 data: 0.0003 max mem: 29202 +[2024-12-10 22:04:16 root] (utils.py 283): INFO Epoch: [5] [ 490/2502] eta: 1:38:21 lr: 0.000017 loss_cls: 2.6457 (2.7689) grad_norm: 1.5972 (1.6703) time: 2.9309 data: 0.0003 max mem: 29202 +[2024-12-10 22:04:45 root] (utils.py 283): INFO Epoch: [5] [ 500/2502] eta: 1:37:51 lr: 0.000017 loss_cls: 2.8673 (2.7716) grad_norm: 1.5972 (1.6691) time: 2.9298 data: 0.0003 max mem: 29202 +[2024-12-10 22:05:15 root] (utils.py 283): INFO Epoch: [5] [ 510/2502] eta: 1:37:22 lr: 0.000017 loss_cls: 3.0664 (2.7763) grad_norm: 1.5514 (1.6705) time: 2.9310 data: 0.0003 max mem: 29202 +[2024-12-10 22:05:44 root] (utils.py 283): INFO Epoch: [5] [ 520/2502] eta: 1:36:52 lr: 0.000017 loss_cls: 2.9089 (2.7764) grad_norm: 1.7015 (1.6757) time: 2.9310 data: 0.0003 max mem: 29202 +[2024-12-10 22:06:13 root] (utils.py 283): INFO Epoch: [5] [ 530/2502] eta: 1:36:24 lr: 0.000017 loss_cls: 2.8543 (2.7786) grad_norm: 1.7443 (1.6772) time: 2.9488 data: 0.0003 max mem: 29202 +[2024-12-10 22:06:43 root] (utils.py 283): INFO Epoch: [5] [ 540/2502] eta: 1:35:56 lr: 0.000017 loss_cls: 2.9529 (2.7799) grad_norm: 1.6201 (1.6779) time: 2.9623 data: 0.0003 max mem: 29202 +[2024-12-10 22:07:12 root] (utils.py 283): INFO Epoch: [5] [ 550/2502] eta: 1:35:27 lr: 0.000017 loss_cls: 2.8238 (2.7790) grad_norm: 1.6009 (1.6776) time: 2.9497 data: 0.0003 max mem: 29202 +[2024-12-10 22:07:42 root] (utils.py 283): INFO Epoch: [5] [ 560/2502] eta: 1:34:58 lr: 0.000017 loss_cls: 2.7799 (2.7779) grad_norm: 1.6227 (1.6774) time: 2.9412 data: 0.0003 max mem: 29202 +[2024-12-10 22:08:11 root] (utils.py 283): INFO Epoch: [5] [ 570/2502] eta: 1:34:29 lr: 0.000017 loss_cls: 2.6715 (2.7731) grad_norm: 1.6490 (1.6777) time: 2.9396 data: 0.0003 max mem: 29202 +[2024-12-10 22:08:41 root] (utils.py 283): INFO Epoch: [5] [ 580/2502] eta: 1:34:00 lr: 0.000017 loss_cls: 2.8950 (2.7748) grad_norm: 1.5567 (1.6766) time: 2.9460 data: 0.0003 max mem: 29202 +[2024-12-10 22:09:10 root] (utils.py 283): INFO Epoch: [5] [ 590/2502] eta: 1:33:31 lr: 0.000017 loss_cls: 2.9711 (2.7758) grad_norm: 1.6080 (1.6777) time: 2.9499 data: 0.0003 max mem: 29202 +[2024-12-10 22:09:40 root] (utils.py 283): INFO Epoch: [5] [ 600/2502] eta: 1:33:02 lr: 0.000017 loss_cls: 2.9125 (2.7750) grad_norm: 1.6005 (1.6757) time: 2.9439 data: 0.0003 max mem: 29202 +[2024-12-10 22:10:09 root] (utils.py 283): INFO Epoch: [5] [ 610/2502] eta: 1:32:33 lr: 0.000017 loss_cls: 2.6700 (2.7727) grad_norm: 1.5864 (1.6763) time: 2.9421 data: 0.0003 max mem: 29202 +[2024-12-10 22:10:39 root] (utils.py 283): INFO Epoch: [5] [ 620/2502] eta: 1:32:04 lr: 0.000017 loss_cls: 2.7733 (2.7719) grad_norm: 1.6855 (1.6766) time: 2.9431 data: 0.0003 max mem: 29202 +[2024-12-10 22:11:08 root] (utils.py 283): INFO Epoch: [5] [ 630/2502] eta: 1:31:35 lr: 0.000017 loss_cls: 2.7945 (2.7707) grad_norm: 1.6285 (1.6780) time: 2.9458 data: 0.0003 max mem: 29202 +[2024-12-10 22:11:37 root] (utils.py 283): INFO Epoch: [5] [ 640/2502] eta: 1:31:05 lr: 0.000017 loss_cls: 2.9597 (2.7741) grad_norm: 1.5707 (1.6789) time: 2.9447 data: 0.0003 max mem: 29202 +[2024-12-10 22:12:07 root] (utils.py 283): INFO Epoch: [5] [ 650/2502] eta: 1:30:36 lr: 0.000017 loss_cls: 2.9434 (2.7733) grad_norm: 1.6208 (1.6787) time: 2.9387 data: 0.0003 max mem: 29202 +[2024-12-10 22:12:36 root] (utils.py 283): INFO Epoch: [5] [ 660/2502] eta: 1:30:07 lr: 0.000017 loss_cls: 2.7613 (2.7692) grad_norm: 1.5355 (1.6769) time: 2.9426 data: 0.0003 max mem: 29202 +[2024-12-10 22:13:06 root] (utils.py 283): INFO Epoch: [5] [ 670/2502] eta: 1:29:38 lr: 0.000017 loss_cls: 2.8168 (2.7714) grad_norm: 1.5508 (1.6766) time: 2.9451 data: 0.0003 max mem: 29202 +[2024-12-10 22:13:35 root] (utils.py 283): INFO Epoch: [5] [ 680/2502] eta: 1:29:09 lr: 0.000017 loss_cls: 2.7683 (2.7694) grad_norm: 1.6542 (1.6766) time: 2.9410 data: 0.0003 max mem: 29202 +[2024-12-10 22:14:05 root] (utils.py 283): INFO Epoch: [5] [ 690/2502] eta: 1:28:40 lr: 0.000017 loss_cls: 2.6881 (2.7696) grad_norm: 1.5734 (1.6752) time: 2.9461 data: 0.0003 max mem: 29202 +[2024-12-10 22:14:34 root] (utils.py 283): INFO Epoch: [5] [ 700/2502] eta: 1:28:11 lr: 0.000017 loss_cls: 2.8404 (2.7707) grad_norm: 1.6689 (1.6768) time: 2.9482 data: 0.0003 max mem: 29202 +[2024-12-10 22:15:04 root] (utils.py 283): INFO Epoch: [5] [ 710/2502] eta: 1:27:41 lr: 0.000017 loss_cls: 2.9343 (2.7732) grad_norm: 1.6588 (1.6755) time: 2.9430 data: 0.0003 max mem: 29202 +[2024-12-10 22:15:33 root] (utils.py 283): INFO Epoch: [5] [ 720/2502] eta: 1:27:12 lr: 0.000017 loss_cls: 2.9343 (2.7736) grad_norm: 1.6500 (1.6771) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-10 22:16:02 root] (utils.py 283): INFO Epoch: [5] [ 730/2502] eta: 1:26:43 lr: 0.000017 loss_cls: 2.7099 (2.7709) grad_norm: 1.6718 (1.6774) time: 2.9395 data: 0.0003 max mem: 29202 +[2024-12-10 22:16:32 root] (utils.py 283): INFO Epoch: [5] [ 740/2502] eta: 1:26:14 lr: 0.000017 loss_cls: 2.6007 (2.7678) grad_norm: 1.6529 (1.6768) time: 2.9444 data: 0.0003 max mem: 29202 +[2024-12-10 22:17:01 root] (utils.py 283): INFO Epoch: [5] [ 750/2502] eta: 1:25:45 lr: 0.000017 loss_cls: 2.7090 (2.7666) grad_norm: 1.6529 (1.6781) time: 2.9520 data: 0.0003 max mem: 29202 +[2024-12-10 22:17:31 root] (utils.py 283): INFO Epoch: [5] [ 760/2502] eta: 1:25:15 lr: 0.000017 loss_cls: 2.7090 (2.7649) grad_norm: 1.6577 (1.6784) time: 2.9410 data: 0.0003 max mem: 29202 +[2024-12-10 22:18:00 root] (utils.py 283): INFO Epoch: [5] [ 770/2502] eta: 1:24:46 lr: 0.000017 loss_cls: 2.5982 (2.7638) grad_norm: 1.5278 (1.6766) time: 2.9277 data: 0.0003 max mem: 29202 +[2024-12-10 22:18:29 root] (utils.py 283): INFO Epoch: [5] [ 780/2502] eta: 1:24:16 lr: 0.000017 loss_cls: 2.8874 (2.7651) grad_norm: 1.6237 (1.6776) time: 2.9291 data: 0.0003 max mem: 29202 +[2024-12-10 22:18:58 root] (utils.py 283): INFO Epoch: [5] [ 790/2502] eta: 1:23:47 lr: 0.000017 loss_cls: 3.0255 (2.7642) grad_norm: 1.5749 (1.6759) time: 2.9280 data: 0.0003 max mem: 29202 +[2024-12-10 22:19:28 root] (utils.py 283): INFO Epoch: [5] [ 800/2502] eta: 1:23:17 lr: 0.000017 loss_cls: 2.5879 (2.7588) grad_norm: 1.5340 (1.6747) time: 2.9385 data: 0.0003 max mem: 29202 +[2024-12-10 22:19:57 root] (utils.py 283): INFO Epoch: [5] [ 810/2502] eta: 1:22:48 lr: 0.000017 loss_cls: 2.4796 (2.7574) grad_norm: 1.5592 (1.6734) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-10 22:20:27 root] (utils.py 283): INFO Epoch: [5] [ 820/2502] eta: 1:22:19 lr: 0.000017 loss_cls: 2.7866 (2.7577) grad_norm: 1.5960 (1.6732) time: 2.9378 data: 0.0003 max mem: 29202 +[2024-12-10 22:20:56 root] (utils.py 283): INFO Epoch: [5] [ 830/2502] eta: 1:21:50 lr: 0.000017 loss_cls: 2.8010 (2.7565) grad_norm: 1.5819 (1.6722) time: 2.9444 data: 0.0003 max mem: 29202 +[2024-12-10 22:21:26 root] (utils.py 283): INFO Epoch: [5] [ 840/2502] eta: 1:21:20 lr: 0.000017 loss_cls: 2.8776 (2.7571) grad_norm: 1.5655 (1.6707) time: 2.9420 data: 0.0003 max mem: 29202 +[2024-12-10 22:21:55 root] (utils.py 283): INFO Epoch: [5] [ 850/2502] eta: 1:20:51 lr: 0.000017 loss_cls: 2.8776 (2.7581) grad_norm: 1.6163 (1.6724) time: 2.9435 data: 0.0003 max mem: 29202 +[2024-12-10 22:22:24 root] (utils.py 283): INFO Epoch: [5] [ 860/2502] eta: 1:20:22 lr: 0.000017 loss_cls: 2.9692 (2.7601) grad_norm: 1.6392 (1.6724) time: 2.9456 data: 0.0003 max mem: 29202 +[2024-12-10 22:22:54 root] (utils.py 283): INFO Epoch: [5] [ 870/2502] eta: 1:19:53 lr: 0.000017 loss_cls: 2.9121 (2.7620) grad_norm: 1.6181 (1.6732) time: 2.9438 data: 0.0003 max mem: 29202 +[2024-12-10 22:23:23 root] (utils.py 283): INFO Epoch: [5] [ 880/2502] eta: 1:19:23 lr: 0.000017 loss_cls: 2.8108 (2.7604) grad_norm: 1.6171 (1.6728) time: 2.9429 data: 0.0003 max mem: 29202 +[2024-12-10 22:23:53 root] (utils.py 283): INFO Epoch: [5] [ 890/2502] eta: 1:18:54 lr: 0.000017 loss_cls: 2.4414 (2.7556) grad_norm: 1.6074 (1.6724) time: 2.9443 data: 0.0003 max mem: 29202 +[2024-12-10 22:24:22 root] (utils.py 283): INFO Epoch: [5] [ 900/2502] eta: 1:18:25 lr: 0.000017 loss_cls: 2.1977 (2.7506) grad_norm: 1.5551 (1.6709) time: 2.9451 data: 0.0003 max mem: 29202 +[2024-12-10 22:24:52 root] (utils.py 283): INFO Epoch: [5] [ 910/2502] eta: 1:17:56 lr: 0.000017 loss_cls: 2.6478 (2.7508) grad_norm: 1.5672 (1.6708) time: 2.9429 data: 0.0003 max mem: 29202 +[2024-12-10 22:25:21 root] (utils.py 283): INFO Epoch: [5] [ 920/2502] eta: 1:17:26 lr: 0.000017 loss_cls: 2.9071 (2.7524) grad_norm: 1.6104 (1.6706) time: 2.9436 data: 0.0003 max mem: 29202 +[2024-12-10 22:25:51 root] (utils.py 283): INFO Epoch: [5] [ 930/2502] eta: 1:16:57 lr: 0.000017 loss_cls: 2.9815 (2.7549) grad_norm: 1.5985 (1.6711) time: 2.9427 data: 0.0003 max mem: 29202 +[2024-12-10 22:26:20 root] (utils.py 283): INFO Epoch: [5] [ 940/2502] eta: 1:16:28 lr: 0.000017 loss_cls: 2.9326 (2.7556) grad_norm: 1.5645 (1.6699) time: 2.9419 data: 0.0003 max mem: 29202 +[2024-12-10 22:26:49 root] (utils.py 283): INFO Epoch: [5] [ 950/2502] eta: 1:15:59 lr: 0.000017 loss_cls: 2.8536 (2.7549) grad_norm: 1.5796 (1.6695) time: 2.9458 data: 0.0003 max mem: 29202 +[2024-12-10 22:27:19 root] (utils.py 283): INFO Epoch: [5] [ 960/2502] eta: 1:15:29 lr: 0.000017 loss_cls: 2.8565 (2.7563) grad_norm: 1.6827 (1.6702) time: 2.9440 data: 0.0003 max mem: 29202 +[2024-12-10 22:27:48 root] (utils.py 283): INFO Epoch: [5] [ 970/2502] eta: 1:15:00 lr: 0.000017 loss_cls: 2.8565 (2.7559) grad_norm: 1.6273 (1.6697) time: 2.9430 data: 0.0003 max mem: 29202 +[2024-12-10 22:28:18 root] (utils.py 283): INFO Epoch: [5] [ 980/2502] eta: 1:14:31 lr: 0.000017 loss_cls: 2.6727 (2.7556) grad_norm: 1.5818 (1.6692) time: 2.9410 data: 0.0003 max mem: 29202 +[2024-12-10 22:28:47 root] (utils.py 283): INFO Epoch: [5] [ 990/2502] eta: 1:14:01 lr: 0.000017 loss_cls: 2.6506 (2.7548) grad_norm: 1.5582 (1.6680) time: 2.9400 data: 0.0003 max mem: 29202 +[2024-12-10 22:29:17 root] (utils.py 283): INFO Epoch: [5] [1000/2502] eta: 1:13:32 lr: 0.000017 loss_cls: 2.7746 (2.7562) grad_norm: 1.5391 (1.6670) time: 2.9419 data: 0.0003 max mem: 29202 +[2024-12-10 22:29:46 root] (utils.py 283): INFO Epoch: [5] [1010/2502] eta: 1:13:03 lr: 0.000017 loss_cls: 2.7291 (2.7524) grad_norm: 1.5391 (1.6663) time: 2.9418 data: 0.0003 max mem: 29202 +[2024-12-10 22:30:15 root] (utils.py 283): INFO Epoch: [5] [1020/2502] eta: 1:12:33 lr: 0.000017 loss_cls: 2.6013 (2.7508) grad_norm: 1.5241 (1.6647) time: 2.9437 data: 0.0003 max mem: 29202 +[2024-12-10 22:30:45 root] (utils.py 283): INFO Epoch: [5] [1030/2502] eta: 1:12:04 lr: 0.000017 loss_cls: 2.6533 (2.7506) grad_norm: 1.5223 (1.6648) time: 2.9461 data: 0.0003 max mem: 29202 +[2024-12-10 22:31:14 root] (utils.py 283): INFO Epoch: [5] [1040/2502] eta: 1:11:35 lr: 0.000017 loss_cls: 2.8342 (2.7496) grad_norm: 1.6306 (1.6652) time: 2.9532 data: 0.0003 max mem: 29202 +[2024-12-10 22:31:44 root] (utils.py 283): INFO Epoch: [5] [1050/2502] eta: 1:11:06 lr: 0.000017 loss_cls: 2.8349 (2.7491) grad_norm: 1.5319 (1.6635) time: 2.9524 data: 0.0003 max mem: 29202 +[2024-12-10 22:32:13 root] (utils.py 283): INFO Epoch: [5] [1060/2502] eta: 1:10:37 lr: 0.000017 loss_cls: 2.6648 (2.7474) grad_norm: 1.5319 (1.6628) time: 2.9473 data: 0.0003 max mem: 29202 +[2024-12-10 22:32:43 root] (utils.py 283): INFO Epoch: [5] [1070/2502] eta: 1:10:07 lr: 0.000017 loss_cls: 2.5556 (2.7470) grad_norm: 1.5695 (1.6629) time: 2.9453 data: 0.0003 max mem: 29202 +[2024-12-10 22:33:12 root] (utils.py 283): INFO Epoch: [5] [1080/2502] eta: 1:09:38 lr: 0.000017 loss_cls: 2.8307 (2.7471) grad_norm: 1.7380 (1.6638) time: 2.9408 data: 0.0003 max mem: 29202 +[2024-12-10 22:33:42 root] (utils.py 283): INFO Epoch: [5] [1090/2502] eta: 1:09:08 lr: 0.000017 loss_cls: 2.6566 (2.7449) grad_norm: 1.6597 (1.6633) time: 2.9364 data: 0.0003 max mem: 29202 +[2024-12-10 22:34:11 root] (utils.py 283): INFO Epoch: [5] [1100/2502] eta: 1:08:39 lr: 0.000017 loss_cls: 2.6743 (2.7444) grad_norm: 1.6597 (1.6643) time: 2.9369 data: 0.0003 max mem: 29202 +[2024-12-10 22:34:40 root] (utils.py 283): INFO Epoch: [5] [1110/2502] eta: 1:08:10 lr: 0.000017 loss_cls: 2.9046 (2.7439) grad_norm: 1.6843 (1.6640) time: 2.9395 data: 0.0003 max mem: 29202 +[2024-12-10 22:35:10 root] (utils.py 283): INFO Epoch: [5] [1120/2502] eta: 1:07:40 lr: 0.000017 loss_cls: 2.9366 (2.7459) grad_norm: 1.5208 (1.6631) time: 2.9392 data: 0.0003 max mem: 29202 +[2024-12-10 22:35:39 root] (utils.py 283): INFO Epoch: [5] [1130/2502] eta: 1:07:11 lr: 0.000017 loss_cls: 2.8492 (2.7428) grad_norm: 1.5003 (1.6622) time: 2.9373 data: 0.0003 max mem: 29202 +[2024-12-10 22:36:08 root] (utils.py 283): INFO Epoch: [5] [1140/2502] eta: 1:06:41 lr: 0.000017 loss_cls: 2.3444 (2.7402) grad_norm: 1.5113 (1.6621) time: 2.9348 data: 0.0002 max mem: 29202 +[2024-12-10 22:36:38 root] (utils.py 283): INFO Epoch: [5] [1150/2502] eta: 1:06:12 lr: 0.000017 loss_cls: 2.3610 (2.7391) grad_norm: 1.5552 (1.6620) time: 2.9359 data: 0.0003 max mem: 29202 +[2024-12-10 22:37:07 root] (utils.py 283): INFO Epoch: [5] [1160/2502] eta: 1:05:43 lr: 0.000017 loss_cls: 2.8128 (2.7401) grad_norm: 1.5866 (1.6622) time: 2.9453 data: 0.0003 max mem: 29202 +[2024-12-10 22:37:37 root] (utils.py 283): INFO Epoch: [5] [1170/2502] eta: 1:05:13 lr: 0.000017 loss_cls: 2.8852 (2.7409) grad_norm: 1.5697 (1.6613) time: 2.9411 data: 0.0003 max mem: 29202 +[2024-12-10 22:38:06 root] (utils.py 283): INFO Epoch: [5] [1180/2502] eta: 1:04:44 lr: 0.000017 loss_cls: 2.8337 (2.7394) grad_norm: 1.5920 (1.6617) time: 2.9342 data: 0.0003 max mem: 29202 +[2024-12-10 22:38:36 root] (utils.py 283): INFO Epoch: [5] [1190/2502] eta: 1:04:15 lr: 0.000017 loss_cls: 2.7423 (2.7398) grad_norm: 1.6654 (1.6613) time: 2.9505 data: 0.0003 max mem: 29202 +[2024-12-10 22:39:05 root] (utils.py 283): INFO Epoch: [5] [1200/2502] eta: 1:03:45 lr: 0.000017 loss_cls: 2.9521 (2.7408) grad_norm: 1.6035 (1.6616) time: 2.9502 data: 0.0003 max mem: 29202 +[2024-12-10 22:39:35 root] (utils.py 283): INFO Epoch: [5] [1210/2502] eta: 1:03:16 lr: 0.000017 loss_cls: 2.7336 (2.7404) grad_norm: 1.5699 (1.6611) time: 2.9420 data: 0.0003 max mem: 29202 +[2024-12-10 22:40:04 root] (utils.py 283): INFO Epoch: [5] [1220/2502] eta: 1:02:47 lr: 0.000017 loss_cls: 2.6562 (2.7399) grad_norm: 1.6065 (1.6608) time: 2.9472 data: 0.0003 max mem: 29202 +[2024-12-10 22:40:33 root] (utils.py 283): INFO Epoch: [5] [1230/2502] eta: 1:02:18 lr: 0.000017 loss_cls: 2.7853 (2.7398) grad_norm: 1.6342 (1.6611) time: 2.9486 data: 0.0003 max mem: 29202 +[2024-12-10 22:41:03 root] (utils.py 283): INFO Epoch: [5] [1240/2502] eta: 1:01:48 lr: 0.000017 loss_cls: 2.8592 (2.7415) grad_norm: 1.6482 (1.6608) time: 2.9352 data: 0.0003 max mem: 29202 +[2024-12-10 22:41:32 root] (utils.py 283): INFO Epoch: [5] [1250/2502] eta: 1:01:18 lr: 0.000017 loss_cls: 2.8592 (2.7419) grad_norm: 1.6153 (1.6625) time: 2.9234 data: 0.0003 max mem: 29202 +[2024-12-10 22:42:01 root] (utils.py 283): INFO Epoch: [5] [1260/2502] eta: 1:00:49 lr: 0.000017 loss_cls: 2.7782 (2.7422) grad_norm: 1.6153 (1.6633) time: 2.9251 data: 0.0003 max mem: 29202 +[2024-12-10 22:42:30 root] (utils.py 283): INFO Epoch: [5] [1270/2502] eta: 1:00:19 lr: 0.000017 loss_cls: 2.8937 (2.7439) grad_norm: 1.6189 (1.6636) time: 2.9232 data: 0.0003 max mem: 29202 +[2024-12-10 22:43:00 root] (utils.py 283): INFO Epoch: [5] [1280/2502] eta: 0:59:50 lr: 0.000017 loss_cls: 3.0006 (2.7435) grad_norm: 1.6079 (1.6643) time: 2.9207 data: 0.0003 max mem: 29202 +[2024-12-10 22:43:29 root] (utils.py 283): INFO Epoch: [5] [1290/2502] eta: 0:59:20 lr: 0.000017 loss_cls: 2.6566 (2.7435) grad_norm: 1.6780 (1.6646) time: 2.9217 data: 0.0003 max mem: 29202 +[2024-12-10 22:43:58 root] (utils.py 283): INFO Epoch: [5] [1300/2502] eta: 0:58:51 lr: 0.000017 loss_cls: 2.6500 (2.7418) grad_norm: 1.7078 (1.6648) time: 2.9220 data: 0.0003 max mem: 29202 +[2024-12-10 22:44:27 root] (utils.py 283): INFO Epoch: [5] [1310/2502] eta: 0:58:21 lr: 0.000017 loss_cls: 2.6199 (2.7407) grad_norm: 1.6346 (1.6647) time: 2.9225 data: 0.0003 max mem: 29202 +[2024-12-10 22:44:57 root] (utils.py 283): INFO Epoch: [5] [1320/2502] eta: 0:57:52 lr: 0.000017 loss_cls: 2.7264 (2.7405) grad_norm: 1.6346 (1.6641) time: 2.9232 data: 0.0003 max mem: 29202 +[2024-12-10 22:45:26 root] (utils.py 283): INFO Epoch: [5] [1330/2502] eta: 0:57:22 lr: 0.000017 loss_cls: 2.9023 (2.7413) grad_norm: 1.6039 (1.6638) time: 2.9261 data: 0.0003 max mem: 29202 +[2024-12-10 22:45:55 root] (utils.py 283): INFO Epoch: [5] [1340/2502] eta: 0:56:53 lr: 0.000017 loss_cls: 2.9395 (2.7409) grad_norm: 1.6007 (1.6643) time: 2.9254 data: 0.0002 max mem: 29202 +[2024-12-10 22:46:24 root] (utils.py 283): INFO Epoch: [5] [1350/2502] eta: 0:56:23 lr: 0.000017 loss_cls: 2.7941 (2.7412) grad_norm: 1.6643 (1.6642) time: 2.9234 data: 0.0003 max mem: 29202 +[2024-12-10 22:46:54 root] (utils.py 283): INFO Epoch: [5] [1360/2502] eta: 0:55:54 lr: 0.000017 loss_cls: 2.7941 (2.7417) grad_norm: 1.6686 (1.6648) time: 2.9255 data: 0.0003 max mem: 29202 +[2024-12-10 22:47:23 root] (utils.py 283): INFO Epoch: [5] [1370/2502] eta: 0:55:24 lr: 0.000017 loss_cls: 2.7413 (2.7404) grad_norm: 1.6803 (1.6650) time: 2.9306 data: 0.0003 max mem: 29202 +[2024-12-10 22:47:52 root] (utils.py 283): INFO Epoch: [5] [1380/2502] eta: 0:54:55 lr: 0.000017 loss_cls: 2.8755 (2.7412) grad_norm: 1.6887 (1.6655) time: 2.9380 data: 0.0003 max mem: 29202 +[2024-12-10 22:48:22 root] (utils.py 283): INFO Epoch: [5] [1390/2502] eta: 0:54:26 lr: 0.000017 loss_cls: 2.9586 (2.7412) grad_norm: 1.6346 (1.6653) time: 2.9408 data: 0.0002 max mem: 29202 +[2024-12-10 22:48:51 root] (utils.py 283): INFO Epoch: [5] [1400/2502] eta: 0:53:56 lr: 0.000017 loss_cls: 2.8128 (2.7416) grad_norm: 1.6197 (1.6648) time: 2.9397 data: 0.0002 max mem: 29202 +[2024-12-10 22:49:21 root] (utils.py 283): INFO Epoch: [5] [1410/2502] eta: 0:53:27 lr: 0.000017 loss_cls: 2.8094 (2.7405) grad_norm: 1.5080 (1.6640) time: 2.9457 data: 0.0002 max mem: 29202 +[2024-12-10 22:49:50 root] (utils.py 283): INFO Epoch: [5] [1420/2502] eta: 0:52:58 lr: 0.000017 loss_cls: 2.8482 (2.7399) grad_norm: 1.5309 (1.6633) time: 2.9440 data: 0.0002 max mem: 29202 +[2024-12-10 22:50:19 root] (utils.py 283): INFO Epoch: [5] [1430/2502] eta: 0:52:28 lr: 0.000017 loss_cls: 2.8482 (2.7398) grad_norm: 1.6263 (1.6637) time: 2.9389 data: 0.0002 max mem: 29202 +[2024-12-10 22:50:49 root] (utils.py 283): INFO Epoch: [5] [1440/2502] eta: 0:51:59 lr: 0.000017 loss_cls: 2.7458 (2.7389) grad_norm: 1.6177 (1.6639) time: 2.9504 data: 0.0002 max mem: 29202 +[2024-12-10 22:51:18 root] (utils.py 283): INFO Epoch: [5] [1450/2502] eta: 0:51:30 lr: 0.000017 loss_cls: 2.7458 (2.7385) grad_norm: 1.5857 (1.6646) time: 2.9512 data: 0.0002 max mem: 29202 +[2024-12-10 22:51:48 root] (utils.py 283): INFO Epoch: [5] [1460/2502] eta: 0:51:01 lr: 0.000017 loss_cls: 2.7725 (2.7381) grad_norm: 1.6217 (1.6650) time: 2.9441 data: 0.0002 max mem: 29202 +[2024-12-10 22:52:17 root] (utils.py 283): INFO Epoch: [5] [1470/2502] eta: 0:50:31 lr: 0.000017 loss_cls: 2.8365 (2.7375) grad_norm: 1.5789 (1.6650) time: 2.9405 data: 0.0003 max mem: 29202 +[2024-12-10 22:52:47 root] (utils.py 283): INFO Epoch: [5] [1480/2502] eta: 0:50:02 lr: 0.000017 loss_cls: 2.9233 (2.7392) grad_norm: 1.5789 (1.6643) time: 2.9381 data: 0.0003 max mem: 29202 +[2024-12-10 22:53:16 root] (utils.py 283): INFO Epoch: [5] [1490/2502] eta: 0:49:33 lr: 0.000017 loss_cls: 2.9932 (2.7405) grad_norm: 1.5373 (1.6638) time: 2.9425 data: 0.0003 max mem: 29202 +[2024-12-10 22:53:46 root] (utils.py 283): INFO Epoch: [5] [1500/2502] eta: 0:49:03 lr: 0.000017 loss_cls: 3.0096 (2.7416) grad_norm: 1.5611 (1.6644) time: 2.9443 data: 0.0003 max mem: 29202 +[2024-12-10 22:54:15 root] (utils.py 283): INFO Epoch: [5] [1510/2502] eta: 0:48:34 lr: 0.000017 loss_cls: 2.7802 (2.7411) grad_norm: 1.5712 (1.6637) time: 2.9412 data: 0.0003 max mem: 29202 +[2024-12-10 22:54:45 root] (utils.py 283): INFO Epoch: [5] [1520/2502] eta: 0:48:05 lr: 0.000017 loss_cls: 2.8008 (2.7425) grad_norm: 1.6334 (1.6648) time: 2.9486 data: 0.0003 max mem: 29202 +[2024-12-10 22:55:14 root] (utils.py 283): INFO Epoch: [5] [1530/2502] eta: 0:47:35 lr: 0.000017 loss_cls: 2.8847 (2.7420) grad_norm: 1.6659 (1.6647) time: 2.9451 data: 0.0003 max mem: 29202 +[2024-12-10 22:55:43 root] (utils.py 283): INFO Epoch: [5] [1540/2502] eta: 0:47:06 lr: 0.000017 loss_cls: 2.7603 (2.7429) grad_norm: 1.6290 (1.6644) time: 2.9357 data: 0.0003 max mem: 29202 +[2024-12-10 22:56:13 root] (utils.py 283): INFO Epoch: [5] [1550/2502] eta: 0:46:36 lr: 0.000017 loss_cls: 2.7194 (2.7423) grad_norm: 1.6473 (1.6645) time: 2.9426 data: 0.0003 max mem: 29202 +[2024-12-10 22:56:42 root] (utils.py 283): INFO Epoch: [5] [1560/2502] eta: 0:46:07 lr: 0.000017 loss_cls: 2.8502 (2.7441) grad_norm: 1.7083 (1.6646) time: 2.9482 data: 0.0003 max mem: 29202 +[2024-12-10 22:57:12 root] (utils.py 283): INFO Epoch: [5] [1570/2502] eta: 0:45:38 lr: 0.000017 loss_cls: 2.9508 (2.7453) grad_norm: 1.7083 (1.6651) time: 2.9525 data: 0.0003 max mem: 29202 +[2024-12-10 22:57:41 root] (utils.py 283): INFO Epoch: [5] [1580/2502] eta: 0:45:09 lr: 0.000017 loss_cls: 2.9449 (2.7467) grad_norm: 1.7217 (1.6672) time: 2.9535 data: 0.0003 max mem: 29202 +[2024-12-10 22:58:11 root] (utils.py 283): INFO Epoch: [5] [1590/2502] eta: 0:44:39 lr: 0.000017 loss_cls: 2.9497 (2.7472) grad_norm: 1.6239 (1.6676) time: 2.9478 data: 0.0003 max mem: 29202 +[2024-12-10 22:58:40 root] (utils.py 283): INFO Epoch: [5] [1600/2502] eta: 0:44:10 lr: 0.000017 loss_cls: 2.6531 (2.7466) grad_norm: 1.6106 (1.6674) time: 2.9430 data: 0.0003 max mem: 29202 +[2024-12-10 22:59:10 root] (utils.py 283): INFO Epoch: [5] [1610/2502] eta: 0:43:40 lr: 0.000017 loss_cls: 2.6156 (2.7457) grad_norm: 1.5977 (1.6674) time: 2.9434 data: 0.0003 max mem: 29202 +[2024-12-10 22:59:39 root] (utils.py 283): INFO Epoch: [5] [1620/2502] eta: 0:43:11 lr: 0.000017 loss_cls: 2.8265 (2.7463) grad_norm: 1.6048 (1.6672) time: 2.9452 data: 0.0003 max mem: 29202 +[2024-12-10 23:00:09 root] (utils.py 283): INFO Epoch: [5] [1630/2502] eta: 0:42:42 lr: 0.000017 loss_cls: 2.8678 (2.7469) grad_norm: 1.6048 (1.6682) time: 2.9456 data: 0.0003 max mem: 29202 +[2024-12-10 23:00:38 root] (utils.py 283): INFO Epoch: [5] [1640/2502] eta: 0:42:12 lr: 0.000017 loss_cls: 3.0320 (2.7482) grad_norm: 1.6931 (1.6688) time: 2.9429 data: 0.0003 max mem: 29202 +[2024-12-10 23:01:07 root] (utils.py 283): INFO Epoch: [5] [1650/2502] eta: 0:41:43 lr: 0.000017 loss_cls: 3.0320 (2.7484) grad_norm: 1.7012 (1.6695) time: 2.9418 data: 0.0003 max mem: 29202 +[2024-12-10 23:01:37 root] (utils.py 283): INFO Epoch: [5] [1660/2502] eta: 0:41:14 lr: 0.000017 loss_cls: 2.7572 (2.7476) grad_norm: 1.7008 (1.6701) time: 2.9431 data: 0.0003 max mem: 29202 +[2024-12-10 23:02:06 root] (utils.py 283): INFO Epoch: [5] [1670/2502] eta: 0:40:44 lr: 0.000017 loss_cls: 2.5526 (2.7470) grad_norm: 1.7081 (1.6715) time: 2.9452 data: 0.0003 max mem: 29202 +[2024-12-10 23:02:36 root] (utils.py 283): INFO Epoch: [5] [1680/2502] eta: 0:40:15 lr: 0.000017 loss_cls: 2.9253 (2.7485) grad_norm: 1.7064 (1.6728) time: 2.9473 data: 0.0003 max mem: 29202 +[2024-12-10 23:03:05 root] (utils.py 283): INFO Epoch: [5] [1690/2502] eta: 0:39:46 lr: 0.000017 loss_cls: 2.9571 (2.7498) grad_norm: 1.7036 (1.6730) time: 2.9527 data: 0.0003 max mem: 29202 +[2024-12-10 23:03:35 root] (utils.py 283): INFO Epoch: [5] [1700/2502] eta: 0:39:16 lr: 0.000017 loss_cls: 2.7062 (2.7474) grad_norm: 1.6920 (1.6731) time: 2.9611 data: 0.0003 max mem: 29202 +[2024-12-10 23:04:04 root] (utils.py 283): INFO Epoch: [5] [1710/2502] eta: 0:38:47 lr: 0.000017 loss_cls: 2.4367 (2.7466) grad_norm: 1.6346 (1.6731) time: 2.9571 data: 0.0003 max mem: 29202 +[2024-12-10 23:04:34 root] (utils.py 283): INFO Epoch: [5] [1720/2502] eta: 0:38:18 lr: 0.000017 loss_cls: 2.6462 (2.7459) grad_norm: 1.5293 (1.6725) time: 2.9439 data: 0.0003 max mem: 29202 +[2024-12-10 23:05:03 root] (utils.py 283): INFO Epoch: [5] [1730/2502] eta: 0:37:48 lr: 0.000017 loss_cls: 2.8811 (2.7460) grad_norm: 1.5899 (1.6728) time: 2.9420 data: 0.0003 max mem: 29202 +[2024-12-10 23:05:33 root] (utils.py 283): INFO Epoch: [5] [1740/2502] eta: 0:37:19 lr: 0.000017 loss_cls: 2.9483 (2.7467) grad_norm: 1.6424 (1.6731) time: 2.9463 data: 0.0003 max mem: 29202 +[2024-12-10 23:06:02 root] (utils.py 283): INFO Epoch: [5] [1750/2502] eta: 0:36:50 lr: 0.000017 loss_cls: 2.7701 (2.7461) grad_norm: 1.6059 (1.6733) time: 2.9468 data: 0.0003 max mem: 29202 +[2024-12-10 23:06:32 root] (utils.py 283): INFO Epoch: [5] [1760/2502] eta: 0:36:20 lr: 0.000017 loss_cls: 2.6689 (2.7456) grad_norm: 1.6830 (1.6744) time: 2.9551 data: 0.0003 max mem: 29202 +[2024-12-10 23:07:01 root] (utils.py 283): INFO Epoch: [5] [1770/2502] eta: 0:35:51 lr: 0.000017 loss_cls: 2.6560 (2.7448) grad_norm: 1.6658 (1.6752) time: 2.9562 data: 0.0003 max mem: 29202 +[2024-12-10 23:07:31 root] (utils.py 283): INFO Epoch: [5] [1780/2502] eta: 0:35:22 lr: 0.000017 loss_cls: 2.7406 (2.7456) grad_norm: 1.6835 (1.6754) time: 2.9562 data: 0.0003 max mem: 29202 +[2024-12-10 23:08:01 root] (utils.py 283): INFO Epoch: [5] [1790/2502] eta: 0:34:52 lr: 0.000017 loss_cls: 2.8916 (2.7457) grad_norm: 1.7313 (1.6751) time: 2.9607 data: 0.0003 max mem: 29202 +[2024-12-10 23:08:30 root] (utils.py 283): INFO Epoch: [5] [1800/2502] eta: 0:34:23 lr: 0.000017 loss_cls: 2.6313 (2.7445) grad_norm: 1.5973 (1.6748) time: 2.9601 data: 0.0003 max mem: 29202 +[2024-12-10 23:09:00 root] (utils.py 283): INFO Epoch: [5] [1810/2502] eta: 0:33:54 lr: 0.000017 loss_cls: 2.8325 (2.7453) grad_norm: 1.6283 (1.6751) time: 2.9595 data: 0.0003 max mem: 29202 +[2024-12-10 23:09:29 root] (utils.py 283): INFO Epoch: [5] [1820/2502] eta: 0:33:24 lr: 0.000017 loss_cls: 2.8530 (2.7456) grad_norm: 1.6283 (1.6747) time: 2.9530 data: 0.0003 max mem: 29202 +[2024-12-10 23:09:59 root] (utils.py 283): INFO Epoch: [5] [1830/2502] eta: 0:32:55 lr: 0.000017 loss_cls: 2.7035 (2.7455) grad_norm: 1.5462 (1.6740) time: 2.9456 data: 0.0003 max mem: 29202 +[2024-12-10 23:10:28 root] (utils.py 283): INFO Epoch: [5] [1840/2502] eta: 0:32:26 lr: 0.000017 loss_cls: 2.8444 (2.7461) grad_norm: 1.5233 (1.6736) time: 2.9561 data: 0.0003 max mem: 29202 +[2024-12-10 23:10:58 root] (utils.py 283): INFO Epoch: [5] [1850/2502] eta: 0:31:56 lr: 0.000017 loss_cls: 2.8444 (2.7453) grad_norm: 1.5296 (1.6731) time: 2.9605 data: 0.0003 max mem: 29202 +[2024-12-10 23:11:27 root] (utils.py 283): INFO Epoch: [5] [1860/2502] eta: 0:31:27 lr: 0.000017 loss_cls: 2.9556 (2.7467) grad_norm: 1.6747 (1.6742) time: 2.9484 data: 0.0003 max mem: 29202 +[2024-12-10 23:11:57 root] (utils.py 283): INFO Epoch: [5] [1870/2502] eta: 0:30:58 lr: 0.000017 loss_cls: 2.9953 (2.7471) grad_norm: 1.6575 (1.6738) time: 2.9504 data: 0.0003 max mem: 29202 +[2024-12-10 23:12:27 root] (utils.py 283): INFO Epoch: [5] [1880/2502] eta: 0:30:28 lr: 0.000017 loss_cls: 2.7831 (2.7469) grad_norm: 1.5986 (1.6741) time: 2.9588 data: 0.0003 max mem: 29202 +[2024-12-10 23:12:56 root] (utils.py 283): INFO Epoch: [5] [1890/2502] eta: 0:29:59 lr: 0.000017 loss_cls: 2.8158 (2.7478) grad_norm: 1.6639 (1.6739) time: 2.9489 data: 0.0003 max mem: 29202 +[2024-12-10 23:13:25 root] (utils.py 283): INFO Epoch: [5] [1900/2502] eta: 0:29:30 lr: 0.000017 loss_cls: 2.8541 (2.7485) grad_norm: 1.5981 (1.6736) time: 2.9386 data: 0.0003 max mem: 29202 +[2024-12-10 23:13:55 root] (utils.py 283): INFO Epoch: [5] [1910/2502] eta: 0:29:00 lr: 0.000017 loss_cls: 2.9193 (2.7480) grad_norm: 1.5520 (1.6732) time: 2.9415 data: 0.0003 max mem: 29202 +[2024-12-10 23:14:24 root] (utils.py 283): INFO Epoch: [5] [1920/2502] eta: 0:28:31 lr: 0.000017 loss_cls: 2.9621 (2.7495) grad_norm: 1.5971 (1.6729) time: 2.9418 data: 0.0003 max mem: 29202 +[2024-12-10 23:14:54 root] (utils.py 283): INFO Epoch: [5] [1930/2502] eta: 0:28:01 lr: 0.000017 loss_cls: 2.9621 (2.7491) grad_norm: 1.6122 (1.6732) time: 2.9458 data: 0.0003 max mem: 29202 +[2024-12-10 23:15:23 root] (utils.py 283): INFO Epoch: [5] [1940/2502] eta: 0:27:32 lr: 0.000017 loss_cls: 2.8186 (2.7491) grad_norm: 1.6214 (1.6731) time: 2.9629 data: 0.0003 max mem: 29202 +[2024-12-10 23:15:53 root] (utils.py 283): INFO Epoch: [5] [1950/2502] eta: 0:27:03 lr: 0.000017 loss_cls: 2.8595 (2.7496) grad_norm: 1.6970 (1.6734) time: 2.9716 data: 0.0003 max mem: 29202 +[2024-12-10 23:16:23 root] (utils.py 283): INFO Epoch: [5] [1960/2502] eta: 0:26:33 lr: 0.000017 loss_cls: 2.9269 (2.7510) grad_norm: 1.7463 (1.6741) time: 2.9566 data: 0.0003 max mem: 29202 +[2024-12-10 23:16:52 root] (utils.py 283): INFO Epoch: [5] [1970/2502] eta: 0:26:04 lr: 0.000017 loss_cls: 2.8856 (2.7511) grad_norm: 1.7181 (1.6747) time: 2.9469 data: 0.0003 max mem: 29202 +[2024-12-10 23:17:22 root] (utils.py 283): INFO Epoch: [5] [1980/2502] eta: 0:25:35 lr: 0.000017 loss_cls: 3.0004 (2.7526) grad_norm: 1.6398 (1.6747) time: 2.9483 data: 0.0003 max mem: 29202 +[2024-12-10 23:17:51 root] (utils.py 283): INFO Epoch: [5] [1990/2502] eta: 0:25:05 lr: 0.000017 loss_cls: 2.9398 (2.7524) grad_norm: 1.6453 (1.6751) time: 2.9529 data: 0.0003 max mem: 29202 +[2024-12-10 23:18:21 root] (utils.py 283): INFO Epoch: [5] [2000/2502] eta: 0:24:36 lr: 0.000017 loss_cls: 2.6646 (2.7521) grad_norm: 1.6453 (1.6747) time: 2.9515 data: 0.0003 max mem: 29202 +[2024-12-10 23:18:50 root] (utils.py 283): INFO Epoch: [5] [2010/2502] eta: 0:24:06 lr: 0.000017 loss_cls: 2.8914 (2.7531) grad_norm: 1.6171 (1.6746) time: 2.9479 data: 0.0003 max mem: 29202 +[2024-12-10 23:19:19 root] (utils.py 283): INFO Epoch: [5] [2020/2502] eta: 0:23:37 lr: 0.000017 loss_cls: 2.9072 (2.7528) grad_norm: 1.6420 (1.6749) time: 2.9461 data: 0.0003 max mem: 29202 +[2024-12-10 23:19:49 root] (utils.py 283): INFO Epoch: [5] [2030/2502] eta: 0:23:08 lr: 0.000017 loss_cls: 2.8024 (2.7528) grad_norm: 1.6420 (1.6746) time: 2.9443 data: 0.0003 max mem: 29202 +[2024-12-10 23:20:18 root] (utils.py 283): INFO Epoch: [5] [2040/2502] eta: 0:22:38 lr: 0.000017 loss_cls: 2.6836 (2.7518) grad_norm: 1.6420 (1.6748) time: 2.9461 data: 0.0003 max mem: 29202 +[2024-12-10 23:20:48 root] (utils.py 283): INFO Epoch: [5] [2050/2502] eta: 0:22:09 lr: 0.000017 loss_cls: 2.7200 (2.7525) grad_norm: 1.6700 (1.6754) time: 2.9481 data: 0.0003 max mem: 29202 +[2024-12-10 23:21:17 root] (utils.py 283): INFO Epoch: [5] [2060/2502] eta: 0:21:39 lr: 0.000017 loss_cls: 3.0079 (2.7522) grad_norm: 1.5724 (1.6752) time: 2.9472 data: 0.0003 max mem: 29202 +[2024-12-10 23:21:47 root] (utils.py 283): INFO Epoch: [5] [2070/2502] eta: 0:21:10 lr: 0.000017 loss_cls: 2.9398 (2.7521) grad_norm: 1.5597 (1.6747) time: 2.9523 data: 0.0003 max mem: 29202 +[2024-12-10 23:22:16 root] (utils.py 283): INFO Epoch: [5] [2080/2502] eta: 0:20:41 lr: 0.000017 loss_cls: 2.8476 (2.7520) grad_norm: 1.5762 (1.6745) time: 2.9511 data: 0.0003 max mem: 29202 +[2024-12-10 23:22:46 root] (utils.py 283): INFO Epoch: [5] [2090/2502] eta: 0:20:11 lr: 0.000017 loss_cls: 2.8711 (2.7524) grad_norm: 1.6125 (1.6742) time: 2.9473 data: 0.0003 max mem: 29202 +[2024-12-10 23:23:15 root] (utils.py 283): INFO Epoch: [5] [2100/2502] eta: 0:19:42 lr: 0.000017 loss_cls: 3.0269 (2.7539) grad_norm: 1.6105 (1.6741) time: 2.9475 data: 0.0003 max mem: 29202 +[2024-12-10 23:23:45 root] (utils.py 283): INFO Epoch: [5] [2110/2502] eta: 0:19:12 lr: 0.000017 loss_cls: 3.0556 (2.7547) grad_norm: 1.6105 (1.6739) time: 2.9450 data: 0.0003 max mem: 29202 +[2024-12-10 23:24:14 root] (utils.py 283): INFO Epoch: [5] [2120/2502] eta: 0:18:43 lr: 0.000017 loss_cls: 2.7906 (2.7546) grad_norm: 1.5480 (1.6736) time: 2.9541 data: 0.0003 max mem: 29202 +[2024-12-10 23:24:44 root] (utils.py 283): INFO Epoch: [5] [2130/2502] eta: 0:18:14 lr: 0.000017 loss_cls: 2.7080 (2.7534) grad_norm: 1.6135 (1.6741) time: 2.9524 data: 0.0003 max mem: 29202 +[2024-12-10 23:25:14 root] (utils.py 283): INFO Epoch: [5] [2140/2502] eta: 0:17:44 lr: 0.000017 loss_cls: 2.7034 (2.7534) grad_norm: 1.6890 (1.6740) time: 2.9566 data: 0.0003 max mem: 29202 +[2024-12-10 23:25:43 root] (utils.py 283): INFO Epoch: [5] [2150/2502] eta: 0:17:15 lr: 0.000017 loss_cls: 2.8901 (2.7541) grad_norm: 1.5604 (1.6738) time: 2.9588 data: 0.0003 max mem: 29202 +[2024-12-10 23:26:12 root] (utils.py 283): INFO Epoch: [5] [2160/2502] eta: 0:16:45 lr: 0.000017 loss_cls: 2.8901 (2.7533) grad_norm: 1.6078 (1.6739) time: 2.9473 data: 0.0003 max mem: 29202 +[2024-12-10 23:26:42 root] (utils.py 283): INFO Epoch: [5] [2170/2502] eta: 0:16:16 lr: 0.000017 loss_cls: 2.7486 (2.7523) grad_norm: 1.6531 (1.6739) time: 2.9472 data: 0.0003 max mem: 29202 +[2024-12-10 23:27:11 root] (utils.py 283): INFO Epoch: [5] [2180/2502] eta: 0:15:47 lr: 0.000017 loss_cls: 2.6630 (2.7518) grad_norm: 1.5886 (1.6739) time: 2.9503 data: 0.0003 max mem: 29202 +[2024-12-10 23:27:41 root] (utils.py 283): INFO Epoch: [5] [2190/2502] eta: 0:15:17 lr: 0.000017 loss_cls: 2.4853 (2.7491) grad_norm: 1.5820 (1.6742) time: 2.9496 data: 0.0003 max mem: 29202 +[2024-12-10 23:28:10 root] (utils.py 283): INFO Epoch: [5] [2200/2502] eta: 0:14:48 lr: 0.000017 loss_cls: 2.4705 (2.7489) grad_norm: 1.5980 (1.6744) time: 2.9455 data: 0.0003 max mem: 29202 +[2024-12-10 23:28:40 root] (utils.py 283): INFO Epoch: [5] [2210/2502] eta: 0:14:18 lr: 0.000017 loss_cls: 2.8280 (2.7484) grad_norm: 1.5980 (1.6741) time: 2.9477 data: 0.0003 max mem: 29202 +[2024-12-10 23:29:09 root] (utils.py 283): INFO Epoch: [5] [2220/2502] eta: 0:13:49 lr: 0.000017 loss_cls: 2.7617 (2.7483) grad_norm: 1.6414 (1.6745) time: 2.9486 data: 0.0003 max mem: 29202 +[2024-12-10 23:29:39 root] (utils.py 283): INFO Epoch: [5] [2230/2502] eta: 0:13:20 lr: 0.000017 loss_cls: 2.8633 (2.7485) grad_norm: 1.6692 (1.6746) time: 2.9441 data: 0.0003 max mem: 29202 +[2024-12-10 23:30:08 root] (utils.py 283): INFO Epoch: [5] [2240/2502] eta: 0:12:50 lr: 0.000017 loss_cls: 2.8576 (2.7485) grad_norm: 1.5361 (1.6742) time: 2.9548 data: 0.0003 max mem: 29202 +[2024-12-10 23:30:38 root] (utils.py 283): INFO Epoch: [5] [2250/2502] eta: 0:12:21 lr: 0.000017 loss_cls: 2.6799 (2.7477) grad_norm: 1.6313 (1.6742) time: 2.9531 data: 0.0003 max mem: 29202 +[2024-12-10 23:31:07 root] (utils.py 283): INFO Epoch: [5] [2260/2502] eta: 0:11:51 lr: 0.000017 loss_cls: 2.4287 (2.7471) grad_norm: 1.5642 (1.6738) time: 2.9413 data: 0.0003 max mem: 29202 +[2024-12-10 23:31:37 root] (utils.py 283): INFO Epoch: [5] [2270/2502] eta: 0:11:22 lr: 0.000017 loss_cls: 2.4287 (2.7458) grad_norm: 1.4932 (1.6731) time: 2.9449 data: 0.0003 max mem: 29202 +[2024-12-10 23:32:06 root] (utils.py 283): INFO Epoch: [5] [2280/2502] eta: 0:10:53 lr: 0.000017 loss_cls: 2.8553 (2.7471) grad_norm: 1.5161 (1.6730) time: 2.9462 data: 0.0003 max mem: 29202 +[2024-12-10 23:32:36 root] (utils.py 283): INFO Epoch: [5] [2290/2502] eta: 0:10:23 lr: 0.000017 loss_cls: 2.9763 (2.7477) grad_norm: 1.5404 (1.6726) time: 2.9472 data: 0.0003 max mem: 29202 +[2024-12-10 23:33:05 root] (utils.py 283): INFO Epoch: [5] [2300/2502] eta: 0:09:54 lr: 0.000017 loss_cls: 2.8408 (2.7478) grad_norm: 1.5092 (1.6722) time: 2.9460 data: 0.0003 max mem: 29202 +[2024-12-10 23:33:35 root] (utils.py 283): INFO Epoch: [5] [2310/2502] eta: 0:09:24 lr: 0.000017 loss_cls: 2.6450 (2.7470) grad_norm: 1.5773 (1.6722) time: 2.9467 data: 0.0003 max mem: 29202 +[2024-12-10 23:34:04 root] (utils.py 283): INFO Epoch: [5] [2320/2502] eta: 0:08:55 lr: 0.000017 loss_cls: 2.5798 (2.7466) grad_norm: 1.6706 (1.6723) time: 2.9461 data: 0.0003 max mem: 29202 +[2024-12-10 23:34:34 root] (utils.py 283): INFO Epoch: [5] [2330/2502] eta: 0:08:26 lr: 0.000017 loss_cls: 2.8908 (2.7468) grad_norm: 1.6865 (1.6729) time: 2.9457 data: 0.0003 max mem: 29202 +[2024-12-10 23:35:03 root] (utils.py 283): INFO Epoch: [5] [2340/2502] eta: 0:07:56 lr: 0.000017 loss_cls: 2.9465 (2.7473) grad_norm: 1.6768 (1.6727) time: 2.9455 data: 0.0003 max mem: 29202 +[2024-12-10 23:35:32 root] (utils.py 283): INFO Epoch: [5] [2350/2502] eta: 0:07:27 lr: 0.000017 loss_cls: 2.8896 (2.7471) grad_norm: 1.6204 (1.6727) time: 2.9464 data: 0.0003 max mem: 29202 +[2024-12-10 23:36:02 root] (utils.py 283): INFO Epoch: [5] [2360/2502] eta: 0:06:57 lr: 0.000017 loss_cls: 2.8009 (2.7473) grad_norm: 1.6211 (1.6729) time: 2.9455 data: 0.0003 max mem: 29202 +[2024-12-10 23:36:31 root] (utils.py 283): INFO Epoch: [5] [2370/2502] eta: 0:06:28 lr: 0.000017 loss_cls: 2.8009 (2.7470) grad_norm: 1.6035 (1.6725) time: 2.9415 data: 0.0003 max mem: 29202 +[2024-12-10 23:37:01 root] (utils.py 283): INFO Epoch: [5] [2380/2502] eta: 0:05:58 lr: 0.000017 loss_cls: 2.8444 (2.7469) grad_norm: 1.5156 (1.6721) time: 2.9418 data: 0.0003 max mem: 29202 +[2024-12-10 23:37:30 root] (utils.py 283): INFO Epoch: [5] [2390/2502] eta: 0:05:29 lr: 0.000017 loss_cls: 2.9431 (2.7470) grad_norm: 1.5156 (1.6718) time: 2.9441 data: 0.0003 max mem: 29202 +[2024-12-10 23:38:00 root] (utils.py 283): INFO Epoch: [5] [2400/2502] eta: 0:05:00 lr: 0.000017 loss_cls: 2.9431 (2.7474) grad_norm: 1.6019 (1.6720) time: 2.9519 data: 0.0003 max mem: 29202 +[2024-12-10 23:38:29 root] (utils.py 283): INFO Epoch: [5] [2410/2502] eta: 0:04:30 lr: 0.000017 loss_cls: 2.8615 (2.7479) grad_norm: 1.5839 (1.6729) time: 2.9492 data: 0.0003 max mem: 29202 +[2024-12-10 23:38:59 root] (utils.py 283): INFO Epoch: [5] [2420/2502] eta: 0:04:01 lr: 0.000017 loss_cls: 2.8611 (2.7470) grad_norm: 1.5915 (1.6729) time: 2.9468 data: 0.0003 max mem: 29202 +[2024-12-10 23:39:28 root] (utils.py 283): INFO Epoch: [5] [2430/2502] eta: 0:03:31 lr: 0.000017 loss_cls: 2.8883 (2.7474) grad_norm: 1.5953 (1.6729) time: 2.9507 data: 0.0003 max mem: 29202 +[2024-12-10 23:39:58 root] (utils.py 283): INFO Epoch: [5] [2440/2502] eta: 0:03:02 lr: 0.000017 loss_cls: 2.8834 (2.7474) grad_norm: 1.5948 (1.6727) time: 2.9511 data: 0.0003 max mem: 29202 +[2024-12-10 23:40:27 root] (utils.py 283): INFO Epoch: [5] [2450/2502] eta: 0:02:32 lr: 0.000017 loss_cls: 2.8642 (2.7477) grad_norm: 1.5948 (1.6724) time: 2.9490 data: 0.0003 max mem: 29202 +[2024-12-10 23:40:57 root] (utils.py 283): INFO Epoch: [5] [2460/2502] eta: 0:02:03 lr: 0.000017 loss_cls: 2.8028 (2.7474) grad_norm: 1.5865 (1.6725) time: 2.9469 data: 0.0003 max mem: 29202 +[2024-12-10 23:41:26 root] (utils.py 283): INFO Epoch: [5] [2470/2502] eta: 0:01:34 lr: 0.000017 loss_cls: 2.6844 (2.7473) grad_norm: 1.5773 (1.6721) time: 2.9608 data: 0.0003 max mem: 29202 +[2024-12-10 23:41:56 root] (utils.py 283): INFO Epoch: [5] [2480/2502] eta: 0:01:04 lr: 0.000017 loss_cls: 2.8744 (2.7475) grad_norm: 1.5773 (1.6730) time: 2.9623 data: 0.0003 max mem: 29202 +[2024-12-10 23:42:26 root] (utils.py 283): INFO Epoch: [5] [2490/2502] eta: 0:00:35 lr: 0.000017 loss_cls: 2.9158 (2.7479) grad_norm: 1.6463 (1.6731) time: 2.9777 data: 0.0240 max mem: 29202 +[2024-12-10 23:42:56 root] (utils.py 283): INFO Epoch: [5] [2500/2502] eta: 0:00:05 lr: 0.000017 loss_cls: 2.9158 (2.7483) grad_norm: 1.6375 (1.6733) time: 2.9895 data: 0.0241 max mem: 29202 +[2024-12-10 23:42:59 root] (utils.py 283): INFO Epoch: [5] [2501/2502] eta: 0:00:02 lr: 0.000017 loss_cls: 2.8961 (2.7483) grad_norm: 1.6463 (1.6733) time: 2.9895 data: 0.0241 max mem: 29202 +[2024-12-10 23:42:59 root] (utils.py 297): INFO Epoch: [5] Total time: 2:02:42 (2.9428 s / it) +[2024-12-10 23:42:59 root] (engine.py 179): INFO Averaged stats:lr: 0.000017 loss_cls: 2.8961 (2.7431) grad_norm: 1.6463 (1.6733) +[2024-12-10 23:43:03 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:55 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.2947 (0.2947) acc1: 92.1875 (92.1875) acc3: 98.4375 (98.4375) acc5: 99.2188 (99.2188) time: 0.5696 data: 0.0004 max mem: 29202 +[2024-12-10 23:43:08 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:48 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.5667 (0.5846) acc1: 86.7188 (87.0739) acc3: 97.6562 (96.8040) acc5: 99.2188 (98.1534) time: 0.5530 data: 0.0003 max mem: 29202 +[2024-12-10 23:43:14 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:43 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.5667 (0.6355) acc1: 85.1562 (85.7887) acc3: 96.8750 (95.9449) acc5: 97.6562 (97.8051) time: 0.5520 data: 0.0004 max mem: 29202 +[2024-12-10 23:43:19 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:37 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6817 (0.6697) acc1: 85.1562 (85.1310) acc3: 95.3125 (95.6653) acc5: 97.6562 (97.5806) time: 0.5527 data: 0.0004 max mem: 29202 +[2024-12-10 23:43:25 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:32 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.7089 (0.6787) acc1: 85.9375 (85.2515) acc3: 95.3125 (95.6364) acc5: 97.6562 (97.4657) time: 0.5525 data: 0.0004 max mem: 29202 +[2024-12-10 23:43:30 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:26 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.8744 (0.7646) acc1: 79.6875 (83.5018) acc3: 92.1875 (94.5466) acc5: 93.7500 (96.5380) time: 0.5525 data: 0.0004 max mem: 29202 +[2024-12-10 23:43:36 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:21 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9477 (0.7939) acc1: 77.3438 (82.9406) acc3: 90.6250 (93.9165) acc5: 92.9688 (96.0938) time: 0.5531 data: 0.0004 max mem: 29202 +[2024-12-10 23:43:42 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:15 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9185 (0.8248) acc1: 78.1250 (82.0533) acc3: 90.6250 (93.5739) acc5: 93.7500 (95.8297) time: 0.5531 data: 0.0004 max mem: 29202 +[2024-12-10 23:43:47 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:09 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 1.0365 (0.8516) acc1: 78.1250 (81.5779) acc3: 90.6250 (93.1231) acc5: 93.7500 (95.5054) time: 0.5531 data: 0.0006 max mem: 29202 +[2024-12-10 23:43:53 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:04 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 1.0632 (0.8818) acc1: 76.5625 (80.7864) acc3: 89.8438 (92.8228) acc5: 92.9688 (95.2782) time: 0.5535 data: 0.0006 max mem: 29202 +[2024-12-10 23:43:56 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9882 (0.8796) acc1: 76.5625 (80.7520) acc3: 90.6250 (92.8720) acc5: 93.7500 (95.3440) time: 0.5442 data: 0.0006 max mem: 29202 +[2024-12-10 23:43:56 root] (utils.py 297): INFO Test: Total time: 0:00:54 (0.5512 s / it) +[2024-12-10 23:43:56 root] (engine.py 264): INFO * Acc@1 80.880 Acc@3 92.808 Acc@5 95.194 loss 0.879 flops 13.207 layer_flops 13.109 +[2024-12-10 23:43:56 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 80.9% +[2024-12-10 23:43:59 root] (main.py 576): INFO Max accuracy: 80.88% +[2024-12-10 23:44:01 root] (utils.py 283): INFO Epoch: [6] [ 0/2502] eta: 2:00:09 lr: 0.000015 loss_cls: 3.3900 (3.3900) grad_norm: 1.9023 (1.9023) time: 2.8815 data: 0.0004 max mem: 29202 +[2024-12-10 23:44:31 root] (utils.py 283): INFO Epoch: [6] [ 10/2502] eta: 2:01:47 lr: 0.000015 loss_cls: 2.8336 (2.6270) grad_norm: 1.7134 (1.8454) time: 2.9325 data: 0.0003 max mem: 29202 +[2024-12-10 23:45:00 root] (utils.py 283): INFO Epoch: [6] [ 20/2502] eta: 2:01:32 lr: 0.000015 loss_cls: 2.8336 (2.7065) grad_norm: 1.6512 (1.7436) time: 2.9410 data: 0.0003 max mem: 29202 +[2024-12-10 23:45:30 root] (utils.py 283): INFO Epoch: [6] [ 30/2502] eta: 2:01:08 lr: 0.000015 loss_cls: 2.9440 (2.7454) grad_norm: 1.5724 (1.6880) time: 2.9447 data: 0.0003 max mem: 29202 +[2024-12-10 23:45:59 root] (utils.py 283): INFO Epoch: [6] [ 40/2502] eta: 2:00:41 lr: 0.000015 loss_cls: 2.7831 (2.7088) grad_norm: 1.6645 (1.6984) time: 2.9446 data: 0.0003 max mem: 29202 +[2024-12-10 23:46:29 root] (utils.py 283): INFO Epoch: [6] [ 50/2502] eta: 2:00:16 lr: 0.000015 loss_cls: 2.6553 (2.6910) grad_norm: 1.6215 (1.6784) time: 2.9469 data: 0.0003 max mem: 29202 +[2024-12-10 23:46:58 root] (utils.py 283): INFO Epoch: [6] [ 60/2502] eta: 1:59:46 lr: 0.000015 loss_cls: 2.6553 (2.6608) grad_norm: 1.6136 (1.6859) time: 2.9456 data: 0.0003 max mem: 29202 +[2024-12-10 23:47:28 root] (utils.py 283): INFO Epoch: [6] [ 70/2502] eta: 1:59:17 lr: 0.000015 loss_cls: 2.7908 (2.6889) grad_norm: 1.6220 (1.6795) time: 2.9428 data: 0.0003 max mem: 29202 +[2024-12-10 23:47:57 root] (utils.py 283): INFO Epoch: [6] [ 80/2502] eta: 1:58:47 lr: 0.000015 loss_cls: 2.8624 (2.7073) grad_norm: 1.6183 (1.6763) time: 2.9438 data: 0.0003 max mem: 29202 +[2024-12-10 23:48:26 root] (utils.py 283): INFO Epoch: [6] [ 90/2502] eta: 1:58:21 lr: 0.000015 loss_cls: 2.8624 (2.7126) grad_norm: 1.6247 (1.6719) time: 2.9492 data: 0.0003 max mem: 29202 +[2024-12-10 23:48:56 root] (utils.py 283): INFO Epoch: [6] [ 100/2502] eta: 1:57:57 lr: 0.000015 loss_cls: 2.8570 (2.7148) grad_norm: 1.6247 (1.6628) time: 2.9607 data: 0.0003 max mem: 29202 +[2024-12-10 23:49:26 root] (utils.py 283): INFO Epoch: [6] [ 110/2502] eta: 1:57:32 lr: 0.000015 loss_cls: 2.7919 (2.7151) grad_norm: 1.5215 (1.6529) time: 2.9678 data: 0.0003 max mem: 29202 +[2024-12-10 23:49:56 root] (utils.py 283): INFO Epoch: [6] [ 120/2502] eta: 1:57:07 lr: 0.000015 loss_cls: 2.6512 (2.7023) grad_norm: 1.4954 (1.6428) time: 2.9692 data: 0.0003 max mem: 29202 +[2024-12-10 23:50:25 root] (utils.py 283): INFO Epoch: [6] [ 130/2502] eta: 1:56:40 lr: 0.000015 loss_cls: 2.6997 (2.7089) grad_norm: 1.5365 (1.6440) time: 2.9678 data: 0.0003 max mem: 29202 +[2024-12-10 23:50:55 root] (utils.py 283): INFO Epoch: [6] [ 140/2502] eta: 1:56:14 lr: 0.000015 loss_cls: 2.8668 (2.7331) grad_norm: 1.6742 (1.6439) time: 2.9683 data: 0.0003 max mem: 29202 +[2024-12-10 23:51:25 root] (utils.py 283): INFO Epoch: [6] [ 150/2502] eta: 1:55:47 lr: 0.000015 loss_cls: 2.8252 (2.7248) grad_norm: 1.5757 (1.6390) time: 2.9711 data: 0.0003 max mem: 29202 +[2024-12-10 23:51:54 root] (utils.py 283): INFO Epoch: [6] [ 160/2502] eta: 1:55:18 lr: 0.000015 loss_cls: 2.8299 (2.7292) grad_norm: 1.5757 (1.6353) time: 2.9626 data: 0.0003 max mem: 29202 +[2024-12-10 23:52:24 root] (utils.py 283): INFO Epoch: [6] [ 170/2502] eta: 1:54:47 lr: 0.000015 loss_cls: 2.8857 (2.7365) grad_norm: 1.5769 (1.6324) time: 2.9472 data: 0.0003 max mem: 29202 +[2024-12-10 23:52:53 root] (utils.py 283): INFO Epoch: [6] [ 180/2502] eta: 1:54:16 lr: 0.000015 loss_cls: 2.8299 (2.7292) grad_norm: 1.6921 (1.6470) time: 2.9420 data: 0.0003 max mem: 29202 +[2024-12-10 23:53:23 root] (utils.py 283): INFO Epoch: [6] [ 190/2502] eta: 1:53:46 lr: 0.000015 loss_cls: 2.7941 (2.7268) grad_norm: 1.6230 (1.6437) time: 2.9468 data: 0.0003 max mem: 29202 +[2024-12-10 23:53:52 root] (utils.py 283): INFO Epoch: [6] [ 200/2502] eta: 1:53:17 lr: 0.000015 loss_cls: 2.8296 (2.7338) grad_norm: 1.5675 (1.6431) time: 2.9527 data: 0.0003 max mem: 29202 +[2024-12-10 23:54:22 root] (utils.py 283): INFO Epoch: [6] [ 210/2502] eta: 1:52:47 lr: 0.000015 loss_cls: 2.9245 (2.7372) grad_norm: 1.6921 (1.6511) time: 2.9520 data: 0.0003 max mem: 29202 +[2024-12-10 23:54:51 root] (utils.py 283): INFO Epoch: [6] [ 220/2502] eta: 1:52:17 lr: 0.000015 loss_cls: 2.8941 (2.7368) grad_norm: 1.7626 (1.6557) time: 2.9497 data: 0.0003 max mem: 29202 +[2024-12-10 23:55:21 root] (utils.py 283): INFO Epoch: [6] [ 230/2502] eta: 1:51:48 lr: 0.000015 loss_cls: 2.7248 (2.7305) grad_norm: 1.4576 (1.6482) time: 2.9542 data: 0.0003 max mem: 29202 +[2024-12-10 23:55:50 root] (utils.py 283): INFO Epoch: [6] [ 240/2502] eta: 1:51:19 lr: 0.000015 loss_cls: 2.8526 (2.7346) grad_norm: 1.5770 (1.6572) time: 2.9595 data: 0.0003 max mem: 29202 +[2024-12-10 23:56:20 root] (utils.py 283): INFO Epoch: [6] [ 250/2502] eta: 1:50:49 lr: 0.000015 loss_cls: 2.8526 (2.7322) grad_norm: 1.6482 (1.6572) time: 2.9527 data: 0.0003 max mem: 29202 +[2024-12-10 23:56:49 root] (utils.py 283): INFO Epoch: [6] [ 260/2502] eta: 1:50:18 lr: 0.000015 loss_cls: 2.7052 (2.7352) grad_norm: 1.5747 (1.6603) time: 2.9431 data: 0.0003 max mem: 29202 +[2024-12-10 23:57:19 root] (utils.py 283): INFO Epoch: [6] [ 270/2502] eta: 1:49:48 lr: 0.000015 loss_cls: 2.8937 (2.7443) grad_norm: 1.5934 (1.6621) time: 2.9433 data: 0.0003 max mem: 29202 +[2024-12-10 23:57:48 root] (utils.py 283): INFO Epoch: [6] [ 280/2502] eta: 1:49:18 lr: 0.000015 loss_cls: 2.9579 (2.7449) grad_norm: 1.7210 (1.6668) time: 2.9425 data: 0.0003 max mem: 29202 +[2024-12-10 23:58:17 root] (utils.py 283): INFO Epoch: [6] [ 290/2502] eta: 1:48:48 lr: 0.000015 loss_cls: 2.8124 (2.7436) grad_norm: 1.6017 (1.6636) time: 2.9419 data: 0.0003 max mem: 29202 +[2024-12-10 23:58:47 root] (utils.py 283): INFO Epoch: [6] [ 300/2502] eta: 1:48:17 lr: 0.000015 loss_cls: 2.8648 (2.7427) grad_norm: 1.5836 (1.6658) time: 2.9421 data: 0.0003 max mem: 29202 +[2024-12-10 23:59:16 root] (utils.py 283): INFO Epoch: [6] [ 310/2502] eta: 1:47:48 lr: 0.000015 loss_cls: 2.9534 (2.7366) grad_norm: 1.5836 (1.6635) time: 2.9461 data: 0.0003 max mem: 29202 +[2024-12-10 23:59:46 root] (utils.py 283): INFO Epoch: [6] [ 320/2502] eta: 1:47:20 lr: 0.000015 loss_cls: 2.6741 (2.7327) grad_norm: 1.5250 (1.6637) time: 2.9603 data: 0.0003 max mem: 29202 +[2024-12-11 00:00:15 root] (utils.py 283): INFO Epoch: [6] [ 330/2502] eta: 1:46:50 lr: 0.000015 loss_cls: 2.6706 (2.7324) grad_norm: 1.5250 (1.6593) time: 2.9562 data: 0.0003 max mem: 29202 +[2024-12-11 00:00:45 root] (utils.py 283): INFO Epoch: [6] [ 340/2502] eta: 1:46:20 lr: 0.000015 loss_cls: 2.7005 (2.7336) grad_norm: 1.5523 (1.6613) time: 2.9438 data: 0.0004 max mem: 29202 +[2024-12-11 00:01:14 root] (utils.py 283): INFO Epoch: [6] [ 350/2502] eta: 1:45:50 lr: 0.000015 loss_cls: 2.8666 (2.7373) grad_norm: 1.6671 (1.6606) time: 2.9435 data: 0.0003 max mem: 29202 +[2024-12-11 00:01:44 root] (utils.py 283): INFO Epoch: [6] [ 360/2502] eta: 1:45:20 lr: 0.000015 loss_cls: 2.8705 (2.7422) grad_norm: 1.6552 (1.6617) time: 2.9445 data: 0.0003 max mem: 29202 +[2024-12-11 00:02:13 root] (utils.py 283): INFO Epoch: [6] [ 370/2502] eta: 1:44:50 lr: 0.000015 loss_cls: 2.9233 (2.7450) grad_norm: 1.6864 (1.6637) time: 2.9438 data: 0.0003 max mem: 29202 +[2024-12-11 00:02:43 root] (utils.py 283): INFO Epoch: [6] [ 380/2502] eta: 1:44:20 lr: 0.000015 loss_cls: 2.6710 (2.7352) grad_norm: 1.5244 (1.6610) time: 2.9424 data: 0.0003 max mem: 29202 +[2024-12-11 00:03:12 root] (utils.py 283): INFO Epoch: [6] [ 390/2502] eta: 1:43:50 lr: 0.000015 loss_cls: 2.4660 (2.7361) grad_norm: 1.5023 (1.6594) time: 2.9440 data: 0.0003 max mem: 29202 +[2024-12-11 00:03:42 root] (utils.py 283): INFO Epoch: [6] [ 400/2502] eta: 1:43:21 lr: 0.000015 loss_cls: 2.8438 (2.7373) grad_norm: 1.5807 (1.6592) time: 2.9469 data: 0.0003 max mem: 29202 +[2024-12-11 00:04:11 root] (utils.py 283): INFO Epoch: [6] [ 410/2502] eta: 1:42:51 lr: 0.000015 loss_cls: 2.8461 (2.7360) grad_norm: 1.6467 (1.6585) time: 2.9475 data: 0.0003 max mem: 29202 +[2024-12-11 00:04:40 root] (utils.py 283): INFO Epoch: [6] [ 420/2502] eta: 1:42:21 lr: 0.000015 loss_cls: 2.7051 (2.7311) grad_norm: 1.5816 (1.6568) time: 2.9447 data: 0.0003 max mem: 29202 +[2024-12-11 00:05:10 root] (utils.py 283): INFO Epoch: [6] [ 430/2502] eta: 1:41:51 lr: 0.000015 loss_cls: 2.7644 (2.7339) grad_norm: 1.6174 (1.6574) time: 2.9421 data: 0.0003 max mem: 29202 +[2024-12-11 00:05:39 root] (utils.py 283): INFO Epoch: [6] [ 440/2502] eta: 1:41:21 lr: 0.000015 loss_cls: 2.9533 (2.7341) grad_norm: 1.6518 (1.6596) time: 2.9437 data: 0.0003 max mem: 29202 +[2024-12-11 00:06:09 root] (utils.py 283): INFO Epoch: [6] [ 450/2502] eta: 1:40:52 lr: 0.000015 loss_cls: 2.8425 (2.7325) grad_norm: 1.6681 (1.6607) time: 2.9471 data: 0.0003 max mem: 29202 +[2024-12-11 00:06:38 root] (utils.py 283): INFO Epoch: [6] [ 460/2502] eta: 1:40:23 lr: 0.000015 loss_cls: 2.6384 (2.7267) grad_norm: 1.6082 (1.6587) time: 2.9533 data: 0.0003 max mem: 29202 +[2024-12-11 00:07:08 root] (utils.py 283): INFO Epoch: [6] [ 470/2502] eta: 1:39:54 lr: 0.000015 loss_cls: 2.6568 (2.7280) grad_norm: 1.5524 (1.6588) time: 2.9650 data: 0.0003 max mem: 29202 +[2024-12-11 00:07:38 root] (utils.py 283): INFO Epoch: [6] [ 480/2502] eta: 1:39:26 lr: 0.000015 loss_cls: 2.9935 (2.7331) grad_norm: 1.6318 (1.6597) time: 2.9728 data: 0.0003 max mem: 29202 +[2024-12-11 00:08:08 root] (utils.py 283): INFO Epoch: [6] [ 490/2502] eta: 1:38:57 lr: 0.000015 loss_cls: 2.9554 (2.7336) grad_norm: 1.5324 (1.6566) time: 2.9685 data: 0.0003 max mem: 29202 +[2024-12-11 00:08:37 root] (utils.py 283): INFO Epoch: [6] [ 500/2502] eta: 1:38:27 lr: 0.000015 loss_cls: 2.8344 (2.7340) grad_norm: 1.5258 (1.6575) time: 2.9525 data: 0.0003 max mem: 29202 +[2024-12-11 00:09:06 root] (utils.py 283): INFO Epoch: [6] [ 510/2502] eta: 1:37:57 lr: 0.000015 loss_cls: 2.8344 (2.7373) grad_norm: 1.6242 (1.6563) time: 2.9417 data: 0.0003 max mem: 29202 +[2024-12-11 00:09:36 root] (utils.py 283): INFO Epoch: [6] [ 520/2502] eta: 1:37:27 lr: 0.000015 loss_cls: 2.7967 (2.7365) grad_norm: 1.6561 (1.6570) time: 2.9416 data: 0.0003 max mem: 29202 +[2024-12-11 00:10:05 root] (utils.py 283): INFO Epoch: [6] [ 530/2502] eta: 1:36:58 lr: 0.000015 loss_cls: 2.8829 (2.7373) grad_norm: 1.6716 (1.6600) time: 2.9531 data: 0.0003 max mem: 29202 +[2024-12-11 00:10:35 root] (utils.py 283): INFO Epoch: [6] [ 540/2502] eta: 1:36:28 lr: 0.000015 loss_cls: 2.8249 (2.7393) grad_norm: 1.6281 (1.6606) time: 2.9508 data: 0.0003 max mem: 29202 +[2024-12-11 00:11:04 root] (utils.py 283): INFO Epoch: [6] [ 550/2502] eta: 1:35:58 lr: 0.000015 loss_cls: 2.8274 (2.7412) grad_norm: 1.5934 (1.6612) time: 2.9400 data: 0.0003 max mem: 29202 +[2024-12-11 00:11:34 root] (utils.py 283): INFO Epoch: [6] [ 560/2502] eta: 1:35:29 lr: 0.000015 loss_cls: 2.9762 (2.7446) grad_norm: 1.6382 (1.6606) time: 2.9412 data: 0.0003 max mem: 29202 +[2024-12-11 00:12:03 root] (utils.py 283): INFO Epoch: [6] [ 570/2502] eta: 1:34:59 lr: 0.000015 loss_cls: 2.9305 (2.7439) grad_norm: 1.5935 (1.6610) time: 2.9388 data: 0.0003 max mem: 29202 +[2024-12-11 00:12:33 root] (utils.py 283): INFO Epoch: [6] [ 580/2502] eta: 1:34:29 lr: 0.000015 loss_cls: 2.5745 (2.7435) grad_norm: 1.5935 (1.6610) time: 2.9486 data: 0.0003 max mem: 29202 +[2024-12-11 00:13:02 root] (utils.py 283): INFO Epoch: [6] [ 590/2502] eta: 1:34:00 lr: 0.000015 loss_cls: 2.7316 (2.7409) grad_norm: 1.5746 (1.6596) time: 2.9486 data: 0.0003 max mem: 29202 +[2024-12-11 00:13:31 root] (utils.py 283): INFO Epoch: [6] [ 600/2502] eta: 1:33:30 lr: 0.000015 loss_cls: 2.8302 (2.7430) grad_norm: 1.5899 (1.6630) time: 2.9401 data: 0.0003 max mem: 29202 +[2024-12-11 00:14:01 root] (utils.py 283): INFO Epoch: [6] [ 610/2502] eta: 1:33:00 lr: 0.000015 loss_cls: 2.8878 (2.7456) grad_norm: 1.5899 (1.6626) time: 2.9416 data: 0.0003 max mem: 29202 +[2024-12-11 00:14:30 root] (utils.py 283): INFO Epoch: [6] [ 620/2502] eta: 1:32:30 lr: 0.000015 loss_cls: 2.7065 (2.7465) grad_norm: 1.5826 (1.6618) time: 2.9405 data: 0.0003 max mem: 29202 +[2024-12-11 00:15:00 root] (utils.py 283): INFO Epoch: [6] [ 630/2502] eta: 1:32:01 lr: 0.000015 loss_cls: 2.7065 (2.7444) grad_norm: 1.6257 (1.6609) time: 2.9532 data: 0.0003 max mem: 29202 +[2024-12-11 00:15:29 root] (utils.py 283): INFO Epoch: [6] [ 640/2502] eta: 1:31:32 lr: 0.000015 loss_cls: 2.7802 (2.7417) grad_norm: 1.5912 (1.6602) time: 2.9591 data: 0.0004 max mem: 29202 +[2024-12-11 00:15:59 root] (utils.py 283): INFO Epoch: [6] [ 650/2502] eta: 1:31:02 lr: 0.000015 loss_cls: 2.8267 (2.7437) grad_norm: 1.5912 (1.6601) time: 2.9486 data: 0.0003 max mem: 29202 +[2024-12-11 00:16:28 root] (utils.py 283): INFO Epoch: [6] [ 660/2502] eta: 1:30:33 lr: 0.000015 loss_cls: 2.9086 (2.7440) grad_norm: 1.5431 (1.6605) time: 2.9434 data: 0.0003 max mem: 29202 +[2024-12-11 00:16:58 root] (utils.py 283): INFO Epoch: [6] [ 670/2502] eta: 1:30:03 lr: 0.000015 loss_cls: 2.8843 (2.7437) grad_norm: 1.6747 (1.6638) time: 2.9449 data: 0.0003 max mem: 29202 +[2024-12-11 00:17:27 root] (utils.py 283): INFO Epoch: [6] [ 680/2502] eta: 1:29:33 lr: 0.000015 loss_cls: 2.6932 (2.7422) grad_norm: 1.6067 (1.6627) time: 2.9474 data: 0.0003 max mem: 29202 +[2024-12-11 00:17:57 root] (utils.py 283): INFO Epoch: [6] [ 690/2502] eta: 1:29:04 lr: 0.000015 loss_cls: 2.7273 (2.7430) grad_norm: 1.6056 (1.6637) time: 2.9541 data: 0.0003 max mem: 29202 +[2024-12-11 00:18:26 root] (utils.py 283): INFO Epoch: [6] [ 700/2502] eta: 1:28:35 lr: 0.000015 loss_cls: 2.8544 (2.7434) grad_norm: 1.6333 (1.6625) time: 2.9521 data: 0.0003 max mem: 29202 +[2024-12-11 00:18:56 root] (utils.py 283): INFO Epoch: [6] [ 710/2502] eta: 1:28:05 lr: 0.000015 loss_cls: 2.7313 (2.7434) grad_norm: 1.5459 (1.6634) time: 2.9476 data: 0.0003 max mem: 29202 +[2024-12-11 00:19:25 root] (utils.py 283): INFO Epoch: [6] [ 720/2502] eta: 1:27:35 lr: 0.000015 loss_cls: 2.7922 (2.7430) grad_norm: 1.6184 (1.6643) time: 2.9458 data: 0.0003 max mem: 29202 +[2024-12-11 00:19:55 root] (utils.py 283): INFO Epoch: [6] [ 730/2502] eta: 1:27:06 lr: 0.000015 loss_cls: 2.8747 (2.7427) grad_norm: 1.6828 (1.6648) time: 2.9415 data: 0.0003 max mem: 29202 +[2024-12-11 00:20:24 root] (utils.py 283): INFO Epoch: [6] [ 740/2502] eta: 1:26:36 lr: 0.000015 loss_cls: 2.9506 (2.7441) grad_norm: 1.5807 (1.6655) time: 2.9427 data: 0.0003 max mem: 29202 +[2024-12-11 00:20:53 root] (utils.py 283): INFO Epoch: [6] [ 750/2502] eta: 1:26:06 lr: 0.000015 loss_cls: 2.8715 (2.7446) grad_norm: 1.5807 (1.6655) time: 2.9405 data: 0.0003 max mem: 29202 +[2024-12-11 00:21:23 root] (utils.py 283): INFO Epoch: [6] [ 760/2502] eta: 1:25:37 lr: 0.000015 loss_cls: 2.8223 (2.7430) grad_norm: 1.6521 (1.6655) time: 2.9391 data: 0.0003 max mem: 29202 +[2024-12-11 00:21:52 root] (utils.py 283): INFO Epoch: [6] [ 770/2502] eta: 1:25:07 lr: 0.000015 loss_cls: 2.6881 (2.7412) grad_norm: 1.6190 (1.6644) time: 2.9435 data: 0.0003 max mem: 29202 +[2024-12-11 00:22:22 root] (utils.py 283): INFO Epoch: [6] [ 780/2502] eta: 1:24:37 lr: 0.000015 loss_cls: 2.6915 (2.7416) grad_norm: 1.6265 (1.6654) time: 2.9433 data: 0.0003 max mem: 29202 +[2024-12-11 00:22:51 root] (utils.py 283): INFO Epoch: [6] [ 790/2502] eta: 1:24:08 lr: 0.000015 loss_cls: 2.9799 (2.7422) grad_norm: 1.6378 (1.6655) time: 2.9410 data: 0.0003 max mem: 29202 +[2024-12-11 00:23:21 root] (utils.py 283): INFO Epoch: [6] [ 800/2502] eta: 1:23:38 lr: 0.000015 loss_cls: 2.6945 (2.7404) grad_norm: 1.6013 (1.6655) time: 2.9415 data: 0.0003 max mem: 29202 +[2024-12-11 00:23:50 root] (utils.py 283): INFO Epoch: [6] [ 810/2502] eta: 1:23:09 lr: 0.000015 loss_cls: 2.6945 (2.7419) grad_norm: 1.5994 (1.6669) time: 2.9435 data: 0.0003 max mem: 29202 +[2024-12-11 00:24:19 root] (utils.py 283): INFO Epoch: [6] [ 820/2502] eta: 1:22:39 lr: 0.000015 loss_cls: 2.9853 (2.7421) grad_norm: 1.5818 (1.6656) time: 2.9456 data: 0.0003 max mem: 29202 +[2024-12-11 00:24:49 root] (utils.py 283): INFO Epoch: [6] [ 830/2502] eta: 1:22:09 lr: 0.000015 loss_cls: 2.8356 (2.7432) grad_norm: 1.5870 (1.6649) time: 2.9431 data: 0.0003 max mem: 29202 +[2024-12-11 00:25:18 root] (utils.py 283): INFO Epoch: [6] [ 840/2502] eta: 1:21:40 lr: 0.000015 loss_cls: 2.8017 (2.7409) grad_norm: 1.6233 (1.6645) time: 2.9387 data: 0.0003 max mem: 29202 +[2024-12-11 00:25:48 root] (utils.py 283): INFO Epoch: [6] [ 850/2502] eta: 1:21:10 lr: 0.000015 loss_cls: 2.8136 (2.7425) grad_norm: 1.5646 (1.6636) time: 2.9382 data: 0.0003 max mem: 29202 +[2024-12-11 00:26:17 root] (utils.py 283): INFO Epoch: [6] [ 860/2502] eta: 1:20:40 lr: 0.000015 loss_cls: 2.8199 (2.7420) grad_norm: 1.5332 (1.6616) time: 2.9432 data: 0.0003 max mem: 29202 +[2024-12-11 00:26:46 root] (utils.py 283): INFO Epoch: [6] [ 870/2502] eta: 1:20:11 lr: 0.000015 loss_cls: 2.9484 (2.7458) grad_norm: 1.5332 (1.6624) time: 2.9416 data: 0.0003 max mem: 29202 +[2024-12-11 00:27:16 root] (utils.py 283): INFO Epoch: [6] [ 880/2502] eta: 1:19:41 lr: 0.000015 loss_cls: 3.0456 (2.7473) grad_norm: 1.5671 (1.6621) time: 2.9400 data: 0.0003 max mem: 29202 +[2024-12-11 00:27:45 root] (utils.py 283): INFO Epoch: [6] [ 890/2502] eta: 1:19:12 lr: 0.000015 loss_cls: 2.7325 (2.7448) grad_norm: 1.6020 (1.6629) time: 2.9439 data: 0.0003 max mem: 29202 +[2024-12-11 00:28:15 root] (utils.py 283): INFO Epoch: [6] [ 900/2502] eta: 1:18:42 lr: 0.000015 loss_cls: 2.5868 (2.7443) grad_norm: 1.6557 (1.6632) time: 2.9475 data: 0.0003 max mem: 29202 +[2024-12-11 00:28:44 root] (utils.py 283): INFO Epoch: [6] [ 910/2502] eta: 1:18:13 lr: 0.000015 loss_cls: 2.7214 (2.7431) grad_norm: 1.6557 (1.6643) time: 2.9480 data: 0.0003 max mem: 29202 +[2024-12-11 00:29:14 root] (utils.py 283): INFO Epoch: [6] [ 920/2502] eta: 1:17:43 lr: 0.000015 loss_cls: 2.3303 (2.7384) grad_norm: 1.5959 (1.6637) time: 2.9432 data: 0.0003 max mem: 29202 +[2024-12-11 00:29:43 root] (utils.py 283): INFO Epoch: [6] [ 930/2502] eta: 1:17:14 lr: 0.000015 loss_cls: 2.6657 (2.7390) grad_norm: 1.5896 (1.6629) time: 2.9398 data: 0.0003 max mem: 29202 +[2024-12-11 00:30:13 root] (utils.py 283): INFO Epoch: [6] [ 940/2502] eta: 1:16:44 lr: 0.000015 loss_cls: 2.7717 (2.7377) grad_norm: 1.6037 (1.6624) time: 2.9419 data: 0.0003 max mem: 29202 +[2024-12-11 00:30:42 root] (utils.py 283): INFO Epoch: [6] [ 950/2502] eta: 1:16:14 lr: 0.000015 loss_cls: 2.7198 (2.7372) grad_norm: 1.5669 (1.6619) time: 2.9419 data: 0.0003 max mem: 29202 +[2024-12-11 00:31:11 root] (utils.py 283): INFO Epoch: [6] [ 960/2502] eta: 1:15:45 lr: 0.000015 loss_cls: 2.8571 (2.7364) grad_norm: 1.6117 (1.6617) time: 2.9428 data: 0.0003 max mem: 29202 +[2024-12-11 00:31:41 root] (utils.py 283): INFO Epoch: [6] [ 970/2502] eta: 1:15:15 lr: 0.000015 loss_cls: 2.6775 (2.7348) grad_norm: 1.6209 (1.6613) time: 2.9452 data: 0.0003 max mem: 29202 +[2024-12-11 00:32:10 root] (utils.py 283): INFO Epoch: [6] [ 980/2502] eta: 1:14:46 lr: 0.000015 loss_cls: 2.5901 (2.7334) grad_norm: 1.5482 (1.6618) time: 2.9434 data: 0.0003 max mem: 29202 +[2024-12-11 00:32:40 root] (utils.py 283): INFO Epoch: [6] [ 990/2502] eta: 1:14:16 lr: 0.000015 loss_cls: 2.7687 (2.7347) grad_norm: 1.5408 (1.6616) time: 2.9436 data: 0.0003 max mem: 29202 +[2024-12-11 00:33:09 root] (utils.py 283): INFO Epoch: [6] [1000/2502] eta: 1:13:47 lr: 0.000015 loss_cls: 2.8447 (2.7337) grad_norm: 1.4993 (1.6613) time: 2.9438 data: 0.0003 max mem: 29202 +[2024-12-11 00:33:39 root] (utils.py 283): INFO Epoch: [6] [1010/2502] eta: 1:13:17 lr: 0.000015 loss_cls: 2.7897 (2.7342) grad_norm: 1.6098 (1.6620) time: 2.9412 data: 0.0003 max mem: 29202 +[2024-12-11 00:34:08 root] (utils.py 283): INFO Epoch: [6] [1020/2502] eta: 1:12:48 lr: 0.000015 loss_cls: 2.7487 (2.7334) grad_norm: 1.6856 (1.6623) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-11 00:34:37 root] (utils.py 283): INFO Epoch: [6] [1030/2502] eta: 1:12:18 lr: 0.000015 loss_cls: 2.8446 (2.7337) grad_norm: 1.6856 (1.6643) time: 2.9412 data: 0.0003 max mem: 29202 +[2024-12-11 00:35:07 root] (utils.py 283): INFO Epoch: [6] [1040/2502] eta: 1:11:49 lr: 0.000015 loss_cls: 2.7994 (2.7327) grad_norm: 1.7422 (1.6647) time: 2.9523 data: 0.0003 max mem: 29202 +[2024-12-11 00:35:36 root] (utils.py 283): INFO Epoch: [6] [1050/2502] eta: 1:11:19 lr: 0.000015 loss_cls: 2.7165 (2.7308) grad_norm: 1.5974 (1.6639) time: 2.9542 data: 0.0003 max mem: 29202 +[2024-12-11 00:36:06 root] (utils.py 283): INFO Epoch: [6] [1060/2502] eta: 1:10:50 lr: 0.000015 loss_cls: 2.7165 (2.7303) grad_norm: 1.5411 (1.6627) time: 2.9437 data: 0.0003 max mem: 29202 +[2024-12-11 00:36:35 root] (utils.py 283): INFO Epoch: [6] [1070/2502] eta: 1:10:20 lr: 0.000015 loss_cls: 2.9473 (2.7330) grad_norm: 1.5325 (1.6625) time: 2.9477 data: 0.0003 max mem: 29202 +[2024-12-11 00:37:05 root] (utils.py 283): INFO Epoch: [6] [1080/2502] eta: 1:09:51 lr: 0.000015 loss_cls: 2.9670 (2.7319) grad_norm: 1.5325 (1.6613) time: 2.9481 data: 0.0003 max mem: 29202 +[2024-12-11 00:37:34 root] (utils.py 283): INFO Epoch: [6] [1090/2502] eta: 1:09:21 lr: 0.000015 loss_cls: 2.7394 (2.7300) grad_norm: 1.4977 (1.6605) time: 2.9513 data: 0.0003 max mem: 29202 +[2024-12-11 00:38:04 root] (utils.py 283): INFO Epoch: [6] [1100/2502] eta: 1:08:52 lr: 0.000015 loss_cls: 2.6670 (2.7305) grad_norm: 1.5367 (1.6591) time: 2.9653 data: 0.0003 max mem: 29202 +[2024-12-11 00:38:34 root] (utils.py 283): INFO Epoch: [6] [1110/2502] eta: 1:08:23 lr: 0.000015 loss_cls: 2.6462 (2.7307) grad_norm: 1.5391 (1.6592) time: 2.9542 data: 0.0003 max mem: 29202 +[2024-12-11 00:39:03 root] (utils.py 283): INFO Epoch: [6] [1120/2502] eta: 1:07:53 lr: 0.000015 loss_cls: 2.6610 (2.7307) grad_norm: 1.5773 (1.6595) time: 2.9385 data: 0.0003 max mem: 29202 +[2024-12-11 00:39:32 root] (utils.py 283): INFO Epoch: [6] [1130/2502] eta: 1:07:23 lr: 0.000015 loss_cls: 2.9548 (2.7319) grad_norm: 1.5183 (1.6588) time: 2.9379 data: 0.0003 max mem: 29202 +[2024-12-11 00:40:02 root] (utils.py 283): INFO Epoch: [6] [1140/2502] eta: 1:06:54 lr: 0.000015 loss_cls: 2.9548 (2.7334) grad_norm: 1.6231 (1.6597) time: 2.9364 data: 0.0003 max mem: 29202 +[2024-12-11 00:40:31 root] (utils.py 283): INFO Epoch: [6] [1150/2502] eta: 1:06:24 lr: 0.000015 loss_cls: 2.8276 (2.7338) grad_norm: 1.6482 (1.6601) time: 2.9382 data: 0.0003 max mem: 29202 +[2024-12-11 00:41:01 root] (utils.py 283): INFO Epoch: [6] [1160/2502] eta: 1:05:55 lr: 0.000015 loss_cls: 2.8451 (2.7337) grad_norm: 1.6466 (1.6603) time: 2.9438 data: 0.0003 max mem: 29202 +[2024-12-11 00:41:30 root] (utils.py 283): INFO Epoch: [6] [1170/2502] eta: 1:05:25 lr: 0.000015 loss_cls: 2.7689 (2.7320) grad_norm: 1.6607 (1.6617) time: 2.9455 data: 0.0003 max mem: 29202 +[2024-12-11 00:41:59 root] (utils.py 283): INFO Epoch: [6] [1180/2502] eta: 1:04:56 lr: 0.000015 loss_cls: 2.6619 (2.7328) grad_norm: 1.6198 (1.6626) time: 2.9446 data: 0.0003 max mem: 29202 +[2024-12-11 00:42:29 root] (utils.py 283): INFO Epoch: [6] [1190/2502] eta: 1:04:26 lr: 0.000015 loss_cls: 2.9067 (2.7343) grad_norm: 1.5598 (1.6627) time: 2.9441 data: 0.0003 max mem: 29202 +[2024-12-11 00:42:58 root] (utils.py 283): INFO Epoch: [6] [1200/2502] eta: 1:03:57 lr: 0.000015 loss_cls: 2.9067 (2.7350) grad_norm: 1.6477 (1.6631) time: 2.9434 data: 0.0003 max mem: 29202 +[2024-12-11 00:43:28 root] (utils.py 283): INFO Epoch: [6] [1210/2502] eta: 1:03:27 lr: 0.000015 loss_cls: 2.7925 (2.7340) grad_norm: 1.7644 (1.6648) time: 2.9425 data: 0.0003 max mem: 29202 +[2024-12-11 00:43:57 root] (utils.py 283): INFO Epoch: [6] [1220/2502] eta: 1:02:58 lr: 0.000015 loss_cls: 2.7513 (2.7350) grad_norm: 1.6232 (1.6640) time: 2.9388 data: 0.0003 max mem: 29202 +[2024-12-11 00:44:27 root] (utils.py 283): INFO Epoch: [6] [1230/2502] eta: 1:02:28 lr: 0.000015 loss_cls: 2.7035 (2.7326) grad_norm: 1.5577 (1.6639) time: 2.9429 data: 0.0003 max mem: 29202 +[2024-12-11 00:44:56 root] (utils.py 283): INFO Epoch: [6] [1240/2502] eta: 1:01:59 lr: 0.000015 loss_cls: 2.4927 (2.7325) grad_norm: 1.5945 (1.6655) time: 2.9488 data: 0.0003 max mem: 29202 +[2024-12-11 00:45:25 root] (utils.py 283): INFO Epoch: [6] [1250/2502] eta: 1:01:29 lr: 0.000015 loss_cls: 2.7199 (2.7329) grad_norm: 1.5945 (1.6650) time: 2.9443 data: 0.0003 max mem: 29202 +[2024-12-11 00:45:55 root] (utils.py 283): INFO Epoch: [6] [1260/2502] eta: 1:01:00 lr: 0.000015 loss_cls: 2.6227 (2.7313) grad_norm: 1.5472 (1.6646) time: 2.9445 data: 0.0003 max mem: 29202 +[2024-12-11 00:46:24 root] (utils.py 283): INFO Epoch: [6] [1270/2502] eta: 1:00:30 lr: 0.000015 loss_cls: 2.6227 (2.7311) grad_norm: 1.5586 (1.6640) time: 2.9479 data: 0.0003 max mem: 29202 +[2024-12-11 00:46:54 root] (utils.py 283): INFO Epoch: [6] [1280/2502] eta: 1:00:01 lr: 0.000015 loss_cls: 2.7311 (2.7311) grad_norm: 1.6122 (1.6639) time: 2.9437 data: 0.0003 max mem: 29202 +[2024-12-11 00:47:24 root] (utils.py 283): INFO Epoch: [6] [1290/2502] eta: 0:59:32 lr: 0.000015 loss_cls: 2.6517 (2.7304) grad_norm: 1.5992 (1.6632) time: 2.9691 data: 0.0003 max mem: 29202 +[2024-12-11 00:47:53 root] (utils.py 283): INFO Epoch: [6] [1300/2502] eta: 0:59:02 lr: 0.000015 loss_cls: 2.7516 (2.7311) grad_norm: 1.5441 (1.6622) time: 2.9712 data: 0.0003 max mem: 29202 +[2024-12-11 00:48:23 root] (utils.py 283): INFO Epoch: [6] [1310/2502] eta: 0:58:33 lr: 0.000015 loss_cls: 2.8205 (2.7314) grad_norm: 1.5441 (1.6624) time: 2.9436 data: 0.0003 max mem: 29202 +[2024-12-11 00:48:52 root] (utils.py 283): INFO Epoch: [6] [1320/2502] eta: 0:58:03 lr: 0.000015 loss_cls: 2.8838 (2.7317) grad_norm: 1.5868 (1.6632) time: 2.9452 data: 0.0003 max mem: 29202 +[2024-12-11 00:49:22 root] (utils.py 283): INFO Epoch: [6] [1330/2502] eta: 0:57:34 lr: 0.000015 loss_cls: 2.8282 (2.7308) grad_norm: 1.5868 (1.6624) time: 2.9464 data: 0.0003 max mem: 29202 +[2024-12-11 00:49:51 root] (utils.py 283): INFO Epoch: [6] [1340/2502] eta: 0:57:04 lr: 0.000015 loss_cls: 2.6779 (2.7292) grad_norm: 1.5323 (1.6622) time: 2.9455 data: 0.0002 max mem: 29202 +[2024-12-11 00:50:21 root] (utils.py 283): INFO Epoch: [6] [1350/2502] eta: 0:56:35 lr: 0.000015 loss_cls: 2.6779 (2.7283) grad_norm: 1.6810 (1.6636) time: 2.9458 data: 0.0002 max mem: 29202 +[2024-12-11 00:50:50 root] (utils.py 283): INFO Epoch: [6] [1360/2502] eta: 0:56:05 lr: 0.000015 loss_cls: 2.7441 (2.7271) grad_norm: 1.7132 (1.6635) time: 2.9451 data: 0.0003 max mem: 29202 +[2024-12-11 00:51:19 root] (utils.py 283): INFO Epoch: [6] [1370/2502] eta: 0:55:36 lr: 0.000015 loss_cls: 2.6143 (2.7265) grad_norm: 1.6057 (1.6633) time: 2.9442 data: 0.0003 max mem: 29202 +[2024-12-11 00:51:49 root] (utils.py 283): INFO Epoch: [6] [1380/2502] eta: 0:55:06 lr: 0.000015 loss_cls: 2.7999 (2.7265) grad_norm: 1.6547 (1.6631) time: 2.9434 data: 0.0003 max mem: 29202 +[2024-12-11 00:52:18 root] (utils.py 283): INFO Epoch: [6] [1390/2502] eta: 0:54:37 lr: 0.000015 loss_cls: 2.7999 (2.7266) grad_norm: 1.5535 (1.6622) time: 2.9453 data: 0.0003 max mem: 29202 +[2024-12-11 00:52:48 root] (utils.py 283): INFO Epoch: [6] [1400/2502] eta: 0:54:07 lr: 0.000015 loss_cls: 2.8458 (2.7274) grad_norm: 1.5866 (1.6623) time: 2.9446 data: 0.0003 max mem: 29202 +[2024-12-11 00:53:17 root] (utils.py 283): INFO Epoch: [6] [1410/2502] eta: 0:53:38 lr: 0.000015 loss_cls: 2.7103 (2.7264) grad_norm: 1.5866 (1.6615) time: 2.9455 data: 0.0003 max mem: 29202 +[2024-12-11 00:53:47 root] (utils.py 283): INFO Epoch: [6] [1420/2502] eta: 0:53:08 lr: 0.000015 loss_cls: 2.6705 (2.7261) grad_norm: 1.4799 (1.6600) time: 2.9457 data: 0.0003 max mem: 29202 +[2024-12-11 00:54:16 root] (utils.py 283): INFO Epoch: [6] [1430/2502] eta: 0:52:39 lr: 0.000015 loss_cls: 2.7689 (2.7280) grad_norm: 1.5727 (1.6602) time: 2.9427 data: 0.0003 max mem: 29202 +[2024-12-11 00:54:46 root] (utils.py 283): INFO Epoch: [6] [1440/2502] eta: 0:52:09 lr: 0.000015 loss_cls: 2.8036 (2.7273) grad_norm: 1.6052 (1.6596) time: 2.9495 data: 0.0003 max mem: 29202 +[2024-12-11 00:55:15 root] (utils.py 283): INFO Epoch: [6] [1450/2502] eta: 0:51:40 lr: 0.000015 loss_cls: 2.7013 (2.7258) grad_norm: 1.6116 (1.6600) time: 2.9543 data: 0.0003 max mem: 29202 +[2024-12-11 00:55:45 root] (utils.py 283): INFO Epoch: [6] [1460/2502] eta: 0:51:11 lr: 0.000015 loss_cls: 2.7056 (2.7261) grad_norm: 1.6244 (1.6604) time: 2.9596 data: 0.0003 max mem: 29202 +[2024-12-11 00:56:14 root] (utils.py 283): INFO Epoch: [6] [1470/2502] eta: 0:50:41 lr: 0.000015 loss_cls: 2.6646 (2.7254) grad_norm: 1.5713 (1.6591) time: 2.9540 data: 0.0003 max mem: 29202 +[2024-12-11 00:56:44 root] (utils.py 283): INFO Epoch: [6] [1480/2502] eta: 0:50:12 lr: 0.000015 loss_cls: 2.5752 (2.7254) grad_norm: 1.5330 (1.6589) time: 2.9421 data: 0.0003 max mem: 29202 +[2024-12-11 00:57:13 root] (utils.py 283): INFO Epoch: [6] [1490/2502] eta: 0:49:42 lr: 0.000015 loss_cls: 2.9216 (2.7268) grad_norm: 1.6168 (1.6591) time: 2.9455 data: 0.0003 max mem: 29202 +[2024-12-11 00:57:43 root] (utils.py 283): INFO Epoch: [6] [1500/2502] eta: 0:49:13 lr: 0.000015 loss_cls: 2.9670 (2.7281) grad_norm: 1.6168 (1.6591) time: 2.9557 data: 0.0003 max mem: 29202 +[2024-12-11 00:58:12 root] (utils.py 283): INFO Epoch: [6] [1510/2502] eta: 0:48:43 lr: 0.000015 loss_cls: 2.8795 (2.7277) grad_norm: 1.6062 (1.6587) time: 2.9555 data: 0.0003 max mem: 29202 +[2024-12-11 00:58:42 root] (utils.py 283): INFO Epoch: [6] [1520/2502] eta: 0:48:14 lr: 0.000015 loss_cls: 2.7126 (2.7279) grad_norm: 1.6196 (1.6600) time: 2.9521 data: 0.0003 max mem: 29202 +[2024-12-11 00:59:12 root] (utils.py 283): INFO Epoch: [6] [1530/2502] eta: 0:47:45 lr: 0.000015 loss_cls: 2.7126 (2.7269) grad_norm: 1.5957 (1.6593) time: 2.9653 data: 0.0003 max mem: 29202 +[2024-12-11 00:59:41 root] (utils.py 283): INFO Epoch: [6] [1540/2502] eta: 0:47:15 lr: 0.000015 loss_cls: 2.7038 (2.7258) grad_norm: 1.5157 (1.6587) time: 2.9693 data: 0.0003 max mem: 29202 +[2024-12-11 01:00:11 root] (utils.py 283): INFO Epoch: [6] [1550/2502] eta: 0:46:46 lr: 0.000015 loss_cls: 2.6456 (2.7251) grad_norm: 1.5252 (1.6590) time: 2.9528 data: 0.0003 max mem: 29202 +[2024-12-11 01:00:40 root] (utils.py 283): INFO Epoch: [6] [1560/2502] eta: 0:46:16 lr: 0.000015 loss_cls: 2.7390 (2.7254) grad_norm: 1.6086 (1.6588) time: 2.9452 data: 0.0003 max mem: 29202 +[2024-12-11 01:01:10 root] (utils.py 283): INFO Epoch: [6] [1570/2502] eta: 0:45:47 lr: 0.000015 loss_cls: 2.8071 (2.7257) grad_norm: 1.4582 (1.6577) time: 2.9474 data: 0.0003 max mem: 29202 +[2024-12-11 01:01:39 root] (utils.py 283): INFO Epoch: [6] [1580/2502] eta: 0:45:17 lr: 0.000015 loss_cls: 2.9412 (2.7268) grad_norm: 1.5808 (1.6598) time: 2.9430 data: 0.0003 max mem: 29202 +[2024-12-11 01:02:08 root] (utils.py 283): INFO Epoch: [6] [1590/2502] eta: 0:44:48 lr: 0.000015 loss_cls: 2.7322 (2.7260) grad_norm: 1.7908 (1.6601) time: 2.9405 data: 0.0003 max mem: 29202 +[2024-12-11 01:02:38 root] (utils.py 283): INFO Epoch: [6] [1600/2502] eta: 0:44:18 lr: 0.000015 loss_cls: 2.6242 (2.7262) grad_norm: 1.6373 (1.6598) time: 2.9437 data: 0.0003 max mem: 29202 +[2024-12-11 01:03:07 root] (utils.py 283): INFO Epoch: [6] [1610/2502] eta: 0:43:49 lr: 0.000015 loss_cls: 2.6662 (2.7259) grad_norm: 1.5993 (1.6598) time: 2.9455 data: 0.0003 max mem: 29202 +[2024-12-11 01:03:37 root] (utils.py 283): INFO Epoch: [6] [1620/2502] eta: 0:43:19 lr: 0.000015 loss_cls: 2.8277 (2.7270) grad_norm: 1.6458 (1.6612) time: 2.9436 data: 0.0003 max mem: 29202 +[2024-12-11 01:04:06 root] (utils.py 283): INFO Epoch: [6] [1630/2502] eta: 0:42:50 lr: 0.000015 loss_cls: 2.8510 (2.7268) grad_norm: 1.6467 (1.6615) time: 2.9428 data: 0.0003 max mem: 29202 +[2024-12-11 01:04:37 root] (utils.py 283): INFO Epoch: [6] [1640/2502] eta: 0:42:21 lr: 0.000015 loss_cls: 2.8128 (2.7261) grad_norm: 1.6477 (1.6618) time: 3.0034 data: 0.0004 max mem: 29202 +[2024-12-11 01:05:06 root] (utils.py 283): INFO Epoch: [6] [1650/2502] eta: 0:41:51 lr: 0.000015 loss_cls: 2.8837 (2.7267) grad_norm: 1.5944 (1.6618) time: 3.0106 data: 0.0005 max mem: 29202 +[2024-12-11 01:05:37 root] (utils.py 283): INFO Epoch: [6] [1660/2502] eta: 0:41:22 lr: 0.000015 loss_cls: 2.8215 (2.7265) grad_norm: 1.5846 (1.6615) time: 2.9929 data: 0.0013 max mem: 29202 +[2024-12-11 01:06:06 root] (utils.py 283): INFO Epoch: [6] [1670/2502] eta: 0:40:53 lr: 0.000015 loss_cls: 2.7795 (2.7261) grad_norm: 1.5833 (1.6615) time: 2.9844 data: 0.0012 max mem: 29202 +[2024-12-11 01:06:35 root] (utils.py 283): INFO Epoch: [6] [1680/2502] eta: 0:40:23 lr: 0.000015 loss_cls: 2.8219 (2.7262) grad_norm: 1.7022 (1.6619) time: 2.9387 data: 0.0003 max mem: 29202 +[2024-12-11 01:07:05 root] (utils.py 283): INFO Epoch: [6] [1690/2502] eta: 0:39:54 lr: 0.000015 loss_cls: 2.8878 (2.7275) grad_norm: 1.6833 (1.6618) time: 2.9372 data: 0.0003 max mem: 29202 +[2024-12-11 01:07:34 root] (utils.py 283): INFO Epoch: [6] [1700/2502] eta: 0:39:24 lr: 0.000015 loss_cls: 2.9271 (2.7280) grad_norm: 1.6466 (1.6626) time: 2.9404 data: 0.0003 max mem: 29202 +[2024-12-11 01:08:04 root] (utils.py 283): INFO Epoch: [6] [1710/2502] eta: 0:38:55 lr: 0.000015 loss_cls: 2.6959 (2.7267) grad_norm: 1.6466 (1.6631) time: 2.9398 data: 0.0003 max mem: 29202 +[2024-12-11 01:08:33 root] (utils.py 283): INFO Epoch: [6] [1720/2502] eta: 0:38:25 lr: 0.000015 loss_cls: 2.6041 (2.7258) grad_norm: 1.6079 (1.6626) time: 2.9394 data: 0.0003 max mem: 29202 +[2024-12-11 01:09:02 root] (utils.py 283): INFO Epoch: [6] [1730/2502] eta: 0:37:56 lr: 0.000015 loss_cls: 2.8560 (2.7267) grad_norm: 1.5842 (1.6625) time: 2.9420 data: 0.0003 max mem: 29202 +[2024-12-11 01:09:32 root] (utils.py 283): INFO Epoch: [6] [1740/2502] eta: 0:37:26 lr: 0.000015 loss_cls: 3.0259 (2.7276) grad_norm: 1.5658 (1.6633) time: 2.9404 data: 0.0003 max mem: 29202 +[2024-12-11 01:10:01 root] (utils.py 283): INFO Epoch: [6] [1750/2502] eta: 0:36:57 lr: 0.000015 loss_cls: 3.0215 (2.7287) grad_norm: 1.5951 (1.6629) time: 2.9422 data: 0.0003 max mem: 29202 +[2024-12-11 01:10:31 root] (utils.py 283): INFO Epoch: [6] [1760/2502] eta: 0:36:27 lr: 0.000015 loss_cls: 3.0215 (2.7306) grad_norm: 1.6221 (1.6635) time: 2.9544 data: 0.0003 max mem: 29202 +[2024-12-11 01:11:00 root] (utils.py 283): INFO Epoch: [6] [1770/2502] eta: 0:35:58 lr: 0.000015 loss_cls: 2.9520 (2.7305) grad_norm: 1.6244 (1.6638) time: 2.9515 data: 0.0003 max mem: 29202 +[2024-12-11 01:11:30 root] (utils.py 283): INFO Epoch: [6] [1780/2502] eta: 0:35:28 lr: 0.000015 loss_cls: 2.6321 (2.7288) grad_norm: 1.5332 (1.6631) time: 2.9421 data: 0.0003 max mem: 29202 +[2024-12-11 01:11:59 root] (utils.py 283): INFO Epoch: [6] [1790/2502] eta: 0:34:59 lr: 0.000015 loss_cls: 2.6321 (2.7282) grad_norm: 1.5385 (1.6633) time: 2.9456 data: 0.0003 max mem: 29202 +[2024-12-11 01:12:29 root] (utils.py 283): INFO Epoch: [6] [1800/2502] eta: 0:34:29 lr: 0.000015 loss_cls: 2.7966 (2.7277) grad_norm: 1.5684 (1.6637) time: 2.9444 data: 0.0003 max mem: 29202 +[2024-12-11 01:12:58 root] (utils.py 283): INFO Epoch: [6] [1810/2502] eta: 0:34:00 lr: 0.000015 loss_cls: 2.7714 (2.7276) grad_norm: 1.6206 (1.6654) time: 2.9449 data: 0.0003 max mem: 29202 +[2024-12-11 01:13:28 root] (utils.py 283): INFO Epoch: [6] [1820/2502] eta: 0:33:30 lr: 0.000015 loss_cls: 2.8174 (2.7281) grad_norm: 1.6044 (1.6654) time: 2.9447 data: 0.0003 max mem: 29202 +[2024-12-11 01:13:57 root] (utils.py 283): INFO Epoch: [6] [1830/2502] eta: 0:33:01 lr: 0.000015 loss_cls: 2.8771 (2.7289) grad_norm: 1.5780 (1.6654) time: 2.9444 data: 0.0003 max mem: 29202 +[2024-12-11 01:14:26 root] (utils.py 283): INFO Epoch: [6] [1840/2502] eta: 0:32:31 lr: 0.000015 loss_cls: 2.8392 (2.7296) grad_norm: 1.5803 (1.6654) time: 2.9427 data: 0.0003 max mem: 29202 +[2024-12-11 01:14:56 root] (utils.py 283): INFO Epoch: [6] [1850/2502] eta: 0:32:02 lr: 0.000015 loss_cls: 2.8123 (2.7288) grad_norm: 1.6020 (1.6655) time: 2.9439 data: 0.0003 max mem: 29202 +[2024-12-11 01:15:25 root] (utils.py 283): INFO Epoch: [6] [1860/2502] eta: 0:31:32 lr: 0.000015 loss_cls: 2.8395 (2.7295) grad_norm: 1.7279 (1.6662) time: 2.9430 data: 0.0003 max mem: 29202 +[2024-12-11 01:15:55 root] (utils.py 283): INFO Epoch: [6] [1870/2502] eta: 0:31:03 lr: 0.000015 loss_cls: 2.8607 (2.7300) grad_norm: 1.7053 (1.6661) time: 2.9335 data: 0.0003 max mem: 29202 +[2024-12-11 01:16:24 root] (utils.py 283): INFO Epoch: [6] [1880/2502] eta: 0:30:33 lr: 0.000015 loss_cls: 2.8564 (2.7294) grad_norm: 1.6027 (1.6660) time: 2.9342 data: 0.0003 max mem: 29202 +[2024-12-11 01:16:53 root] (utils.py 283): INFO Epoch: [6] [1890/2502] eta: 0:30:04 lr: 0.000015 loss_cls: 2.6524 (2.7283) grad_norm: 1.5193 (1.6658) time: 2.9394 data: 0.0002 max mem: 29202 +[2024-12-11 01:17:23 root] (utils.py 283): INFO Epoch: [6] [1900/2502] eta: 0:29:34 lr: 0.000015 loss_cls: 2.4907 (2.7273) grad_norm: 1.5150 (1.6670) time: 2.9470 data: 0.0002 max mem: 29202 +[2024-12-11 01:17:52 root] (utils.py 283): INFO Epoch: [6] [1910/2502] eta: 0:29:05 lr: 0.000015 loss_cls: 2.8920 (2.7282) grad_norm: 1.5825 (1.6667) time: 2.9453 data: 0.0003 max mem: 29202 +[2024-12-11 01:18:22 root] (utils.py 283): INFO Epoch: [6] [1920/2502] eta: 0:28:35 lr: 0.000015 loss_cls: 2.7993 (2.7277) grad_norm: 1.6225 (1.6667) time: 2.9371 data: 0.0003 max mem: 29202 +[2024-12-11 01:18:51 root] (utils.py 283): INFO Epoch: [6] [1930/2502] eta: 0:28:06 lr: 0.000015 loss_cls: 2.7319 (2.7271) grad_norm: 1.6096 (1.6662) time: 2.9412 data: 0.0003 max mem: 29202 +[2024-12-11 01:19:21 root] (utils.py 283): INFO Epoch: [6] [1940/2502] eta: 0:27:36 lr: 0.000015 loss_cls: 2.8362 (2.7284) grad_norm: 1.6492 (1.6667) time: 2.9417 data: 0.0003 max mem: 29202 +[2024-12-11 01:19:50 root] (utils.py 283): INFO Epoch: [6] [1950/2502] eta: 0:27:07 lr: 0.000015 loss_cls: 2.9243 (2.7278) grad_norm: 1.6925 (1.6668) time: 2.9400 data: 0.0003 max mem: 29202 +[2024-12-11 01:20:19 root] (utils.py 283): INFO Epoch: [6] [1960/2502] eta: 0:26:37 lr: 0.000015 loss_cls: 2.7097 (2.7272) grad_norm: 1.5941 (1.6662) time: 2.9431 data: 0.0003 max mem: 29202 +[2024-12-11 01:20:49 root] (utils.py 283): INFO Epoch: [6] [1970/2502] eta: 0:26:08 lr: 0.000015 loss_cls: 2.7888 (2.7272) grad_norm: 1.5678 (1.6665) time: 2.9408 data: 0.0003 max mem: 29202 +[2024-12-11 01:21:18 root] (utils.py 283): INFO Epoch: [6] [1980/2502] eta: 0:25:38 lr: 0.000015 loss_cls: 2.8642 (2.7269) grad_norm: 1.5187 (1.6660) time: 2.9394 data: 0.0003 max mem: 29202 +[2024-12-11 01:21:48 root] (utils.py 283): INFO Epoch: [6] [1990/2502] eta: 0:25:09 lr: 0.000015 loss_cls: 2.8038 (2.7279) grad_norm: 1.5965 (1.6662) time: 2.9420 data: 0.0003 max mem: 29202 +[2024-12-11 01:22:17 root] (utils.py 283): INFO Epoch: [6] [2000/2502] eta: 0:24:39 lr: 0.000015 loss_cls: 2.8664 (2.7279) grad_norm: 1.6441 (1.6660) time: 2.9397 data: 0.0003 max mem: 29202 +[2024-12-11 01:22:46 root] (utils.py 283): INFO Epoch: [6] [2010/2502] eta: 0:24:10 lr: 0.000015 loss_cls: 2.6168 (2.7264) grad_norm: 1.5415 (1.6660) time: 2.9390 data: 0.0003 max mem: 29202 +[2024-12-11 01:23:16 root] (utils.py 283): INFO Epoch: [6] [2020/2502] eta: 0:23:40 lr: 0.000015 loss_cls: 2.4762 (2.7259) grad_norm: 1.6330 (1.6660) time: 2.9462 data: 0.0003 max mem: 29202 +[2024-12-11 01:23:45 root] (utils.py 283): INFO Epoch: [6] [2030/2502] eta: 0:23:11 lr: 0.000015 loss_cls: 2.6718 (2.7255) grad_norm: 1.6843 (1.6668) time: 2.9458 data: 0.0002 max mem: 29202 +[2024-12-11 01:24:15 root] (utils.py 283): INFO Epoch: [6] [2040/2502] eta: 0:22:41 lr: 0.000015 loss_cls: 2.7252 (2.7248) grad_norm: 1.7101 (1.6674) time: 2.9379 data: 0.0002 max mem: 29202 +[2024-12-11 01:24:44 root] (utils.py 283): INFO Epoch: [6] [2050/2502] eta: 0:22:12 lr: 0.000015 loss_cls: 2.7941 (2.7250) grad_norm: 1.7101 (1.6688) time: 2.9339 data: 0.0003 max mem: 29202 +[2024-12-11 01:25:13 root] (utils.py 283): INFO Epoch: [6] [2060/2502] eta: 0:21:42 lr: 0.000015 loss_cls: 2.7802 (2.7245) grad_norm: 1.6403 (1.6684) time: 2.9385 data: 0.0003 max mem: 29202 +[2024-12-11 01:25:43 root] (utils.py 283): INFO Epoch: [6] [2070/2502] eta: 0:21:13 lr: 0.000015 loss_cls: 2.8174 (2.7253) grad_norm: 1.5396 (1.6681) time: 2.9409 data: 0.0003 max mem: 29202 +[2024-12-11 01:26:12 root] (utils.py 283): INFO Epoch: [6] [2080/2502] eta: 0:20:43 lr: 0.000015 loss_cls: 2.8174 (2.7252) grad_norm: 1.5590 (1.6689) time: 2.9359 data: 0.0003 max mem: 29202 +[2024-12-11 01:26:42 root] (utils.py 283): INFO Epoch: [6] [2090/2502] eta: 0:20:14 lr: 0.000015 loss_cls: 2.8173 (2.7259) grad_norm: 1.5458 (1.6681) time: 2.9374 data: 0.0002 max mem: 29202 +[2024-12-11 01:27:11 root] (utils.py 283): INFO Epoch: [6] [2100/2502] eta: 0:19:44 lr: 0.000015 loss_cls: 2.8173 (2.7257) grad_norm: 1.4838 (1.6681) time: 2.9431 data: 0.0002 max mem: 29202 +[2024-12-11 01:27:40 root] (utils.py 283): INFO Epoch: [6] [2110/2502] eta: 0:19:15 lr: 0.000015 loss_cls: 2.9054 (2.7267) grad_norm: 1.5830 (1.6678) time: 2.9405 data: 0.0003 max mem: 29202 +[2024-12-11 01:28:10 root] (utils.py 283): INFO Epoch: [6] [2120/2502] eta: 0:18:45 lr: 0.000015 loss_cls: 2.9289 (2.7265) grad_norm: 1.5826 (1.6673) time: 2.9419 data: 0.0003 max mem: 29202 +[2024-12-11 01:28:39 root] (utils.py 283): INFO Epoch: [6] [2130/2502] eta: 0:18:16 lr: 0.000015 loss_cls: 2.9322 (2.7267) grad_norm: 1.5469 (1.6668) time: 2.9447 data: 0.0003 max mem: 29202 +[2024-12-11 01:29:09 root] (utils.py 283): INFO Epoch: [6] [2140/2502] eta: 0:17:46 lr: 0.000015 loss_cls: 2.8632 (2.7269) grad_norm: 1.6029 (1.6671) time: 2.9391 data: 0.0003 max mem: 29202 +[2024-12-11 01:29:38 root] (utils.py 283): INFO Epoch: [6] [2150/2502] eta: 0:17:17 lr: 0.000015 loss_cls: 2.9510 (2.7275) grad_norm: 1.6595 (1.6674) time: 2.9396 data: 0.0003 max mem: 29202 +[2024-12-11 01:30:07 root] (utils.py 283): INFO Epoch: [6] [2160/2502] eta: 0:16:47 lr: 0.000015 loss_cls: 2.8716 (2.7275) grad_norm: 1.5913 (1.6669) time: 2.9418 data: 0.0003 max mem: 29202 +[2024-12-11 01:30:37 root] (utils.py 283): INFO Epoch: [6] [2170/2502] eta: 0:16:18 lr: 0.000015 loss_cls: 2.6750 (2.7265) grad_norm: 1.5913 (1.6670) time: 2.9396 data: 0.0003 max mem: 29202 +[2024-12-11 01:31:06 root] (utils.py 283): INFO Epoch: [6] [2180/2502] eta: 0:15:48 lr: 0.000015 loss_cls: 2.4285 (2.7253) grad_norm: 1.5998 (1.6672) time: 2.9351 data: 0.0003 max mem: 29202 +[2024-12-11 01:31:36 root] (utils.py 283): INFO Epoch: [6] [2190/2502] eta: 0:15:19 lr: 0.000015 loss_cls: 2.7911 (2.7260) grad_norm: 1.6636 (1.6679) time: 2.9365 data: 0.0003 max mem: 29202 +[2024-12-11 01:32:05 root] (utils.py 283): INFO Epoch: [6] [2200/2502] eta: 0:14:49 lr: 0.000015 loss_cls: 2.8989 (2.7264) grad_norm: 1.6377 (1.6676) time: 2.9413 data: 0.0003 max mem: 29202 +[2024-12-11 01:32:35 root] (utils.py 283): INFO Epoch: [6] [2210/2502] eta: 0:14:20 lr: 0.000015 loss_cls: 2.7693 (2.7258) grad_norm: 1.5967 (1.6677) time: 2.9472 data: 0.0003 max mem: 29202 +[2024-12-11 01:33:04 root] (utils.py 283): INFO Epoch: [6] [2220/2502] eta: 0:13:51 lr: 0.000015 loss_cls: 2.6752 (2.7257) grad_norm: 1.5395 (1.6677) time: 2.9553 data: 0.0002 max mem: 29202 +[2024-12-11 01:33:34 root] (utils.py 283): INFO Epoch: [6] [2230/2502] eta: 0:13:21 lr: 0.000015 loss_cls: 2.8523 (2.7258) grad_norm: 1.5521 (1.6685) time: 2.9495 data: 0.0002 max mem: 29202 +[2024-12-11 01:34:03 root] (utils.py 283): INFO Epoch: [6] [2240/2502] eta: 0:12:52 lr: 0.000015 loss_cls: 2.8523 (2.7258) grad_norm: 1.7112 (1.6686) time: 2.9419 data: 0.0003 max mem: 29202 +[2024-12-11 01:34:32 root] (utils.py 283): INFO Epoch: [6] [2250/2502] eta: 0:12:22 lr: 0.000015 loss_cls: 2.8564 (2.7262) grad_norm: 1.6678 (1.6689) time: 2.9410 data: 0.0003 max mem: 29202 +[2024-12-11 01:35:02 root] (utils.py 283): INFO Epoch: [6] [2260/2502] eta: 0:11:53 lr: 0.000015 loss_cls: 2.7382 (2.7256) grad_norm: 1.6075 (1.6684) time: 2.9478 data: 0.0002 max mem: 29202 +[2024-12-11 01:35:31 root] (utils.py 283): INFO Epoch: [6] [2270/2502] eta: 0:11:23 lr: 0.000015 loss_cls: 2.7382 (2.7256) grad_norm: 1.5480 (1.6688) time: 2.9470 data: 0.0003 max mem: 29202 +[2024-12-11 01:36:01 root] (utils.py 283): INFO Epoch: [6] [2280/2502] eta: 0:10:54 lr: 0.000015 loss_cls: 2.8812 (2.7263) grad_norm: 1.6172 (1.6692) time: 2.9380 data: 0.0003 max mem: 29202 +[2024-12-11 01:36:30 root] (utils.py 283): INFO Epoch: [6] [2290/2502] eta: 0:10:24 lr: 0.000015 loss_cls: 2.7841 (2.7263) grad_norm: 1.6218 (1.6691) time: 2.9394 data: 0.0003 max mem: 29202 +[2024-12-11 01:37:00 root] (utils.py 283): INFO Epoch: [6] [2300/2502] eta: 0:09:55 lr: 0.000015 loss_cls: 2.8120 (2.7269) grad_norm: 1.6065 (1.6689) time: 2.9469 data: 0.0003 max mem: 29202 +[2024-12-11 01:37:29 root] (utils.py 283): INFO Epoch: [6] [2310/2502] eta: 0:09:25 lr: 0.000015 loss_cls: 2.8395 (2.7268) grad_norm: 1.5228 (1.6686) time: 2.9434 data: 0.0003 max mem: 29202 +[2024-12-11 01:37:58 root] (utils.py 283): INFO Epoch: [6] [2320/2502] eta: 0:08:56 lr: 0.000015 loss_cls: 2.8746 (2.7275) grad_norm: 1.5232 (1.6687) time: 2.9319 data: 0.0003 max mem: 29202 +[2024-12-11 01:38:28 root] (utils.py 283): INFO Epoch: [6] [2330/2502] eta: 0:08:26 lr: 0.000015 loss_cls: 2.8187 (2.7268) grad_norm: 1.5997 (1.6684) time: 2.9325 data: 0.0003 max mem: 29202 +[2024-12-11 01:38:57 root] (utils.py 283): INFO Epoch: [6] [2340/2502] eta: 0:07:57 lr: 0.000015 loss_cls: 2.7574 (2.7271) grad_norm: 1.5372 (1.6681) time: 2.9340 data: 0.0003 max mem: 29202 +[2024-12-11 01:39:26 root] (utils.py 283): INFO Epoch: [6] [2350/2502] eta: 0:07:27 lr: 0.000015 loss_cls: 2.8015 (2.7263) grad_norm: 1.5997 (1.6681) time: 2.9370 data: 0.0003 max mem: 29202 +[2024-12-11 01:39:56 root] (utils.py 283): INFO Epoch: [6] [2360/2502] eta: 0:06:58 lr: 0.000015 loss_cls: 2.6608 (2.7261) grad_norm: 1.5662 (1.6676) time: 2.9389 data: 0.0003 max mem: 29202 +[2024-12-11 01:40:25 root] (utils.py 283): INFO Epoch: [6] [2370/2502] eta: 0:06:28 lr: 0.000015 loss_cls: 2.6608 (2.7250) grad_norm: 1.5959 (1.6680) time: 2.9378 data: 0.0003 max mem: 29202 +[2024-12-11 01:40:55 root] (utils.py 283): INFO Epoch: [6] [2380/2502] eta: 0:05:59 lr: 0.000015 loss_cls: 2.7140 (2.7253) grad_norm: 1.6532 (1.6682) time: 2.9407 data: 0.0003 max mem: 29202 +[2024-12-11 01:41:24 root] (utils.py 283): INFO Epoch: [6] [2390/2502] eta: 0:05:30 lr: 0.000015 loss_cls: 2.8836 (2.7259) grad_norm: 1.6219 (1.6679) time: 2.9483 data: 0.0003 max mem: 29202 +[2024-12-11 01:41:54 root] (utils.py 283): INFO Epoch: [6] [2400/2502] eta: 0:05:00 lr: 0.000015 loss_cls: 2.8472 (2.7259) grad_norm: 1.6087 (1.6684) time: 2.9483 data: 0.0003 max mem: 29202 +[2024-12-11 01:42:23 root] (utils.py 283): INFO Epoch: [6] [2410/2502] eta: 0:04:31 lr: 0.000015 loss_cls: 2.9085 (2.7272) grad_norm: 1.6159 (1.6682) time: 2.9411 data: 0.0003 max mem: 29202 +[2024-12-11 01:42:52 root] (utils.py 283): INFO Epoch: [6] [2420/2502] eta: 0:04:01 lr: 0.000015 loss_cls: 2.9553 (2.7271) grad_norm: 1.5612 (1.6677) time: 2.9427 data: 0.0003 max mem: 29202 +[2024-12-11 01:43:22 root] (utils.py 283): INFO Epoch: [6] [2430/2502] eta: 0:03:32 lr: 0.000015 loss_cls: 2.6855 (2.7269) grad_norm: 1.5499 (1.6676) time: 2.9454 data: 0.0003 max mem: 29202 +[2024-12-11 01:43:51 root] (utils.py 283): INFO Epoch: [6] [2440/2502] eta: 0:03:02 lr: 0.000015 loss_cls: 2.6289 (2.7260) grad_norm: 1.7037 (1.6683) time: 2.9419 data: 0.0003 max mem: 29202 +[2024-12-11 01:44:21 root] (utils.py 283): INFO Epoch: [6] [2450/2502] eta: 0:02:33 lr: 0.000015 loss_cls: 2.6172 (2.7254) grad_norm: 1.6731 (1.6685) time: 2.9405 data: 0.0003 max mem: 29202 +[2024-12-11 01:44:50 root] (utils.py 283): INFO Epoch: [6] [2460/2502] eta: 0:02:03 lr: 0.000015 loss_cls: 2.8328 (2.7257) grad_norm: 1.5681 (1.6683) time: 2.9394 data: 0.0003 max mem: 29202 +[2024-12-11 01:45:20 root] (utils.py 283): INFO Epoch: [6] [2470/2502] eta: 0:01:34 lr: 0.000015 loss_cls: 2.8700 (2.7256) grad_norm: 1.5531 (1.6679) time: 2.9449 data: 0.0003 max mem: 29202 +[2024-12-11 01:45:49 root] (utils.py 283): INFO Epoch: [6] [2480/2502] eta: 0:01:04 lr: 0.000015 loss_cls: 2.9413 (2.7258) grad_norm: 1.5559 (1.6677) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-11 01:46:19 root] (utils.py 283): INFO Epoch: [6] [2490/2502] eta: 0:00:35 lr: 0.000015 loss_cls: 2.7369 (2.7248) grad_norm: 1.6526 (1.6682) time: 2.9601 data: 0.0268 max mem: 29202 +[2024-12-11 01:46:48 root] (utils.py 283): INFO Epoch: [6] [2500/2502] eta: 0:00:05 lr: 0.000015 loss_cls: 2.6446 (2.7252) grad_norm: 1.6526 (1.6681) time: 2.9638 data: 0.0268 max mem: 29202 +[2024-12-11 01:46:51 root] (utils.py 283): INFO Epoch: [6] [2501/2502] eta: 0:00:02 lr: 0.000015 loss_cls: 2.6473 (2.7253) grad_norm: 1.6526 (1.6681) time: 2.9644 data: 0.0268 max mem: 29202 +[2024-12-11 01:46:51 root] (utils.py 297): INFO Epoch: [6] Total time: 2:02:52 (2.9467 s / it) +[2024-12-11 01:46:51 root] (engine.py 179): INFO Averaged stats:lr: 0.000015 loss_cls: 2.6473 (2.7345) grad_norm: 1.6526 (1.6681) +[2024-12-11 01:46:55 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:55 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.3014 (0.3014) acc1: 92.9688 (92.9688) acc3: 98.4375 (98.4375) acc5: 98.4375 (98.4375) time: 0.5614 data: 0.0003 max mem: 29202 +[2024-12-11 01:47:01 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:48 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6099 (0.5938) acc1: 86.7188 (86.2926) acc3: 97.6562 (97.0881) acc5: 98.4375 (98.0114) time: 0.5514 data: 0.0004 max mem: 29202 +[2024-12-11 01:47:06 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:43 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6099 (0.6327) acc1: 86.7188 (85.4539) acc3: 96.0938 (96.2054) acc5: 97.6562 (97.5818) time: 0.5513 data: 0.0004 max mem: 29202 +[2024-12-11 01:47:12 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:37 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6461 (0.6643) acc1: 85.9375 (85.0302) acc3: 95.3125 (95.8921) acc5: 96.8750 (97.3034) time: 0.5520 data: 0.0005 max mem: 29202 +[2024-12-11 01:47:17 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:31 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6715 (0.6715) acc1: 85.9375 (85.0610) acc3: 96.0938 (95.8460) acc5: 96.8750 (97.3895) time: 0.5516 data: 0.0005 max mem: 29202 +[2024-12-11 01:47:23 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:26 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.8571 (0.7568) acc1: 78.9062 (83.3640) acc3: 92.9688 (94.7763) acc5: 95.3125 (96.5993) time: 0.5518 data: 0.0004 max mem: 29202 +[2024-12-11 01:47:28 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:20 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9823 (0.7886) acc1: 76.5625 (82.7997) acc3: 89.8438 (94.1214) acc5: 93.7500 (96.1194) time: 0.5527 data: 0.0004 max mem: 29202 +[2024-12-11 01:47:34 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:15 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9823 (0.8218) acc1: 78.1250 (82.0643) acc3: 90.6250 (93.7280) acc5: 93.7500 (95.8957) time: 0.5526 data: 0.0004 max mem: 29202 +[2024-12-11 01:47:39 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:09 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9630 (0.8483) acc1: 78.1250 (81.5972) acc3: 90.6250 (93.2677) acc5: 93.7500 (95.5440) time: 0.5522 data: 0.0006 max mem: 29202 +[2024-12-11 01:47:45 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:04 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9998 (0.8765) acc1: 75.7812 (80.7950) acc3: 90.6250 (92.9859) acc5: 93.7500 (95.3726) time: 0.5525 data: 0.0006 max mem: 29202 +[2024-12-11 01:47:49 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9478 (0.8747) acc1: 75.7812 (80.7760) acc3: 91.4062 (93.0400) acc5: 94.5312 (95.4400) time: 0.5432 data: 0.0006 max mem: 29202 +[2024-12-11 01:47:49 root] (utils.py 297): INFO Test: Total time: 0:00:53 (0.5504 s / it) +[2024-12-11 01:47:49 root] (engine.py 264): INFO * Acc@1 80.882 Acc@3 92.904 Acc@5 95.310 loss 0.869 flops 13.207 layer_flops 13.109 +[2024-12-11 01:47:49 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 80.9% +[2024-12-11 01:47:51 root] (main.py 576): INFO Max accuracy: 80.88% +[2024-12-11 01:47:54 root] (utils.py 283): INFO Epoch: [7] [ 0/2502] eta: 2:00:11 lr: 0.000013 loss_cls: 3.0210 (3.0210) grad_norm: 1.3583 (1.3583) time: 2.8823 data: 0.0003 max mem: 29202 +[2024-12-11 01:48:24 root] (utils.py 283): INFO Epoch: [7] [ 10/2502] eta: 2:01:45 lr: 0.000013 loss_cls: 3.0210 (2.9056) grad_norm: 1.6706 (1.6910) time: 2.9318 data: 0.0003 max mem: 29202 +[2024-12-11 01:48:53 root] (utils.py 283): INFO Epoch: [7] [ 20/2502] eta: 2:01:23 lr: 0.000013 loss_cls: 2.9519 (2.7747) grad_norm: 1.6337 (1.6320) time: 2.9372 data: 0.0003 max mem: 29202 +[2024-12-11 01:49:22 root] (utils.py 283): INFO Epoch: [7] [ 30/2502] eta: 2:01:09 lr: 0.000013 loss_cls: 2.9519 (2.8017) grad_norm: 1.5657 (1.6378) time: 2.9458 data: 0.0003 max mem: 29202 +[2024-12-11 01:49:52 root] (utils.py 283): INFO Epoch: [7] [ 40/2502] eta: 2:00:45 lr: 0.000013 loss_cls: 2.8067 (2.7513) grad_norm: 1.5268 (1.6125) time: 2.9514 data: 0.0003 max mem: 29202 +[2024-12-11 01:50:21 root] (utils.py 283): INFO Epoch: [7] [ 50/2502] eta: 2:00:09 lr: 0.000013 loss_cls: 2.8026 (2.7402) grad_norm: 1.5268 (1.5931) time: 2.9393 data: 0.0003 max mem: 29202 +[2024-12-11 01:50:51 root] (utils.py 283): INFO Epoch: [7] [ 60/2502] eta: 1:59:38 lr: 0.000013 loss_cls: 2.9548 (2.7681) grad_norm: 1.5306 (1.5931) time: 2.9335 data: 0.0003 max mem: 29202 +[2024-12-11 01:51:20 root] (utils.py 283): INFO Epoch: [7] [ 70/2502] eta: 1:59:15 lr: 0.000013 loss_cls: 2.8736 (2.7544) grad_norm: 1.6091 (1.6057) time: 2.9466 data: 0.0003 max mem: 29202 +[2024-12-11 01:51:49 root] (utils.py 283): INFO Epoch: [7] [ 80/2502] eta: 1:58:42 lr: 0.000013 loss_cls: 2.8367 (2.7600) grad_norm: 1.5973 (1.6034) time: 2.9438 data: 0.0003 max mem: 29202 +[2024-12-11 01:52:19 root] (utils.py 283): INFO Epoch: [7] [ 90/2502] eta: 1:58:11 lr: 0.000013 loss_cls: 2.8636 (2.7669) grad_norm: 1.5611 (1.6083) time: 2.9323 data: 0.0003 max mem: 29202 +[2024-12-11 01:52:48 root] (utils.py 283): INFO Epoch: [7] [ 100/2502] eta: 1:57:40 lr: 0.000013 loss_cls: 2.8425 (2.7680) grad_norm: 1.4631 (1.5942) time: 2.9349 data: 0.0003 max mem: 29202 +[2024-12-11 01:53:18 root] (utils.py 283): INFO Epoch: [7] [ 110/2502] eta: 1:57:11 lr: 0.000013 loss_cls: 2.9120 (2.7714) grad_norm: 1.4759 (1.5980) time: 2.9377 data: 0.0003 max mem: 29202 +[2024-12-11 01:53:47 root] (utils.py 283): INFO Epoch: [7] [ 120/2502] eta: 1:56:42 lr: 0.000013 loss_cls: 2.7842 (2.7517) grad_norm: 1.5253 (1.5936) time: 2.9407 data: 0.0003 max mem: 29202 +[2024-12-11 01:54:16 root] (utils.py 283): INFO Epoch: [7] [ 130/2502] eta: 1:56:12 lr: 0.000013 loss_cls: 2.7489 (2.7562) grad_norm: 1.5915 (1.5995) time: 2.9407 data: 0.0002 max mem: 29202 +[2024-12-11 01:54:46 root] (utils.py 283): INFO Epoch: [7] [ 140/2502] eta: 1:55:44 lr: 0.000013 loss_cls: 2.9143 (2.7577) grad_norm: 1.6323 (1.6020) time: 2.9433 data: 0.0003 max mem: 29202 +[2024-12-11 01:55:15 root] (utils.py 283): INFO Epoch: [7] [ 150/2502] eta: 1:55:14 lr: 0.000013 loss_cls: 2.7597 (2.7532) grad_norm: 1.6136 (1.5981) time: 2.9423 data: 0.0003 max mem: 29202 +[2024-12-11 01:55:45 root] (utils.py 283): INFO Epoch: [7] [ 160/2502] eta: 1:54:44 lr: 0.000013 loss_cls: 2.8181 (2.7621) grad_norm: 1.6275 (1.6033) time: 2.9349 data: 0.0003 max mem: 29202 +[2024-12-11 01:56:14 root] (utils.py 283): INFO Epoch: [7] [ 170/2502] eta: 1:54:14 lr: 0.000013 loss_cls: 2.8181 (2.7426) grad_norm: 1.6313 (1.6020) time: 2.9351 data: 0.0003 max mem: 29202 +[2024-12-11 01:56:43 root] (utils.py 283): INFO Epoch: [7] [ 180/2502] eta: 1:53:44 lr: 0.000013 loss_cls: 2.0638 (2.7167) grad_norm: 1.5337 (1.5992) time: 2.9362 data: 0.0003 max mem: 29202 +[2024-12-11 01:57:13 root] (utils.py 283): INFO Epoch: [7] [ 190/2502] eta: 1:53:15 lr: 0.000013 loss_cls: 2.7458 (2.7267) grad_norm: 1.5268 (1.6019) time: 2.9361 data: 0.0003 max mem: 29202 +[2024-12-11 01:57:42 root] (utils.py 283): INFO Epoch: [7] [ 200/2502] eta: 1:52:45 lr: 0.000013 loss_cls: 2.9085 (2.7240) grad_norm: 1.6548 (1.6159) time: 2.9369 data: 0.0003 max mem: 29202 +[2024-12-11 01:58:11 root] (utils.py 283): INFO Epoch: [7] [ 210/2502] eta: 1:52:15 lr: 0.000013 loss_cls: 2.7137 (2.7188) grad_norm: 1.6548 (1.6275) time: 2.9362 data: 0.0003 max mem: 29202 +[2024-12-11 01:58:41 root] (utils.py 283): INFO Epoch: [7] [ 220/2502] eta: 1:51:45 lr: 0.000013 loss_cls: 2.6424 (2.7108) grad_norm: 1.5152 (1.6247) time: 2.9344 data: 0.0003 max mem: 29202 +[2024-12-11 01:59:10 root] (utils.py 283): INFO Epoch: [7] [ 230/2502] eta: 1:51:16 lr: 0.000013 loss_cls: 2.7474 (2.7163) grad_norm: 1.5571 (1.6286) time: 2.9338 data: 0.0003 max mem: 29202 +[2024-12-11 01:59:39 root] (utils.py 283): INFO Epoch: [7] [ 240/2502] eta: 1:50:46 lr: 0.000013 loss_cls: 2.9162 (2.7259) grad_norm: 1.5873 (1.6332) time: 2.9376 data: 0.0003 max mem: 29202 +[2024-12-11 02:00:09 root] (utils.py 283): INFO Epoch: [7] [ 250/2502] eta: 1:50:18 lr: 0.000013 loss_cls: 2.9417 (2.7270) grad_norm: 1.5873 (1.6330) time: 2.9426 data: 0.0003 max mem: 29202 +[2024-12-11 02:00:38 root] (utils.py 283): INFO Epoch: [7] [ 260/2502] eta: 1:49:49 lr: 0.000013 loss_cls: 2.8499 (2.7244) grad_norm: 1.6278 (1.6297) time: 2.9469 data: 0.0003 max mem: 29202 +[2024-12-11 02:01:08 root] (utils.py 283): INFO Epoch: [7] [ 270/2502] eta: 1:49:20 lr: 0.000013 loss_cls: 2.7072 (2.7196) grad_norm: 1.5194 (1.6273) time: 2.9452 data: 0.0003 max mem: 29202 +[2024-12-11 02:01:37 root] (utils.py 283): INFO Epoch: [7] [ 280/2502] eta: 1:48:50 lr: 0.000013 loss_cls: 2.8696 (2.7203) grad_norm: 1.5194 (1.6248) time: 2.9394 data: 0.0003 max mem: 29202 +[2024-12-11 02:02:07 root] (utils.py 283): INFO Epoch: [7] [ 290/2502] eta: 1:48:21 lr: 0.000013 loss_cls: 2.9206 (2.7236) grad_norm: 1.5526 (1.6263) time: 2.9366 data: 0.0003 max mem: 29202 +[2024-12-11 02:02:36 root] (utils.py 283): INFO Epoch: [7] [ 300/2502] eta: 1:47:51 lr: 0.000013 loss_cls: 2.9311 (2.7241) grad_norm: 1.5526 (1.6269) time: 2.9383 data: 0.0003 max mem: 29202 +[2024-12-11 02:03:05 root] (utils.py 283): INFO Epoch: [7] [ 310/2502] eta: 1:47:22 lr: 0.000013 loss_cls: 2.7736 (2.7165) grad_norm: 1.5475 (1.6260) time: 2.9399 data: 0.0003 max mem: 29202 +[2024-12-11 02:03:35 root] (utils.py 283): INFO Epoch: [7] [ 320/2502] eta: 1:46:54 lr: 0.000013 loss_cls: 2.4913 (2.7159) grad_norm: 1.5993 (1.6282) time: 2.9455 data: 0.0003 max mem: 29202 +[2024-12-11 02:04:04 root] (utils.py 283): INFO Epoch: [7] [ 330/2502] eta: 1:46:24 lr: 0.000013 loss_cls: 2.7413 (2.7164) grad_norm: 1.5993 (1.6279) time: 2.9427 data: 0.0003 max mem: 29202 +[2024-12-11 02:04:34 root] (utils.py 283): INFO Epoch: [7] [ 340/2502] eta: 1:45:55 lr: 0.000013 loss_cls: 2.7413 (2.7144) grad_norm: 1.5599 (1.6244) time: 2.9390 data: 0.0003 max mem: 29202 +[2024-12-11 02:05:03 root] (utils.py 283): INFO Epoch: [7] [ 350/2502] eta: 1:45:25 lr: 0.000013 loss_cls: 2.7665 (2.7138) grad_norm: 1.5599 (1.6277) time: 2.9418 data: 0.0003 max mem: 29202 +[2024-12-11 02:05:32 root] (utils.py 283): INFO Epoch: [7] [ 360/2502] eta: 1:44:55 lr: 0.000013 loss_cls: 2.9712 (2.7185) grad_norm: 1.6672 (1.6305) time: 2.9346 data: 0.0003 max mem: 29202 +[2024-12-11 02:06:02 root] (utils.py 283): INFO Epoch: [7] [ 370/2502] eta: 1:44:26 lr: 0.000013 loss_cls: 2.8791 (2.7130) grad_norm: 1.6411 (1.6339) time: 2.9336 data: 0.0003 max mem: 29202 +[2024-12-11 02:06:31 root] (utils.py 283): INFO Epoch: [7] [ 380/2502] eta: 1:43:57 lr: 0.000013 loss_cls: 2.7612 (2.7152) grad_norm: 1.5769 (1.6344) time: 2.9435 data: 0.0003 max mem: 29202 +[2024-12-11 02:07:01 root] (utils.py 283): INFO Epoch: [7] [ 390/2502] eta: 1:43:28 lr: 0.000013 loss_cls: 3.0091 (2.7188) grad_norm: 1.5531 (1.6382) time: 2.9450 data: 0.0002 max mem: 29202 +[2024-12-11 02:07:30 root] (utils.py 283): INFO Epoch: [7] [ 400/2502] eta: 1:42:58 lr: 0.000013 loss_cls: 2.9582 (2.7233) grad_norm: 1.5679 (1.6372) time: 2.9377 data: 0.0003 max mem: 29202 +[2024-12-11 02:07:59 root] (utils.py 283): INFO Epoch: [7] [ 410/2502] eta: 1:42:28 lr: 0.000013 loss_cls: 2.9734 (2.7254) grad_norm: 1.6118 (1.6390) time: 2.9359 data: 0.0003 max mem: 29202 +[2024-12-11 02:08:29 root] (utils.py 283): INFO Epoch: [7] [ 420/2502] eta: 1:41:59 lr: 0.000013 loss_cls: 2.5849 (2.7212) grad_norm: 1.5282 (1.6391) time: 2.9381 data: 0.0003 max mem: 29202 +[2024-12-11 02:08:58 root] (utils.py 283): INFO Epoch: [7] [ 430/2502] eta: 1:41:30 lr: 0.000013 loss_cls: 2.5849 (2.7225) grad_norm: 1.5282 (1.6408) time: 2.9404 data: 0.0003 max mem: 29202 +[2024-12-11 02:09:28 root] (utils.py 283): INFO Epoch: [7] [ 440/2502] eta: 1:41:01 lr: 0.000013 loss_cls: 2.6554 (2.7178) grad_norm: 1.6216 (1.6428) time: 2.9414 data: 0.0002 max mem: 29202 +[2024-12-11 02:09:57 root] (utils.py 283): INFO Epoch: [7] [ 450/2502] eta: 1:40:31 lr: 0.000013 loss_cls: 2.6015 (2.7172) grad_norm: 1.6269 (1.6434) time: 2.9426 data: 0.0003 max mem: 29202 +[2024-12-11 02:10:26 root] (utils.py 283): INFO Epoch: [7] [ 460/2502] eta: 1:40:02 lr: 0.000013 loss_cls: 2.8878 (2.7192) grad_norm: 1.5637 (1.6415) time: 2.9434 data: 0.0003 max mem: 29202 +[2024-12-11 02:10:56 root] (utils.py 283): INFO Epoch: [7] [ 470/2502] eta: 1:39:33 lr: 0.000013 loss_cls: 2.8232 (2.7153) grad_norm: 1.5572 (1.6457) time: 2.9411 data: 0.0003 max mem: 29202 +[2024-12-11 02:11:25 root] (utils.py 283): INFO Epoch: [7] [ 480/2502] eta: 1:39:03 lr: 0.000013 loss_cls: 2.6416 (2.7121) grad_norm: 1.6021 (1.6468) time: 2.9382 data: 0.0003 max mem: 29202 +[2024-12-11 02:11:55 root] (utils.py 283): INFO Epoch: [7] [ 490/2502] eta: 1:38:34 lr: 0.000013 loss_cls: 2.8595 (2.7137) grad_norm: 1.6098 (1.6473) time: 2.9402 data: 0.0003 max mem: 29202 +[2024-12-11 02:12:24 root] (utils.py 283): INFO Epoch: [7] [ 500/2502] eta: 1:38:05 lr: 0.000013 loss_cls: 2.8900 (2.7141) grad_norm: 1.6098 (1.6475) time: 2.9414 data: 0.0003 max mem: 29202 +[2024-12-11 02:12:53 root] (utils.py 283): INFO Epoch: [7] [ 510/2502] eta: 1:37:35 lr: 0.000013 loss_cls: 2.7392 (2.7162) grad_norm: 1.5955 (1.6463) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-11 02:13:23 root] (utils.py 283): INFO Epoch: [7] [ 520/2502] eta: 1:37:06 lr: 0.000013 loss_cls: 2.7392 (2.7150) grad_norm: 1.6772 (1.6498) time: 2.9400 data: 0.0003 max mem: 29202 +[2024-12-11 02:13:52 root] (utils.py 283): INFO Epoch: [7] [ 530/2502] eta: 1:36:37 lr: 0.000013 loss_cls: 2.7573 (2.7156) grad_norm: 1.7147 (1.6534) time: 2.9441 data: 0.0003 max mem: 29202 +[2024-12-11 02:14:22 root] (utils.py 283): INFO Epoch: [7] [ 540/2502] eta: 1:36:07 lr: 0.000013 loss_cls: 2.8789 (2.7129) grad_norm: 1.7036 (1.6552) time: 2.9449 data: 0.0003 max mem: 29202 +[2024-12-11 02:14:51 root] (utils.py 283): INFO Epoch: [7] [ 550/2502] eta: 1:35:38 lr: 0.000013 loss_cls: 2.6857 (2.7127) grad_norm: 1.6018 (1.6535) time: 2.9462 data: 0.0003 max mem: 29202 +[2024-12-11 02:15:21 root] (utils.py 283): INFO Epoch: [7] [ 560/2502] eta: 1:35:10 lr: 0.000013 loss_cls: 2.8144 (2.7162) grad_norm: 1.5388 (1.6543) time: 2.9557 data: 0.0003 max mem: 29202 +[2024-12-11 02:15:50 root] (utils.py 283): INFO Epoch: [7] [ 570/2502] eta: 1:34:40 lr: 0.000013 loss_cls: 2.9492 (2.7167) grad_norm: 1.6459 (1.6542) time: 2.9493 data: 0.0003 max mem: 29202 +[2024-12-11 02:16:20 root] (utils.py 283): INFO Epoch: [7] [ 580/2502] eta: 1:34:11 lr: 0.000013 loss_cls: 2.9301 (2.7201) grad_norm: 1.6101 (1.6547) time: 2.9391 data: 0.0003 max mem: 29202 +[2024-12-11 02:16:49 root] (utils.py 283): INFO Epoch: [7] [ 590/2502] eta: 1:33:41 lr: 0.000013 loss_cls: 3.0439 (2.7242) grad_norm: 1.5894 (1.6539) time: 2.9380 data: 0.0003 max mem: 29202 +[2024-12-11 02:17:18 root] (utils.py 283): INFO Epoch: [7] [ 600/2502] eta: 1:33:12 lr: 0.000013 loss_cls: 2.9681 (2.7245) grad_norm: 1.5297 (1.6526) time: 2.9375 data: 0.0003 max mem: 29202 +[2024-12-11 02:17:48 root] (utils.py 283): INFO Epoch: [7] [ 610/2502] eta: 1:32:43 lr: 0.000013 loss_cls: 2.7601 (2.7238) grad_norm: 1.5656 (1.6520) time: 2.9446 data: 0.0003 max mem: 29202 +[2024-12-11 02:18:17 root] (utils.py 283): INFO Epoch: [7] [ 620/2502] eta: 1:32:13 lr: 0.000013 loss_cls: 2.6812 (2.7229) grad_norm: 1.6596 (1.6530) time: 2.9454 data: 0.0003 max mem: 29202 +[2024-12-11 02:18:47 root] (utils.py 283): INFO Epoch: [7] [ 630/2502] eta: 1:31:44 lr: 0.000013 loss_cls: 2.7909 (2.7245) grad_norm: 1.6188 (1.6513) time: 2.9492 data: 0.0003 max mem: 29202 +[2024-12-11 02:19:16 root] (utils.py 283): INFO Epoch: [7] [ 640/2502] eta: 1:31:15 lr: 0.000013 loss_cls: 2.8747 (2.7245) grad_norm: 1.5612 (1.6510) time: 2.9468 data: 0.0003 max mem: 29202 +[2024-12-11 02:19:46 root] (utils.py 283): INFO Epoch: [7] [ 650/2502] eta: 1:30:45 lr: 0.000013 loss_cls: 2.6507 (2.7226) grad_norm: 1.5992 (1.6504) time: 2.9381 data: 0.0003 max mem: 29202 +[2024-12-11 02:20:15 root] (utils.py 283): INFO Epoch: [7] [ 660/2502] eta: 1:30:16 lr: 0.000013 loss_cls: 2.9034 (2.7269) grad_norm: 1.4796 (1.6507) time: 2.9413 data: 0.0003 max mem: 29202 +[2024-12-11 02:20:44 root] (utils.py 283): INFO Epoch: [7] [ 670/2502] eta: 1:29:47 lr: 0.000013 loss_cls: 2.9749 (2.7275) grad_norm: 1.5947 (1.6514) time: 2.9392 data: 0.0003 max mem: 29202 +[2024-12-11 02:21:14 root] (utils.py 283): INFO Epoch: [7] [ 680/2502] eta: 1:29:17 lr: 0.000013 loss_cls: 2.8006 (2.7287) grad_norm: 1.6538 (1.6504) time: 2.9384 data: 0.0003 max mem: 29202 +[2024-12-11 02:21:43 root] (utils.py 283): INFO Epoch: [7] [ 690/2502] eta: 1:28:48 lr: 0.000013 loss_cls: 2.8006 (2.7291) grad_norm: 1.6812 (1.6542) time: 2.9484 data: 0.0003 max mem: 29202 +[2024-12-11 02:22:13 root] (utils.py 283): INFO Epoch: [7] [ 700/2502] eta: 1:28:19 lr: 0.000013 loss_cls: 2.7382 (2.7301) grad_norm: 1.6664 (1.6540) time: 2.9494 data: 0.0003 max mem: 29202 +[2024-12-11 02:22:42 root] (utils.py 283): INFO Epoch: [7] [ 710/2502] eta: 1:27:49 lr: 0.000013 loss_cls: 2.7587 (2.7302) grad_norm: 1.5753 (1.6545) time: 2.9388 data: 0.0003 max mem: 29202 +[2024-12-11 02:23:12 root] (utils.py 283): INFO Epoch: [7] [ 720/2502] eta: 1:27:20 lr: 0.000013 loss_cls: 2.9101 (2.7313) grad_norm: 1.5592 (1.6535) time: 2.9378 data: 0.0003 max mem: 29202 +[2024-12-11 02:23:41 root] (utils.py 283): INFO Epoch: [7] [ 730/2502] eta: 1:26:51 lr: 0.000013 loss_cls: 2.9353 (2.7320) grad_norm: 1.5542 (1.6571) time: 2.9427 data: 0.0003 max mem: 29202 +[2024-12-11 02:24:10 root] (utils.py 283): INFO Epoch: [7] [ 740/2502] eta: 1:26:21 lr: 0.000013 loss_cls: 2.9280 (2.7336) grad_norm: 1.6659 (1.6579) time: 2.9410 data: 0.0003 max mem: 29202 +[2024-12-11 02:24:40 root] (utils.py 283): INFO Epoch: [7] [ 750/2502] eta: 1:25:52 lr: 0.000013 loss_cls: 2.8896 (2.7359) grad_norm: 1.6659 (1.6578) time: 2.9393 data: 0.0003 max mem: 29202 +[2024-12-11 02:25:09 root] (utils.py 283): INFO Epoch: [7] [ 760/2502] eta: 1:25:22 lr: 0.000013 loss_cls: 2.9025 (2.7382) grad_norm: 1.6117 (1.6571) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-11 02:25:39 root] (utils.py 283): INFO Epoch: [7] [ 770/2502] eta: 1:24:53 lr: 0.000013 loss_cls: 2.9246 (2.7400) grad_norm: 1.5828 (1.6572) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-11 02:26:08 root] (utils.py 283): INFO Epoch: [7] [ 780/2502] eta: 1:24:23 lr: 0.000013 loss_cls: 2.8289 (2.7384) grad_norm: 1.6271 (1.6579) time: 2.9402 data: 0.0003 max mem: 29202 +[2024-12-11 02:26:37 root] (utils.py 283): INFO Epoch: [7] [ 790/2502] eta: 1:23:54 lr: 0.000013 loss_cls: 2.8130 (2.7407) grad_norm: 1.7049 (1.6588) time: 2.9416 data: 0.0003 max mem: 29202 +[2024-12-11 02:27:07 root] (utils.py 283): INFO Epoch: [7] [ 800/2502] eta: 1:23:25 lr: 0.000013 loss_cls: 2.8130 (2.7396) grad_norm: 1.6123 (1.6580) time: 2.9409 data: 0.0003 max mem: 29202 +[2024-12-11 02:27:36 root] (utils.py 283): INFO Epoch: [7] [ 810/2502] eta: 1:22:55 lr: 0.000013 loss_cls: 2.8922 (2.7416) grad_norm: 1.6054 (1.6594) time: 2.9387 data: 0.0003 max mem: 29202 +[2024-12-11 02:28:06 root] (utils.py 283): INFO Epoch: [7] [ 820/2502] eta: 1:22:26 lr: 0.000013 loss_cls: 2.8964 (2.7406) grad_norm: 1.6015 (1.6587) time: 2.9431 data: 0.0003 max mem: 29202 +[2024-12-11 02:28:35 root] (utils.py 283): INFO Epoch: [7] [ 830/2502] eta: 1:21:57 lr: 0.000013 loss_cls: 2.6124 (2.7392) grad_norm: 1.6061 (1.6590) time: 2.9457 data: 0.0003 max mem: 29202 +[2024-12-11 02:29:05 root] (utils.py 283): INFO Epoch: [7] [ 840/2502] eta: 1:21:28 lr: 0.000013 loss_cls: 2.5845 (2.7366) grad_norm: 1.6061 (1.6587) time: 2.9530 data: 0.0003 max mem: 29202 +[2024-12-11 02:29:34 root] (utils.py 283): INFO Epoch: [7] [ 850/2502] eta: 1:20:58 lr: 0.000013 loss_cls: 2.6334 (2.7365) grad_norm: 1.5044 (1.6570) time: 2.9495 data: 0.0003 max mem: 29202 +[2024-12-11 02:30:04 root] (utils.py 283): INFO Epoch: [7] [ 860/2502] eta: 1:20:29 lr: 0.000013 loss_cls: 2.7714 (2.7370) grad_norm: 1.5626 (1.6570) time: 2.9405 data: 0.0002 max mem: 29202 +[2024-12-11 02:30:33 root] (utils.py 283): INFO Epoch: [7] [ 870/2502] eta: 1:19:59 lr: 0.000013 loss_cls: 2.9328 (2.7389) grad_norm: 1.6118 (1.6578) time: 2.9406 data: 0.0002 max mem: 29202 +[2024-12-11 02:31:02 root] (utils.py 283): INFO Epoch: [7] [ 880/2502] eta: 1:19:30 lr: 0.000013 loss_cls: 3.0114 (2.7383) grad_norm: 1.5717 (1.6581) time: 2.9363 data: 0.0003 max mem: 29202 +[2024-12-11 02:31:32 root] (utils.py 283): INFO Epoch: [7] [ 890/2502] eta: 1:19:00 lr: 0.000013 loss_cls: 2.8275 (2.7393) grad_norm: 1.6546 (1.6599) time: 2.9369 data: 0.0003 max mem: 29202 +[2024-12-11 02:32:01 root] (utils.py 283): INFO Epoch: [7] [ 900/2502] eta: 1:18:31 lr: 0.000013 loss_cls: 2.8218 (2.7406) grad_norm: 1.4929 (1.6587) time: 2.9394 data: 0.0003 max mem: 29202 +[2024-12-11 02:32:31 root] (utils.py 283): INFO Epoch: [7] [ 910/2502] eta: 1:18:02 lr: 0.000013 loss_cls: 2.8092 (2.7404) grad_norm: 1.5243 (1.6583) time: 2.9438 data: 0.0003 max mem: 29202 +[2024-12-11 02:33:00 root] (utils.py 283): INFO Epoch: [7] [ 920/2502] eta: 1:17:32 lr: 0.000013 loss_cls: 2.7625 (2.7395) grad_norm: 1.6320 (1.6616) time: 2.9446 data: 0.0003 max mem: 29202 +[2024-12-11 02:33:29 root] (utils.py 283): INFO Epoch: [7] [ 930/2502] eta: 1:17:03 lr: 0.000013 loss_cls: 2.5244 (2.7368) grad_norm: 1.5881 (1.6604) time: 2.9365 data: 0.0003 max mem: 29202 +[2024-12-11 02:33:59 root] (utils.py 283): INFO Epoch: [7] [ 940/2502] eta: 1:16:33 lr: 0.000013 loss_cls: 2.5244 (2.7343) grad_norm: 1.6058 (1.6611) time: 2.9309 data: 0.0003 max mem: 29202 +[2024-12-11 02:34:28 root] (utils.py 283): INFO Epoch: [7] [ 950/2502] eta: 1:16:03 lr: 0.000013 loss_cls: 2.6288 (2.7328) grad_norm: 1.6790 (1.6611) time: 2.9316 data: 0.0003 max mem: 29202 +[2024-12-11 02:34:57 root] (utils.py 283): INFO Epoch: [7] [ 960/2502] eta: 1:15:34 lr: 0.000013 loss_cls: 2.6525 (2.7340) grad_norm: 1.5580 (1.6611) time: 2.9304 data: 0.0003 max mem: 29202 +[2024-12-11 02:35:27 root] (utils.py 283): INFO Epoch: [7] [ 970/2502] eta: 1:15:04 lr: 0.000013 loss_cls: 2.7539 (2.7341) grad_norm: 1.5884 (1.6611) time: 2.9352 data: 0.0003 max mem: 29202 +[2024-12-11 02:35:56 root] (utils.py 283): INFO Epoch: [7] [ 980/2502] eta: 1:14:35 lr: 0.000013 loss_cls: 2.7941 (2.7349) grad_norm: 1.5878 (1.6609) time: 2.9416 data: 0.0003 max mem: 29202 +[2024-12-11 02:36:25 root] (utils.py 283): INFO Epoch: [7] [ 990/2502] eta: 1:14:05 lr: 0.000013 loss_cls: 2.7968 (2.7362) grad_norm: 1.5855 (1.6609) time: 2.9325 data: 0.0003 max mem: 29202 +[2024-12-11 02:36:55 root] (utils.py 283): INFO Epoch: [7] [1000/2502] eta: 1:13:36 lr: 0.000013 loss_cls: 2.7968 (2.7362) grad_norm: 1.5289 (1.6605) time: 2.9269 data: 0.0003 max mem: 29202 +[2024-12-11 02:37:24 root] (utils.py 283): INFO Epoch: [7] [1010/2502] eta: 1:13:06 lr: 0.000013 loss_cls: 2.7563 (2.7367) grad_norm: 1.5742 (1.6610) time: 2.9291 data: 0.0003 max mem: 29202 +[2024-12-11 02:37:53 root] (utils.py 283): INFO Epoch: [7] [1020/2502] eta: 1:12:37 lr: 0.000013 loss_cls: 2.8363 (2.7360) grad_norm: 1.7268 (1.6619) time: 2.9280 data: 0.0003 max mem: 29202 +[2024-12-11 02:38:22 root] (utils.py 283): INFO Epoch: [7] [1030/2502] eta: 1:12:07 lr: 0.000013 loss_cls: 2.4682 (2.7319) grad_norm: 1.6072 (1.6617) time: 2.9272 data: 0.0003 max mem: 29202 +[2024-12-11 02:38:52 root] (utils.py 283): INFO Epoch: [7] [1040/2502] eta: 1:11:38 lr: 0.000013 loss_cls: 2.6718 (2.7336) grad_norm: 1.6027 (1.6627) time: 2.9349 data: 0.0003 max mem: 29202 +[2024-12-11 02:39:21 root] (utils.py 283): INFO Epoch: [7] [1050/2502] eta: 1:11:08 lr: 0.000013 loss_cls: 2.8016 (2.7325) grad_norm: 1.5996 (1.6620) time: 2.9350 data: 0.0003 max mem: 29202 +[2024-12-11 02:39:50 root] (utils.py 283): INFO Epoch: [7] [1060/2502] eta: 1:10:39 lr: 0.000013 loss_cls: 2.6479 (2.7324) grad_norm: 1.6450 (1.6626) time: 2.9256 data: 0.0003 max mem: 29202 +[2024-12-11 02:40:20 root] (utils.py 283): INFO Epoch: [7] [1070/2502] eta: 1:10:09 lr: 0.000013 loss_cls: 2.7226 (2.7329) grad_norm: 1.5445 (1.6618) time: 2.9238 data: 0.0002 max mem: 29202 +[2024-12-11 02:40:49 root] (utils.py 283): INFO Epoch: [7] [1080/2502] eta: 1:09:39 lr: 0.000013 loss_cls: 2.7047 (2.7319) grad_norm: 1.4934 (1.6607) time: 2.9244 data: 0.0003 max mem: 29202 +[2024-12-11 02:41:18 root] (utils.py 283): INFO Epoch: [7] [1090/2502] eta: 1:09:10 lr: 0.000013 loss_cls: 2.8081 (2.7326) grad_norm: 1.5179 (1.6600) time: 2.9301 data: 0.0002 max mem: 29202 +[2024-12-11 02:41:48 root] (utils.py 283): INFO Epoch: [7] [1100/2502] eta: 1:08:40 lr: 0.000013 loss_cls: 2.8272 (2.7317) grad_norm: 1.5513 (1.6593) time: 2.9300 data: 0.0002 max mem: 29202 +[2024-12-11 02:42:17 root] (utils.py 283): INFO Epoch: [7] [1110/2502] eta: 1:08:11 lr: 0.000013 loss_cls: 2.7536 (2.7300) grad_norm: 1.5695 (1.6580) time: 2.9267 data: 0.0003 max mem: 29202 +[2024-12-11 02:42:46 root] (utils.py 283): INFO Epoch: [7] [1120/2502] eta: 1:07:41 lr: 0.000013 loss_cls: 2.4725 (2.7286) grad_norm: 1.5490 (1.6585) time: 2.9287 data: 0.0003 max mem: 29202 +[2024-12-11 02:43:15 root] (utils.py 283): INFO Epoch: [7] [1130/2502] eta: 1:07:12 lr: 0.000013 loss_cls: 2.7374 (2.7287) grad_norm: 1.6548 (1.6587) time: 2.9282 data: 0.0003 max mem: 29202 +[2024-12-11 02:43:45 root] (utils.py 283): INFO Epoch: [7] [1140/2502] eta: 1:06:42 lr: 0.000013 loss_cls: 2.7527 (2.7282) grad_norm: 1.5597 (1.6580) time: 2.9266 data: 0.0003 max mem: 29202 +[2024-12-11 02:44:14 root] (utils.py 283): INFO Epoch: [7] [1150/2502] eta: 1:06:13 lr: 0.000013 loss_cls: 2.7033 (2.7279) grad_norm: 1.6327 (1.6597) time: 2.9278 data: 0.0003 max mem: 29202 +[2024-12-11 02:44:43 root] (utils.py 283): INFO Epoch: [7] [1160/2502] eta: 1:05:43 lr: 0.000013 loss_cls: 2.7033 (2.7276) grad_norm: 1.6399 (1.6594) time: 2.9297 data: 0.0003 max mem: 29202 +[2024-12-11 02:45:13 root] (utils.py 283): INFO Epoch: [7] [1170/2502] eta: 1:05:14 lr: 0.000013 loss_cls: 2.8338 (2.7270) grad_norm: 1.6351 (1.6600) time: 2.9295 data: 0.0003 max mem: 29202 +[2024-12-11 02:45:42 root] (utils.py 283): INFO Epoch: [7] [1180/2502] eta: 1:04:44 lr: 0.000013 loss_cls: 2.8660 (2.7287) grad_norm: 1.7797 (1.6609) time: 2.9305 data: 0.0003 max mem: 29202 +[2024-12-11 02:46:11 root] (utils.py 283): INFO Epoch: [7] [1190/2502] eta: 1:04:15 lr: 0.000013 loss_cls: 2.8184 (2.7275) grad_norm: 1.6939 (1.6605) time: 2.9294 data: 0.0003 max mem: 29202 +[2024-12-11 02:46:40 root] (utils.py 283): INFO Epoch: [7] [1200/2502] eta: 1:03:45 lr: 0.000013 loss_cls: 2.4491 (2.7255) grad_norm: 1.5379 (1.6592) time: 2.9277 data: 0.0003 max mem: 29202 +[2024-12-11 02:47:10 root] (utils.py 283): INFO Epoch: [7] [1210/2502] eta: 1:03:16 lr: 0.000013 loss_cls: 2.4996 (2.7255) grad_norm: 1.5544 (1.6619) time: 2.9278 data: 0.0003 max mem: 29202 +[2024-12-11 02:47:39 root] (utils.py 283): INFO Epoch: [7] [1220/2502] eta: 1:02:46 lr: 0.000013 loss_cls: 2.6789 (2.7233) grad_norm: 1.6521 (1.6613) time: 2.9265 data: 0.0003 max mem: 29202 +[2024-12-11 02:48:08 root] (utils.py 283): INFO Epoch: [7] [1230/2502] eta: 1:02:17 lr: 0.000013 loss_cls: 2.6789 (2.7233) grad_norm: 1.5240 (1.6611) time: 2.9259 data: 0.0003 max mem: 29202 +[2024-12-11 02:48:37 root] (utils.py 283): INFO Epoch: [7] [1240/2502] eta: 1:01:47 lr: 0.000013 loss_cls: 2.7148 (2.7225) grad_norm: 1.5192 (1.6603) time: 2.9257 data: 0.0003 max mem: 29202 +[2024-12-11 02:49:07 root] (utils.py 283): INFO Epoch: [7] [1250/2502] eta: 1:01:18 lr: 0.000013 loss_cls: 2.7148 (2.7219) grad_norm: 1.5141 (1.6605) time: 2.9266 data: 0.0003 max mem: 29202 +[2024-12-11 02:49:36 root] (utils.py 283): INFO Epoch: [7] [1260/2502] eta: 1:00:48 lr: 0.000013 loss_cls: 2.8275 (2.7238) grad_norm: 1.6079 (1.6614) time: 2.9261 data: 0.0003 max mem: 29202 +[2024-12-11 02:50:05 root] (utils.py 283): INFO Epoch: [7] [1270/2502] eta: 1:00:19 lr: 0.000013 loss_cls: 2.9272 (2.7251) grad_norm: 1.6079 (1.6608) time: 2.9261 data: 0.0003 max mem: 29202 +[2024-12-11 02:50:35 root] (utils.py 283): INFO Epoch: [7] [1280/2502] eta: 0:59:49 lr: 0.000013 loss_cls: 2.8047 (2.7247) grad_norm: 1.5390 (1.6596) time: 2.9289 data: 0.0003 max mem: 29202 +[2024-12-11 02:51:04 root] (utils.py 283): INFO Epoch: [7] [1290/2502] eta: 0:59:20 lr: 0.000013 loss_cls: 2.8047 (2.7254) grad_norm: 1.5319 (1.6588) time: 2.9290 data: 0.0003 max mem: 29202 +[2024-12-11 02:51:33 root] (utils.py 283): INFO Epoch: [7] [1300/2502] eta: 0:58:50 lr: 0.000013 loss_cls: 2.8215 (2.7260) grad_norm: 1.5555 (1.6583) time: 2.9271 data: 0.0003 max mem: 29202 +[2024-12-11 02:52:02 root] (utils.py 283): INFO Epoch: [7] [1310/2502] eta: 0:58:21 lr: 0.000013 loss_cls: 2.7235 (2.7256) grad_norm: 1.5586 (1.6583) time: 2.9260 data: 0.0003 max mem: 29202 +[2024-12-11 02:52:32 root] (utils.py 283): INFO Epoch: [7] [1320/2502] eta: 0:57:51 lr: 0.000013 loss_cls: 2.7235 (2.7252) grad_norm: 1.7229 (1.6593) time: 2.9274 data: 0.0003 max mem: 29202 +[2024-12-11 02:53:01 root] (utils.py 283): INFO Epoch: [7] [1330/2502] eta: 0:57:22 lr: 0.000013 loss_cls: 2.7315 (2.7234) grad_norm: 1.5984 (1.6585) time: 2.9286 data: 0.0003 max mem: 29202 +[2024-12-11 02:53:30 root] (utils.py 283): INFO Epoch: [7] [1340/2502] eta: 0:56:53 lr: 0.000013 loss_cls: 2.5747 (2.7224) grad_norm: 1.5274 (1.6585) time: 2.9347 data: 0.0003 max mem: 29202 +[2024-12-11 02:54:00 root] (utils.py 283): INFO Epoch: [7] [1350/2502] eta: 0:56:23 lr: 0.000013 loss_cls: 2.8502 (2.7240) grad_norm: 1.6334 (1.6588) time: 2.9401 data: 0.0003 max mem: 29202 +[2024-12-11 02:54:29 root] (utils.py 283): INFO Epoch: [7] [1360/2502] eta: 0:55:54 lr: 0.000013 loss_cls: 2.8838 (2.7236) grad_norm: 1.6334 (1.6585) time: 2.9439 data: 0.0003 max mem: 29202 +[2024-12-11 02:54:59 root] (utils.py 283): INFO Epoch: [7] [1370/2502] eta: 0:55:25 lr: 0.000013 loss_cls: 2.8984 (2.7244) grad_norm: 1.6772 (1.6590) time: 2.9564 data: 0.0004 max mem: 29202 +[2024-12-11 02:55:28 root] (utils.py 283): INFO Epoch: [7] [1380/2502] eta: 0:54:55 lr: 0.000013 loss_cls: 2.9029 (2.7261) grad_norm: 1.7200 (1.6599) time: 2.9504 data: 0.0004 max mem: 29202 +[2024-12-11 02:55:58 root] (utils.py 283): INFO Epoch: [7] [1390/2502] eta: 0:54:26 lr: 0.000013 loss_cls: 2.8206 (2.7265) grad_norm: 1.6834 (1.6599) time: 2.9387 data: 0.0003 max mem: 29202 +[2024-12-11 02:56:27 root] (utils.py 283): INFO Epoch: [7] [1400/2502] eta: 0:53:57 lr: 0.000013 loss_cls: 2.7279 (2.7254) grad_norm: 1.6465 (1.6600) time: 2.9400 data: 0.0003 max mem: 29202 +[2024-12-11 02:56:57 root] (utils.py 283): INFO Epoch: [7] [1410/2502] eta: 0:53:27 lr: 0.000013 loss_cls: 2.6988 (2.7257) grad_norm: 1.5977 (1.6598) time: 2.9452 data: 0.0003 max mem: 29202 +[2024-12-11 02:57:26 root] (utils.py 283): INFO Epoch: [7] [1420/2502] eta: 0:52:58 lr: 0.000013 loss_cls: 2.7722 (2.7252) grad_norm: 1.6251 (1.6597) time: 2.9454 data: 0.0003 max mem: 29202 +[2024-12-11 02:57:55 root] (utils.py 283): INFO Epoch: [7] [1430/2502] eta: 0:52:29 lr: 0.000013 loss_cls: 2.7109 (2.7243) grad_norm: 1.6352 (1.6595) time: 2.9430 data: 0.0003 max mem: 29202 +[2024-12-11 02:58:25 root] (utils.py 283): INFO Epoch: [7] [1440/2502] eta: 0:51:59 lr: 0.000013 loss_cls: 2.6989 (2.7231) grad_norm: 1.4728 (1.6589) time: 2.9449 data: 0.0003 max mem: 29202 +[2024-12-11 02:58:54 root] (utils.py 283): INFO Epoch: [7] [1450/2502] eta: 0:51:30 lr: 0.000013 loss_cls: 2.7201 (2.7225) grad_norm: 1.5478 (1.6592) time: 2.9439 data: 0.0003 max mem: 29202 +[2024-12-11 02:59:24 root] (utils.py 283): INFO Epoch: [7] [1460/2502] eta: 0:51:01 lr: 0.000013 loss_cls: 2.8026 (2.7243) grad_norm: 1.6936 (1.6594) time: 2.9417 data: 0.0003 max mem: 29202 +[2024-12-11 02:59:53 root] (utils.py 283): INFO Epoch: [7] [1470/2502] eta: 0:50:31 lr: 0.000013 loss_cls: 3.0581 (2.7242) grad_norm: 1.6172 (1.6588) time: 2.9410 data: 0.0003 max mem: 29202 +[2024-12-11 03:00:23 root] (utils.py 283): INFO Epoch: [7] [1480/2502] eta: 0:50:02 lr: 0.000013 loss_cls: 2.7656 (2.7232) grad_norm: 1.5068 (1.6610) time: 2.9437 data: 0.0003 max mem: 29202 +[2024-12-11 03:00:52 root] (utils.py 283): INFO Epoch: [7] [1490/2502] eta: 0:49:33 lr: 0.000013 loss_cls: 2.7677 (2.7231) grad_norm: 1.5424 (1.6611) time: 2.9439 data: 0.0003 max mem: 29202 +[2024-12-11 03:01:21 root] (utils.py 283): INFO Epoch: [7] [1500/2502] eta: 0:49:03 lr: 0.000013 loss_cls: 2.8336 (2.7230) grad_norm: 1.6208 (1.6616) time: 2.9410 data: 0.0003 max mem: 29202 +[2024-12-11 03:01:51 root] (utils.py 283): INFO Epoch: [7] [1510/2502] eta: 0:48:34 lr: 0.000013 loss_cls: 2.8336 (2.7233) grad_norm: 1.6687 (1.6618) time: 2.9388 data: 0.0003 max mem: 29202 +[2024-12-11 03:02:20 root] (utils.py 283): INFO Epoch: [7] [1520/2502] eta: 0:48:05 lr: 0.000013 loss_cls: 2.6283 (2.7218) grad_norm: 1.5954 (1.6615) time: 2.9410 data: 0.0003 max mem: 29202 +[2024-12-11 03:02:50 root] (utils.py 283): INFO Epoch: [7] [1530/2502] eta: 0:47:35 lr: 0.000013 loss_cls: 2.4856 (2.7201) grad_norm: 1.5706 (1.6615) time: 2.9422 data: 0.0003 max mem: 29202 +[2024-12-11 03:03:19 root] (utils.py 283): INFO Epoch: [7] [1540/2502] eta: 0:47:06 lr: 0.000013 loss_cls: 2.8819 (2.7212) grad_norm: 1.6088 (1.6615) time: 2.9405 data: 0.0003 max mem: 29202 +[2024-12-11 03:03:48 root] (utils.py 283): INFO Epoch: [7] [1550/2502] eta: 0:46:37 lr: 0.000013 loss_cls: 2.9138 (2.7218) grad_norm: 1.5602 (1.6616) time: 2.9410 data: 0.0003 max mem: 29202 +[2024-12-11 03:04:18 root] (utils.py 283): INFO Epoch: [7] [1560/2502] eta: 0:46:07 lr: 0.000013 loss_cls: 2.6998 (2.7219) grad_norm: 1.5602 (1.6613) time: 2.9483 data: 0.0003 max mem: 29202 +[2024-12-11 03:04:48 root] (utils.py 283): INFO Epoch: [7] [1570/2502] eta: 0:45:38 lr: 0.000013 loss_cls: 2.8864 (2.7231) grad_norm: 1.5533 (1.6610) time: 2.9622 data: 0.0003 max mem: 29202 +[2024-12-11 03:05:17 root] (utils.py 283): INFO Epoch: [7] [1580/2502] eta: 0:45:09 lr: 0.000013 loss_cls: 2.7831 (2.7220) grad_norm: 1.5737 (1.6618) time: 2.9650 data: 0.0003 max mem: 29202 +[2024-12-11 03:05:47 root] (utils.py 283): INFO Epoch: [7] [1590/2502] eta: 0:44:40 lr: 0.000013 loss_cls: 2.7719 (2.7235) grad_norm: 1.6040 (1.6616) time: 2.9610 data: 0.0003 max mem: 29202 +[2024-12-11 03:06:16 root] (utils.py 283): INFO Epoch: [7] [1600/2502] eta: 0:44:10 lr: 0.000013 loss_cls: 2.8915 (2.7246) grad_norm: 1.6040 (1.6626) time: 2.9483 data: 0.0003 max mem: 29202 +[2024-12-11 03:06:46 root] (utils.py 283): INFO Epoch: [7] [1610/2502] eta: 0:43:41 lr: 0.000013 loss_cls: 2.8269 (2.7240) grad_norm: 1.7674 (1.6628) time: 2.9365 data: 0.0003 max mem: 29202 +[2024-12-11 03:07:15 root] (utils.py 283): INFO Epoch: [7] [1620/2502] eta: 0:43:11 lr: 0.000013 loss_cls: 2.5673 (2.7222) grad_norm: 1.5238 (1.6619) time: 2.9379 data: 0.0003 max mem: 29202 +[2024-12-11 03:07:44 root] (utils.py 283): INFO Epoch: [7] [1630/2502] eta: 0:42:42 lr: 0.000013 loss_cls: 2.7126 (2.7221) grad_norm: 1.5238 (1.6620) time: 2.9418 data: 0.0003 max mem: 29202 +[2024-12-11 03:08:14 root] (utils.py 283): INFO Epoch: [7] [1640/2502] eta: 0:42:13 lr: 0.000013 loss_cls: 2.8794 (2.7223) grad_norm: 1.5544 (1.6621) time: 2.9493 data: 0.0003 max mem: 29202 +[2024-12-11 03:08:43 root] (utils.py 283): INFO Epoch: [7] [1650/2502] eta: 0:41:43 lr: 0.000013 loss_cls: 2.9546 (2.7217) grad_norm: 1.5499 (1.6617) time: 2.9503 data: 0.0003 max mem: 29202 +[2024-12-11 03:09:13 root] (utils.py 283): INFO Epoch: [7] [1660/2502] eta: 0:41:14 lr: 0.000013 loss_cls: 2.9765 (2.7232) grad_norm: 1.5785 (1.6630) time: 2.9425 data: 0.0003 max mem: 29202 +[2024-12-11 03:09:42 root] (utils.py 283): INFO Epoch: [7] [1670/2502] eta: 0:40:45 lr: 0.000013 loss_cls: 3.0844 (2.7242) grad_norm: 1.6400 (1.6626) time: 2.9401 data: 0.0003 max mem: 29202 +[2024-12-11 03:10:12 root] (utils.py 283): INFO Epoch: [7] [1680/2502] eta: 0:40:15 lr: 0.000013 loss_cls: 2.8815 (2.7240) grad_norm: 1.6251 (1.6621) time: 2.9419 data: 0.0003 max mem: 29202 +[2024-12-11 03:10:41 root] (utils.py 283): INFO Epoch: [7] [1690/2502] eta: 0:39:46 lr: 0.000013 loss_cls: 2.8815 (2.7242) grad_norm: 1.6352 (1.6620) time: 2.9391 data: 0.0003 max mem: 29202 +[2024-12-11 03:11:10 root] (utils.py 283): INFO Epoch: [7] [1700/2502] eta: 0:39:16 lr: 0.000013 loss_cls: 2.9070 (2.7252) grad_norm: 1.6357 (1.6617) time: 2.9366 data: 0.0003 max mem: 29202 +[2024-12-11 03:11:40 root] (utils.py 283): INFO Epoch: [7] [1710/2502] eta: 0:38:47 lr: 0.000013 loss_cls: 2.9530 (2.7263) grad_norm: 1.6160 (1.6639) time: 2.9347 data: 0.0003 max mem: 29202 +[2024-12-11 03:12:09 root] (utils.py 283): INFO Epoch: [7] [1720/2502] eta: 0:38:18 lr: 0.000013 loss_cls: 2.9214 (2.7274) grad_norm: 1.6177 (1.6641) time: 2.9331 data: 0.0003 max mem: 29202 +[2024-12-11 03:12:38 root] (utils.py 283): INFO Epoch: [7] [1730/2502] eta: 0:37:48 lr: 0.000013 loss_cls: 2.8841 (2.7272) grad_norm: 1.6293 (1.6641) time: 2.9340 data: 0.0003 max mem: 29202 +[2024-12-11 03:13:08 root] (utils.py 283): INFO Epoch: [7] [1740/2502] eta: 0:37:19 lr: 0.000013 loss_cls: 2.8273 (2.7262) grad_norm: 1.6308 (1.6657) time: 2.9359 data: 0.0003 max mem: 29202 +[2024-12-11 03:13:37 root] (utils.py 283): INFO Epoch: [7] [1750/2502] eta: 0:36:49 lr: 0.000013 loss_cls: 2.6491 (2.7259) grad_norm: 1.6157 (1.6663) time: 2.9337 data: 0.0003 max mem: 29202 +[2024-12-11 03:14:07 root] (utils.py 283): INFO Epoch: [7] [1760/2502] eta: 0:36:20 lr: 0.000013 loss_cls: 2.7547 (2.7252) grad_norm: 1.6401 (1.6669) time: 2.9428 data: 0.0003 max mem: 29202 +[2024-12-11 03:14:36 root] (utils.py 283): INFO Epoch: [7] [1770/2502] eta: 0:35:51 lr: 0.000013 loss_cls: 2.4483 (2.7241) grad_norm: 1.6849 (1.6675) time: 2.9465 data: 0.0003 max mem: 29202 +[2024-12-11 03:15:06 root] (utils.py 283): INFO Epoch: [7] [1780/2502] eta: 0:35:21 lr: 0.000013 loss_cls: 2.9118 (2.7247) grad_norm: 1.6721 (1.6675) time: 2.9427 data: 0.0003 max mem: 29202 +[2024-12-11 03:15:35 root] (utils.py 283): INFO Epoch: [7] [1790/2502] eta: 0:34:52 lr: 0.000013 loss_cls: 2.8148 (2.7249) grad_norm: 1.6721 (1.6681) time: 2.9462 data: 0.0003 max mem: 29202 +[2024-12-11 03:16:04 root] (utils.py 283): INFO Epoch: [7] [1800/2502] eta: 0:34:23 lr: 0.000013 loss_cls: 2.7648 (2.7250) grad_norm: 1.6705 (1.6689) time: 2.9434 data: 0.0003 max mem: 29202 +[2024-12-11 03:16:34 root] (utils.py 283): INFO Epoch: [7] [1810/2502] eta: 0:33:53 lr: 0.000013 loss_cls: 2.7624 (2.7244) grad_norm: 1.5969 (1.6704) time: 2.9471 data: 0.0003 max mem: 29202 +[2024-12-11 03:17:03 root] (utils.py 283): INFO Epoch: [7] [1820/2502] eta: 0:33:24 lr: 0.000013 loss_cls: 2.6900 (2.7236) grad_norm: 1.6074 (1.6706) time: 2.9452 data: 0.0003 max mem: 29202 +[2024-12-11 03:17:33 root] (utils.py 283): INFO Epoch: [7] [1830/2502] eta: 0:32:54 lr: 0.000013 loss_cls: 2.7533 (2.7248) grad_norm: 1.5864 (1.6707) time: 2.9396 data: 0.0003 max mem: 29202 +[2024-12-11 03:18:02 root] (utils.py 283): INFO Epoch: [7] [1840/2502] eta: 0:32:25 lr: 0.000013 loss_cls: 2.9231 (2.7249) grad_norm: 1.5998 (1.6711) time: 2.9408 data: 0.0003 max mem: 29202 +[2024-12-11 03:18:32 root] (utils.py 283): INFO Epoch: [7] [1850/2502] eta: 0:31:56 lr: 0.000013 loss_cls: 2.7926 (2.7252) grad_norm: 1.6252 (1.6710) time: 2.9415 data: 0.0003 max mem: 29202 +[2024-12-11 03:19:01 root] (utils.py 283): INFO Epoch: [7] [1860/2502] eta: 0:31:26 lr: 0.000013 loss_cls: 2.7441 (2.7241) grad_norm: 1.6444 (1.6707) time: 2.9410 data: 0.0003 max mem: 29202 +[2024-12-11 03:19:30 root] (utils.py 283): INFO Epoch: [7] [1870/2502] eta: 0:30:57 lr: 0.000013 loss_cls: 2.6122 (2.7238) grad_norm: 1.6453 (1.6707) time: 2.9389 data: 0.0003 max mem: 29202 +[2024-12-11 03:20:00 root] (utils.py 283): INFO Epoch: [7] [1880/2502] eta: 0:30:28 lr: 0.000013 loss_cls: 2.5473 (2.7220) grad_norm: 1.5726 (1.6697) time: 2.9378 data: 0.0003 max mem: 29202 +[2024-12-11 03:20:29 root] (utils.py 283): INFO Epoch: [7] [1890/2502] eta: 0:29:58 lr: 0.000013 loss_cls: 2.6566 (2.7222) grad_norm: 1.5757 (1.6703) time: 2.9379 data: 0.0003 max mem: 29202 +[2024-12-11 03:20:59 root] (utils.py 283): INFO Epoch: [7] [1900/2502] eta: 0:29:29 lr: 0.000013 loss_cls: 2.8119 (2.7229) grad_norm: 1.6110 (1.6700) time: 2.9394 data: 0.0003 max mem: 29202 +[2024-12-11 03:21:28 root] (utils.py 283): INFO Epoch: [7] [1910/2502] eta: 0:28:59 lr: 0.000013 loss_cls: 2.9624 (2.7244) grad_norm: 1.6110 (1.6700) time: 2.9418 data: 0.0003 max mem: 29202 +[2024-12-11 03:21:57 root] (utils.py 283): INFO Epoch: [7] [1920/2502] eta: 0:28:30 lr: 0.000013 loss_cls: 2.9027 (2.7244) grad_norm: 1.6304 (1.6702) time: 2.9408 data: 0.0003 max mem: 29202 +[2024-12-11 03:22:27 root] (utils.py 283): INFO Epoch: [7] [1930/2502] eta: 0:28:01 lr: 0.000013 loss_cls: 2.9123 (2.7251) grad_norm: 1.6118 (1.6702) time: 2.9397 data: 0.0003 max mem: 29202 +[2024-12-11 03:22:56 root] (utils.py 283): INFO Epoch: [7] [1940/2502] eta: 0:27:31 lr: 0.000013 loss_cls: 2.9784 (2.7258) grad_norm: 1.6009 (1.6703) time: 2.9486 data: 0.0003 max mem: 29202 +[2024-12-11 03:23:26 root] (utils.py 283): INFO Epoch: [7] [1950/2502] eta: 0:27:02 lr: 0.000013 loss_cls: 2.8842 (2.7259) grad_norm: 1.6485 (1.6703) time: 2.9457 data: 0.0003 max mem: 29202 +[2024-12-11 03:23:55 root] (utils.py 283): INFO Epoch: [7] [1960/2502] eta: 0:26:32 lr: 0.000013 loss_cls: 2.8013 (2.7260) grad_norm: 1.6566 (1.6708) time: 2.9308 data: 0.0003 max mem: 29202 +[2024-12-11 03:24:24 root] (utils.py 283): INFO Epoch: [7] [1970/2502] eta: 0:26:03 lr: 0.000013 loss_cls: 2.8684 (2.7272) grad_norm: 1.6252 (1.6704) time: 2.9235 data: 0.0003 max mem: 29202 +[2024-12-11 03:24:53 root] (utils.py 283): INFO Epoch: [7] [1980/2502] eta: 0:25:34 lr: 0.000013 loss_cls: 2.9136 (2.7276) grad_norm: 1.5674 (1.6706) time: 2.9205 data: 0.0003 max mem: 29202 +[2024-12-11 03:25:23 root] (utils.py 283): INFO Epoch: [7] [1990/2502] eta: 0:25:04 lr: 0.000013 loss_cls: 2.6654 (2.7273) grad_norm: 1.5389 (1.6700) time: 2.9222 data: 0.0003 max mem: 29202 +[2024-12-11 03:25:52 root] (utils.py 283): INFO Epoch: [7] [2000/2502] eta: 0:24:35 lr: 0.000013 loss_cls: 2.6654 (2.7270) grad_norm: 1.5389 (1.6693) time: 2.9236 data: 0.0003 max mem: 29202 +[2024-12-11 03:26:21 root] (utils.py 283): INFO Epoch: [7] [2010/2502] eta: 0:24:05 lr: 0.000013 loss_cls: 2.7891 (2.7272) grad_norm: 1.5662 (1.6709) time: 2.9241 data: 0.0003 max mem: 29202 +[2024-12-11 03:26:50 root] (utils.py 283): INFO Epoch: [7] [2020/2502] eta: 0:23:36 lr: 0.000013 loss_cls: 2.8456 (2.7273) grad_norm: 1.6177 (1.6710) time: 2.9255 data: 0.0003 max mem: 29202 +[2024-12-11 03:27:20 root] (utils.py 283): INFO Epoch: [7] [2030/2502] eta: 0:23:06 lr: 0.000013 loss_cls: 2.9387 (2.7278) grad_norm: 1.6125 (1.6719) time: 2.9234 data: 0.0003 max mem: 29202 +[2024-12-11 03:27:49 root] (utils.py 283): INFO Epoch: [7] [2040/2502] eta: 0:22:37 lr: 0.000013 loss_cls: 2.8706 (2.7276) grad_norm: 1.6119 (1.6718) time: 2.9227 data: 0.0003 max mem: 29202 +[2024-12-11 03:28:18 root] (utils.py 283): INFO Epoch: [7] [2050/2502] eta: 0:22:08 lr: 0.000013 loss_cls: 2.8175 (2.7280) grad_norm: 1.6254 (1.6724) time: 2.9241 data: 0.0003 max mem: 29202 +[2024-12-11 03:28:48 root] (utils.py 283): INFO Epoch: [7] [2060/2502] eta: 0:21:38 lr: 0.000013 loss_cls: 2.8609 (2.7293) grad_norm: 1.5973 (1.6719) time: 2.9380 data: 0.0003 max mem: 29202 +[2024-12-11 03:29:17 root] (utils.py 283): INFO Epoch: [7] [2070/2502] eta: 0:21:09 lr: 0.000013 loss_cls: 2.8911 (2.7291) grad_norm: 1.5325 (1.6718) time: 2.9450 data: 0.0003 max mem: 29202 +[2024-12-11 03:29:46 root] (utils.py 283): INFO Epoch: [7] [2080/2502] eta: 0:20:39 lr: 0.000013 loss_cls: 2.8068 (2.7293) grad_norm: 1.6498 (1.6723) time: 2.9368 data: 0.0003 max mem: 29202 +[2024-12-11 03:30:16 root] (utils.py 283): INFO Epoch: [7] [2090/2502] eta: 0:20:10 lr: 0.000013 loss_cls: 2.8306 (2.7292) grad_norm: 1.7077 (1.6741) time: 2.9359 data: 0.0003 max mem: 29202 +[2024-12-11 03:30:45 root] (utils.py 283): INFO Epoch: [7] [2100/2502] eta: 0:19:41 lr: 0.000013 loss_cls: 2.8306 (2.7295) grad_norm: 1.6216 (1.6743) time: 2.9351 data: 0.0003 max mem: 29202 +[2024-12-11 03:31:14 root] (utils.py 283): INFO Epoch: [7] [2110/2502] eta: 0:19:11 lr: 0.000013 loss_cls: 2.7491 (2.7295) grad_norm: 1.5801 (1.6740) time: 2.9355 data: 0.0003 max mem: 29202 +[2024-12-11 03:31:44 root] (utils.py 283): INFO Epoch: [7] [2120/2502] eta: 0:18:42 lr: 0.000013 loss_cls: 2.8102 (2.7297) grad_norm: 1.5801 (1.6739) time: 2.9441 data: 0.0003 max mem: 29202 +[2024-12-11 03:32:13 root] (utils.py 283): INFO Epoch: [7] [2130/2502] eta: 0:18:13 lr: 0.000013 loss_cls: 2.8102 (2.7293) grad_norm: 1.5811 (1.6738) time: 2.9446 data: 0.0003 max mem: 29202 +[2024-12-11 03:32:43 root] (utils.py 283): INFO Epoch: [7] [2140/2502] eta: 0:17:43 lr: 0.000013 loss_cls: 2.5896 (2.7282) grad_norm: 1.5410 (1.6733) time: 2.9368 data: 0.0003 max mem: 29202 +[2024-12-11 03:33:12 root] (utils.py 283): INFO Epoch: [7] [2150/2502] eta: 0:17:14 lr: 0.000013 loss_cls: 2.5446 (2.7275) grad_norm: 1.5746 (1.6734) time: 2.9357 data: 0.0003 max mem: 29202 +[2024-12-11 03:33:41 root] (utils.py 283): INFO Epoch: [7] [2160/2502] eta: 0:16:44 lr: 0.000013 loss_cls: 2.6934 (2.7273) grad_norm: 1.5746 (1.6729) time: 2.9350 data: 0.0003 max mem: 29202 +[2024-12-11 03:34:11 root] (utils.py 283): INFO Epoch: [7] [2170/2502] eta: 0:16:15 lr: 0.000013 loss_cls: 2.6934 (2.7271) grad_norm: 1.6685 (1.6733) time: 2.9345 data: 0.0003 max mem: 29202 +[2024-12-11 03:34:40 root] (utils.py 283): INFO Epoch: [7] [2180/2502] eta: 0:15:46 lr: 0.000013 loss_cls: 2.8590 (2.7269) grad_norm: 1.6697 (1.6736) time: 2.9453 data: 0.0003 max mem: 29202 +[2024-12-11 03:35:10 root] (utils.py 283): INFO Epoch: [7] [2190/2502] eta: 0:15:16 lr: 0.000013 loss_cls: 2.9309 (2.7277) grad_norm: 1.5265 (1.6725) time: 2.9491 data: 0.0003 max mem: 29202 +[2024-12-11 03:35:39 root] (utils.py 283): INFO Epoch: [7] [2200/2502] eta: 0:14:47 lr: 0.000013 loss_cls: 2.8435 (2.7267) grad_norm: 1.4498 (1.6723) time: 2.9407 data: 0.0003 max mem: 29202 +[2024-12-11 03:36:08 root] (utils.py 283): INFO Epoch: [7] [2210/2502] eta: 0:14:18 lr: 0.000013 loss_cls: 2.5556 (2.7273) grad_norm: 1.6333 (1.6724) time: 2.9410 data: 0.0003 max mem: 29202 +[2024-12-11 03:36:38 root] (utils.py 283): INFO Epoch: [7] [2220/2502] eta: 0:13:48 lr: 0.000013 loss_cls: 2.9489 (2.7278) grad_norm: 1.6349 (1.6724) time: 2.9438 data: 0.0003 max mem: 29202 +[2024-12-11 03:37:07 root] (utils.py 283): INFO Epoch: [7] [2230/2502] eta: 0:13:19 lr: 0.000013 loss_cls: 2.9489 (2.7277) grad_norm: 1.6349 (1.6720) time: 2.9409 data: 0.0003 max mem: 29202 +[2024-12-11 03:37:37 root] (utils.py 283): INFO Epoch: [7] [2240/2502] eta: 0:12:49 lr: 0.000013 loss_cls: 2.8236 (2.7273) grad_norm: 1.5247 (1.6721) time: 2.9375 data: 0.0003 max mem: 29202 +[2024-12-11 03:38:06 root] (utils.py 283): INFO Epoch: [7] [2250/2502] eta: 0:12:20 lr: 0.000013 loss_cls: 2.8325 (2.7279) grad_norm: 1.5594 (1.6721) time: 2.9420 data: 0.0003 max mem: 29202 +[2024-12-11 03:38:35 root] (utils.py 283): INFO Epoch: [7] [2260/2502] eta: 0:11:51 lr: 0.000013 loss_cls: 2.8979 (2.7273) grad_norm: 1.5594 (1.6719) time: 2.9414 data: 0.0003 max mem: 29202 +[2024-12-11 03:39:05 root] (utils.py 283): INFO Epoch: [7] [2270/2502] eta: 0:11:21 lr: 0.000013 loss_cls: 2.6215 (2.7270) grad_norm: 1.6334 (1.6716) time: 2.9391 data: 0.0003 max mem: 29202 +[2024-12-11 03:39:34 root] (utils.py 283): INFO Epoch: [7] [2280/2502] eta: 0:10:52 lr: 0.000013 loss_cls: 2.6215 (2.7266) grad_norm: 1.5985 (1.6713) time: 2.9382 data: 0.0003 max mem: 29202 +[2024-12-11 03:40:04 root] (utils.py 283): INFO Epoch: [7] [2290/2502] eta: 0:10:22 lr: 0.000013 loss_cls: 2.8043 (2.7269) grad_norm: 1.5741 (1.6708) time: 2.9391 data: 0.0003 max mem: 29202 +[2024-12-11 03:40:33 root] (utils.py 283): INFO Epoch: [7] [2300/2502] eta: 0:09:53 lr: 0.000013 loss_cls: 2.7427 (2.7258) grad_norm: 1.5086 (1.6699) time: 2.9408 data: 0.0003 max mem: 29202 +[2024-12-11 03:41:02 root] (utils.py 283): INFO Epoch: [7] [2310/2502] eta: 0:09:24 lr: 0.000013 loss_cls: 2.6030 (2.7254) grad_norm: 1.5845 (1.6701) time: 2.9383 data: 0.0003 max mem: 29202 +[2024-12-11 03:41:32 root] (utils.py 283): INFO Epoch: [7] [2320/2502] eta: 0:08:54 lr: 0.000013 loss_cls: 2.8378 (2.7252) grad_norm: 1.5938 (1.6701) time: 2.9380 data: 0.0003 max mem: 29202 +[2024-12-11 03:42:01 root] (utils.py 283): INFO Epoch: [7] [2330/2502] eta: 0:08:25 lr: 0.000013 loss_cls: 2.8489 (2.7257) grad_norm: 1.5973 (1.6700) time: 2.9351 data: 0.0003 max mem: 29202 +[2024-12-11 03:42:31 root] (utils.py 283): INFO Epoch: [7] [2340/2502] eta: 0:07:56 lr: 0.000013 loss_cls: 2.6870 (2.7247) grad_norm: 1.5973 (1.6696) time: 2.9343 data: 0.0003 max mem: 29202 +[2024-12-11 03:43:00 root] (utils.py 283): INFO Epoch: [7] [2350/2502] eta: 0:07:26 lr: 0.000013 loss_cls: 2.7169 (2.7251) grad_norm: 1.5483 (1.6695) time: 2.9356 data: 0.0003 max mem: 29202 +[2024-12-11 03:43:29 root] (utils.py 283): INFO Epoch: [7] [2360/2502] eta: 0:06:57 lr: 0.000013 loss_cls: 2.8768 (2.7257) grad_norm: 1.5493 (1.6697) time: 2.9349 data: 0.0002 max mem: 29202 +[2024-12-11 03:43:59 root] (utils.py 283): INFO Epoch: [7] [2370/2502] eta: 0:06:27 lr: 0.000013 loss_cls: 2.8735 (2.7259) grad_norm: 1.5851 (1.6706) time: 2.9378 data: 0.0002 max mem: 29202 +[2024-12-11 03:44:28 root] (utils.py 283): INFO Epoch: [7] [2380/2502] eta: 0:05:58 lr: 0.000013 loss_cls: 2.8920 (2.7264) grad_norm: 1.6812 (1.6713) time: 2.9400 data: 0.0002 max mem: 29202 +[2024-12-11 03:44:57 root] (utils.py 283): INFO Epoch: [7] [2390/2502] eta: 0:05:29 lr: 0.000013 loss_cls: 2.8536 (2.7268) grad_norm: 1.6513 (1.6712) time: 2.9396 data: 0.0003 max mem: 29202 +[2024-12-11 03:45:27 root] (utils.py 283): INFO Epoch: [7] [2400/2502] eta: 0:04:59 lr: 0.000013 loss_cls: 2.8056 (2.7260) grad_norm: 1.6281 (1.6706) time: 2.9398 data: 0.0003 max mem: 29202 +[2024-12-11 03:45:56 root] (utils.py 283): INFO Epoch: [7] [2410/2502] eta: 0:04:30 lr: 0.000013 loss_cls: 2.7549 (2.7264) grad_norm: 1.6316 (1.6709) time: 2.9402 data: 0.0003 max mem: 29202 +[2024-12-11 03:46:26 root] (utils.py 283): INFO Epoch: [7] [2420/2502] eta: 0:04:00 lr: 0.000013 loss_cls: 2.7720 (2.7266) grad_norm: 1.6316 (1.6710) time: 2.9367 data: 0.0003 max mem: 29202 +[2024-12-11 03:46:55 root] (utils.py 283): INFO Epoch: [7] [2430/2502] eta: 0:03:31 lr: 0.000013 loss_cls: 2.7720 (2.7263) grad_norm: 1.5477 (1.6703) time: 2.9377 data: 0.0003 max mem: 29202 +[2024-12-11 03:47:24 root] (utils.py 283): INFO Epoch: [7] [2440/2502] eta: 0:03:02 lr: 0.000013 loss_cls: 2.8447 (2.7268) grad_norm: 1.4954 (1.6701) time: 2.9432 data: 0.0003 max mem: 29202 +[2024-12-11 03:47:54 root] (utils.py 283): INFO Epoch: [7] [2450/2502] eta: 0:02:32 lr: 0.000013 loss_cls: 2.8867 (2.7267) grad_norm: 1.6083 (1.6699) time: 2.9400 data: 0.0003 max mem: 29202 +[2024-12-11 03:48:23 root] (utils.py 283): INFO Epoch: [7] [2460/2502] eta: 0:02:03 lr: 0.000013 loss_cls: 2.7392 (2.7262) grad_norm: 1.5677 (1.6697) time: 2.9370 data: 0.0003 max mem: 29202 +[2024-12-11 03:48:53 root] (utils.py 283): INFO Epoch: [7] [2470/2502] eta: 0:01:34 lr: 0.000013 loss_cls: 2.6106 (2.7257) grad_norm: 1.6855 (1.6698) time: 2.9437 data: 0.0003 max mem: 29202 +[2024-12-11 03:49:22 root] (utils.py 283): INFO Epoch: [7] [2480/2502] eta: 0:01:04 lr: 0.000013 loss_cls: 2.6019 (2.7256) grad_norm: 1.4927 (1.6689) time: 2.9487 data: 0.0003 max mem: 29202 +[2024-12-11 03:49:52 root] (utils.py 283): INFO Epoch: [7] [2490/2502] eta: 0:00:35 lr: 0.000013 loss_cls: 2.7026 (2.7258) grad_norm: 1.4425 (1.6682) time: 2.9642 data: 0.0275 max mem: 29202 +[2024-12-11 03:50:21 root] (utils.py 283): INFO Epoch: [7] [2500/2502] eta: 0:00:05 lr: 0.000013 loss_cls: 2.7026 (2.7253) grad_norm: 1.5746 (1.6684) time: 2.9626 data: 0.0275 max mem: 29202 +[2024-12-11 03:50:24 root] (utils.py 283): INFO Epoch: [7] [2501/2502] eta: 0:00:02 lr: 0.000013 loss_cls: 2.7119 (2.7254) grad_norm: 1.5746 (1.6683) time: 2.9628 data: 0.0275 max mem: 29202 +[2024-12-11 03:50:24 root] (utils.py 297): INFO Epoch: [7] Total time: 2:02:33 (2.9389 s / it) +[2024-12-11 03:50:24 root] (engine.py 179): INFO Averaged stats:lr: 0.000013 loss_cls: 2.7119 (2.7207) grad_norm: 1.5746 (1.6683) +[2024-12-11 03:50:29 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:55 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.3062 (0.3062) acc1: 92.1875 (92.1875) acc3: 98.4375 (98.4375) acc5: 98.4375 (98.4375) time: 0.5624 data: 0.0003 max mem: 29202 +[2024-12-11 03:50:34 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:48 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.5889 (0.5758) acc1: 85.1562 (86.4347) acc3: 97.6562 (96.5199) acc5: 98.4375 (97.9403) time: 0.5510 data: 0.0004 max mem: 29202 +[2024-12-11 03:50:40 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:42 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.5889 (0.6238) acc1: 85.1562 (85.6027) acc3: 96.0938 (95.7589) acc5: 97.6562 (97.5446) time: 0.5507 data: 0.0004 max mem: 29202 +[2024-12-11 03:50:45 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:37 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6187 (0.6544) acc1: 85.9375 (84.9546) acc3: 94.5312 (95.5645) acc5: 96.8750 (97.3286) time: 0.5512 data: 0.0004 max mem: 29202 +[2024-12-11 03:50:51 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:31 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6963 (0.6613) acc1: 85.1562 (85.1753) acc3: 96.0938 (95.6364) acc5: 96.8750 (97.2942) time: 0.5513 data: 0.0004 max mem: 29202 +[2024-12-11 03:50:56 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:26 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.8248 (0.7468) acc1: 80.4688 (83.3640) acc3: 92.9688 (94.5159) acc5: 95.3125 (96.4614) time: 0.5518 data: 0.0004 max mem: 29202 +[2024-12-11 03:51:02 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:20 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9696 (0.7774) acc1: 78.1250 (82.9150) acc3: 89.8438 (93.8653) acc5: 92.9688 (96.0041) time: 0.5523 data: 0.0004 max mem: 29202 +[2024-12-11 03:51:07 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:15 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9742 (0.8093) acc1: 79.6875 (82.1413) acc3: 90.6250 (93.6400) acc5: 93.7500 (95.7857) time: 0.5523 data: 0.0004 max mem: 29202 +[2024-12-11 03:51:13 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:09 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9447 (0.8337) acc1: 78.1250 (81.6551) acc3: 90.6250 (93.2388) acc5: 93.7500 (95.4572) time: 0.5522 data: 0.0005 max mem: 29202 +[2024-12-11 03:51:18 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:04 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9832 (0.8629) acc1: 75.7812 (80.9323) acc3: 91.4062 (92.9430) acc5: 93.7500 (95.3039) time: 0.5528 data: 0.0005 max mem: 29202 +[2024-12-11 03:51:22 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9736 (0.8601) acc1: 75.0000 (80.8800) acc3: 91.4062 (92.9920) acc5: 94.5312 (95.4080) time: 0.5439 data: 0.0005 max mem: 29202 +[2024-12-11 03:51:22 root] (utils.py 297): INFO Test: Total time: 0:00:53 (0.5502 s / it) +[2024-12-11 03:51:22 root] (engine.py 264): INFO * Acc@1 80.990 Acc@3 92.948 Acc@5 95.280 loss 0.854 flops 13.207 layer_flops 13.109 +[2024-12-11 03:51:22 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 81.0% +[2024-12-11 03:51:24 root] (main.py 576): INFO Max accuracy: 80.99% +[2024-12-11 03:51:27 root] (utils.py 283): INFO Epoch: [8] [ 0/2502] eta: 2:00:03 lr: 0.000011 loss_cls: 2.7326 (2.7326) grad_norm: 1.6642 (1.6642) time: 2.8792 data: 0.0003 max mem: 29202 +[2024-12-11 03:51:57 root] (utils.py 283): INFO Epoch: [8] [ 10/2502] eta: 2:01:40 lr: 0.000011 loss_cls: 2.9784 (2.7578) grad_norm: 1.6065 (1.6910) time: 2.9296 data: 0.0002 max mem: 29202 +[2024-12-11 03:52:26 root] (utils.py 283): INFO Epoch: [8] [ 20/2502] eta: 2:01:23 lr: 0.000011 loss_cls: 3.0128 (2.8565) grad_norm: 1.5534 (1.6450) time: 2.9374 data: 0.0003 max mem: 29202 +[2024-12-11 03:52:55 root] (utils.py 283): INFO Epoch: [8] [ 30/2502] eta: 2:01:00 lr: 0.000011 loss_cls: 3.0128 (2.8526) grad_norm: 1.5534 (1.6223) time: 2.9413 data: 0.0003 max mem: 29202 +[2024-12-11 03:53:25 root] (utils.py 283): INFO Epoch: [8] [ 40/2502] eta: 2:00:31 lr: 0.000011 loss_cls: 2.9543 (2.8306) grad_norm: 1.5973 (1.6235) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-11 03:53:54 root] (utils.py 283): INFO Epoch: [8] [ 50/2502] eta: 2:00:05 lr: 0.000011 loss_cls: 2.9433 (2.8290) grad_norm: 1.6430 (1.6420) time: 2.9408 data: 0.0003 max mem: 29202 +[2024-12-11 03:54:24 root] (utils.py 283): INFO Epoch: [8] [ 60/2502] eta: 1:59:38 lr: 0.000011 loss_cls: 2.8669 (2.8389) grad_norm: 1.6547 (1.6403) time: 2.9442 data: 0.0003 max mem: 29202 +[2024-12-11 03:54:53 root] (utils.py 283): INFO Epoch: [8] [ 70/2502] eta: 1:59:08 lr: 0.000011 loss_cls: 2.8294 (2.8280) grad_norm: 1.5684 (1.6191) time: 2.9412 data: 0.0003 max mem: 29202 +[2024-12-11 03:55:22 root] (utils.py 283): INFO Epoch: [8] [ 80/2502] eta: 1:58:39 lr: 0.000011 loss_cls: 2.8294 (2.8410) grad_norm: 1.5435 (1.6299) time: 2.9392 data: 0.0003 max mem: 29202 +[2024-12-11 03:55:52 root] (utils.py 283): INFO Epoch: [8] [ 90/2502] eta: 1:58:09 lr: 0.000011 loss_cls: 2.8143 (2.8256) grad_norm: 1.6577 (1.6452) time: 2.9397 data: 0.0003 max mem: 29202 +[2024-12-11 03:56:21 root] (utils.py 283): INFO Epoch: [8] [ 100/2502] eta: 1:57:40 lr: 0.000011 loss_cls: 2.6332 (2.7963) grad_norm: 1.6591 (1.6466) time: 2.9382 data: 0.0003 max mem: 29202 +[2024-12-11 03:56:51 root] (utils.py 283): INFO Epoch: [8] [ 110/2502] eta: 1:57:11 lr: 0.000011 loss_cls: 2.6922 (2.7782) grad_norm: 1.6487 (1.6418) time: 2.9406 data: 0.0003 max mem: 29202 +[2024-12-11 03:57:20 root] (utils.py 283): INFO Epoch: [8] [ 120/2502] eta: 1:56:42 lr: 0.000011 loss_cls: 2.7417 (2.7667) grad_norm: 1.5664 (1.6359) time: 2.9427 data: 0.0003 max mem: 29202 +[2024-12-11 03:57:49 root] (utils.py 283): INFO Epoch: [8] [ 130/2502] eta: 1:56:12 lr: 0.000011 loss_cls: 2.7417 (2.7641) grad_norm: 1.4959 (1.6372) time: 2.9391 data: 0.0003 max mem: 29202 +[2024-12-11 03:58:19 root] (utils.py 283): INFO Epoch: [8] [ 140/2502] eta: 1:55:43 lr: 0.000011 loss_cls: 2.8232 (2.7691) grad_norm: 1.5227 (1.6308) time: 2.9384 data: 0.0003 max mem: 29202 +[2024-12-11 03:58:48 root] (utils.py 283): INFO Epoch: [8] [ 150/2502] eta: 1:55:13 lr: 0.000011 loss_cls: 2.7642 (2.7611) grad_norm: 1.5146 (1.6301) time: 2.9402 data: 0.0003 max mem: 29202 +[2024-12-11 03:59:18 root] (utils.py 283): INFO Epoch: [8] [ 160/2502] eta: 1:54:44 lr: 0.000011 loss_cls: 2.6422 (2.7544) grad_norm: 1.5278 (1.6313) time: 2.9398 data: 0.0003 max mem: 29202 +[2024-12-11 03:59:47 root] (utils.py 283): INFO Epoch: [8] [ 170/2502] eta: 1:54:15 lr: 0.000011 loss_cls: 2.4406 (2.7356) grad_norm: 1.5604 (1.6281) time: 2.9423 data: 0.0003 max mem: 29202 +[2024-12-11 04:00:17 root] (utils.py 283): INFO Epoch: [8] [ 180/2502] eta: 1:53:47 lr: 0.000011 loss_cls: 2.4966 (2.7307) grad_norm: 1.5344 (1.6266) time: 2.9445 data: 0.0002 max mem: 29202 +[2024-12-11 04:00:46 root] (utils.py 283): INFO Epoch: [8] [ 190/2502] eta: 1:53:17 lr: 0.000011 loss_cls: 2.7306 (2.7261) grad_norm: 1.5316 (1.6321) time: 2.9421 data: 0.0002 max mem: 29202 +[2024-12-11 04:01:15 root] (utils.py 283): INFO Epoch: [8] [ 200/2502] eta: 1:52:49 lr: 0.000011 loss_cls: 2.7946 (2.7204) grad_norm: 1.5903 (1.6343) time: 2.9456 data: 0.0003 max mem: 29202 +[2024-12-11 04:01:45 root] (utils.py 283): INFO Epoch: [8] [ 210/2502] eta: 1:52:21 lr: 0.000011 loss_cls: 2.8480 (2.7235) grad_norm: 1.6548 (1.6390) time: 2.9526 data: 0.0003 max mem: 29202 +[2024-12-11 04:02:14 root] (utils.py 283): INFO Epoch: [8] [ 220/2502] eta: 1:51:51 lr: 0.000011 loss_cls: 2.8279 (2.7220) grad_norm: 1.6793 (1.6416) time: 2.9463 data: 0.0003 max mem: 29202 +[2024-12-11 04:02:44 root] (utils.py 283): INFO Epoch: [8] [ 230/2502] eta: 1:51:21 lr: 0.000011 loss_cls: 2.8793 (2.7275) grad_norm: 1.5890 (1.6399) time: 2.9376 data: 0.0003 max mem: 29202 +[2024-12-11 04:03:13 root] (utils.py 283): INFO Epoch: [8] [ 240/2502] eta: 1:50:52 lr: 0.000011 loss_cls: 2.7184 (2.7127) grad_norm: 1.5664 (1.6365) time: 2.9396 data: 0.0003 max mem: 29202 +[2024-12-11 04:03:43 root] (utils.py 283): INFO Epoch: [8] [ 250/2502] eta: 1:50:22 lr: 0.000011 loss_cls: 2.5871 (2.7106) grad_norm: 1.5056 (1.6368) time: 2.9399 data: 0.0003 max mem: 29202 +[2024-12-11 04:04:12 root] (utils.py 283): INFO Epoch: [8] [ 260/2502] eta: 1:49:52 lr: 0.000011 loss_cls: 2.8173 (2.7093) grad_norm: 1.4932 (1.6357) time: 2.9346 data: 0.0003 max mem: 29202 +[2024-12-11 04:04:41 root] (utils.py 283): INFO Epoch: [8] [ 270/2502] eta: 1:49:22 lr: 0.000011 loss_cls: 2.8897 (2.7179) grad_norm: 1.5261 (1.6337) time: 2.9317 data: 0.0003 max mem: 29202 +[2024-12-11 04:05:11 root] (utils.py 283): INFO Epoch: [8] [ 280/2502] eta: 1:48:53 lr: 0.000011 loss_cls: 2.9700 (2.7242) grad_norm: 1.5102 (1.6287) time: 2.9360 data: 0.0003 max mem: 29202 +[2024-12-11 04:05:40 root] (utils.py 283): INFO Epoch: [8] [ 290/2502] eta: 1:48:23 lr: 0.000011 loss_cls: 2.8293 (2.7123) grad_norm: 1.4634 (1.6246) time: 2.9392 data: 0.0003 max mem: 29202 +[2024-12-11 04:06:09 root] (utils.py 283): INFO Epoch: [8] [ 300/2502] eta: 1:47:55 lr: 0.000011 loss_cls: 2.6811 (2.7156) grad_norm: 1.4998 (1.6211) time: 2.9452 data: 0.0003 max mem: 29202 +[2024-12-11 04:06:39 root] (utils.py 283): INFO Epoch: [8] [ 310/2502] eta: 1:47:25 lr: 0.000011 loss_cls: 2.7491 (2.7149) grad_norm: 1.4998 (1.6173) time: 2.9433 data: 0.0003 max mem: 29202 +[2024-12-11 04:07:08 root] (utils.py 283): INFO Epoch: [8] [ 320/2502] eta: 1:46:57 lr: 0.000011 loss_cls: 2.7803 (2.7233) grad_norm: 1.4961 (1.6150) time: 2.9465 data: 0.0003 max mem: 29202 +[2024-12-11 04:07:38 root] (utils.py 283): INFO Epoch: [8] [ 330/2502] eta: 1:46:27 lr: 0.000011 loss_cls: 2.7307 (2.7152) grad_norm: 1.5769 (1.6199) time: 2.9484 data: 0.0003 max mem: 29202 +[2024-12-11 04:08:07 root] (utils.py 283): INFO Epoch: [8] [ 340/2502] eta: 1:45:57 lr: 0.000011 loss_cls: 2.4876 (2.7125) grad_norm: 1.5729 (1.6206) time: 2.9337 data: 0.0003 max mem: 29202 +[2024-12-11 04:08:36 root] (utils.py 283): INFO Epoch: [8] [ 350/2502] eta: 1:45:26 lr: 0.000011 loss_cls: 2.5320 (2.7113) grad_norm: 1.5522 (1.6235) time: 2.9259 data: 0.0003 max mem: 29202 +[2024-12-11 04:09:06 root] (utils.py 283): INFO Epoch: [8] [ 360/2502] eta: 1:44:56 lr: 0.000011 loss_cls: 2.8359 (2.7156) grad_norm: 1.5896 (1.6219) time: 2.9224 data: 0.0003 max mem: 29202 +[2024-12-11 04:09:35 root] (utils.py 283): INFO Epoch: [8] [ 370/2502] eta: 1:44:26 lr: 0.000011 loss_cls: 2.7821 (2.7120) grad_norm: 1.4195 (1.6166) time: 2.9221 data: 0.0003 max mem: 29202 +[2024-12-11 04:10:04 root] (utils.py 283): INFO Epoch: [8] [ 380/2502] eta: 1:43:55 lr: 0.000011 loss_cls: 2.8304 (2.7154) grad_norm: 1.5295 (1.6184) time: 2.9231 data: 0.0003 max mem: 29202 +[2024-12-11 04:10:33 root] (utils.py 283): INFO Epoch: [8] [ 390/2502] eta: 1:43:25 lr: 0.000011 loss_cls: 2.8304 (2.7118) grad_norm: 1.5665 (1.6177) time: 2.9241 data: 0.0003 max mem: 29202 +[2024-12-11 04:11:02 root] (utils.py 283): INFO Epoch: [8] [ 400/2502] eta: 1:42:55 lr: 0.000011 loss_cls: 2.6173 (2.7093) grad_norm: 1.5624 (1.6175) time: 2.9233 data: 0.0003 max mem: 29202 +[2024-12-11 04:11:32 root] (utils.py 283): INFO Epoch: [8] [ 410/2502] eta: 1:42:25 lr: 0.000011 loss_cls: 2.8760 (2.7152) grad_norm: 1.5311 (1.6148) time: 2.9230 data: 0.0003 max mem: 29202 +[2024-12-11 04:12:01 root] (utils.py 283): INFO Epoch: [8] [ 420/2502] eta: 1:41:55 lr: 0.000011 loss_cls: 2.8846 (2.7138) grad_norm: 1.5349 (1.6187) time: 2.9250 data: 0.0003 max mem: 29202 +[2024-12-11 04:12:30 root] (utils.py 283): INFO Epoch: [8] [ 430/2502] eta: 1:41:25 lr: 0.000011 loss_cls: 2.6585 (2.7104) grad_norm: 1.5715 (1.6190) time: 2.9254 data: 0.0003 max mem: 29202 +[2024-12-11 04:13:00 root] (utils.py 283): INFO Epoch: [8] [ 440/2502] eta: 1:40:55 lr: 0.000011 loss_cls: 2.7773 (2.7128) grad_norm: 1.6809 (1.6251) time: 2.9266 data: 0.0003 max mem: 29202 +[2024-12-11 04:13:29 root] (utils.py 283): INFO Epoch: [8] [ 450/2502] eta: 1:40:25 lr: 0.000011 loss_cls: 2.8813 (2.7134) grad_norm: 1.5831 (1.6225) time: 2.9263 data: 0.0003 max mem: 29202 +[2024-12-11 04:13:58 root] (utils.py 283): INFO Epoch: [8] [ 460/2502] eta: 1:39:55 lr: 0.000011 loss_cls: 2.8691 (2.7160) grad_norm: 1.5050 (1.6233) time: 2.9219 data: 0.0003 max mem: 29202 +[2024-12-11 04:14:27 root] (utils.py 283): INFO Epoch: [8] [ 470/2502] eta: 1:39:25 lr: 0.000011 loss_cls: 2.8691 (2.7185) grad_norm: 1.6260 (1.6242) time: 2.9249 data: 0.0003 max mem: 29202 +[2024-12-11 04:14:56 root] (utils.py 283): INFO Epoch: [8] [ 480/2502] eta: 1:38:55 lr: 0.000011 loss_cls: 2.7497 (2.7172) grad_norm: 1.6858 (1.6281) time: 2.9249 data: 0.0003 max mem: 29202 +[2024-12-11 04:15:26 root] (utils.py 283): INFO Epoch: [8] [ 490/2502] eta: 1:38:26 lr: 0.000011 loss_cls: 2.6563 (2.7186) grad_norm: 1.6901 (1.6310) time: 2.9212 data: 0.0003 max mem: 29202 +[2024-12-11 04:15:55 root] (utils.py 283): INFO Epoch: [8] [ 500/2502] eta: 1:37:56 lr: 0.000011 loss_cls: 2.8124 (2.7192) grad_norm: 1.6367 (1.6286) time: 2.9207 data: 0.0003 max mem: 29202 +[2024-12-11 04:16:24 root] (utils.py 283): INFO Epoch: [8] [ 510/2502] eta: 1:37:26 lr: 0.000011 loss_cls: 2.8124 (2.7185) grad_norm: 1.5509 (1.6292) time: 2.9212 data: 0.0003 max mem: 29202 +[2024-12-11 04:16:53 root] (utils.py 283): INFO Epoch: [8] [ 520/2502] eta: 1:36:56 lr: 0.000011 loss_cls: 2.8465 (2.7192) grad_norm: 1.6390 (1.6290) time: 2.9242 data: 0.0003 max mem: 29202 +[2024-12-11 04:17:23 root] (utils.py 283): INFO Epoch: [8] [ 530/2502] eta: 1:36:26 lr: 0.000011 loss_cls: 2.8465 (2.7213) grad_norm: 1.6100 (1.6296) time: 2.9232 data: 0.0003 max mem: 29202 +[2024-12-11 04:17:52 root] (utils.py 283): INFO Epoch: [8] [ 540/2502] eta: 1:35:56 lr: 0.000011 loss_cls: 2.7322 (2.7177) grad_norm: 1.5404 (1.6297) time: 2.9219 data: 0.0003 max mem: 29202 +[2024-12-11 04:18:21 root] (utils.py 283): INFO Epoch: [8] [ 550/2502] eta: 1:35:27 lr: 0.000011 loss_cls: 2.7322 (2.7205) grad_norm: 1.5404 (1.6295) time: 2.9239 data: 0.0003 max mem: 29202 +[2024-12-11 04:18:50 root] (utils.py 283): INFO Epoch: [8] [ 560/2502] eta: 1:34:57 lr: 0.000011 loss_cls: 2.9190 (2.7237) grad_norm: 1.5556 (1.6297) time: 2.9241 data: 0.0003 max mem: 29202 +[2024-12-11 04:19:20 root] (utils.py 283): INFO Epoch: [8] [ 570/2502] eta: 1:34:27 lr: 0.000011 loss_cls: 2.7337 (2.7249) grad_norm: 1.5864 (1.6289) time: 2.9238 data: 0.0003 max mem: 29202 +[2024-12-11 04:19:49 root] (utils.py 283): INFO Epoch: [8] [ 580/2502] eta: 1:33:58 lr: 0.000011 loss_cls: 2.7337 (2.7234) grad_norm: 1.5813 (1.6281) time: 2.9234 data: 0.0003 max mem: 29202 +[2024-12-11 04:20:18 root] (utils.py 283): INFO Epoch: [8] [ 590/2502] eta: 1:33:28 lr: 0.000011 loss_cls: 2.6646 (2.7237) grad_norm: 1.5804 (1.6294) time: 2.9268 data: 0.0003 max mem: 29202 +[2024-12-11 04:20:48 root] (utils.py 283): INFO Epoch: [8] [ 600/2502] eta: 1:32:59 lr: 0.000011 loss_cls: 2.9251 (2.7282) grad_norm: 1.6023 (1.6289) time: 2.9375 data: 0.0003 max mem: 29202 +[2024-12-11 04:21:17 root] (utils.py 283): INFO Epoch: [8] [ 610/2502] eta: 1:32:30 lr: 0.000011 loss_cls: 2.9303 (2.7276) grad_norm: 1.6021 (1.6285) time: 2.9445 data: 0.0003 max mem: 29202 +[2024-12-11 04:21:46 root] (utils.py 283): INFO Epoch: [8] [ 620/2502] eta: 1:32:01 lr: 0.000011 loss_cls: 2.7891 (2.7267) grad_norm: 1.5267 (1.6276) time: 2.9436 data: 0.0003 max mem: 29202 +[2024-12-11 04:22:16 root] (utils.py 283): INFO Epoch: [8] [ 630/2502] eta: 1:31:32 lr: 0.000011 loss_cls: 2.7891 (2.7262) grad_norm: 1.5615 (1.6309) time: 2.9405 data: 0.0003 max mem: 29202 +[2024-12-11 04:22:45 root] (utils.py 283): INFO Epoch: [8] [ 640/2502] eta: 1:31:03 lr: 0.000011 loss_cls: 2.9743 (2.7297) grad_norm: 1.6165 (1.6316) time: 2.9406 data: 0.0003 max mem: 29202 +[2024-12-11 04:23:15 root] (utils.py 283): INFO Epoch: [8] [ 650/2502] eta: 1:30:34 lr: 0.000011 loss_cls: 2.8870 (2.7275) grad_norm: 1.6674 (1.6324) time: 2.9440 data: 0.0003 max mem: 29202 +[2024-12-11 04:23:44 root] (utils.py 283): INFO Epoch: [8] [ 660/2502] eta: 1:30:05 lr: 0.000011 loss_cls: 2.6808 (2.7281) grad_norm: 1.6223 (1.6327) time: 2.9488 data: 0.0003 max mem: 29202 +[2024-12-11 04:24:14 root] (utils.py 283): INFO Epoch: [8] [ 670/2502] eta: 1:29:36 lr: 0.000011 loss_cls: 2.8075 (2.7286) grad_norm: 1.5939 (1.6314) time: 2.9505 data: 0.0003 max mem: 29202 +[2024-12-11 04:24:43 root] (utils.py 283): INFO Epoch: [8] [ 680/2502] eta: 1:29:07 lr: 0.000011 loss_cls: 2.7812 (2.7291) grad_norm: 1.5428 (1.6312) time: 2.9425 data: 0.0003 max mem: 29202 +[2024-12-11 04:25:13 root] (utils.py 283): INFO Epoch: [8] [ 690/2502] eta: 1:28:38 lr: 0.000011 loss_cls: 2.7806 (2.7293) grad_norm: 1.5483 (1.6317) time: 2.9415 data: 0.0003 max mem: 29202 +[2024-12-11 04:25:42 root] (utils.py 283): INFO Epoch: [8] [ 700/2502] eta: 1:28:09 lr: 0.000011 loss_cls: 2.7860 (2.7298) grad_norm: 1.6177 (1.6321) time: 2.9455 data: 0.0003 max mem: 29202 +[2024-12-11 04:26:11 root] (utils.py 283): INFO Epoch: [8] [ 710/2502] eta: 1:27:39 lr: 0.000011 loss_cls: 2.8561 (2.7315) grad_norm: 1.5858 (1.6316) time: 2.9426 data: 0.0003 max mem: 29202 +[2024-12-11 04:26:41 root] (utils.py 283): INFO Epoch: [8] [ 720/2502] eta: 1:27:10 lr: 0.000011 loss_cls: 2.6672 (2.7288) grad_norm: 1.5782 (1.6316) time: 2.9395 data: 0.0003 max mem: 29202 +[2024-12-11 04:27:10 root] (utils.py 283): INFO Epoch: [8] [ 730/2502] eta: 1:26:41 lr: 0.000011 loss_cls: 2.6393 (2.7297) grad_norm: 1.5782 (1.6309) time: 2.9370 data: 0.0003 max mem: 29202 +[2024-12-11 04:27:39 root] (utils.py 283): INFO Epoch: [8] [ 740/2502] eta: 1:26:11 lr: 0.000011 loss_cls: 2.8556 (2.7292) grad_norm: 1.5083 (1.6317) time: 2.9342 data: 0.0003 max mem: 29202 +[2024-12-11 04:28:09 root] (utils.py 283): INFO Epoch: [8] [ 750/2502] eta: 1:25:42 lr: 0.000011 loss_cls: 2.8556 (2.7292) grad_norm: 1.5265 (1.6313) time: 2.9324 data: 0.0003 max mem: 29202 +[2024-12-11 04:28:38 root] (utils.py 283): INFO Epoch: [8] [ 760/2502] eta: 1:25:13 lr: 0.000011 loss_cls: 2.8278 (2.7293) grad_norm: 1.5178 (1.6303) time: 2.9351 data: 0.0003 max mem: 29202 +[2024-12-11 04:29:08 root] (utils.py 283): INFO Epoch: [8] [ 770/2502] eta: 1:24:43 lr: 0.000011 loss_cls: 2.9317 (2.7319) grad_norm: 1.5680 (1.6310) time: 2.9401 data: 0.0003 max mem: 29202 +[2024-12-11 04:29:37 root] (utils.py 283): INFO Epoch: [8] [ 780/2502] eta: 1:24:14 lr: 0.000011 loss_cls: 2.9317 (2.7290) grad_norm: 1.6203 (1.6311) time: 2.9400 data: 0.0003 max mem: 29202 +[2024-12-11 04:30:06 root] (utils.py 283): INFO Epoch: [8] [ 790/2502] eta: 1:23:45 lr: 0.000011 loss_cls: 2.4255 (2.7266) grad_norm: 1.6095 (1.6304) time: 2.9372 data: 0.0003 max mem: 29202 +[2024-12-11 04:30:36 root] (utils.py 283): INFO Epoch: [8] [ 800/2502] eta: 1:23:16 lr: 0.000011 loss_cls: 2.8913 (2.7286) grad_norm: 1.5408 (1.6294) time: 2.9423 data: 0.0003 max mem: 29202 +[2024-12-11 04:31:05 root] (utils.py 283): INFO Epoch: [8] [ 810/2502] eta: 1:22:46 lr: 0.000011 loss_cls: 2.8648 (2.7276) grad_norm: 1.6331 (1.6309) time: 2.9444 data: 0.0003 max mem: 29202 +[2024-12-11 04:31:35 root] (utils.py 283): INFO Epoch: [8] [ 820/2502] eta: 1:22:17 lr: 0.000011 loss_cls: 2.9322 (2.7308) grad_norm: 1.6878 (1.6326) time: 2.9398 data: 0.0003 max mem: 29202 +[2024-12-11 04:32:04 root] (utils.py 283): INFO Epoch: [8] [ 830/2502] eta: 1:21:48 lr: 0.000011 loss_cls: 2.9995 (2.7325) grad_norm: 1.5957 (1.6355) time: 2.9414 data: 0.0003 max mem: 29202 +[2024-12-11 04:32:33 root] (utils.py 283): INFO Epoch: [8] [ 840/2502] eta: 1:21:19 lr: 0.000011 loss_cls: 2.8563 (2.7320) grad_norm: 1.5876 (1.6364) time: 2.9394 data: 0.0003 max mem: 29202 +[2024-12-11 04:33:03 root] (utils.py 283): INFO Epoch: [8] [ 850/2502] eta: 1:20:49 lr: 0.000011 loss_cls: 2.7298 (2.7288) grad_norm: 1.5883 (1.6354) time: 2.9342 data: 0.0003 max mem: 29202 +[2024-12-11 04:33:32 root] (utils.py 283): INFO Epoch: [8] [ 860/2502] eta: 1:20:20 lr: 0.000011 loss_cls: 2.8252 (2.7290) grad_norm: 1.5531 (1.6354) time: 2.9313 data: 0.0002 max mem: 29202 +[2024-12-11 04:34:01 root] (utils.py 283): INFO Epoch: [8] [ 870/2502] eta: 1:19:50 lr: 0.000011 loss_cls: 2.7827 (2.7261) grad_norm: 1.5531 (1.6359) time: 2.9294 data: 0.0002 max mem: 29202 +[2024-12-11 04:34:31 root] (utils.py 283): INFO Epoch: [8] [ 880/2502] eta: 1:19:21 lr: 0.000011 loss_cls: 2.7517 (2.7270) grad_norm: 1.5301 (1.6347) time: 2.9318 data: 0.0003 max mem: 29202 +[2024-12-11 04:35:00 root] (utils.py 283): INFO Epoch: [8] [ 890/2502] eta: 1:18:51 lr: 0.000011 loss_cls: 2.8173 (2.7276) grad_norm: 1.5477 (1.6365) time: 2.9332 data: 0.0003 max mem: 29202 +[2024-12-11 04:35:29 root] (utils.py 283): INFO Epoch: [8] [ 900/2502] eta: 1:18:22 lr: 0.000011 loss_cls: 2.8173 (2.7274) grad_norm: 1.6701 (1.6377) time: 2.9341 data: 0.0003 max mem: 29202 +[2024-12-11 04:35:59 root] (utils.py 283): INFO Epoch: [8] [ 910/2502] eta: 1:17:53 lr: 0.000011 loss_cls: 2.8664 (2.7273) grad_norm: 1.6528 (1.6382) time: 2.9369 data: 0.0003 max mem: 29202 +[2024-12-11 04:36:28 root] (utils.py 283): INFO Epoch: [8] [ 920/2502] eta: 1:17:24 lr: 0.000011 loss_cls: 2.8328 (2.7263) grad_norm: 1.5562 (1.6379) time: 2.9397 data: 0.0003 max mem: 29202 +[2024-12-11 04:36:58 root] (utils.py 283): INFO Epoch: [8] [ 930/2502] eta: 1:16:54 lr: 0.000011 loss_cls: 2.5139 (2.7220) grad_norm: 1.5112 (1.6376) time: 2.9409 data: 0.0003 max mem: 29202 +[2024-12-11 04:37:27 root] (utils.py 283): INFO Epoch: [8] [ 940/2502] eta: 1:16:25 lr: 0.000011 loss_cls: 2.6046 (2.7245) grad_norm: 1.6073 (1.6380) time: 2.9402 data: 0.0003 max mem: 29202 +[2024-12-11 04:37:56 root] (utils.py 283): INFO Epoch: [8] [ 950/2502] eta: 1:15:56 lr: 0.000011 loss_cls: 2.8743 (2.7236) grad_norm: 1.6073 (1.6371) time: 2.9417 data: 0.0003 max mem: 29202 +[2024-12-11 04:38:26 root] (utils.py 283): INFO Epoch: [8] [ 960/2502] eta: 1:15:27 lr: 0.000011 loss_cls: 2.7307 (2.7214) grad_norm: 1.5031 (1.6366) time: 2.9422 data: 0.0003 max mem: 29202 +[2024-12-11 04:38:55 root] (utils.py 283): INFO Epoch: [8] [ 970/2502] eta: 1:14:57 lr: 0.000011 loss_cls: 2.5181 (2.7202) grad_norm: 1.5031 (1.6366) time: 2.9399 data: 0.0003 max mem: 29202 +[2024-12-11 04:39:25 root] (utils.py 283): INFO Epoch: [8] [ 980/2502] eta: 1:14:28 lr: 0.000011 loss_cls: 2.8027 (2.7216) grad_norm: 1.6241 (1.6370) time: 2.9416 data: 0.0003 max mem: 29202 +[2024-12-11 04:39:54 root] (utils.py 283): INFO Epoch: [8] [ 990/2502] eta: 1:13:59 lr: 0.000011 loss_cls: 2.8571 (2.7233) grad_norm: 1.6438 (1.6364) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-11 04:40:23 root] (utils.py 283): INFO Epoch: [8] [1000/2502] eta: 1:13:29 lr: 0.000011 loss_cls: 2.8571 (2.7214) grad_norm: 1.6438 (1.6367) time: 2.9366 data: 0.0003 max mem: 29202 +[2024-12-11 04:40:53 root] (utils.py 283): INFO Epoch: [8] [1010/2502] eta: 1:13:00 lr: 0.000011 loss_cls: 2.4675 (2.7209) grad_norm: 1.6897 (1.6375) time: 2.9388 data: 0.0003 max mem: 29202 +[2024-12-11 04:41:22 root] (utils.py 283): INFO Epoch: [8] [1020/2502] eta: 1:12:31 lr: 0.000011 loss_cls: 2.8753 (2.7210) grad_norm: 1.6277 (1.6382) time: 2.9485 data: 0.0003 max mem: 29202 +[2024-12-11 04:41:52 root] (utils.py 283): INFO Epoch: [8] [1030/2502] eta: 1:12:02 lr: 0.000011 loss_cls: 2.8874 (2.7215) grad_norm: 1.5124 (1.6388) time: 2.9521 data: 0.0002 max mem: 29202 +[2024-12-11 04:42:21 root] (utils.py 283): INFO Epoch: [8] [1040/2502] eta: 1:11:33 lr: 0.000011 loss_cls: 2.7444 (2.7204) grad_norm: 1.5551 (1.6406) time: 2.9492 data: 0.0002 max mem: 29202 +[2024-12-11 04:42:51 root] (utils.py 283): INFO Epoch: [8] [1050/2502] eta: 1:11:03 lr: 0.000011 loss_cls: 2.7120 (2.7212) grad_norm: 1.5551 (1.6399) time: 2.9479 data: 0.0003 max mem: 29202 +[2024-12-11 04:43:20 root] (utils.py 283): INFO Epoch: [8] [1060/2502] eta: 1:10:34 lr: 0.000011 loss_cls: 2.7120 (2.7194) grad_norm: 1.5519 (1.6398) time: 2.9461 data: 0.0003 max mem: 29202 +[2024-12-11 04:43:50 root] (utils.py 283): INFO Epoch: [8] [1070/2502] eta: 1:10:05 lr: 0.000011 loss_cls: 2.7557 (2.7190) grad_norm: 1.5270 (1.6409) time: 2.9446 data: 0.0002 max mem: 29202 +[2024-12-11 04:44:19 root] (utils.py 283): INFO Epoch: [8] [1080/2502] eta: 1:09:36 lr: 0.000011 loss_cls: 2.8440 (2.7178) grad_norm: 1.5882 (1.6428) time: 2.9467 data: 0.0003 max mem: 29202 +[2024-12-11 04:44:49 root] (utils.py 283): INFO Epoch: [8] [1090/2502] eta: 1:09:06 lr: 0.000011 loss_cls: 2.7748 (2.7164) grad_norm: 1.5827 (1.6421) time: 2.9520 data: 0.0003 max mem: 29202 +[2024-12-11 04:45:18 root] (utils.py 283): INFO Epoch: [8] [1100/2502] eta: 1:08:37 lr: 0.000011 loss_cls: 2.6685 (2.7164) grad_norm: 1.5448 (1.6418) time: 2.9445 data: 0.0003 max mem: 29202 +[2024-12-11 04:45:47 root] (utils.py 283): INFO Epoch: [8] [1110/2502] eta: 1:08:08 lr: 0.000011 loss_cls: 2.6685 (2.7148) grad_norm: 1.5799 (1.6419) time: 2.9381 data: 0.0003 max mem: 29202 +[2024-12-11 04:46:17 root] (utils.py 283): INFO Epoch: [8] [1120/2502] eta: 1:07:38 lr: 0.000011 loss_cls: 2.7644 (2.7133) grad_norm: 1.5410 (1.6411) time: 2.9397 data: 0.0002 max mem: 29202 +[2024-12-11 04:46:46 root] (utils.py 283): INFO Epoch: [8] [1130/2502] eta: 1:07:09 lr: 0.000011 loss_cls: 2.7904 (2.7144) grad_norm: 1.5167 (1.6406) time: 2.9373 data: 0.0003 max mem: 29202 +[2024-12-11 04:47:16 root] (utils.py 283): INFO Epoch: [8] [1140/2502] eta: 1:06:40 lr: 0.000011 loss_cls: 2.8587 (2.7156) grad_norm: 1.5312 (1.6406) time: 2.9339 data: 0.0003 max mem: 29202 +[2024-12-11 04:47:45 root] (utils.py 283): INFO Epoch: [8] [1150/2502] eta: 1:06:10 lr: 0.000011 loss_cls: 2.8137 (2.7155) grad_norm: 1.6403 (1.6410) time: 2.9360 data: 0.0003 max mem: 29202 +[2024-12-11 04:48:14 root] (utils.py 283): INFO Epoch: [8] [1160/2502] eta: 1:05:41 lr: 0.000011 loss_cls: 2.8019 (2.7147) grad_norm: 1.6211 (1.6417) time: 2.9384 data: 0.0003 max mem: 29202 +[2024-12-11 04:48:44 root] (utils.py 283): INFO Epoch: [8] [1170/2502] eta: 1:05:12 lr: 0.000011 loss_cls: 2.8647 (2.7160) grad_norm: 1.5538 (1.6407) time: 2.9408 data: 0.0003 max mem: 29202 +[2024-12-11 04:49:13 root] (utils.py 283): INFO Epoch: [8] [1180/2502] eta: 1:04:42 lr: 0.000011 loss_cls: 2.8647 (2.7175) grad_norm: 1.5592 (1.6431) time: 2.9433 data: 0.0003 max mem: 29202 +[2024-12-11 04:49:43 root] (utils.py 283): INFO Epoch: [8] [1190/2502] eta: 1:04:13 lr: 0.000011 loss_cls: 2.8194 (2.7182) grad_norm: 1.5847 (1.6437) time: 2.9376 data: 0.0003 max mem: 29202 +[2024-12-11 04:50:12 root] (utils.py 283): INFO Epoch: [8] [1200/2502] eta: 1:03:44 lr: 0.000011 loss_cls: 2.9571 (2.7191) grad_norm: 1.5995 (1.6442) time: 2.9401 data: 0.0003 max mem: 29202 +[2024-12-11 04:50:41 root] (utils.py 283): INFO Epoch: [8] [1210/2502] eta: 1:03:14 lr: 0.000011 loss_cls: 2.9571 (2.7184) grad_norm: 1.5356 (1.6435) time: 2.9443 data: 0.0003 max mem: 29202 +[2024-12-11 04:51:11 root] (utils.py 283): INFO Epoch: [8] [1220/2502] eta: 1:02:45 lr: 0.000011 loss_cls: 2.8487 (2.7187) grad_norm: 1.4707 (1.6427) time: 2.9397 data: 0.0003 max mem: 29202 +[2024-12-11 04:51:40 root] (utils.py 283): INFO Epoch: [8] [1230/2502] eta: 1:02:16 lr: 0.000011 loss_cls: 2.7771 (2.7195) grad_norm: 1.5468 (1.6420) time: 2.9399 data: 0.0002 max mem: 29202 +[2024-12-11 04:52:10 root] (utils.py 283): INFO Epoch: [8] [1240/2502] eta: 1:01:46 lr: 0.000011 loss_cls: 2.8112 (2.7196) grad_norm: 1.5411 (1.6418) time: 2.9412 data: 0.0003 max mem: 29202 +[2024-12-11 04:52:39 root] (utils.py 283): INFO Epoch: [8] [1250/2502] eta: 1:01:17 lr: 0.000011 loss_cls: 2.9182 (2.7198) grad_norm: 1.5920 (1.6424) time: 2.9406 data: 0.0003 max mem: 29202 +[2024-12-11 04:53:08 root] (utils.py 283): INFO Epoch: [8] [1260/2502] eta: 1:00:48 lr: 0.000011 loss_cls: 2.9030 (2.7200) grad_norm: 1.6200 (1.6426) time: 2.9432 data: 0.0003 max mem: 29202 +[2024-12-11 04:53:38 root] (utils.py 283): INFO Epoch: [8] [1270/2502] eta: 1:00:18 lr: 0.000011 loss_cls: 2.8461 (2.7209) grad_norm: 1.6200 (1.6432) time: 2.9419 data: 0.0003 max mem: 29202 +[2024-12-11 04:54:07 root] (utils.py 283): INFO Epoch: [8] [1280/2502] eta: 0:59:49 lr: 0.000011 loss_cls: 2.7483 (2.7195) grad_norm: 1.5845 (1.6426) time: 2.9408 data: 0.0003 max mem: 29202 +[2024-12-11 04:54:37 root] (utils.py 283): INFO Epoch: [8] [1290/2502] eta: 0:59:20 lr: 0.000011 loss_cls: 2.6134 (2.7176) grad_norm: 1.5472 (1.6422) time: 2.9436 data: 0.0003 max mem: 29202 +[2024-12-11 04:55:06 root] (utils.py 283): INFO Epoch: [8] [1300/2502] eta: 0:58:50 lr: 0.000011 loss_cls: 2.3362 (2.7137) grad_norm: 1.5162 (1.6409) time: 2.9419 data: 0.0003 max mem: 29202 +[2024-12-11 04:55:36 root] (utils.py 283): INFO Epoch: [8] [1310/2502] eta: 0:58:21 lr: 0.000011 loss_cls: 2.4273 (2.7147) grad_norm: 1.4985 (1.6406) time: 2.9390 data: 0.0003 max mem: 29202 +[2024-12-11 04:56:05 root] (utils.py 283): INFO Epoch: [8] [1320/2502] eta: 0:57:52 lr: 0.000011 loss_cls: 2.9745 (2.7149) grad_norm: 1.5255 (1.6406) time: 2.9495 data: 0.0003 max mem: 29202 +[2024-12-11 04:56:35 root] (utils.py 283): INFO Epoch: [8] [1330/2502] eta: 0:57:22 lr: 0.000011 loss_cls: 2.8650 (2.7156) grad_norm: 1.6058 (1.6406) time: 2.9533 data: 0.0003 max mem: 29202 +[2024-12-11 04:57:04 root] (utils.py 283): INFO Epoch: [8] [1340/2502] eta: 0:56:53 lr: 0.000011 loss_cls: 2.8601 (2.7172) grad_norm: 1.6262 (1.6410) time: 2.9428 data: 0.0003 max mem: 29202 +[2024-12-11 04:57:33 root] (utils.py 283): INFO Epoch: [8] [1350/2502] eta: 0:56:24 lr: 0.000011 loss_cls: 2.8605 (2.7184) grad_norm: 1.7285 (1.6420) time: 2.9383 data: 0.0003 max mem: 29202 +[2024-12-11 04:58:03 root] (utils.py 283): INFO Epoch: [8] [1360/2502] eta: 0:55:54 lr: 0.000011 loss_cls: 2.8605 (2.7188) grad_norm: 1.6200 (1.6418) time: 2.9354 data: 0.0003 max mem: 29202 +[2024-12-11 04:58:32 root] (utils.py 283): INFO Epoch: [8] [1370/2502] eta: 0:55:25 lr: 0.000011 loss_cls: 2.8071 (2.7177) grad_norm: 1.6200 (1.6427) time: 2.9334 data: 0.0003 max mem: 29202 +[2024-12-11 04:59:01 root] (utils.py 283): INFO Epoch: [8] [1380/2502] eta: 0:54:55 lr: 0.000011 loss_cls: 2.8251 (2.7185) grad_norm: 1.5840 (1.6421) time: 2.9316 data: 0.0003 max mem: 29202 +[2024-12-11 04:59:31 root] (utils.py 283): INFO Epoch: [8] [1390/2502] eta: 0:54:26 lr: 0.000011 loss_cls: 2.6584 (2.7174) grad_norm: 1.5619 (1.6435) time: 2.9306 data: 0.0003 max mem: 29202 +[2024-12-11 05:00:00 root] (utils.py 283): INFO Epoch: [8] [1400/2502] eta: 0:53:57 lr: 0.000011 loss_cls: 2.5916 (2.7164) grad_norm: 1.5653 (1.6432) time: 2.9324 data: 0.0003 max mem: 29202 +[2024-12-11 05:00:30 root] (utils.py 283): INFO Epoch: [8] [1410/2502] eta: 0:53:27 lr: 0.000011 loss_cls: 2.9399 (2.7163) grad_norm: 1.5582 (1.6426) time: 2.9453 data: 0.0003 max mem: 29202 +[2024-12-11 05:00:59 root] (utils.py 283): INFO Epoch: [8] [1420/2502] eta: 0:52:58 lr: 0.000011 loss_cls: 2.8531 (2.7155) grad_norm: 1.4804 (1.6418) time: 2.9467 data: 0.0003 max mem: 29202 +[2024-12-11 05:01:28 root] (utils.py 283): INFO Epoch: [8] [1430/2502] eta: 0:52:29 lr: 0.000011 loss_cls: 2.6263 (2.7148) grad_norm: 1.5656 (1.6419) time: 2.9388 data: 0.0003 max mem: 29202 +[2024-12-11 05:01:58 root] (utils.py 283): INFO Epoch: [8] [1440/2502] eta: 0:51:59 lr: 0.000011 loss_cls: 2.6263 (2.7142) grad_norm: 1.5907 (1.6417) time: 2.9399 data: 0.0002 max mem: 29202 +[2024-12-11 05:02:27 root] (utils.py 283): INFO Epoch: [8] [1450/2502] eta: 0:51:30 lr: 0.000011 loss_cls: 2.8013 (2.7128) grad_norm: 1.6032 (1.6427) time: 2.9388 data: 0.0003 max mem: 29202 +[2024-12-11 05:02:56 root] (utils.py 283): INFO Epoch: [8] [1460/2502] eta: 0:51:01 lr: 0.000011 loss_cls: 2.8240 (2.7137) grad_norm: 1.6253 (1.6425) time: 2.9388 data: 0.0003 max mem: 29202 +[2024-12-11 05:03:26 root] (utils.py 283): INFO Epoch: [8] [1470/2502] eta: 0:50:31 lr: 0.000011 loss_cls: 2.8375 (2.7134) grad_norm: 1.5691 (1.6421) time: 2.9513 data: 0.0003 max mem: 29202 +[2024-12-11 05:03:56 root] (utils.py 283): INFO Epoch: [8] [1480/2502] eta: 0:50:02 lr: 0.000011 loss_cls: 2.7252 (2.7124) grad_norm: 1.5650 (1.6420) time: 2.9604 data: 0.0003 max mem: 29202 +[2024-12-11 05:04:25 root] (utils.py 283): INFO Epoch: [8] [1490/2502] eta: 0:49:33 lr: 0.000011 loss_cls: 2.4824 (2.7117) grad_norm: 1.6305 (1.6424) time: 2.9524 data: 0.0003 max mem: 29202 +[2024-12-11 05:04:55 root] (utils.py 283): INFO Epoch: [8] [1500/2502] eta: 0:49:03 lr: 0.000011 loss_cls: 2.6626 (2.7109) grad_norm: 1.6104 (1.6423) time: 2.9439 data: 0.0003 max mem: 29202 +[2024-12-11 05:05:24 root] (utils.py 283): INFO Epoch: [8] [1510/2502] eta: 0:48:34 lr: 0.000011 loss_cls: 2.8090 (2.7114) grad_norm: 1.5802 (1.6443) time: 2.9414 data: 0.0003 max mem: 29202 +[2024-12-11 05:05:53 root] (utils.py 283): INFO Epoch: [8] [1520/2502] eta: 0:48:05 lr: 0.000011 loss_cls: 2.7247 (2.7094) grad_norm: 1.5973 (1.6444) time: 2.9381 data: 0.0003 max mem: 29202 +[2024-12-11 05:06:23 root] (utils.py 283): INFO Epoch: [8] [1530/2502] eta: 0:47:35 lr: 0.000011 loss_cls: 2.2329 (2.7085) grad_norm: 1.6792 (1.6450) time: 2.9363 data: 0.0003 max mem: 29202 +[2024-12-11 05:06:52 root] (utils.py 283): INFO Epoch: [8] [1540/2502] eta: 0:47:06 lr: 0.000011 loss_cls: 2.4663 (2.7071) grad_norm: 1.6831 (1.6450) time: 2.9405 data: 0.0003 max mem: 29202 +[2024-12-11 05:07:22 root] (utils.py 283): INFO Epoch: [8] [1550/2502] eta: 0:46:37 lr: 0.000011 loss_cls: 2.6908 (2.7076) grad_norm: 1.5837 (1.6450) time: 2.9561 data: 0.0003 max mem: 29202 +[2024-12-11 05:07:51 root] (utils.py 283): INFO Epoch: [8] [1560/2502] eta: 0:46:07 lr: 0.000011 loss_cls: 2.9133 (2.7080) grad_norm: 1.5700 (1.6446) time: 2.9564 data: 0.0003 max mem: 29202 +[2024-12-11 05:08:21 root] (utils.py 283): INFO Epoch: [8] [1570/2502] eta: 0:45:38 lr: 0.000011 loss_cls: 2.9203 (2.7083) grad_norm: 1.5750 (1.6445) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-11 05:08:50 root] (utils.py 283): INFO Epoch: [8] [1580/2502] eta: 0:45:09 lr: 0.000011 loss_cls: 2.7091 (2.7065) grad_norm: 1.5959 (1.6442) time: 2.9392 data: 0.0003 max mem: 29202 +[2024-12-11 05:09:19 root] (utils.py 283): INFO Epoch: [8] [1590/2502] eta: 0:44:39 lr: 0.000011 loss_cls: 2.5332 (2.7051) grad_norm: 1.5444 (1.6437) time: 2.9377 data: 0.0003 max mem: 29202 +[2024-12-11 05:09:49 root] (utils.py 283): INFO Epoch: [8] [1600/2502] eta: 0:44:10 lr: 0.000011 loss_cls: 2.2519 (2.7029) grad_norm: 1.5370 (1.6432) time: 2.9406 data: 0.0003 max mem: 29202 +[2024-12-11 05:10:18 root] (utils.py 283): INFO Epoch: [8] [1610/2502] eta: 0:43:41 lr: 0.000011 loss_cls: 2.8760 (2.7043) grad_norm: 1.5812 (1.6433) time: 2.9459 data: 0.0003 max mem: 29202 +[2024-12-11 05:10:48 root] (utils.py 283): INFO Epoch: [8] [1620/2502] eta: 0:43:11 lr: 0.000011 loss_cls: 2.9433 (2.7042) grad_norm: 1.6244 (1.6432) time: 2.9416 data: 0.0003 max mem: 29202 +[2024-12-11 05:11:17 root] (utils.py 283): INFO Epoch: [8] [1630/2502] eta: 0:42:42 lr: 0.000011 loss_cls: 2.9068 (2.7045) grad_norm: 1.6190 (1.6432) time: 2.9372 data: 0.0003 max mem: 29202 +[2024-12-11 05:11:46 root] (utils.py 283): INFO Epoch: [8] [1640/2502] eta: 0:42:12 lr: 0.000011 loss_cls: 2.7689 (2.7046) grad_norm: 1.6678 (1.6438) time: 2.9380 data: 0.0003 max mem: 29202 +[2024-12-11 05:12:16 root] (utils.py 283): INFO Epoch: [8] [1650/2502] eta: 0:41:43 lr: 0.000011 loss_cls: 2.6972 (2.7040) grad_norm: 1.6488 (1.6434) time: 2.9415 data: 0.0003 max mem: 29202 +[2024-12-11 05:12:45 root] (utils.py 283): INFO Epoch: [8] [1660/2502] eta: 0:41:14 lr: 0.000011 loss_cls: 2.6336 (2.7044) grad_norm: 1.5390 (1.6445) time: 2.9427 data: 0.0003 max mem: 29202 +[2024-12-11 05:13:15 root] (utils.py 283): INFO Epoch: [8] [1670/2502] eta: 0:40:44 lr: 0.000011 loss_cls: 2.8740 (2.7049) grad_norm: 1.5642 (1.6447) time: 2.9379 data: 0.0003 max mem: 29202 +[2024-12-11 05:13:44 root] (utils.py 283): INFO Epoch: [8] [1680/2502] eta: 0:40:15 lr: 0.000011 loss_cls: 2.8740 (2.7057) grad_norm: 1.6080 (1.6448) time: 2.9349 data: 0.0003 max mem: 29202 +[2024-12-11 05:14:13 root] (utils.py 283): INFO Epoch: [8] [1690/2502] eta: 0:39:45 lr: 0.000011 loss_cls: 2.8129 (2.7055) grad_norm: 1.5765 (1.6448) time: 2.9363 data: 0.0003 max mem: 29202 +[2024-12-11 05:14:43 root] (utils.py 283): INFO Epoch: [8] [1700/2502] eta: 0:39:16 lr: 0.000011 loss_cls: 2.8417 (2.7062) grad_norm: 1.5728 (1.6445) time: 2.9369 data: 0.0003 max mem: 29202 +[2024-12-11 05:15:12 root] (utils.py 283): INFO Epoch: [8] [1710/2502] eta: 0:38:47 lr: 0.000011 loss_cls: 2.9174 (2.7069) grad_norm: 1.5728 (1.6441) time: 2.9380 data: 0.0003 max mem: 29202 +[2024-12-11 05:15:42 root] (utils.py 283): INFO Epoch: [8] [1720/2502] eta: 0:38:17 lr: 0.000011 loss_cls: 2.9174 (2.7075) grad_norm: 1.5357 (1.6438) time: 2.9399 data: 0.0003 max mem: 29202 +[2024-12-11 05:16:11 root] (utils.py 283): INFO Epoch: [8] [1730/2502] eta: 0:37:48 lr: 0.000011 loss_cls: 2.9148 (2.7069) grad_norm: 1.5378 (1.6445) time: 2.9413 data: 0.0003 max mem: 29202 +[2024-12-11 05:16:40 root] (utils.py 283): INFO Epoch: [8] [1740/2502] eta: 0:37:19 lr: 0.000011 loss_cls: 2.8576 (2.7071) grad_norm: 1.5639 (1.6446) time: 2.9410 data: 0.0003 max mem: 29202 +[2024-12-11 05:17:10 root] (utils.py 283): INFO Epoch: [8] [1750/2502] eta: 0:36:49 lr: 0.000011 loss_cls: 2.9437 (2.7088) grad_norm: 1.6386 (1.6449) time: 2.9392 data: 0.0003 max mem: 29202 +[2024-12-11 05:17:39 root] (utils.py 283): INFO Epoch: [8] [1760/2502] eta: 0:36:20 lr: 0.000011 loss_cls: 2.9790 (2.7083) grad_norm: 1.6524 (1.6448) time: 2.9502 data: 0.0003 max mem: 29202 +[2024-12-11 05:18:09 root] (utils.py 283): INFO Epoch: [8] [1770/2502] eta: 0:35:51 lr: 0.000011 loss_cls: 2.5514 (2.7087) grad_norm: 1.6446 (1.6457) time: 2.9526 data: 0.0003 max mem: 29202 +[2024-12-11 05:18:38 root] (utils.py 283): INFO Epoch: [8] [1780/2502] eta: 0:35:21 lr: 0.000011 loss_cls: 2.6286 (2.7074) grad_norm: 1.6112 (1.6455) time: 2.9425 data: 0.0003 max mem: 29202 +[2024-12-11 05:19:08 root] (utils.py 283): INFO Epoch: [8] [1790/2502] eta: 0:34:52 lr: 0.000011 loss_cls: 2.7415 (2.7079) grad_norm: 1.6207 (1.6453) time: 2.9426 data: 0.0003 max mem: 29202 +[2024-12-11 05:19:37 root] (utils.py 283): INFO Epoch: [8] [1800/2502] eta: 0:34:22 lr: 0.000011 loss_cls: 2.9750 (2.7094) grad_norm: 1.6231 (1.6454) time: 2.9407 data: 0.0003 max mem: 29202 +[2024-12-11 05:20:07 root] (utils.py 283): INFO Epoch: [8] [1810/2502] eta: 0:33:53 lr: 0.000011 loss_cls: 2.9750 (2.7097) grad_norm: 1.5497 (1.6456) time: 2.9408 data: 0.0002 max mem: 29202 +[2024-12-11 05:20:36 root] (utils.py 283): INFO Epoch: [8] [1820/2502] eta: 0:33:24 lr: 0.000011 loss_cls: 2.7609 (2.7099) grad_norm: 1.6356 (1.6458) time: 2.9444 data: 0.0003 max mem: 29202 +[2024-12-11 05:21:05 root] (utils.py 283): INFO Epoch: [8] [1830/2502] eta: 0:32:54 lr: 0.000011 loss_cls: 2.6473 (2.7099) grad_norm: 1.6660 (1.6456) time: 2.9464 data: 0.0003 max mem: 29202 +[2024-12-11 05:21:35 root] (utils.py 283): INFO Epoch: [8] [1840/2502] eta: 0:32:25 lr: 0.000011 loss_cls: 2.5844 (2.7083) grad_norm: 1.5635 (1.6454) time: 2.9460 data: 0.0002 max mem: 29202 +[2024-12-11 05:22:04 root] (utils.py 283): INFO Epoch: [8] [1850/2502] eta: 0:31:56 lr: 0.000011 loss_cls: 2.7626 (2.7087) grad_norm: 1.6149 (1.6455) time: 2.9430 data: 0.0002 max mem: 29202 +[2024-12-11 05:22:34 root] (utils.py 283): INFO Epoch: [8] [1860/2502] eta: 0:31:26 lr: 0.000011 loss_cls: 2.9246 (2.7098) grad_norm: 1.6589 (1.6455) time: 2.9421 data: 0.0003 max mem: 29202 +[2024-12-11 05:23:03 root] (utils.py 283): INFO Epoch: [8] [1870/2502] eta: 0:30:57 lr: 0.000011 loss_cls: 2.7981 (2.7080) grad_norm: 1.5983 (1.6455) time: 2.9444 data: 0.0003 max mem: 29202 +[2024-12-11 05:23:33 root] (utils.py 283): INFO Epoch: [8] [1880/2502] eta: 0:30:28 lr: 0.000011 loss_cls: 2.8714 (2.7097) grad_norm: 1.5807 (1.6453) time: 2.9525 data: 0.0003 max mem: 29202 +[2024-12-11 05:24:02 root] (utils.py 283): INFO Epoch: [8] [1890/2502] eta: 0:29:58 lr: 0.000011 loss_cls: 2.9518 (2.7104) grad_norm: 1.5157 (1.6453) time: 2.9505 data: 0.0003 max mem: 29202 +[2024-12-11 05:24:32 root] (utils.py 283): INFO Epoch: [8] [1900/2502] eta: 0:29:29 lr: 0.000011 loss_cls: 2.8970 (2.7102) grad_norm: 1.6522 (1.6457) time: 2.9445 data: 0.0003 max mem: 29202 +[2024-12-11 05:25:01 root] (utils.py 283): INFO Epoch: [8] [1910/2502] eta: 0:28:59 lr: 0.000011 loss_cls: 2.8296 (2.7110) grad_norm: 1.6599 (1.6460) time: 2.9446 data: 0.0003 max mem: 29202 +[2024-12-11 05:25:31 root] (utils.py 283): INFO Epoch: [8] [1920/2502] eta: 0:28:30 lr: 0.000011 loss_cls: 2.7962 (2.7110) grad_norm: 1.6599 (1.6462) time: 2.9500 data: 0.0003 max mem: 29202 +[2024-12-11 05:26:00 root] (utils.py 283): INFO Epoch: [8] [1930/2502] eta: 0:28:01 lr: 0.000011 loss_cls: 2.7868 (2.7107) grad_norm: 1.6205 (1.6461) time: 2.9518 data: 0.0002 max mem: 29202 +[2024-12-11 05:26:30 root] (utils.py 283): INFO Epoch: [8] [1940/2502] eta: 0:27:31 lr: 0.000011 loss_cls: 2.7744 (2.7102) grad_norm: 1.6613 (1.6466) time: 2.9418 data: 0.0002 max mem: 29202 +[2024-12-11 05:26:59 root] (utils.py 283): INFO Epoch: [8] [1950/2502] eta: 0:27:02 lr: 0.000011 loss_cls: 2.5812 (2.7097) grad_norm: 1.5842 (1.6466) time: 2.9453 data: 0.0003 max mem: 29202 +[2024-12-11 05:27:29 root] (utils.py 283): INFO Epoch: [8] [1960/2502] eta: 0:26:33 lr: 0.000011 loss_cls: 2.7290 (2.7110) grad_norm: 1.5559 (1.6462) time: 2.9494 data: 0.0003 max mem: 29202 +[2024-12-11 05:27:58 root] (utils.py 283): INFO Epoch: [8] [1970/2502] eta: 0:26:03 lr: 0.000011 loss_cls: 2.9901 (2.7114) grad_norm: 1.5187 (1.6456) time: 2.9460 data: 0.0003 max mem: 29202 +[2024-12-11 05:28:27 root] (utils.py 283): INFO Epoch: [8] [1980/2502] eta: 0:25:34 lr: 0.000011 loss_cls: 2.8022 (2.7113) grad_norm: 1.6094 (1.6460) time: 2.9482 data: 0.0003 max mem: 29202 +[2024-12-11 05:28:57 root] (utils.py 283): INFO Epoch: [8] [1990/2502] eta: 0:25:04 lr: 0.000011 loss_cls: 2.7686 (2.7116) grad_norm: 1.6722 (1.6459) time: 2.9467 data: 0.0003 max mem: 29202 +[2024-12-11 05:29:26 root] (utils.py 283): INFO Epoch: [8] [2000/2502] eta: 0:24:35 lr: 0.000011 loss_cls: 2.8886 (2.7117) grad_norm: 1.5881 (1.6456) time: 2.9409 data: 0.0003 max mem: 29202 +[2024-12-11 05:29:56 root] (utils.py 283): INFO Epoch: [8] [2010/2502] eta: 0:24:06 lr: 0.000011 loss_cls: 2.6164 (2.7106) grad_norm: 1.5212 (1.6454) time: 2.9405 data: 0.0003 max mem: 29202 +[2024-12-11 05:30:25 root] (utils.py 283): INFO Epoch: [8] [2020/2502] eta: 0:23:36 lr: 0.000011 loss_cls: 2.4924 (2.7092) grad_norm: 1.5328 (1.6461) time: 2.9423 data: 0.0003 max mem: 29202 +[2024-12-11 05:30:55 root] (utils.py 283): INFO Epoch: [8] [2030/2502] eta: 0:23:07 lr: 0.000011 loss_cls: 2.7206 (2.7097) grad_norm: 1.6587 (1.6461) time: 2.9439 data: 0.0003 max mem: 29202 +[2024-12-11 05:31:24 root] (utils.py 283): INFO Epoch: [8] [2040/2502] eta: 0:22:38 lr: 0.000011 loss_cls: 2.7445 (2.7095) grad_norm: 1.5704 (1.6455) time: 2.9446 data: 0.0003 max mem: 29202 +[2024-12-11 05:31:54 root] (utils.py 283): INFO Epoch: [8] [2050/2502] eta: 0:22:08 lr: 0.000011 loss_cls: 2.7181 (2.7102) grad_norm: 1.5338 (1.6455) time: 2.9462 data: 0.0003 max mem: 29202 +[2024-12-11 05:32:23 root] (utils.py 283): INFO Epoch: [8] [2060/2502] eta: 0:21:39 lr: 0.000011 loss_cls: 2.7405 (2.7096) grad_norm: 1.6360 (1.6460) time: 2.9459 data: 0.0002 max mem: 29202 +[2024-12-11 05:32:52 root] (utils.py 283): INFO Epoch: [8] [2070/2502] eta: 0:21:09 lr: 0.000011 loss_cls: 2.7587 (2.7094) grad_norm: 1.6442 (1.6461) time: 2.9398 data: 0.0003 max mem: 29202 +[2024-12-11 05:33:22 root] (utils.py 283): INFO Epoch: [8] [2080/2502] eta: 0:20:40 lr: 0.000011 loss_cls: 2.6322 (2.7091) grad_norm: 1.6714 (1.6465) time: 2.9444 data: 0.0003 max mem: 29202 +[2024-12-11 05:33:51 root] (utils.py 283): INFO Epoch: [8] [2090/2502] eta: 0:20:11 lr: 0.000011 loss_cls: 2.7857 (2.7100) grad_norm: 1.6954 (1.6464) time: 2.9509 data: 0.0003 max mem: 29202 +[2024-12-11 05:34:21 root] (utils.py 283): INFO Epoch: [8] [2100/2502] eta: 0:19:41 lr: 0.000011 loss_cls: 2.6400 (2.7084) grad_norm: 1.6704 (1.6472) time: 2.9477 data: 0.0002 max mem: 29202 +[2024-12-11 05:34:50 root] (utils.py 283): INFO Epoch: [8] [2110/2502] eta: 0:19:12 lr: 0.000011 loss_cls: 2.5629 (2.7082) grad_norm: 1.6747 (1.6475) time: 2.9477 data: 0.0003 max mem: 29202 +[2024-12-11 05:35:20 root] (utils.py 283): INFO Epoch: [8] [2120/2502] eta: 0:18:43 lr: 0.000011 loss_cls: 2.6986 (2.7085) grad_norm: 1.6747 (1.6477) time: 2.9548 data: 0.0003 max mem: 29202 +[2024-12-11 05:35:49 root] (utils.py 283): INFO Epoch: [8] [2130/2502] eta: 0:18:13 lr: 0.000011 loss_cls: 2.8254 (2.7083) grad_norm: 1.6899 (1.6485) time: 2.9569 data: 0.0003 max mem: 29202 +[2024-12-11 05:36:19 root] (utils.py 283): INFO Epoch: [8] [2140/2502] eta: 0:17:44 lr: 0.000011 loss_cls: 2.7690 (2.7083) grad_norm: 1.7004 (1.6486) time: 2.9498 data: 0.0003 max mem: 29202 +[2024-12-11 05:36:48 root] (utils.py 283): INFO Epoch: [8] [2150/2502] eta: 0:17:14 lr: 0.000011 loss_cls: 2.7409 (2.7077) grad_norm: 1.6625 (1.6489) time: 2.9469 data: 0.0003 max mem: 29202 +[2024-12-11 05:37:18 root] (utils.py 283): INFO Epoch: [8] [2160/2502] eta: 0:16:45 lr: 0.000011 loss_cls: 2.9386 (2.7084) grad_norm: 1.6502 (1.6490) time: 2.9448 data: 0.0003 max mem: 29202 +[2024-12-11 05:37:47 root] (utils.py 283): INFO Epoch: [8] [2170/2502] eta: 0:16:16 lr: 0.000011 loss_cls: 3.0706 (2.7099) grad_norm: 1.6333 (1.6494) time: 2.9436 data: 0.0003 max mem: 29202 +[2024-12-11 05:38:17 root] (utils.py 283): INFO Epoch: [8] [2180/2502] eta: 0:15:46 lr: 0.000011 loss_cls: 2.9378 (2.7089) grad_norm: 1.6189 (1.6497) time: 2.9455 data: 0.0002 max mem: 29202 +[2024-12-11 05:38:46 root] (utils.py 283): INFO Epoch: [8] [2190/2502] eta: 0:15:17 lr: 0.000011 loss_cls: 2.9236 (2.7092) grad_norm: 1.5912 (1.6497) time: 2.9409 data: 0.0003 max mem: 29202 +[2024-12-11 05:39:15 root] (utils.py 283): INFO Epoch: [8] [2200/2502] eta: 0:14:47 lr: 0.000011 loss_cls: 2.9236 (2.7086) grad_norm: 1.6679 (1.6513) time: 2.9348 data: 0.0003 max mem: 29202 +[2024-12-11 05:39:45 root] (utils.py 283): INFO Epoch: [8] [2210/2502] eta: 0:14:18 lr: 0.000011 loss_cls: 2.7946 (2.7093) grad_norm: 1.6535 (1.6512) time: 2.9332 data: 0.0003 max mem: 29202 +[2024-12-11 05:40:14 root] (utils.py 283): INFO Epoch: [8] [2220/2502] eta: 0:13:49 lr: 0.000011 loss_cls: 2.8387 (2.7089) grad_norm: 1.5416 (1.6529) time: 2.9315 data: 0.0003 max mem: 29202 +[2024-12-11 05:40:43 root] (utils.py 283): INFO Epoch: [8] [2230/2502] eta: 0:13:19 lr: 0.000011 loss_cls: 2.7368 (2.7088) grad_norm: 1.5855 (1.6538) time: 2.9311 data: 0.0003 max mem: 29202 +[2024-12-11 05:41:13 root] (utils.py 283): INFO Epoch: [8] [2240/2502] eta: 0:12:50 lr: 0.000011 loss_cls: 2.9986 (2.7096) grad_norm: 1.6659 (1.6540) time: 2.9298 data: 0.0003 max mem: 29202 +[2024-12-11 05:41:42 root] (utils.py 283): INFO Epoch: [8] [2250/2502] eta: 0:12:20 lr: 0.000011 loss_cls: 3.0357 (2.7109) grad_norm: 1.6800 (1.6543) time: 2.9277 data: 0.0003 max mem: 29202 +[2024-12-11 05:42:11 root] (utils.py 283): INFO Epoch: [8] [2260/2502] eta: 0:11:51 lr: 0.000011 loss_cls: 3.0240 (2.7122) grad_norm: 1.6873 (1.6554) time: 2.9255 data: 0.0002 max mem: 29202 +[2024-12-11 05:42:40 root] (utils.py 283): INFO Epoch: [8] [2270/2502] eta: 0:11:21 lr: 0.000011 loss_cls: 2.9773 (2.7122) grad_norm: 1.7116 (1.6556) time: 2.9240 data: 0.0002 max mem: 29202 +[2024-12-11 05:43:10 root] (utils.py 283): INFO Epoch: [8] [2280/2502] eta: 0:10:52 lr: 0.000011 loss_cls: 2.5911 (2.7118) grad_norm: 1.5888 (1.6552) time: 2.9260 data: 0.0003 max mem: 29202 +[2024-12-11 05:43:39 root] (utils.py 283): INFO Epoch: [8] [2290/2502] eta: 0:10:23 lr: 0.000011 loss_cls: 2.5555 (2.7113) grad_norm: 1.5469 (1.6548) time: 2.9259 data: 0.0003 max mem: 29202 +[2024-12-11 05:44:08 root] (utils.py 283): INFO Epoch: [8] [2300/2502] eta: 0:09:53 lr: 0.000011 loss_cls: 2.7693 (2.7118) grad_norm: 1.5656 (1.6562) time: 2.9248 data: 0.0002 max mem: 29202 +[2024-12-11 05:44:37 root] (utils.py 283): INFO Epoch: [8] [2310/2502] eta: 0:09:24 lr: 0.000011 loss_cls: 2.8085 (2.7120) grad_norm: 1.5907 (1.6560) time: 2.9247 data: 0.0003 max mem: 29202 +[2024-12-11 05:45:07 root] (utils.py 283): INFO Epoch: [8] [2320/2502] eta: 0:08:54 lr: 0.000011 loss_cls: 2.8790 (2.7133) grad_norm: 1.5723 (1.6558) time: 2.9256 data: 0.0003 max mem: 29202 +[2024-12-11 05:45:36 root] (utils.py 283): INFO Epoch: [8] [2330/2502] eta: 0:08:25 lr: 0.000011 loss_cls: 2.9735 (2.7141) grad_norm: 1.5898 (1.6558) time: 2.9259 data: 0.0003 max mem: 29202 +[2024-12-11 05:46:05 root] (utils.py 283): INFO Epoch: [8] [2340/2502] eta: 0:07:56 lr: 0.000011 loss_cls: 2.8142 (2.7135) grad_norm: 1.5959 (1.6559) time: 2.9309 data: 0.0003 max mem: 29202 +[2024-12-11 05:46:35 root] (utils.py 283): INFO Epoch: [8] [2350/2502] eta: 0:07:26 lr: 0.000011 loss_cls: 2.6805 (2.7139) grad_norm: 1.5896 (1.6558) time: 2.9388 data: 0.0003 max mem: 29202 +[2024-12-11 05:47:04 root] (utils.py 283): INFO Epoch: [8] [2360/2502] eta: 0:06:57 lr: 0.000011 loss_cls: 2.8331 (2.7136) grad_norm: 1.6246 (1.6556) time: 2.9378 data: 0.0003 max mem: 29202 +[2024-12-11 05:47:34 root] (utils.py 283): INFO Epoch: [8] [2370/2502] eta: 0:06:27 lr: 0.000011 loss_cls: 2.8816 (2.7140) grad_norm: 1.6124 (1.6551) time: 2.9397 data: 0.0003 max mem: 29202 +[2024-12-11 05:48:03 root] (utils.py 283): INFO Epoch: [8] [2380/2502] eta: 0:05:58 lr: 0.000011 loss_cls: 2.9336 (2.7151) grad_norm: 1.6306 (1.6554) time: 2.9406 data: 0.0003 max mem: 29202 +[2024-12-11 05:48:32 root] (utils.py 283): INFO Epoch: [8] [2390/2502] eta: 0:05:29 lr: 0.000011 loss_cls: 2.9929 (2.7151) grad_norm: 1.6374 (1.6550) time: 2.9378 data: 0.0003 max mem: 29202 +[2024-12-11 05:49:02 root] (utils.py 283): INFO Epoch: [8] [2400/2502] eta: 0:04:59 lr: 0.000011 loss_cls: 2.8479 (2.7153) grad_norm: 1.5602 (1.6552) time: 2.9423 data: 0.0003 max mem: 29202 +[2024-12-11 05:49:31 root] (utils.py 283): INFO Epoch: [8] [2410/2502] eta: 0:04:30 lr: 0.000011 loss_cls: 2.6944 (2.7150) grad_norm: 1.6260 (1.6552) time: 2.9446 data: 0.0003 max mem: 29202 +[2024-12-11 05:50:01 root] (utils.py 283): INFO Epoch: [8] [2420/2502] eta: 0:04:01 lr: 0.000011 loss_cls: 2.8640 (2.7157) grad_norm: 1.6280 (1.6551) time: 2.9408 data: 0.0003 max mem: 29202 +[2024-12-11 05:50:30 root] (utils.py 283): INFO Epoch: [8] [2430/2502] eta: 0:03:31 lr: 0.000011 loss_cls: 2.9319 (2.7165) grad_norm: 1.6574 (1.6552) time: 2.9375 data: 0.0003 max mem: 29202 +[2024-12-11 05:50:59 root] (utils.py 283): INFO Epoch: [8] [2440/2502] eta: 0:03:02 lr: 0.000011 loss_cls: 2.8386 (2.7163) grad_norm: 1.5685 (1.6545) time: 2.9365 data: 0.0003 max mem: 29202 +[2024-12-11 05:51:29 root] (utils.py 283): INFO Epoch: [8] [2450/2502] eta: 0:02:32 lr: 0.000011 loss_cls: 2.8133 (2.7172) grad_norm: 1.5551 (1.6548) time: 2.9369 data: 0.0003 max mem: 29202 +[2024-12-11 05:51:58 root] (utils.py 283): INFO Epoch: [8] [2460/2502] eta: 0:02:03 lr: 0.000011 loss_cls: 2.9199 (2.7168) grad_norm: 1.5996 (1.6543) time: 2.9352 data: 0.0003 max mem: 29202 +[2024-12-11 05:52:28 root] (utils.py 283): INFO Epoch: [8] [2470/2502] eta: 0:01:34 lr: 0.000011 loss_cls: 2.9118 (2.7167) grad_norm: 1.5996 (1.6543) time: 2.9443 data: 0.0003 max mem: 29202 +[2024-12-11 05:52:57 root] (utils.py 283): INFO Epoch: [8] [2480/2502] eta: 0:01:04 lr: 0.000011 loss_cls: 2.7020 (2.7162) grad_norm: 1.5371 (1.6542) time: 2.9459 data: 0.0003 max mem: 29202 +[2024-12-11 05:53:27 root] (utils.py 283): INFO Epoch: [8] [2490/2502] eta: 0:00:35 lr: 0.000011 loss_cls: 2.8133 (2.7169) grad_norm: 1.5371 (1.6541) time: 2.9613 data: 0.0235 max mem: 29202 +[2024-12-11 05:53:56 root] (utils.py 283): INFO Epoch: [8] [2500/2502] eta: 0:00:05 lr: 0.000011 loss_cls: 3.0005 (2.7184) grad_norm: 1.6857 (1.6555) time: 2.9631 data: 0.0235 max mem: 29202 +[2024-12-11 05:53:59 root] (utils.py 283): INFO Epoch: [8] [2501/2502] eta: 0:00:02 lr: 0.000011 loss_cls: 3.0005 (2.7185) grad_norm: 1.6793 (1.6555) time: 2.9638 data: 0.0235 max mem: 29202 +[2024-12-11 05:53:59 root] (utils.py 297): INFO Epoch: [8] Total time: 2:02:34 (2.9396 s / it) +[2024-12-11 05:53:59 root] (engine.py 179): INFO Averaged stats:lr: 0.000011 loss_cls: 3.0005 (2.7185) grad_norm: 1.6793 (1.6555) +[2024-12-11 05:54:03 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:55 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.2966 (0.2966) acc1: 92.9688 (92.9688) acc3: 97.6562 (97.6562) acc5: 98.4375 (98.4375) time: 0.5697 data: 0.0004 max mem: 29202 +[2024-12-11 05:54:09 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:48 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.5969 (0.5757) acc1: 85.9375 (86.8608) acc3: 97.6562 (96.5199) acc5: 98.4375 (98.1534) time: 0.5526 data: 0.0005 max mem: 29202 +[2024-12-11 05:54:14 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:43 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.5969 (0.6170) acc1: 85.9375 (85.6399) acc3: 96.8750 (96.0193) acc5: 97.6562 (97.7307) time: 0.5510 data: 0.0005 max mem: 29202 +[2024-12-11 05:54:20 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:37 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6425 (0.6412) acc1: 85.9375 (85.2319) acc3: 95.3125 (95.7409) acc5: 97.6562 (97.5050) time: 0.5516 data: 0.0004 max mem: 29202 +[2024-12-11 05:54:25 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:32 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6463 (0.6462) acc1: 85.1562 (85.1944) acc3: 96.0938 (95.7698) acc5: 97.6562 (97.5610) time: 0.5519 data: 0.0004 max mem: 29202 +[2024-12-11 05:54:31 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:26 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.8394 (0.7329) acc1: 78.9062 (83.5018) acc3: 92.9688 (94.6232) acc5: 95.3125 (96.6759) time: 0.5522 data: 0.0004 max mem: 29202 +[2024-12-11 05:54:36 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:20 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9421 (0.7641) acc1: 77.3438 (82.9150) acc3: 90.6250 (94.0446) acc5: 93.7500 (96.2346) time: 0.5524 data: 0.0004 max mem: 29202 +[2024-12-11 05:54:42 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:15 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9421 (0.7973) acc1: 78.1250 (82.2293) acc3: 91.4062 (93.8160) acc5: 94.5312 (96.0167) time: 0.5522 data: 0.0004 max mem: 29202 +[2024-12-11 05:54:47 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:09 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9405 (0.8208) acc1: 78.1250 (81.7226) acc3: 91.4062 (93.4414) acc5: 93.7500 (95.6694) time: 0.5526 data: 0.0006 max mem: 29202 +[2024-12-11 05:54:53 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:04 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9661 (0.8511) acc1: 75.7812 (80.9753) acc3: 90.6250 (93.0975) acc5: 92.9688 (95.4413) time: 0.5533 data: 0.0006 max mem: 29202 +[2024-12-11 05:54:57 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9404 (0.8499) acc1: 75.0000 (80.9280) acc3: 91.4062 (93.1440) acc5: 93.7500 (95.5120) time: 0.5438 data: 0.0006 max mem: 29202 +[2024-12-11 05:54:57 root] (utils.py 297): INFO Test: Total time: 0:00:53 (0.5506 s / it) +[2024-12-11 05:54:57 root] (engine.py 264): INFO * Acc@1 81.122 Acc@3 92.992 Acc@5 95.334 loss 0.842 flops 13.207 layer_flops 13.109 +[2024-12-11 05:54:57 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 81.1% +[2024-12-11 05:54:59 root] (main.py 576): INFO Max accuracy: 81.12% +[2024-12-11 05:55:02 root] (utils.py 283): INFO Epoch: [9] [ 0/2502] eta: 1:59:38 lr: 0.000010 loss_cls: 2.6427 (2.6427) grad_norm: 2.1873 (2.1873) time: 2.8690 data: 0.0008 max mem: 29202 +[2024-12-11 05:55:31 root] (utils.py 283): INFO Epoch: [9] [ 10/2502] eta: 2:01:35 lr: 0.000010 loss_cls: 2.8649 (2.7962) grad_norm: 1.6959 (1.7574) time: 2.9277 data: 0.0003 max mem: 29202 +[2024-12-11 05:56:00 root] (utils.py 283): INFO Epoch: [9] [ 20/2502] eta: 2:01:18 lr: 0.000010 loss_cls: 2.8425 (2.7169) grad_norm: 1.5991 (1.6710) time: 2.9355 data: 0.0003 max mem: 29202 +[2024-12-11 05:56:30 root] (utils.py 283): INFO Epoch: [9] [ 30/2502] eta: 2:00:49 lr: 0.000010 loss_cls: 2.6753 (2.6692) grad_norm: 1.5705 (1.7010) time: 2.9351 data: 0.0003 max mem: 29202 +[2024-12-11 05:56:59 root] (utils.py 283): INFO Epoch: [9] [ 40/2502] eta: 2:00:22 lr: 0.000010 loss_cls: 2.6145 (2.6739) grad_norm: 1.5649 (1.6733) time: 2.9349 data: 0.0003 max mem: 29202 +[2024-12-11 05:57:29 root] (utils.py 283): INFO Epoch: [9] [ 50/2502] eta: 1:59:55 lr: 0.000010 loss_cls: 2.4853 (2.6177) grad_norm: 1.5711 (1.6573) time: 2.9373 data: 0.0003 max mem: 29202 +[2024-12-11 05:57:58 root] (utils.py 283): INFO Epoch: [9] [ 60/2502] eta: 1:59:28 lr: 0.000010 loss_cls: 2.8173 (2.6563) grad_norm: 1.6566 (1.6700) time: 2.9389 data: 0.0002 max mem: 29202 +[2024-12-11 05:58:27 root] (utils.py 283): INFO Epoch: [9] [ 70/2502] eta: 1:58:59 lr: 0.000010 loss_cls: 2.8173 (2.6406) grad_norm: 1.6845 (1.6740) time: 2.9390 data: 0.0003 max mem: 29202 +[2024-12-11 05:58:57 root] (utils.py 283): INFO Epoch: [9] [ 80/2502] eta: 1:58:30 lr: 0.000010 loss_cls: 2.6318 (2.6276) grad_norm: 1.5827 (1.6591) time: 2.9366 data: 0.0003 max mem: 29202 +[2024-12-11 05:59:26 root] (utils.py 283): INFO Epoch: [9] [ 90/2502] eta: 1:58:02 lr: 0.000010 loss_cls: 2.6523 (2.6428) grad_norm: 1.5698 (1.6464) time: 2.9387 data: 0.0003 max mem: 29202 +[2024-12-11 05:59:56 root] (utils.py 283): INFO Epoch: [9] [ 100/2502] eta: 1:57:38 lr: 0.000010 loss_cls: 2.6523 (2.6388) grad_norm: 1.6291 (1.6474) time: 2.9509 data: 0.0003 max mem: 29202 +[2024-12-11 06:00:25 root] (utils.py 283): INFO Epoch: [9] [ 110/2502] eta: 1:57:11 lr: 0.000010 loss_cls: 2.8855 (2.6715) grad_norm: 1.5268 (1.6508) time: 2.9535 data: 0.0003 max mem: 29202 +[2024-12-11 06:00:55 root] (utils.py 283): INFO Epoch: [9] [ 120/2502] eta: 1:56:42 lr: 0.000010 loss_cls: 2.8722 (2.6853) grad_norm: 1.5364 (1.6507) time: 2.9457 data: 0.0003 max mem: 29202 +[2024-12-11 06:01:24 root] (utils.py 283): INFO Epoch: [9] [ 130/2502] eta: 1:56:12 lr: 0.000010 loss_cls: 2.8111 (2.6805) grad_norm: 1.5598 (1.6512) time: 2.9410 data: 0.0003 max mem: 29202 +[2024-12-11 06:01:53 root] (utils.py 283): INFO Epoch: [9] [ 140/2502] eta: 1:55:43 lr: 0.000010 loss_cls: 2.8966 (2.6974) grad_norm: 1.5571 (1.6446) time: 2.9389 data: 0.0003 max mem: 29202 +[2024-12-11 06:02:23 root] (utils.py 283): INFO Epoch: [9] [ 150/2502] eta: 1:55:13 lr: 0.000010 loss_cls: 2.9225 (2.6996) grad_norm: 1.5277 (1.6395) time: 2.9388 data: 0.0003 max mem: 29202 +[2024-12-11 06:02:52 root] (utils.py 283): INFO Epoch: [9] [ 160/2502] eta: 1:54:44 lr: 0.000010 loss_cls: 2.5481 (2.6884) grad_norm: 1.5317 (1.6436) time: 2.9386 data: 0.0003 max mem: 29202 +[2024-12-11 06:03:22 root] (utils.py 283): INFO Epoch: [9] [ 170/2502] eta: 1:54:14 lr: 0.000010 loss_cls: 2.4312 (2.6703) grad_norm: 1.6059 (1.6444) time: 2.9375 data: 0.0003 max mem: 29202 +[2024-12-11 06:03:51 root] (utils.py 283): INFO Epoch: [9] [ 180/2502] eta: 1:53:45 lr: 0.000010 loss_cls: 2.3978 (2.6559) grad_norm: 1.5529 (1.6384) time: 2.9395 data: 0.0003 max mem: 29202 +[2024-12-11 06:04:20 root] (utils.py 283): INFO Epoch: [9] [ 190/2502] eta: 1:53:13 lr: 0.000010 loss_cls: 2.4322 (2.6579) grad_norm: 1.5423 (1.6409) time: 2.9321 data: 0.0002 max mem: 29202 +[2024-12-11 06:04:49 root] (utils.py 283): INFO Epoch: [9] [ 200/2502] eta: 1:52:42 lr: 0.000010 loss_cls: 2.7920 (2.6638) grad_norm: 1.6495 (1.6406) time: 2.9204 data: 0.0002 max mem: 29202 +[2024-12-11 06:05:19 root] (utils.py 283): INFO Epoch: [9] [ 210/2502] eta: 1:52:12 lr: 0.000010 loss_cls: 2.8621 (2.6631) grad_norm: 1.5971 (1.6378) time: 2.9257 data: 0.0003 max mem: 29202 +[2024-12-11 06:05:48 root] (utils.py 283): INFO Epoch: [9] [ 220/2502] eta: 1:51:41 lr: 0.000010 loss_cls: 2.8362 (2.6704) grad_norm: 1.5761 (1.6355) time: 2.9285 data: 0.0003 max mem: 29202 +[2024-12-11 06:06:17 root] (utils.py 283): INFO Epoch: [9] [ 230/2502] eta: 1:51:10 lr: 0.000010 loss_cls: 2.8296 (2.6662) grad_norm: 1.5571 (1.6330) time: 2.9221 data: 0.0003 max mem: 29202 +[2024-12-11 06:06:46 root] (utils.py 283): INFO Epoch: [9] [ 240/2502] eta: 1:50:39 lr: 0.000010 loss_cls: 2.8287 (2.6717) grad_norm: 1.6079 (1.6353) time: 2.9199 data: 0.0003 max mem: 29202 +[2024-12-11 06:07:16 root] (utils.py 283): INFO Epoch: [9] [ 250/2502] eta: 1:50:09 lr: 0.000010 loss_cls: 2.9126 (2.6788) grad_norm: 1.6398 (1.6369) time: 2.9233 data: 0.0003 max mem: 29202 +[2024-12-11 06:07:45 root] (utils.py 283): INFO Epoch: [9] [ 260/2502] eta: 1:49:39 lr: 0.000010 loss_cls: 2.9126 (2.6766) grad_norm: 1.5893 (1.6346) time: 2.9238 data: 0.0003 max mem: 29202 +[2024-12-11 06:08:14 root] (utils.py 283): INFO Epoch: [9] [ 270/2502] eta: 1:49:08 lr: 0.000010 loss_cls: 2.7715 (2.6804) grad_norm: 1.5783 (1.6362) time: 2.9227 data: 0.0003 max mem: 29202 +[2024-12-11 06:08:43 root] (utils.py 283): INFO Epoch: [9] [ 280/2502] eta: 1:48:38 lr: 0.000010 loss_cls: 2.8544 (2.6820) grad_norm: 1.6199 (1.6391) time: 2.9233 data: 0.0003 max mem: 29202 +[2024-12-11 06:09:13 root] (utils.py 283): INFO Epoch: [9] [ 290/2502] eta: 1:48:08 lr: 0.000010 loss_cls: 2.9122 (2.6902) grad_norm: 1.6003 (1.6385) time: 2.9219 data: 0.0003 max mem: 29202 +[2024-12-11 06:09:42 root] (utils.py 283): INFO Epoch: [9] [ 300/2502] eta: 1:47:38 lr: 0.000010 loss_cls: 2.8527 (2.6865) grad_norm: 1.5343 (1.6386) time: 2.9223 data: 0.0003 max mem: 29202 +[2024-12-11 06:10:11 root] (utils.py 283): INFO Epoch: [9] [ 310/2502] eta: 1:47:09 lr: 0.000010 loss_cls: 2.6489 (2.6782) grad_norm: 1.5482 (1.6366) time: 2.9320 data: 0.0003 max mem: 29202 +[2024-12-11 06:10:40 root] (utils.py 283): INFO Epoch: [9] [ 320/2502] eta: 1:46:40 lr: 0.000010 loss_cls: 2.6207 (2.6777) grad_norm: 1.5337 (1.6357) time: 2.9368 data: 0.0003 max mem: 29202 +[2024-12-11 06:11:10 root] (utils.py 283): INFO Epoch: [9] [ 330/2502] eta: 1:46:09 lr: 0.000010 loss_cls: 2.8027 (2.6829) grad_norm: 1.5254 (1.6339) time: 2.9242 data: 0.0003 max mem: 29202 +[2024-12-11 06:11:39 root] (utils.py 283): INFO Epoch: [9] [ 340/2502] eta: 1:45:39 lr: 0.000010 loss_cls: 2.8466 (2.6828) grad_norm: 1.6292 (1.6415) time: 2.9170 data: 0.0003 max mem: 29202 +[2024-12-11 06:12:08 root] (utils.py 283): INFO Epoch: [9] [ 350/2502] eta: 1:45:09 lr: 0.000010 loss_cls: 2.8316 (2.6843) grad_norm: 1.6337 (1.6432) time: 2.9196 data: 0.0003 max mem: 29202 +[2024-12-11 06:12:37 root] (utils.py 283): INFO Epoch: [9] [ 360/2502] eta: 1:44:39 lr: 0.000010 loss_cls: 2.4831 (2.6752) grad_norm: 1.7026 (1.6468) time: 2.9203 data: 0.0003 max mem: 29202 +[2024-12-11 06:13:06 root] (utils.py 283): INFO Epoch: [9] [ 370/2502] eta: 1:44:09 lr: 0.000010 loss_cls: 2.6608 (2.6753) grad_norm: 1.7253 (1.6473) time: 2.9186 data: 0.0003 max mem: 29202 +[2024-12-11 06:13:36 root] (utils.py 283): INFO Epoch: [9] [ 380/2502] eta: 1:43:39 lr: 0.000010 loss_cls: 2.7698 (2.6760) grad_norm: 1.5776 (1.6472) time: 2.9196 data: 0.0003 max mem: 29202 +[2024-12-11 06:14:05 root] (utils.py 283): INFO Epoch: [9] [ 390/2502] eta: 1:43:09 lr: 0.000010 loss_cls: 2.8721 (2.6776) grad_norm: 1.5786 (1.6462) time: 2.9225 data: 0.0003 max mem: 29202 +[2024-12-11 06:14:34 root] (utils.py 283): INFO Epoch: [9] [ 400/2502] eta: 1:42:39 lr: 0.000010 loss_cls: 2.7804 (2.6801) grad_norm: 1.5423 (1.6443) time: 2.9212 data: 0.0003 max mem: 29202 +[2024-12-11 06:15:03 root] (utils.py 283): INFO Epoch: [9] [ 410/2502] eta: 1:42:09 lr: 0.000010 loss_cls: 2.7320 (2.6801) grad_norm: 1.5245 (1.6426) time: 2.9195 data: 0.0003 max mem: 29202 +[2024-12-11 06:15:33 root] (utils.py 283): INFO Epoch: [9] [ 420/2502] eta: 1:41:40 lr: 0.000010 loss_cls: 2.8103 (2.6828) grad_norm: 1.4878 (1.6394) time: 2.9270 data: 0.0003 max mem: 29202 +[2024-12-11 06:16:02 root] (utils.py 283): INFO Epoch: [9] [ 430/2502] eta: 1:41:11 lr: 0.000010 loss_cls: 2.7985 (2.6808) grad_norm: 1.4457 (1.6363) time: 2.9346 data: 0.0003 max mem: 29202 +[2024-12-11 06:16:31 root] (utils.py 283): INFO Epoch: [9] [ 440/2502] eta: 1:40:42 lr: 0.000010 loss_cls: 2.7641 (2.6822) grad_norm: 1.5104 (1.6353) time: 2.9351 data: 0.0003 max mem: 29202 +[2024-12-11 06:17:01 root] (utils.py 283): INFO Epoch: [9] [ 450/2502] eta: 1:40:13 lr: 0.000010 loss_cls: 2.8440 (2.6826) grad_norm: 1.5639 (1.6354) time: 2.9361 data: 0.0003 max mem: 29202 +[2024-12-11 06:17:30 root] (utils.py 283): INFO Epoch: [9] [ 460/2502] eta: 1:39:44 lr: 0.000010 loss_cls: 2.7426 (2.6822) grad_norm: 1.5246 (1.6333) time: 2.9362 data: 0.0003 max mem: 29202 +[2024-12-11 06:17:59 root] (utils.py 283): INFO Epoch: [9] [ 470/2502] eta: 1:39:15 lr: 0.000010 loss_cls: 2.7426 (2.6841) grad_norm: 1.5219 (1.6321) time: 2.9370 data: 0.0003 max mem: 29202 +[2024-12-11 06:18:29 root] (utils.py 283): INFO Epoch: [9] [ 480/2502] eta: 1:38:46 lr: 0.000010 loss_cls: 2.7463 (2.6824) grad_norm: 1.5889 (1.6315) time: 2.9346 data: 0.0003 max mem: 29202 +[2024-12-11 06:18:58 root] (utils.py 283): INFO Epoch: [9] [ 490/2502] eta: 1:38:17 lr: 0.000010 loss_cls: 2.7988 (2.6859) grad_norm: 1.5741 (1.6307) time: 2.9352 data: 0.0003 max mem: 29202 +[2024-12-11 06:19:28 root] (utils.py 283): INFO Epoch: [9] [ 500/2502] eta: 1:37:48 lr: 0.000010 loss_cls: 2.8822 (2.6890) grad_norm: 1.5741 (1.6318) time: 2.9407 data: 0.0003 max mem: 29202 +[2024-12-11 06:19:57 root] (utils.py 283): INFO Epoch: [9] [ 510/2502] eta: 1:37:19 lr: 0.000010 loss_cls: 2.8101 (2.6868) grad_norm: 1.6141 (1.6324) time: 2.9464 data: 0.0003 max mem: 29202 +[2024-12-11 06:20:26 root] (utils.py 283): INFO Epoch: [9] [ 520/2502] eta: 1:36:50 lr: 0.000010 loss_cls: 2.5937 (2.6849) grad_norm: 1.6100 (1.6322) time: 2.9450 data: 0.0003 max mem: 29202 +[2024-12-11 06:20:56 root] (utils.py 283): INFO Epoch: [9] [ 530/2502] eta: 1:36:21 lr: 0.000010 loss_cls: 2.5565 (2.6799) grad_norm: 1.5949 (1.6314) time: 2.9371 data: 0.0003 max mem: 29202 +[2024-12-11 06:21:25 root] (utils.py 283): INFO Epoch: [9] [ 540/2502] eta: 1:35:52 lr: 0.000010 loss_cls: 2.5565 (2.6773) grad_norm: 1.6013 (1.6327) time: 2.9370 data: 0.0003 max mem: 29202 +[2024-12-11 06:21:55 root] (utils.py 283): INFO Epoch: [9] [ 550/2502] eta: 1:35:23 lr: 0.000010 loss_cls: 2.6805 (2.6779) grad_norm: 1.5500 (1.6321) time: 2.9384 data: 0.0003 max mem: 29202 +[2024-12-11 06:22:24 root] (utils.py 283): INFO Epoch: [9] [ 560/2502] eta: 1:34:54 lr: 0.000010 loss_cls: 2.8382 (2.6763) grad_norm: 1.5500 (1.6325) time: 2.9345 data: 0.0003 max mem: 29202 +[2024-12-11 06:22:53 root] (utils.py 283): INFO Epoch: [9] [ 570/2502] eta: 1:34:25 lr: 0.000010 loss_cls: 2.8382 (2.6775) grad_norm: 1.5835 (1.6312) time: 2.9368 data: 0.0003 max mem: 29202 +[2024-12-11 06:23:23 root] (utils.py 283): INFO Epoch: [9] [ 580/2502] eta: 1:33:56 lr: 0.000010 loss_cls: 2.8847 (2.6783) grad_norm: 1.5835 (1.6323) time: 2.9400 data: 0.0003 max mem: 29202 +[2024-12-11 06:23:52 root] (utils.py 283): INFO Epoch: [9] [ 590/2502] eta: 1:33:26 lr: 0.000010 loss_cls: 2.8326 (2.6795) grad_norm: 1.6218 (1.6348) time: 2.9371 data: 0.0003 max mem: 29202 +[2024-12-11 06:24:21 root] (utils.py 283): INFO Epoch: [9] [ 600/2502] eta: 1:32:57 lr: 0.000010 loss_cls: 2.8043 (2.6761) grad_norm: 1.6545 (1.6351) time: 2.9372 data: 0.0003 max mem: 29202 +[2024-12-11 06:24:51 root] (utils.py 283): INFO Epoch: [9] [ 610/2502] eta: 1:32:28 lr: 0.000010 loss_cls: 2.6452 (2.6779) grad_norm: 1.6520 (1.6343) time: 2.9346 data: 0.0003 max mem: 29202 +[2024-12-11 06:25:20 root] (utils.py 283): INFO Epoch: [9] [ 620/2502] eta: 1:31:59 lr: 0.000010 loss_cls: 2.6452 (2.6766) grad_norm: 1.6520 (1.6346) time: 2.9346 data: 0.0003 max mem: 29202 +[2024-12-11 06:25:49 root] (utils.py 283): INFO Epoch: [9] [ 630/2502] eta: 1:31:29 lr: 0.000010 loss_cls: 2.8165 (2.6767) grad_norm: 1.6324 (1.6350) time: 2.9369 data: 0.0003 max mem: 29202 +[2024-12-11 06:26:19 root] (utils.py 283): INFO Epoch: [9] [ 640/2502] eta: 1:31:01 lr: 0.000010 loss_cls: 2.7095 (2.6764) grad_norm: 1.5883 (1.6335) time: 2.9449 data: 0.0003 max mem: 29202 +[2024-12-11 06:26:48 root] (utils.py 283): INFO Epoch: [9] [ 650/2502] eta: 1:30:32 lr: 0.000010 loss_cls: 2.6847 (2.6743) grad_norm: 1.5400 (1.6342) time: 2.9469 data: 0.0003 max mem: 29202 +[2024-12-11 06:27:18 root] (utils.py 283): INFO Epoch: [9] [ 660/2502] eta: 1:30:02 lr: 0.000010 loss_cls: 2.6424 (2.6743) grad_norm: 1.5649 (1.6338) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-11 06:27:47 root] (utils.py 283): INFO Epoch: [9] [ 670/2502] eta: 1:29:33 lr: 0.000010 loss_cls: 2.6424 (2.6721) grad_norm: 1.5568 (1.6327) time: 2.9390 data: 0.0003 max mem: 29202 +[2024-12-11 06:28:17 root] (utils.py 283): INFO Epoch: [9] [ 680/2502] eta: 1:29:04 lr: 0.000010 loss_cls: 2.8115 (2.6745) grad_norm: 1.5878 (1.6332) time: 2.9366 data: 0.0002 max mem: 29202 +[2024-12-11 06:28:46 root] (utils.py 283): INFO Epoch: [9] [ 690/2502] eta: 1:28:35 lr: 0.000010 loss_cls: 2.9467 (2.6737) grad_norm: 1.5878 (1.6322) time: 2.9390 data: 0.0003 max mem: 29202 +[2024-12-11 06:29:15 root] (utils.py 283): INFO Epoch: [9] [ 700/2502] eta: 1:28:06 lr: 0.000010 loss_cls: 2.7926 (2.6745) grad_norm: 1.5605 (1.6317) time: 2.9370 data: 0.0003 max mem: 29202 +[2024-12-11 06:29:45 root] (utils.py 283): INFO Epoch: [9] [ 710/2502] eta: 1:27:36 lr: 0.000010 loss_cls: 2.7926 (2.6767) grad_norm: 1.5849 (1.6340) time: 2.9326 data: 0.0003 max mem: 29202 +[2024-12-11 06:30:14 root] (utils.py 283): INFO Epoch: [9] [ 720/2502] eta: 1:27:07 lr: 0.000010 loss_cls: 2.8154 (2.6791) grad_norm: 1.6312 (1.6344) time: 2.9347 data: 0.0003 max mem: 29202 +[2024-12-11 06:30:43 root] (utils.py 283): INFO Epoch: [9] [ 730/2502] eta: 1:26:38 lr: 0.000010 loss_cls: 2.7638 (2.6787) grad_norm: 1.5085 (1.6334) time: 2.9369 data: 0.0003 max mem: 29202 +[2024-12-11 06:31:13 root] (utils.py 283): INFO Epoch: [9] [ 740/2502] eta: 1:26:08 lr: 0.000010 loss_cls: 2.7638 (2.6803) grad_norm: 1.5342 (1.6352) time: 2.9362 data: 0.0003 max mem: 29202 +[2024-12-11 06:31:42 root] (utils.py 283): INFO Epoch: [9] [ 750/2502] eta: 1:25:39 lr: 0.000010 loss_cls: 2.8394 (2.6827) grad_norm: 1.5738 (1.6353) time: 2.9370 data: 0.0003 max mem: 29202 +[2024-12-11 06:32:11 root] (utils.py 283): INFO Epoch: [9] [ 760/2502] eta: 1:25:10 lr: 0.000010 loss_cls: 2.8311 (2.6837) grad_norm: 1.5413 (1.6340) time: 2.9380 data: 0.0003 max mem: 29202 +[2024-12-11 06:32:41 root] (utils.py 283): INFO Epoch: [9] [ 770/2502] eta: 1:24:41 lr: 0.000010 loss_cls: 2.4996 (2.6797) grad_norm: 1.5794 (1.6343) time: 2.9353 data: 0.0003 max mem: 29202 +[2024-12-11 06:33:10 root] (utils.py 283): INFO Epoch: [9] [ 780/2502] eta: 1:24:11 lr: 0.000010 loss_cls: 2.8172 (2.6794) grad_norm: 1.6607 (1.6340) time: 2.9343 data: 0.0003 max mem: 29202 +[2024-12-11 06:33:40 root] (utils.py 283): INFO Epoch: [9] [ 790/2502] eta: 1:23:42 lr: 0.000010 loss_cls: 2.8706 (2.6815) grad_norm: 1.6471 (1.6344) time: 2.9356 data: 0.0003 max mem: 29202 +[2024-12-11 06:34:09 root] (utils.py 283): INFO Epoch: [9] [ 800/2502] eta: 1:23:13 lr: 0.000010 loss_cls: 2.9246 (2.6839) grad_norm: 1.5743 (1.6339) time: 2.9447 data: 0.0003 max mem: 29202 +[2024-12-11 06:34:39 root] (utils.py 283): INFO Epoch: [9] [ 810/2502] eta: 1:22:45 lr: 0.000010 loss_cls: 2.9241 (2.6848) grad_norm: 1.5048 (1.6320) time: 2.9652 data: 0.0003 max mem: 29202 +[2024-12-11 06:35:08 root] (utils.py 283): INFO Epoch: [9] [ 820/2502] eta: 1:22:16 lr: 0.000010 loss_cls: 2.7073 (2.6840) grad_norm: 1.5654 (1.6342) time: 2.9611 data: 0.0003 max mem: 29202 +[2024-12-11 06:35:38 root] (utils.py 283): INFO Epoch: [9] [ 830/2502] eta: 1:21:46 lr: 0.000010 loss_cls: 2.9468 (2.6864) grad_norm: 1.7671 (1.6367) time: 2.9435 data: 0.0003 max mem: 29202 +[2024-12-11 06:36:07 root] (utils.py 283): INFO Epoch: [9] [ 840/2502] eta: 1:21:17 lr: 0.000010 loss_cls: 2.9468 (2.6854) grad_norm: 1.5854 (1.6355) time: 2.9409 data: 0.0003 max mem: 29202 +[2024-12-11 06:36:37 root] (utils.py 283): INFO Epoch: [9] [ 850/2502] eta: 1:20:48 lr: 0.000010 loss_cls: 2.6538 (2.6867) grad_norm: 1.5085 (1.6337) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-11 06:37:06 root] (utils.py 283): INFO Epoch: [9] [ 860/2502] eta: 1:20:19 lr: 0.000010 loss_cls: 2.8945 (2.6858) grad_norm: 1.4695 (1.6331) time: 2.9409 data: 0.0003 max mem: 29202 +[2024-12-11 06:37:36 root] (utils.py 283): INFO Epoch: [9] [ 870/2502] eta: 1:19:50 lr: 0.000010 loss_cls: 2.6562 (2.6849) grad_norm: 1.4958 (1.6319) time: 2.9481 data: 0.0003 max mem: 29202 +[2024-12-11 06:38:05 root] (utils.py 283): INFO Epoch: [9] [ 880/2502] eta: 1:19:21 lr: 0.000010 loss_cls: 2.7867 (2.6847) grad_norm: 1.5533 (1.6324) time: 2.9561 data: 0.0003 max mem: 29202 +[2024-12-11 06:38:35 root] (utils.py 283): INFO Epoch: [9] [ 890/2502] eta: 1:18:52 lr: 0.000010 loss_cls: 2.5055 (2.6818) grad_norm: 1.5930 (1.6350) time: 2.9510 data: 0.0003 max mem: 29202 +[2024-12-11 06:39:04 root] (utils.py 283): INFO Epoch: [9] [ 900/2502] eta: 1:18:22 lr: 0.000010 loss_cls: 2.5055 (2.6798) grad_norm: 1.6202 (1.6348) time: 2.9459 data: 0.0003 max mem: 29202 +[2024-12-11 06:39:33 root] (utils.py 283): INFO Epoch: [9] [ 910/2502] eta: 1:17:53 lr: 0.000010 loss_cls: 2.7244 (2.6778) grad_norm: 1.5213 (1.6338) time: 2.9441 data: 0.0003 max mem: 29202 +[2024-12-11 06:40:03 root] (utils.py 283): INFO Epoch: [9] [ 920/2502] eta: 1:17:24 lr: 0.000010 loss_cls: 2.7722 (2.6782) grad_norm: 1.6177 (1.6356) time: 2.9417 data: 0.0003 max mem: 29202 +[2024-12-11 06:40:32 root] (utils.py 283): INFO Epoch: [9] [ 930/2502] eta: 1:16:55 lr: 0.000010 loss_cls: 2.7496 (2.6788) grad_norm: 1.6052 (1.6349) time: 2.9484 data: 0.0003 max mem: 29202 +[2024-12-11 06:41:02 root] (utils.py 283): INFO Epoch: [9] [ 940/2502] eta: 1:16:26 lr: 0.000010 loss_cls: 2.7496 (2.6799) grad_norm: 1.5143 (1.6346) time: 2.9563 data: 0.0003 max mem: 29202 +[2024-12-11 06:41:31 root] (utils.py 283): INFO Epoch: [9] [ 950/2502] eta: 1:15:56 lr: 0.000010 loss_cls: 2.9018 (2.6805) grad_norm: 1.5051 (1.6334) time: 2.9473 data: 0.0003 max mem: 29202 +[2024-12-11 06:42:01 root] (utils.py 283): INFO Epoch: [9] [ 960/2502] eta: 1:15:27 lr: 0.000010 loss_cls: 2.7472 (2.6810) grad_norm: 1.4732 (1.6338) time: 2.9366 data: 0.0003 max mem: 29202 +[2024-12-11 06:42:30 root] (utils.py 283): INFO Epoch: [9] [ 970/2502] eta: 1:14:58 lr: 0.000010 loss_cls: 2.7096 (2.6804) grad_norm: 1.5092 (1.6334) time: 2.9372 data: 0.0002 max mem: 29202 +[2024-12-11 06:42:59 root] (utils.py 283): INFO Epoch: [9] [ 980/2502] eta: 1:14:28 lr: 0.000010 loss_cls: 2.6115 (2.6782) grad_norm: 1.5485 (1.6330) time: 2.9375 data: 0.0002 max mem: 29202 +[2024-12-11 06:43:29 root] (utils.py 283): INFO Epoch: [9] [ 990/2502] eta: 1:13:59 lr: 0.000010 loss_cls: 2.5880 (2.6777) grad_norm: 1.5485 (1.6332) time: 2.9402 data: 0.0002 max mem: 29202 +[2024-12-11 06:43:58 root] (utils.py 283): INFO Epoch: [9] [1000/2502] eta: 1:13:30 lr: 0.000010 loss_cls: 2.7555 (2.6791) grad_norm: 1.5666 (1.6332) time: 2.9443 data: 0.0003 max mem: 29202 +[2024-12-11 06:44:28 root] (utils.py 283): INFO Epoch: [9] [1010/2502] eta: 1:13:01 lr: 0.000010 loss_cls: 2.7222 (2.6778) grad_norm: 1.5684 (1.6327) time: 2.9496 data: 0.0003 max mem: 29202 +[2024-12-11 06:44:57 root] (utils.py 283): INFO Epoch: [9] [1020/2502] eta: 1:12:31 lr: 0.000010 loss_cls: 2.8244 (2.6788) grad_norm: 1.5675 (1.6320) time: 2.9455 data: 0.0003 max mem: 29202 +[2024-12-11 06:45:27 root] (utils.py 283): INFO Epoch: [9] [1030/2502] eta: 1:12:02 lr: 0.000010 loss_cls: 2.8402 (2.6794) grad_norm: 1.5832 (1.6322) time: 2.9477 data: 0.0003 max mem: 29202 +[2024-12-11 06:45:56 root] (utils.py 283): INFO Epoch: [9] [1040/2502] eta: 1:11:33 lr: 0.000010 loss_cls: 2.7262 (2.6801) grad_norm: 1.5667 (1.6316) time: 2.9541 data: 0.0003 max mem: 29202 +[2024-12-11 06:46:26 root] (utils.py 283): INFO Epoch: [9] [1050/2502] eta: 1:11:04 lr: 0.000010 loss_cls: 2.8167 (2.6812) grad_norm: 1.6586 (1.6327) time: 2.9464 data: 0.0003 max mem: 29202 +[2024-12-11 06:46:55 root] (utils.py 283): INFO Epoch: [9] [1060/2502] eta: 1:10:35 lr: 0.000010 loss_cls: 2.8306 (2.6826) grad_norm: 1.6188 (1.6326) time: 2.9491 data: 0.0003 max mem: 29202 +[2024-12-11 06:47:25 root] (utils.py 283): INFO Epoch: [9] [1070/2502] eta: 1:10:06 lr: 0.000010 loss_cls: 2.8381 (2.6831) grad_norm: 1.5929 (1.6332) time: 2.9537 data: 0.0004 max mem: 29202 +[2024-12-11 06:47:54 root] (utils.py 283): INFO Epoch: [9] [1080/2502] eta: 1:09:36 lr: 0.000010 loss_cls: 2.8098 (2.6844) grad_norm: 1.6442 (1.6333) time: 2.9494 data: 0.0003 max mem: 29202 +[2024-12-11 06:48:24 root] (utils.py 283): INFO Epoch: [9] [1090/2502] eta: 1:09:07 lr: 0.000010 loss_cls: 3.0287 (2.6867) grad_norm: 1.6489 (1.6350) time: 2.9474 data: 0.0003 max mem: 29202 +[2024-12-11 06:48:53 root] (utils.py 283): INFO Epoch: [9] [1100/2502] eta: 1:08:38 lr: 0.000010 loss_cls: 3.0382 (2.6881) grad_norm: 1.5941 (1.6366) time: 2.9473 data: 0.0003 max mem: 29202 +[2024-12-11 06:49:23 root] (utils.py 283): INFO Epoch: [9] [1110/2502] eta: 1:08:09 lr: 0.000010 loss_cls: 2.8190 (2.6887) grad_norm: 1.6410 (1.6368) time: 2.9465 data: 0.0003 max mem: 29202 +[2024-12-11 06:49:52 root] (utils.py 283): INFO Epoch: [9] [1120/2502] eta: 1:07:39 lr: 0.000010 loss_cls: 2.7179 (2.6884) grad_norm: 1.5626 (1.6359) time: 2.9460 data: 0.0003 max mem: 29202 +[2024-12-11 06:50:22 root] (utils.py 283): INFO Epoch: [9] [1130/2502] eta: 1:07:10 lr: 0.000010 loss_cls: 2.7653 (2.6877) grad_norm: 1.5665 (1.6369) time: 2.9477 data: 0.0002 max mem: 29202 +[2024-12-11 06:50:51 root] (utils.py 283): INFO Epoch: [9] [1140/2502] eta: 1:06:41 lr: 0.000010 loss_cls: 2.7677 (2.6876) grad_norm: 1.5927 (1.6377) time: 2.9469 data: 0.0002 max mem: 29202 +[2024-12-11 06:51:21 root] (utils.py 283): INFO Epoch: [9] [1150/2502] eta: 1:06:12 lr: 0.000010 loss_cls: 2.7863 (2.6876) grad_norm: 1.6329 (1.6377) time: 2.9439 data: 0.0002 max mem: 29202 +[2024-12-11 06:51:50 root] (utils.py 283): INFO Epoch: [9] [1160/2502] eta: 1:05:42 lr: 0.000010 loss_cls: 2.8577 (2.6897) grad_norm: 1.6926 (1.6381) time: 2.9469 data: 0.0002 max mem: 29202 +[2024-12-11 06:52:20 root] (utils.py 283): INFO Epoch: [9] [1170/2502] eta: 1:05:13 lr: 0.000010 loss_cls: 2.9191 (2.6888) grad_norm: 1.6926 (1.6394) time: 2.9534 data: 0.0002 max mem: 29202 +[2024-12-11 06:52:49 root] (utils.py 283): INFO Epoch: [9] [1180/2502] eta: 1:04:44 lr: 0.000010 loss_cls: 2.8824 (2.6898) grad_norm: 1.6194 (1.6394) time: 2.9492 data: 0.0003 max mem: 29202 +[2024-12-11 06:53:18 root] (utils.py 283): INFO Epoch: [9] [1190/2502] eta: 1:04:14 lr: 0.000010 loss_cls: 2.8776 (2.6894) grad_norm: 1.5837 (1.6388) time: 2.9406 data: 0.0003 max mem: 29202 +[2024-12-11 06:53:48 root] (utils.py 283): INFO Epoch: [9] [1200/2502] eta: 1:03:45 lr: 0.000010 loss_cls: 2.8100 (2.6902) grad_norm: 1.5451 (1.6382) time: 2.9407 data: 0.0002 max mem: 29202 +[2024-12-11 06:54:17 root] (utils.py 283): INFO Epoch: [9] [1210/2502] eta: 1:03:16 lr: 0.000010 loss_cls: 2.8014 (2.6892) grad_norm: 1.5626 (1.6378) time: 2.9381 data: 0.0002 max mem: 29202 +[2024-12-11 06:54:47 root] (utils.py 283): INFO Epoch: [9] [1220/2502] eta: 1:02:46 lr: 0.000010 loss_cls: 2.7117 (2.6895) grad_norm: 1.5876 (1.6379) time: 2.9406 data: 0.0002 max mem: 29202 +[2024-12-11 06:55:16 root] (utils.py 283): INFO Epoch: [9] [1230/2502] eta: 1:02:17 lr: 0.000010 loss_cls: 2.7239 (2.6888) grad_norm: 1.5326 (1.6381) time: 2.9436 data: 0.0002 max mem: 29202 +[2024-12-11 06:55:45 root] (utils.py 283): INFO Epoch: [9] [1240/2502] eta: 1:01:48 lr: 0.000010 loss_cls: 2.6907 (2.6883) grad_norm: 1.5301 (1.6387) time: 2.9408 data: 0.0003 max mem: 29202 +[2024-12-11 06:56:15 root] (utils.py 283): INFO Epoch: [9] [1250/2502] eta: 1:01:18 lr: 0.000010 loss_cls: 2.8050 (2.6893) grad_norm: 1.6435 (1.6387) time: 2.9412 data: 0.0003 max mem: 29202 +[2024-12-11 06:56:44 root] (utils.py 283): INFO Epoch: [9] [1260/2502] eta: 1:00:49 lr: 0.000010 loss_cls: 2.8000 (2.6890) grad_norm: 1.6452 (1.6395) time: 2.9385 data: 0.0003 max mem: 29202 +[2024-12-11 06:57:14 root] (utils.py 283): INFO Epoch: [9] [1270/2502] eta: 1:00:20 lr: 0.000010 loss_cls: 2.4041 (2.6867) grad_norm: 1.6251 (1.6395) time: 2.9412 data: 0.0003 max mem: 29202 +[2024-12-11 06:57:43 root] (utils.py 283): INFO Epoch: [9] [1280/2502] eta: 0:59:50 lr: 0.000010 loss_cls: 2.4923 (2.6861) grad_norm: 1.6432 (1.6397) time: 2.9478 data: 0.0003 max mem: 29202 +[2024-12-11 06:58:13 root] (utils.py 283): INFO Epoch: [9] [1290/2502] eta: 0:59:21 lr: 0.000010 loss_cls: 2.6917 (2.6863) grad_norm: 1.6432 (1.6394) time: 2.9438 data: 0.0003 max mem: 29202 +[2024-12-11 06:58:42 root] (utils.py 283): INFO Epoch: [9] [1300/2502] eta: 0:58:52 lr: 0.000010 loss_cls: 2.6917 (2.6858) grad_norm: 1.6386 (1.6396) time: 2.9390 data: 0.0003 max mem: 29202 +[2024-12-11 06:59:11 root] (utils.py 283): INFO Epoch: [9] [1310/2502] eta: 0:58:22 lr: 0.000010 loss_cls: 2.8006 (2.6864) grad_norm: 1.6051 (1.6394) time: 2.9394 data: 0.0003 max mem: 29202 +[2024-12-11 06:59:41 root] (utils.py 283): INFO Epoch: [9] [1320/2502] eta: 0:57:53 lr: 0.000010 loss_cls: 2.9021 (2.6876) grad_norm: 1.5565 (1.6394) time: 2.9427 data: 0.0003 max mem: 29202 +[2024-12-11 07:00:10 root] (utils.py 283): INFO Epoch: [9] [1330/2502] eta: 0:57:23 lr: 0.000010 loss_cls: 2.8390 (2.6858) grad_norm: 1.5432 (1.6383) time: 2.9419 data: 0.0003 max mem: 29202 +[2024-12-11 07:00:40 root] (utils.py 283): INFO Epoch: [9] [1340/2502] eta: 0:56:54 lr: 0.000010 loss_cls: 2.6818 (2.6860) grad_norm: 1.5432 (1.6390) time: 2.9417 data: 0.0003 max mem: 29202 +[2024-12-11 07:01:09 root] (utils.py 283): INFO Epoch: [9] [1350/2502] eta: 0:56:25 lr: 0.000010 loss_cls: 3.0275 (2.6885) grad_norm: 1.5673 (1.6405) time: 2.9434 data: 0.0003 max mem: 29202 +[2024-12-11 07:01:39 root] (utils.py 283): INFO Epoch: [9] [1360/2502] eta: 0:55:56 lr: 0.000010 loss_cls: 3.0340 (2.6879) grad_norm: 1.5439 (1.6403) time: 2.9543 data: 0.0003 max mem: 29202 +[2024-12-11 07:02:08 root] (utils.py 283): INFO Epoch: [9] [1370/2502] eta: 0:55:26 lr: 0.000010 loss_cls: 2.8388 (2.6886) grad_norm: 1.5370 (1.6407) time: 2.9547 data: 0.0003 max mem: 29202 +[2024-12-11 07:02:38 root] (utils.py 283): INFO Epoch: [9] [1380/2502] eta: 0:54:57 lr: 0.000010 loss_cls: 2.8948 (2.6890) grad_norm: 1.5467 (1.6402) time: 2.9408 data: 0.0003 max mem: 29202 +[2024-12-11 07:03:07 root] (utils.py 283): INFO Epoch: [9] [1390/2502] eta: 0:54:28 lr: 0.000010 loss_cls: 2.8259 (2.6893) grad_norm: 1.5696 (1.6407) time: 2.9409 data: 0.0003 max mem: 29202 +[2024-12-11 07:03:36 root] (utils.py 283): INFO Epoch: [9] [1400/2502] eta: 0:53:58 lr: 0.000010 loss_cls: 2.8494 (2.6893) grad_norm: 1.5696 (1.6398) time: 2.9431 data: 0.0003 max mem: 29202 +[2024-12-11 07:04:06 root] (utils.py 283): INFO Epoch: [9] [1410/2502] eta: 0:53:29 lr: 0.000010 loss_cls: 2.8126 (2.6890) grad_norm: 1.5473 (1.6395) time: 2.9473 data: 0.0003 max mem: 29202 +[2024-12-11 07:04:35 root] (utils.py 283): INFO Epoch: [9] [1420/2502] eta: 0:53:00 lr: 0.000010 loss_cls: 2.7400 (2.6900) grad_norm: 1.6053 (1.6395) time: 2.9461 data: 0.0003 max mem: 29202 +[2024-12-11 07:05:05 root] (utils.py 283): INFO Epoch: [9] [1430/2502] eta: 0:52:30 lr: 0.000010 loss_cls: 2.7400 (2.6892) grad_norm: 1.6204 (1.6419) time: 2.9390 data: 0.0003 max mem: 29202 +[2024-12-11 07:05:34 root] (utils.py 283): INFO Epoch: [9] [1440/2502] eta: 0:52:01 lr: 0.000010 loss_cls: 2.7042 (2.6898) grad_norm: 1.5846 (1.6419) time: 2.9415 data: 0.0003 max mem: 29202 +[2024-12-11 07:06:04 root] (utils.py 283): INFO Epoch: [9] [1450/2502] eta: 0:51:31 lr: 0.000010 loss_cls: 2.8507 (2.6906) grad_norm: 1.5782 (1.6424) time: 2.9446 data: 0.0003 max mem: 29202 +[2024-12-11 07:06:33 root] (utils.py 283): INFO Epoch: [9] [1460/2502] eta: 0:51:02 lr: 0.000010 loss_cls: 2.9091 (2.6897) grad_norm: 1.6488 (1.6429) time: 2.9436 data: 0.0003 max mem: 29202 +[2024-12-11 07:07:03 root] (utils.py 283): INFO Epoch: [9] [1470/2502] eta: 0:50:33 lr: 0.000010 loss_cls: 2.7280 (2.6903) grad_norm: 1.7086 (1.6435) time: 2.9422 data: 0.0003 max mem: 29202 +[2024-12-11 07:07:32 root] (utils.py 283): INFO Epoch: [9] [1480/2502] eta: 0:50:03 lr: 0.000010 loss_cls: 2.6569 (2.6895) grad_norm: 1.5691 (1.6424) time: 2.9397 data: 0.0003 max mem: 29202 +[2024-12-11 07:08:01 root] (utils.py 283): INFO Epoch: [9] [1490/2502] eta: 0:49:34 lr: 0.000010 loss_cls: 2.8908 (2.6913) grad_norm: 1.4794 (1.6428) time: 2.9405 data: 0.0003 max mem: 29202 +[2024-12-11 07:08:31 root] (utils.py 283): INFO Epoch: [9] [1500/2502] eta: 0:49:05 lr: 0.000010 loss_cls: 2.9412 (2.6921) grad_norm: 1.5793 (1.6425) time: 2.9458 data: 0.0003 max mem: 29202 +[2024-12-11 07:09:00 root] (utils.py 283): INFO Epoch: [9] [1510/2502] eta: 0:48:35 lr: 0.000010 loss_cls: 2.7506 (2.6912) grad_norm: 1.5781 (1.6420) time: 2.9457 data: 0.0003 max mem: 29202 +[2024-12-11 07:09:30 root] (utils.py 283): INFO Epoch: [9] [1520/2502] eta: 0:48:06 lr: 0.000010 loss_cls: 2.5091 (2.6902) grad_norm: 1.6265 (1.6423) time: 2.9414 data: 0.0003 max mem: 29202 +[2024-12-11 07:09:59 root] (utils.py 283): INFO Epoch: [9] [1530/2502] eta: 0:47:36 lr: 0.000010 loss_cls: 2.6707 (2.6907) grad_norm: 1.6013 (1.6427) time: 2.9414 data: 0.0003 max mem: 29202 +[2024-12-11 07:10:28 root] (utils.py 283): INFO Epoch: [9] [1540/2502] eta: 0:47:07 lr: 0.000010 loss_cls: 2.8397 (2.6909) grad_norm: 1.5501 (1.6425) time: 2.9394 data: 0.0003 max mem: 29202 +[2024-12-11 07:10:58 root] (utils.py 283): INFO Epoch: [9] [1550/2502] eta: 0:46:38 lr: 0.000010 loss_cls: 2.7599 (2.6908) grad_norm: 1.5253 (1.6419) time: 2.9402 data: 0.0003 max mem: 29202 +[2024-12-11 07:11:27 root] (utils.py 283): INFO Epoch: [9] [1560/2502] eta: 0:46:08 lr: 0.000010 loss_cls: 2.6749 (2.6908) grad_norm: 1.5253 (1.6415) time: 2.9455 data: 0.0003 max mem: 29202 +[2024-12-11 07:11:57 root] (utils.py 283): INFO Epoch: [9] [1570/2502] eta: 0:45:39 lr: 0.000010 loss_cls: 2.8054 (2.6905) grad_norm: 1.5741 (1.6419) time: 2.9538 data: 0.0003 max mem: 29202 +[2024-12-11 07:12:26 root] (utils.py 283): INFO Epoch: [9] [1580/2502] eta: 0:45:10 lr: 0.000010 loss_cls: 2.8054 (2.6920) grad_norm: 1.5519 (1.6433) time: 2.9453 data: 0.0003 max mem: 29202 +[2024-12-11 07:12:56 root] (utils.py 283): INFO Epoch: [9] [1590/2502] eta: 0:44:40 lr: 0.000010 loss_cls: 2.8132 (2.6918) grad_norm: 1.5770 (1.6437) time: 2.9331 data: 0.0003 max mem: 29202 +[2024-12-11 07:13:25 root] (utils.py 283): INFO Epoch: [9] [1600/2502] eta: 0:44:11 lr: 0.000010 loss_cls: 2.8177 (2.6912) grad_norm: 1.5770 (1.6431) time: 2.9353 data: 0.0002 max mem: 29202 +[2024-12-11 07:13:54 root] (utils.py 283): INFO Epoch: [9] [1610/2502] eta: 0:43:41 lr: 0.000010 loss_cls: 2.7901 (2.6907) grad_norm: 1.5063 (1.6426) time: 2.9349 data: 0.0002 max mem: 29202 +[2024-12-11 07:14:24 root] (utils.py 283): INFO Epoch: [9] [1620/2502] eta: 0:43:12 lr: 0.000010 loss_cls: 2.7901 (2.6918) grad_norm: 1.5093 (1.6425) time: 2.9356 data: 0.0002 max mem: 29202 +[2024-12-11 07:14:53 root] (utils.py 283): INFO Epoch: [9] [1630/2502] eta: 0:42:43 lr: 0.000010 loss_cls: 2.8845 (2.6924) grad_norm: 1.5775 (1.6426) time: 2.9362 data: 0.0003 max mem: 29202 +[2024-12-11 07:15:22 root] (utils.py 283): INFO Epoch: [9] [1640/2502] eta: 0:42:13 lr: 0.000010 loss_cls: 2.8734 (2.6931) grad_norm: 1.5838 (1.6424) time: 2.9393 data: 0.0003 max mem: 29202 +[2024-12-11 07:15:52 root] (utils.py 283): INFO Epoch: [9] [1650/2502] eta: 0:41:44 lr: 0.000010 loss_cls: 2.8361 (2.6938) grad_norm: 1.5635 (1.6419) time: 2.9500 data: 0.0003 max mem: 29202 +[2024-12-11 07:16:21 root] (utils.py 283): INFO Epoch: [9] [1660/2502] eta: 0:41:14 lr: 0.000010 loss_cls: 2.6976 (2.6928) grad_norm: 1.5350 (1.6413) time: 2.9475 data: 0.0003 max mem: 29202 +[2024-12-11 07:16:51 root] (utils.py 283): INFO Epoch: [9] [1670/2502] eta: 0:40:45 lr: 0.000010 loss_cls: 2.6976 (2.6931) grad_norm: 1.5680 (1.6410) time: 2.9386 data: 0.0003 max mem: 29202 +[2024-12-11 07:17:20 root] (utils.py 283): INFO Epoch: [9] [1680/2502] eta: 0:40:16 lr: 0.000010 loss_cls: 2.8136 (2.6935) grad_norm: 1.5680 (1.6409) time: 2.9396 data: 0.0003 max mem: 29202 +[2024-12-11 07:17:50 root] (utils.py 283): INFO Epoch: [9] [1690/2502] eta: 0:39:46 lr: 0.000010 loss_cls: 2.8155 (2.6937) grad_norm: 1.5054 (1.6402) time: 2.9405 data: 0.0003 max mem: 29202 +[2024-12-11 07:18:19 root] (utils.py 283): INFO Epoch: [9] [1700/2502] eta: 0:39:17 lr: 0.000010 loss_cls: 2.8214 (2.6935) grad_norm: 1.5054 (1.6398) time: 2.9424 data: 0.0003 max mem: 29202 +[2024-12-11 07:18:48 root] (utils.py 283): INFO Epoch: [9] [1710/2502] eta: 0:38:48 lr: 0.000010 loss_cls: 2.8214 (2.6940) grad_norm: 1.5483 (1.6399) time: 2.9424 data: 0.0003 max mem: 29202 +[2024-12-11 07:19:18 root] (utils.py 283): INFO Epoch: [9] [1720/2502] eta: 0:38:18 lr: 0.000010 loss_cls: 2.8208 (2.6928) grad_norm: 1.6122 (1.6404) time: 2.9398 data: 0.0003 max mem: 29202 +[2024-12-11 07:19:47 root] (utils.py 283): INFO Epoch: [9] [1730/2502] eta: 0:37:49 lr: 0.000010 loss_cls: 2.8052 (2.6934) grad_norm: 1.6007 (1.6401) time: 2.9389 data: 0.0003 max mem: 29202 +[2024-12-11 07:20:17 root] (utils.py 283): INFO Epoch: [9] [1740/2502] eta: 0:37:19 lr: 0.000010 loss_cls: 2.7449 (2.6926) grad_norm: 1.5688 (1.6403) time: 2.9504 data: 0.0003 max mem: 29202 +[2024-12-11 07:20:46 root] (utils.py 283): INFO Epoch: [9] [1750/2502] eta: 0:36:50 lr: 0.000010 loss_cls: 2.8267 (2.6933) grad_norm: 1.6718 (1.6415) time: 2.9500 data: 0.0003 max mem: 29202 +[2024-12-11 07:21:16 root] (utils.py 283): INFO Epoch: [9] [1760/2502] eta: 0:36:21 lr: 0.000010 loss_cls: 2.8679 (2.6942) grad_norm: 1.6886 (1.6411) time: 2.9470 data: 0.0003 max mem: 29202 +[2024-12-11 07:21:45 root] (utils.py 283): INFO Epoch: [9] [1770/2502] eta: 0:35:51 lr: 0.000010 loss_cls: 2.7553 (2.6933) grad_norm: 1.5858 (1.6414) time: 2.9503 data: 0.0003 max mem: 29202 +[2024-12-11 07:22:15 root] (utils.py 283): INFO Epoch: [9] [1780/2502] eta: 0:35:22 lr: 0.000010 loss_cls: 2.6804 (2.6933) grad_norm: 1.6062 (1.6415) time: 2.9428 data: 0.0003 max mem: 29202 +[2024-12-11 07:22:44 root] (utils.py 283): INFO Epoch: [9] [1790/2502] eta: 0:34:53 lr: 0.000010 loss_cls: 2.8277 (2.6956) grad_norm: 1.5870 (1.6416) time: 2.9398 data: 0.0003 max mem: 29202 +[2024-12-11 07:23:13 root] (utils.py 283): INFO Epoch: [9] [1800/2502] eta: 0:34:23 lr: 0.000010 loss_cls: 2.9932 (2.6963) grad_norm: 1.5698 (1.6418) time: 2.9402 data: 0.0003 max mem: 29202 +[2024-12-11 07:23:43 root] (utils.py 283): INFO Epoch: [9] [1810/2502] eta: 0:33:54 lr: 0.000010 loss_cls: 2.7581 (2.6960) grad_norm: 1.5392 (1.6409) time: 2.9444 data: 0.0003 max mem: 29202 +[2024-12-11 07:24:12 root] (utils.py 283): INFO Epoch: [9] [1820/2502] eta: 0:33:24 lr: 0.000010 loss_cls: 2.6644 (2.6957) grad_norm: 1.4702 (1.6411) time: 2.9451 data: 0.0003 max mem: 29202 +[2024-12-11 07:24:42 root] (utils.py 283): INFO Epoch: [9] [1830/2502] eta: 0:32:55 lr: 0.000010 loss_cls: 2.7352 (2.6957) grad_norm: 1.7695 (1.6426) time: 2.9404 data: 0.0003 max mem: 29202 +[2024-12-11 07:25:11 root] (utils.py 283): INFO Epoch: [9] [1840/2502] eta: 0:32:26 lr: 0.000010 loss_cls: 2.7352 (2.6951) grad_norm: 1.6745 (1.6423) time: 2.9397 data: 0.0003 max mem: 29202 +[2024-12-11 07:25:41 root] (utils.py 283): INFO Epoch: [9] [1850/2502] eta: 0:31:56 lr: 0.000010 loss_cls: 2.6650 (2.6945) grad_norm: 1.6065 (1.6422) time: 2.9532 data: 0.0003 max mem: 29202 +[2024-12-11 07:26:10 root] (utils.py 283): INFO Epoch: [9] [1860/2502] eta: 0:31:27 lr: 0.000010 loss_cls: 2.6003 (2.6941) grad_norm: 1.6668 (1.6421) time: 2.9559 data: 0.0003 max mem: 29202 +[2024-12-11 07:26:40 root] (utils.py 283): INFO Epoch: [9] [1870/2502] eta: 0:30:58 lr: 0.000010 loss_cls: 2.5283 (2.6930) grad_norm: 1.6668 (1.6423) time: 2.9469 data: 0.0003 max mem: 29202 +[2024-12-11 07:27:09 root] (utils.py 283): INFO Epoch: [9] [1880/2502] eta: 0:30:28 lr: 0.000010 loss_cls: 2.6104 (2.6936) grad_norm: 1.5763 (1.6420) time: 2.9418 data: 0.0003 max mem: 29202 +[2024-12-11 07:27:39 root] (utils.py 283): INFO Epoch: [9] [1890/2502] eta: 0:29:59 lr: 0.000010 loss_cls: 2.9210 (2.6944) grad_norm: 1.4838 (1.6413) time: 2.9386 data: 0.0003 max mem: 29202 +[2024-12-11 07:28:08 root] (utils.py 283): INFO Epoch: [9] [1900/2502] eta: 0:29:29 lr: 0.000010 loss_cls: 2.8701 (2.6942) grad_norm: 1.5515 (1.6410) time: 2.9392 data: 0.0003 max mem: 29202 +[2024-12-11 07:28:37 root] (utils.py 283): INFO Epoch: [9] [1910/2502] eta: 0:29:00 lr: 0.000010 loss_cls: 2.7040 (2.6929) grad_norm: 1.5977 (1.6409) time: 2.9388 data: 0.0003 max mem: 29202 +[2024-12-11 07:29:07 root] (utils.py 283): INFO Epoch: [9] [1920/2502] eta: 0:28:31 lr: 0.000010 loss_cls: 2.7438 (2.6934) grad_norm: 1.5714 (1.6406) time: 2.9414 data: 0.0003 max mem: 29202 +[2024-12-11 07:29:36 root] (utils.py 283): INFO Epoch: [9] [1930/2502] eta: 0:28:01 lr: 0.000010 loss_cls: 2.6035 (2.6912) grad_norm: 1.5077 (1.6400) time: 2.9387 data: 0.0003 max mem: 29202 +[2024-12-11 07:30:06 root] (utils.py 283): INFO Epoch: [9] [1940/2502] eta: 0:27:32 lr: 0.000010 loss_cls: 2.4088 (2.6909) grad_norm: 1.5241 (1.6404) time: 2.9369 data: 0.0003 max mem: 29202 +[2024-12-11 07:30:35 root] (utils.py 283): INFO Epoch: [9] [1950/2502] eta: 0:27:02 lr: 0.000010 loss_cls: 2.8206 (2.6913) grad_norm: 1.6274 (1.6401) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-11 07:31:04 root] (utils.py 283): INFO Epoch: [9] [1960/2502] eta: 0:26:33 lr: 0.000010 loss_cls: 2.8668 (2.6922) grad_norm: 1.5588 (1.6400) time: 2.9420 data: 0.0003 max mem: 29202 +[2024-12-11 07:31:34 root] (utils.py 283): INFO Epoch: [9] [1970/2502] eta: 0:26:04 lr: 0.000010 loss_cls: 2.9533 (2.6926) grad_norm: 1.5405 (1.6399) time: 2.9401 data: 0.0003 max mem: 29202 +[2024-12-11 07:32:03 root] (utils.py 283): INFO Epoch: [9] [1980/2502] eta: 0:25:34 lr: 0.000010 loss_cls: 2.9050 (2.6924) grad_norm: 1.5405 (1.6399) time: 2.9367 data: 0.0003 max mem: 29202 +[2024-12-11 07:32:32 root] (utils.py 283): INFO Epoch: [9] [1990/2502] eta: 0:25:05 lr: 0.000010 loss_cls: 2.8419 (2.6928) grad_norm: 1.5125 (1.6393) time: 2.9383 data: 0.0003 max mem: 29202 +[2024-12-11 07:33:02 root] (utils.py 283): INFO Epoch: [9] [2000/2502] eta: 0:24:35 lr: 0.000010 loss_cls: 2.7436 (2.6930) grad_norm: 1.5155 (1.6390) time: 2.9396 data: 0.0003 max mem: 29202 +[2024-12-11 07:33:31 root] (utils.py 283): INFO Epoch: [9] [2010/2502] eta: 0:24:06 lr: 0.000010 loss_cls: 2.7482 (2.6928) grad_norm: 1.5463 (1.6409) time: 2.9366 data: 0.0003 max mem: 29202 +[2024-12-11 07:34:01 root] (utils.py 283): INFO Epoch: [9] [2020/2502] eta: 0:23:37 lr: 0.000010 loss_cls: 2.8535 (2.6939) grad_norm: 1.5463 (1.6409) time: 2.9364 data: 0.0003 max mem: 29202 +[2024-12-11 07:34:30 root] (utils.py 283): INFO Epoch: [9] [2030/2502] eta: 0:23:07 lr: 0.000010 loss_cls: 2.9335 (2.6949) grad_norm: 1.5324 (1.6415) time: 2.9411 data: 0.0003 max mem: 29202 +[2024-12-11 07:34:59 root] (utils.py 283): INFO Epoch: [9] [2040/2502] eta: 0:22:38 lr: 0.000010 loss_cls: 2.8395 (2.6944) grad_norm: 1.5097 (1.6410) time: 2.9395 data: 0.0003 max mem: 29202 +[2024-12-11 07:35:29 root] (utils.py 283): INFO Epoch: [9] [2050/2502] eta: 0:22:08 lr: 0.000010 loss_cls: 2.5624 (2.6942) grad_norm: 1.4647 (1.6408) time: 2.9364 data: 0.0002 max mem: 29202 +[2024-12-11 07:35:58 root] (utils.py 283): INFO Epoch: [9] [2060/2502] eta: 0:21:39 lr: 0.000010 loss_cls: 2.8801 (2.6952) grad_norm: 1.5585 (1.6405) time: 2.9367 data: 0.0003 max mem: 29202 +[2024-12-11 07:36:28 root] (utils.py 283): INFO Epoch: [9] [2070/2502] eta: 0:21:10 lr: 0.000010 loss_cls: 2.8574 (2.6951) grad_norm: 1.5787 (1.6400) time: 2.9454 data: 0.0003 max mem: 29202 +[2024-12-11 07:36:57 root] (utils.py 283): INFO Epoch: [9] [2080/2502] eta: 0:20:40 lr: 0.000010 loss_cls: 2.7092 (2.6951) grad_norm: 1.5424 (1.6399) time: 2.9474 data: 0.0003 max mem: 29202 +[2024-12-11 07:37:26 root] (utils.py 283): INFO Epoch: [9] [2090/2502] eta: 0:20:11 lr: 0.000010 loss_cls: 2.6337 (2.6945) grad_norm: 1.5549 (1.6399) time: 2.9396 data: 0.0003 max mem: 29202 +[2024-12-11 07:37:56 root] (utils.py 283): INFO Epoch: [9] [2100/2502] eta: 0:19:41 lr: 0.000010 loss_cls: 2.5528 (2.6941) grad_norm: 1.5683 (1.6396) time: 2.9383 data: 0.0003 max mem: 29202 +[2024-12-11 07:38:25 root] (utils.py 283): INFO Epoch: [9] [2110/2502] eta: 0:19:12 lr: 0.000010 loss_cls: 2.8088 (2.6954) grad_norm: 1.6542 (1.6419) time: 2.9370 data: 0.0003 max mem: 29202 +[2024-12-11 07:38:55 root] (utils.py 283): INFO Epoch: [9] [2120/2502] eta: 0:18:43 lr: 0.000010 loss_cls: 2.8603 (2.6951) grad_norm: 1.5859 (1.6414) time: 2.9431 data: 0.0003 max mem: 29202 +[2024-12-11 07:39:24 root] (utils.py 283): INFO Epoch: [9] [2130/2502] eta: 0:18:13 lr: 0.000010 loss_cls: 2.7790 (2.6955) grad_norm: 1.5719 (1.6414) time: 2.9420 data: 0.0003 max mem: 29202 +[2024-12-11 07:39:54 root] (utils.py 283): INFO Epoch: [9] [2140/2502] eta: 0:17:44 lr: 0.000010 loss_cls: 2.9036 (2.6958) grad_norm: 1.6779 (1.6413) time: 2.9413 data: 0.0003 max mem: 29202 +[2024-12-11 07:40:23 root] (utils.py 283): INFO Epoch: [9] [2150/2502] eta: 0:17:14 lr: 0.000010 loss_cls: 2.9234 (2.6958) grad_norm: 1.5342 (1.6409) time: 2.9417 data: 0.0003 max mem: 29202 +[2024-12-11 07:40:52 root] (utils.py 283): INFO Epoch: [9] [2160/2502] eta: 0:16:45 lr: 0.000010 loss_cls: 2.7938 (2.6953) grad_norm: 1.5645 (1.6409) time: 2.9348 data: 0.0003 max mem: 29202 +[2024-12-11 07:41:22 root] (utils.py 283): INFO Epoch: [9] [2170/2502] eta: 0:16:16 lr: 0.000010 loss_cls: 2.7001 (2.6955) grad_norm: 1.6332 (1.6408) time: 2.9348 data: 0.0003 max mem: 29202 +[2024-12-11 07:41:51 root] (utils.py 283): INFO Epoch: [9] [2180/2502] eta: 0:15:46 lr: 0.000010 loss_cls: 2.7972 (2.6959) grad_norm: 1.6227 (1.6411) time: 2.9344 data: 0.0003 max mem: 29202 +[2024-12-11 07:42:20 root] (utils.py 283): INFO Epoch: [9] [2190/2502] eta: 0:15:17 lr: 0.000010 loss_cls: 2.7584 (2.6959) grad_norm: 1.5966 (1.6409) time: 2.9359 data: 0.0003 max mem: 29202 +[2024-12-11 07:42:50 root] (utils.py 283): INFO Epoch: [9] [2200/2502] eta: 0:14:47 lr: 0.000010 loss_cls: 2.8231 (2.6963) grad_norm: 1.5651 (1.6415) time: 2.9406 data: 0.0003 max mem: 29202 +[2024-12-11 07:43:19 root] (utils.py 283): INFO Epoch: [9] [2210/2502] eta: 0:14:18 lr: 0.000010 loss_cls: 2.8990 (2.6965) grad_norm: 1.6962 (1.6416) time: 2.9419 data: 0.0003 max mem: 29202 +[2024-12-11 07:43:49 root] (utils.py 283): INFO Epoch: [9] [2220/2502] eta: 0:13:49 lr: 0.000010 loss_cls: 3.0169 (2.6984) grad_norm: 1.7284 (1.6424) time: 2.9388 data: 0.0003 max mem: 29202 +[2024-12-11 07:44:18 root] (utils.py 283): INFO Epoch: [9] [2230/2502] eta: 0:13:19 lr: 0.000010 loss_cls: 3.0723 (2.6995) grad_norm: 1.6220 (1.6423) time: 2.9391 data: 0.0003 max mem: 29202 +[2024-12-11 07:44:47 root] (utils.py 283): INFO Epoch: [9] [2240/2502] eta: 0:12:50 lr: 0.000010 loss_cls: 2.8660 (2.6996) grad_norm: 1.5493 (1.6422) time: 2.9407 data: 0.0003 max mem: 29202 +[2024-12-11 07:45:17 root] (utils.py 283): INFO Epoch: [9] [2250/2502] eta: 0:12:20 lr: 0.000010 loss_cls: 2.7916 (2.6998) grad_norm: 1.5350 (1.6417) time: 2.9400 data: 0.0003 max mem: 29202 +[2024-12-11 07:45:46 root] (utils.py 283): INFO Epoch: [9] [2260/2502] eta: 0:11:51 lr: 0.000010 loss_cls: 2.8970 (2.7008) grad_norm: 1.5549 (1.6417) time: 2.9392 data: 0.0003 max mem: 29202 +[2024-12-11 07:46:16 root] (utils.py 283): INFO Epoch: [9] [2270/2502] eta: 0:11:22 lr: 0.000010 loss_cls: 2.5811 (2.6984) grad_norm: 1.5539 (1.6411) time: 2.9396 data: 0.0003 max mem: 29202 +[2024-12-11 07:46:45 root] (utils.py 283): INFO Epoch: [9] [2280/2502] eta: 0:10:52 lr: 0.000010 loss_cls: 2.4864 (2.6985) grad_norm: 1.5466 (1.6413) time: 2.9392 data: 0.0003 max mem: 29202 +[2024-12-11 07:47:14 root] (utils.py 283): INFO Epoch: [9] [2290/2502] eta: 0:10:23 lr: 0.000010 loss_cls: 2.7485 (2.6977) grad_norm: 1.5436 (1.6410) time: 2.9369 data: 0.0003 max mem: 29202 +[2024-12-11 07:47:44 root] (utils.py 283): INFO Epoch: [9] [2300/2502] eta: 0:09:53 lr: 0.000010 loss_cls: 2.7485 (2.6982) grad_norm: 1.5217 (1.6409) time: 2.9343 data: 0.0003 max mem: 29202 +[2024-12-11 07:48:13 root] (utils.py 283): INFO Epoch: [9] [2310/2502] eta: 0:09:24 lr: 0.000010 loss_cls: 2.8748 (2.6978) grad_norm: 1.5803 (1.6410) time: 2.9359 data: 0.0003 max mem: 29202 +[2024-12-11 07:48:42 root] (utils.py 283): INFO Epoch: [9] [2320/2502] eta: 0:08:55 lr: 0.000010 loss_cls: 2.5977 (2.6977) grad_norm: 1.5803 (1.6406) time: 2.9389 data: 0.0003 max mem: 29202 +[2024-12-11 07:49:12 root] (utils.py 283): INFO Epoch: [9] [2330/2502] eta: 0:08:25 lr: 0.000010 loss_cls: 2.7312 (2.6976) grad_norm: 1.4914 (1.6402) time: 2.9366 data: 0.0003 max mem: 29202 +[2024-12-11 07:49:41 root] (utils.py 283): INFO Epoch: [9] [2340/2502] eta: 0:07:56 lr: 0.000010 loss_cls: 2.7625 (2.6982) grad_norm: 1.5529 (1.6399) time: 2.9359 data: 0.0003 max mem: 29202 +[2024-12-11 07:50:10 root] (utils.py 283): INFO Epoch: [9] [2350/2502] eta: 0:07:26 lr: 0.000010 loss_cls: 2.9048 (2.6987) grad_norm: 1.5548 (1.6403) time: 2.9370 data: 0.0003 max mem: 29202 +[2024-12-11 07:50:40 root] (utils.py 283): INFO Epoch: [9] [2360/2502] eta: 0:06:57 lr: 0.000010 loss_cls: 2.7930 (2.6988) grad_norm: 1.5829 (1.6403) time: 2.9318 data: 0.0003 max mem: 29202 +[2024-12-11 07:51:09 root] (utils.py 283): INFO Epoch: [9] [2370/2502] eta: 0:06:28 lr: 0.000010 loss_cls: 2.8492 (2.6996) grad_norm: 1.5829 (1.6400) time: 2.9259 data: 0.0003 max mem: 29202 +[2024-12-11 07:51:38 root] (utils.py 283): INFO Epoch: [9] [2380/2502] eta: 0:05:58 lr: 0.000010 loss_cls: 2.9080 (2.6990) grad_norm: 1.5581 (1.6398) time: 2.9234 data: 0.0003 max mem: 29202 +[2024-12-11 07:52:07 root] (utils.py 283): INFO Epoch: [9] [2390/2502] eta: 0:05:29 lr: 0.000010 loss_cls: 2.6550 (2.6985) grad_norm: 1.5173 (1.6394) time: 2.9223 data: 0.0003 max mem: 29202 +[2024-12-11 07:52:37 root] (utils.py 283): INFO Epoch: [9] [2400/2502] eta: 0:04:59 lr: 0.000010 loss_cls: 2.5637 (2.6977) grad_norm: 1.5028 (1.6389) time: 2.9223 data: 0.0003 max mem: 29202 +[2024-12-11 07:53:06 root] (utils.py 283): INFO Epoch: [9] [2410/2502] eta: 0:04:30 lr: 0.000010 loss_cls: 2.6915 (2.6980) grad_norm: 1.5067 (1.6389) time: 2.9310 data: 0.0003 max mem: 29202 +[2024-12-11 07:53:35 root] (utils.py 283): INFO Epoch: [9] [2420/2502] eta: 0:04:01 lr: 0.000010 loss_cls: 2.8426 (2.6975) grad_norm: 1.6119 (1.6396) time: 2.9407 data: 0.0003 max mem: 29202 +[2024-12-11 07:54:05 root] (utils.py 283): INFO Epoch: [9] [2430/2502] eta: 0:03:31 lr: 0.000010 loss_cls: 2.8613 (2.6979) grad_norm: 1.6918 (1.6397) time: 2.9397 data: 0.0003 max mem: 29202 +[2024-12-11 07:54:34 root] (utils.py 283): INFO Epoch: [9] [2440/2502] eta: 0:03:02 lr: 0.000010 loss_cls: 2.9643 (2.6993) grad_norm: 1.6918 (1.6408) time: 2.9394 data: 0.0003 max mem: 29202 +[2024-12-11 07:55:04 root] (utils.py 283): INFO Epoch: [9] [2450/2502] eta: 0:02:32 lr: 0.000010 loss_cls: 2.9095 (2.6993) grad_norm: 1.5868 (1.6405) time: 2.9427 data: 0.0003 max mem: 29202 +[2024-12-11 07:55:33 root] (utils.py 283): INFO Epoch: [9] [2460/2502] eta: 0:02:03 lr: 0.000010 loss_cls: 2.9159 (2.7006) grad_norm: 1.5509 (1.6408) time: 2.9465 data: 0.0003 max mem: 29202 +[2024-12-11 07:56:03 root] (utils.py 283): INFO Epoch: [9] [2470/2502] eta: 0:01:34 lr: 0.000010 loss_cls: 2.8881 (2.7001) grad_norm: 1.5560 (1.6404) time: 2.9535 data: 0.0003 max mem: 29202 +[2024-12-11 07:56:32 root] (utils.py 283): INFO Epoch: [9] [2480/2502] eta: 0:01:04 lr: 0.000010 loss_cls: 2.7350 (2.7001) grad_norm: 1.4839 (1.6399) time: 2.9490 data: 0.0003 max mem: 29202 +[2024-12-11 07:57:02 root] (utils.py 283): INFO Epoch: [9] [2490/2502] eta: 0:00:35 lr: 0.000010 loss_cls: 2.7409 (2.7005) grad_norm: 1.5063 (1.6398) time: 2.9605 data: 0.0250 max mem: 29202 +[2024-12-11 07:57:31 root] (utils.py 283): INFO Epoch: [9] [2500/2502] eta: 0:00:05 lr: 0.000010 loss_cls: 2.9026 (2.7002) grad_norm: 1.5802 (1.6396) time: 2.9609 data: 0.0251 max mem: 29202 +[2024-12-11 07:57:34 root] (utils.py 283): INFO Epoch: [9] [2501/2502] eta: 0:00:02 lr: 0.000010 loss_cls: 2.9026 (2.7004) grad_norm: 1.6242 (1.6396) time: 2.9602 data: 0.0251 max mem: 29202 +[2024-12-11 07:57:34 root] (utils.py 297): INFO Epoch: [9] Total time: 2:02:35 (2.9398 s / it) +[2024-12-11 07:57:34 root] (engine.py 179): INFO Averaged stats:lr: 0.000010 loss_cls: 2.9026 (2.7043) grad_norm: 1.6242 (1.6396) +[2024-12-11 07:57:38 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:55 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.2751 (0.2751) acc1: 92.9688 (92.9688) acc3: 98.4375 (98.4375) acc5: 98.4375 (98.4375) time: 0.5642 data: 0.0006 max mem: 29202 +[2024-12-11 07:57:44 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:48 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.5419 (0.5582) acc1: 86.7188 (87.0739) acc3: 97.6562 (96.6619) acc5: 98.4375 (98.2955) time: 0.5520 data: 0.0004 max mem: 29202 +[2024-12-11 07:57:49 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:43 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.5419 (0.6026) acc1: 85.1562 (85.8631) acc3: 96.8750 (95.9077) acc5: 98.4375 (97.8051) time: 0.5513 data: 0.0004 max mem: 29202 +[2024-12-11 07:57:55 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:37 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.5906 (0.6356) acc1: 85.1562 (85.3075) acc3: 96.0938 (95.6653) acc5: 97.6562 (97.5050) time: 0.5521 data: 0.0004 max mem: 29202 +[2024-12-11 07:58:00 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:32 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6552 (0.6414) acc1: 85.9375 (85.4040) acc3: 96.0938 (95.6745) acc5: 96.8750 (97.4848) time: 0.5522 data: 0.0004 max mem: 29202 +[2024-12-11 07:58:06 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:26 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.8218 (0.7245) acc1: 78.9062 (83.6091) acc3: 93.7500 (94.5772) acc5: 95.3125 (96.6605) time: 0.5524 data: 0.0004 max mem: 29202 +[2024-12-11 07:58:11 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:20 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9244 (0.7561) acc1: 77.3438 (83.0046) acc3: 90.6250 (94.0446) acc5: 93.7500 (96.2474) time: 0.5528 data: 0.0004 max mem: 29202 +[2024-12-11 07:58:17 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:15 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9586 (0.7893) acc1: 78.9062 (82.2293) acc3: 91.4062 (93.7610) acc5: 93.7500 (96.0167) time: 0.5525 data: 0.0004 max mem: 29202 +[2024-12-11 07:58:23 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:09 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9936 (0.8159) acc1: 78.1250 (81.7708) acc3: 91.4062 (93.3256) acc5: 92.9688 (95.6404) time: 0.5530 data: 0.0007 max mem: 29202 +[2024-12-11 07:58:28 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:04 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 1.0210 (0.8440) acc1: 75.7812 (80.9409) acc3: 90.6250 (93.0117) acc5: 92.9688 (95.4155) time: 0.5535 data: 0.0006 max mem: 29202 +[2024-12-11 07:58:32 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9438 (0.8418) acc1: 75.0000 (80.9200) acc3: 90.6250 (93.0320) acc5: 93.7500 (95.4800) time: 0.5440 data: 0.0005 max mem: 29202 +[2024-12-11 07:58:32 root] (utils.py 297): INFO Test: Total time: 0:00:53 (0.5509 s / it) +[2024-12-11 07:58:32 root] (engine.py 264): INFO * Acc@1 81.176 Acc@3 92.984 Acc@5 95.312 loss 0.837 flops 13.207 layer_flops 13.109 +[2024-12-11 07:58:32 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 81.2% +[2024-12-11 07:58:34 root] (main.py 576): INFO Max accuracy: 81.18% +[2024-12-11 07:58:37 root] (utils.py 283): INFO Epoch: [10] [ 0/2502] eta: 1:59:53 lr: 0.000008 loss_cls: 2.4130 (2.4130) grad_norm: 1.5651 (1.5651) time: 2.8751 data: 0.0003 max mem: 29202 +[2024-12-11 07:59:06 root] (utils.py 283): INFO Epoch: [10] [ 10/2502] eta: 2:01:45 lr: 0.000008 loss_cls: 2.7806 (2.7191) grad_norm: 1.6631 (1.6289) time: 2.9317 data: 0.0003 max mem: 29202 +[2024-12-11 07:59:36 root] (utils.py 283): INFO Epoch: [10] [ 20/2502] eta: 2:01:27 lr: 0.000008 loss_cls: 2.9505 (2.7389) grad_norm: 1.6078 (1.6864) time: 2.9392 data: 0.0003 max mem: 29202 +[2024-12-11 08:00:05 root] (utils.py 283): INFO Epoch: [10] [ 30/2502] eta: 2:01:10 lr: 0.000008 loss_cls: 2.8719 (2.6998) grad_norm: 1.5445 (1.6328) time: 2.9463 data: 0.0003 max mem: 29202 +[2024-12-11 08:00:35 root] (utils.py 283): INFO Epoch: [10] [ 40/2502] eta: 2:00:48 lr: 0.000008 loss_cls: 2.5629 (2.6376) grad_norm: 1.4722 (1.6295) time: 2.9524 data: 0.0003 max mem: 29202 +[2024-12-11 08:01:04 root] (utils.py 283): INFO Epoch: [10] [ 50/2502] eta: 2:00:19 lr: 0.000008 loss_cls: 2.4653 (2.6297) grad_norm: 1.5186 (1.6385) time: 2.9491 data: 0.0003 max mem: 29202 +[2024-12-11 08:01:34 root] (utils.py 283): INFO Epoch: [10] [ 60/2502] eta: 1:59:45 lr: 0.000008 loss_cls: 2.7252 (2.6485) grad_norm: 1.5415 (1.6234) time: 2.9387 data: 0.0003 max mem: 29202 +[2024-12-11 08:02:03 root] (utils.py 283): INFO Epoch: [10] [ 70/2502] eta: 1:59:16 lr: 0.000008 loss_cls: 2.8355 (2.6629) grad_norm: 1.5415 (1.6222) time: 2.9385 data: 0.0003 max mem: 29202 +[2024-12-11 08:02:33 root] (utils.py 283): INFO Epoch: [10] [ 80/2502] eta: 1:58:47 lr: 0.000008 loss_cls: 2.8107 (2.6738) grad_norm: 1.6304 (1.6496) time: 2.9437 data: 0.0003 max mem: 29202 +[2024-12-11 08:03:02 root] (utils.py 283): INFO Epoch: [10] [ 90/2502] eta: 1:58:16 lr: 0.000008 loss_cls: 2.8069 (2.6604) grad_norm: 1.6572 (1.6705) time: 2.9407 data: 0.0003 max mem: 29202 +[2024-12-11 08:03:31 root] (utils.py 283): INFO Epoch: [10] [ 100/2502] eta: 1:57:46 lr: 0.000008 loss_cls: 2.3543 (2.6335) grad_norm: 1.5927 (1.6615) time: 2.9383 data: 0.0003 max mem: 29202 +[2024-12-11 08:04:01 root] (utils.py 283): INFO Epoch: [10] [ 110/2502] eta: 1:57:15 lr: 0.000008 loss_cls: 2.4849 (2.6339) grad_norm: 1.5803 (1.6708) time: 2.9376 data: 0.0003 max mem: 29202 +[2024-12-11 08:04:30 root] (utils.py 283): INFO Epoch: [10] [ 120/2502] eta: 1:56:47 lr: 0.000008 loss_cls: 2.7125 (2.6370) grad_norm: 1.5593 (1.6648) time: 2.9427 data: 0.0003 max mem: 29202 +[2024-12-11 08:05:00 root] (utils.py 283): INFO Epoch: [10] [ 130/2502] eta: 1:56:17 lr: 0.000008 loss_cls: 2.5915 (2.6358) grad_norm: 1.5231 (1.6561) time: 2.9430 data: 0.0003 max mem: 29202 +[2024-12-11 08:05:29 root] (utils.py 283): INFO Epoch: [10] [ 140/2502] eta: 1:55:47 lr: 0.000008 loss_cls: 2.5915 (2.6384) grad_norm: 1.5467 (1.6570) time: 2.9386 data: 0.0003 max mem: 29202 +[2024-12-11 08:05:58 root] (utils.py 283): INFO Epoch: [10] [ 150/2502] eta: 1:55:18 lr: 0.000008 loss_cls: 2.8447 (2.6492) grad_norm: 1.5888 (1.6571) time: 2.9407 data: 0.0003 max mem: 29202 +[2024-12-11 08:06:28 root] (utils.py 283): INFO Epoch: [10] [ 160/2502] eta: 1:54:48 lr: 0.000008 loss_cls: 2.9397 (2.6568) grad_norm: 1.5526 (1.6566) time: 2.9409 data: 0.0003 max mem: 29202 +[2024-12-11 08:06:57 root] (utils.py 283): INFO Epoch: [10] [ 170/2502] eta: 1:54:19 lr: 0.000008 loss_cls: 2.7068 (2.6438) grad_norm: 1.6389 (1.6545) time: 2.9428 data: 0.0003 max mem: 29202 +[2024-12-11 08:07:27 root] (utils.py 283): INFO Epoch: [10] [ 180/2502] eta: 1:53:50 lr: 0.000008 loss_cls: 2.7100 (2.6568) grad_norm: 1.6026 (1.6551) time: 2.9432 data: 0.0003 max mem: 29202 +[2024-12-11 08:07:56 root] (utils.py 283): INFO Epoch: [10] [ 190/2502] eta: 1:53:20 lr: 0.000008 loss_cls: 2.9003 (2.6725) grad_norm: 1.6083 (1.6564) time: 2.9405 data: 0.0003 max mem: 29202 +[2024-12-11 08:08:25 root] (utils.py 283): INFO Epoch: [10] [ 200/2502] eta: 1:52:51 lr: 0.000008 loss_cls: 2.9040 (2.6866) grad_norm: 1.5969 (1.6530) time: 2.9416 data: 0.0003 max mem: 29202 +[2024-12-11 08:08:55 root] (utils.py 283): INFO Epoch: [10] [ 210/2502] eta: 1:52:22 lr: 0.000008 loss_cls: 3.0632 (2.6966) grad_norm: 1.6456 (1.6572) time: 2.9434 data: 0.0003 max mem: 29202 +[2024-12-11 08:09:24 root] (utils.py 283): INFO Epoch: [10] [ 220/2502] eta: 1:51:52 lr: 0.000008 loss_cls: 2.8512 (2.6988) grad_norm: 1.6603 (1.6612) time: 2.9393 data: 0.0003 max mem: 29202 +[2024-12-11 08:09:54 root] (utils.py 283): INFO Epoch: [10] [ 230/2502] eta: 1:51:22 lr: 0.000008 loss_cls: 2.8512 (2.7006) grad_norm: 1.5016 (1.6590) time: 2.9372 data: 0.0003 max mem: 29202 +[2024-12-11 08:10:23 root] (utils.py 283): INFO Epoch: [10] [ 240/2502] eta: 1:50:53 lr: 0.000008 loss_cls: 2.7486 (2.6959) grad_norm: 1.4961 (1.6551) time: 2.9395 data: 0.0003 max mem: 29202 +[2024-12-11 08:10:52 root] (utils.py 283): INFO Epoch: [10] [ 250/2502] eta: 1:50:24 lr: 0.000008 loss_cls: 2.6638 (2.6925) grad_norm: 1.5198 (1.6534) time: 2.9425 data: 0.0003 max mem: 29202 +[2024-12-11 08:11:22 root] (utils.py 283): INFO Epoch: [10] [ 260/2502] eta: 1:49:55 lr: 0.000008 loss_cls: 2.7528 (2.6916) grad_norm: 1.5451 (1.6509) time: 2.9471 data: 0.0003 max mem: 29202 +[2024-12-11 08:11:52 root] (utils.py 283): INFO Epoch: [10] [ 270/2502] eta: 1:49:27 lr: 0.000008 loss_cls: 2.7528 (2.6924) grad_norm: 1.5451 (1.6506) time: 2.9532 data: 0.0003 max mem: 29202 +[2024-12-11 08:12:21 root] (utils.py 283): INFO Epoch: [10] [ 280/2502] eta: 1:48:58 lr: 0.000008 loss_cls: 2.9628 (2.6972) grad_norm: 1.5530 (1.6500) time: 2.9524 data: 0.0003 max mem: 29202 +[2024-12-11 08:12:50 root] (utils.py 283): INFO Epoch: [10] [ 290/2502] eta: 1:48:28 lr: 0.000008 loss_cls: 2.8514 (2.6947) grad_norm: 1.5444 (1.6476) time: 2.9417 data: 0.0002 max mem: 29202 +[2024-12-11 08:13:20 root] (utils.py 283): INFO Epoch: [10] [ 300/2502] eta: 1:47:57 lr: 0.000008 loss_cls: 2.7041 (2.6945) grad_norm: 1.6577 (1.6475) time: 2.9323 data: 0.0002 max mem: 29202 +[2024-12-11 08:13:49 root] (utils.py 283): INFO Epoch: [10] [ 310/2502] eta: 1:47:27 lr: 0.000008 loss_cls: 2.6069 (2.6930) grad_norm: 1.6231 (1.6454) time: 2.9257 data: 0.0003 max mem: 29202 +[2024-12-11 08:14:18 root] (utils.py 283): INFO Epoch: [10] [ 320/2502] eta: 1:46:57 lr: 0.000008 loss_cls: 2.8063 (2.6990) grad_norm: 1.5870 (1.6448) time: 2.9296 data: 0.0003 max mem: 29202 +[2024-12-11 08:14:48 root] (utils.py 283): INFO Epoch: [10] [ 330/2502] eta: 1:46:26 lr: 0.000008 loss_cls: 2.8356 (2.6982) grad_norm: 1.6092 (1.6457) time: 2.9313 data: 0.0003 max mem: 29202 +[2024-12-11 08:15:17 root] (utils.py 283): INFO Epoch: [10] [ 340/2502] eta: 1:45:56 lr: 0.000008 loss_cls: 2.8346 (2.7025) grad_norm: 1.4623 (1.6439) time: 2.9256 data: 0.0003 max mem: 29202 +[2024-12-11 08:15:46 root] (utils.py 283): INFO Epoch: [10] [ 350/2502] eta: 1:45:26 lr: 0.000008 loss_cls: 2.8401 (2.7015) grad_norm: 1.5419 (1.6440) time: 2.9310 data: 0.0003 max mem: 29202 +[2024-12-11 08:16:16 root] (utils.py 283): INFO Epoch: [10] [ 360/2502] eta: 1:44:57 lr: 0.000008 loss_cls: 2.7580 (2.6952) grad_norm: 1.6007 (1.6434) time: 2.9378 data: 0.0003 max mem: 29202 +[2024-12-11 08:16:45 root] (utils.py 283): INFO Epoch: [10] [ 370/2502] eta: 1:44:27 lr: 0.000008 loss_cls: 2.7580 (2.6975) grad_norm: 1.5944 (1.6416) time: 2.9380 data: 0.0003 max mem: 29202 +[2024-12-11 08:17:14 root] (utils.py 283): INFO Epoch: [10] [ 380/2502] eta: 1:43:58 lr: 0.000008 loss_cls: 2.8125 (2.6955) grad_norm: 1.5397 (1.6403) time: 2.9377 data: 0.0003 max mem: 29202 +[2024-12-11 08:17:44 root] (utils.py 283): INFO Epoch: [10] [ 390/2502] eta: 1:43:29 lr: 0.000008 loss_cls: 2.7693 (2.6947) grad_norm: 1.5958 (1.6425) time: 2.9409 data: 0.0003 max mem: 29202 +[2024-12-11 08:18:13 root] (utils.py 283): INFO Epoch: [10] [ 400/2502] eta: 1:42:59 lr: 0.000008 loss_cls: 2.7693 (2.6950) grad_norm: 1.5865 (1.6419) time: 2.9432 data: 0.0003 max mem: 29202 +[2024-12-11 08:18:43 root] (utils.py 283): INFO Epoch: [10] [ 410/2502] eta: 1:42:30 lr: 0.000008 loss_cls: 2.6688 (2.6926) grad_norm: 1.5714 (1.6425) time: 2.9442 data: 0.0003 max mem: 29202 +[2024-12-11 08:19:12 root] (utils.py 283): INFO Epoch: [10] [ 420/2502] eta: 1:42:01 lr: 0.000008 loss_cls: 2.9251 (2.6999) grad_norm: 1.6088 (1.6426) time: 2.9425 data: 0.0003 max mem: 29202 +[2024-12-11 08:19:41 root] (utils.py 283): INFO Epoch: [10] [ 430/2502] eta: 1:41:32 lr: 0.000008 loss_cls: 3.0164 (2.7055) grad_norm: 1.5801 (1.6414) time: 2.9404 data: 0.0003 max mem: 29202 +[2024-12-11 08:20:11 root] (utils.py 283): INFO Epoch: [10] [ 440/2502] eta: 1:41:02 lr: 0.000008 loss_cls: 2.8240 (2.6975) grad_norm: 1.5360 (1.6395) time: 2.9425 data: 0.0003 max mem: 29202 +[2024-12-11 08:20:40 root] (utils.py 283): INFO Epoch: [10] [ 450/2502] eta: 1:40:33 lr: 0.000008 loss_cls: 2.3159 (2.6929) grad_norm: 1.5451 (1.6408) time: 2.9445 data: 0.0003 max mem: 29202 +[2024-12-11 08:21:10 root] (utils.py 283): INFO Epoch: [10] [ 460/2502] eta: 1:40:04 lr: 0.000008 loss_cls: 2.8131 (2.6957) grad_norm: 1.6661 (1.6416) time: 2.9418 data: 0.0003 max mem: 29202 +[2024-12-11 08:21:39 root] (utils.py 283): INFO Epoch: [10] [ 470/2502] eta: 1:39:34 lr: 0.000008 loss_cls: 2.9848 (2.6969) grad_norm: 1.6310 (1.6405) time: 2.9411 data: 0.0003 max mem: 29202 +[2024-12-11 08:22:09 root] (utils.py 283): INFO Epoch: [10] [ 480/2502] eta: 1:39:05 lr: 0.000008 loss_cls: 2.7713 (2.6936) grad_norm: 1.5893 (1.6417) time: 2.9481 data: 0.0003 max mem: 29202 +[2024-12-11 08:22:38 root] (utils.py 283): INFO Epoch: [10] [ 490/2502] eta: 1:38:36 lr: 0.000008 loss_cls: 2.3806 (2.6909) grad_norm: 1.5334 (1.6415) time: 2.9446 data: 0.0003 max mem: 29202 +[2024-12-11 08:23:07 root] (utils.py 283): INFO Epoch: [10] [ 500/2502] eta: 1:38:06 lr: 0.000008 loss_cls: 2.5839 (2.6874) grad_norm: 1.5094 (1.6390) time: 2.9370 data: 0.0003 max mem: 29202 +[2024-12-11 08:23:37 root] (utils.py 283): INFO Epoch: [10] [ 510/2502] eta: 1:37:37 lr: 0.000008 loss_cls: 2.4984 (2.6808) grad_norm: 1.4886 (1.6365) time: 2.9368 data: 0.0003 max mem: 29202 +[2024-12-11 08:24:06 root] (utils.py 283): INFO Epoch: [10] [ 520/2502] eta: 1:37:08 lr: 0.000008 loss_cls: 2.3380 (2.6785) grad_norm: 1.5079 (1.6360) time: 2.9509 data: 0.0003 max mem: 29202 +[2024-12-11 08:24:36 root] (utils.py 283): INFO Epoch: [10] [ 530/2502] eta: 1:36:39 lr: 0.000008 loss_cls: 2.7827 (2.6794) grad_norm: 1.5024 (1.6331) time: 2.9575 data: 0.0003 max mem: 29202 +[2024-12-11 08:25:05 root] (utils.py 283): INFO Epoch: [10] [ 540/2502] eta: 1:36:10 lr: 0.000008 loss_cls: 2.7827 (2.6756) grad_norm: 1.5723 (1.6357) time: 2.9460 data: 0.0003 max mem: 29202 +[2024-12-11 08:25:35 root] (utils.py 283): INFO Epoch: [10] [ 550/2502] eta: 1:35:41 lr: 0.000008 loss_cls: 2.6110 (2.6773) grad_norm: 1.5757 (1.6346) time: 2.9413 data: 0.0003 max mem: 29202 +[2024-12-11 08:26:04 root] (utils.py 283): INFO Epoch: [10] [ 560/2502] eta: 1:35:12 lr: 0.000008 loss_cls: 2.6304 (2.6770) grad_norm: 1.5470 (1.6362) time: 2.9569 data: 0.0003 max mem: 29202 +[2024-12-11 08:26:34 root] (utils.py 283): INFO Epoch: [10] [ 570/2502] eta: 1:34:43 lr: 0.000008 loss_cls: 2.7383 (2.6805) grad_norm: 1.5470 (1.6347) time: 2.9548 data: 0.0003 max mem: 29202 +[2024-12-11 08:27:03 root] (utils.py 283): INFO Epoch: [10] [ 580/2502] eta: 1:34:13 lr: 0.000008 loss_cls: 2.6874 (2.6797) grad_norm: 1.4728 (1.6339) time: 2.9388 data: 0.0003 max mem: 29202 +[2024-12-11 08:27:33 root] (utils.py 283): INFO Epoch: [10] [ 590/2502] eta: 1:33:44 lr: 0.000008 loss_cls: 2.7200 (2.6806) grad_norm: 1.5501 (1.6331) time: 2.9418 data: 0.0003 max mem: 29202 +[2024-12-11 08:28:02 root] (utils.py 283): INFO Epoch: [10] [ 600/2502] eta: 1:33:14 lr: 0.000008 loss_cls: 2.8533 (2.6833) grad_norm: 1.5444 (1.6313) time: 2.9428 data: 0.0003 max mem: 29202 +[2024-12-11 08:28:32 root] (utils.py 283): INFO Epoch: [10] [ 610/2502] eta: 1:32:45 lr: 0.000008 loss_cls: 2.7969 (2.6827) grad_norm: 1.5434 (1.6309) time: 2.9413 data: 0.0003 max mem: 29202 +[2024-12-11 08:29:01 root] (utils.py 283): INFO Epoch: [10] [ 620/2502] eta: 1:32:16 lr: 0.000008 loss_cls: 2.6746 (2.6818) grad_norm: 1.5540 (1.6299) time: 2.9417 data: 0.0003 max mem: 29202 +[2024-12-11 08:29:30 root] (utils.py 283): INFO Epoch: [10] [ 630/2502] eta: 1:31:46 lr: 0.000008 loss_cls: 2.7481 (2.6800) grad_norm: 1.6134 (1.6310) time: 2.9423 data: 0.0003 max mem: 29202 +[2024-12-11 08:30:00 root] (utils.py 283): INFO Epoch: [10] [ 640/2502] eta: 1:31:17 lr: 0.000008 loss_cls: 2.7481 (2.6807) grad_norm: 1.7005 (1.6328) time: 2.9382 data: 0.0003 max mem: 29202 +[2024-12-11 08:30:29 root] (utils.py 283): INFO Epoch: [10] [ 650/2502] eta: 1:30:47 lr: 0.000008 loss_cls: 2.8531 (2.6793) grad_norm: 1.7005 (1.6414) time: 2.9408 data: 0.0003 max mem: 29202 +[2024-12-11 08:30:59 root] (utils.py 283): INFO Epoch: [10] [ 660/2502] eta: 1:30:18 lr: 0.000008 loss_cls: 2.7776 (2.6773) grad_norm: 1.7092 (1.6422) time: 2.9432 data: 0.0003 max mem: 29202 +[2024-12-11 08:31:28 root] (utils.py 283): INFO Epoch: [10] [ 670/2502] eta: 1:29:48 lr: 0.000008 loss_cls: 2.5164 (2.6749) grad_norm: 1.7092 (1.6442) time: 2.9405 data: 0.0003 max mem: 29202 +[2024-12-11 08:31:57 root] (utils.py 283): INFO Epoch: [10] [ 680/2502] eta: 1:29:19 lr: 0.000008 loss_cls: 2.4685 (2.6728) grad_norm: 1.6978 (1.6450) time: 2.9406 data: 0.0003 max mem: 29202 +[2024-12-11 08:32:27 root] (utils.py 283): INFO Epoch: [10] [ 690/2502] eta: 1:28:50 lr: 0.000008 loss_cls: 2.7515 (2.6756) grad_norm: 1.6166 (1.6449) time: 2.9469 data: 0.0003 max mem: 29202 +[2024-12-11 08:32:56 root] (utils.py 283): INFO Epoch: [10] [ 700/2502] eta: 1:28:20 lr: 0.000008 loss_cls: 2.9873 (2.6793) grad_norm: 1.5586 (1.6449) time: 2.9457 data: 0.0003 max mem: 29202 +[2024-12-11 08:33:26 root] (utils.py 283): INFO Epoch: [10] [ 710/2502] eta: 1:27:51 lr: 0.000008 loss_cls: 2.9690 (2.6792) grad_norm: 1.6267 (1.6465) time: 2.9399 data: 0.0003 max mem: 29202 +[2024-12-11 08:33:55 root] (utils.py 283): INFO Epoch: [10] [ 720/2502] eta: 1:27:21 lr: 0.000008 loss_cls: 2.7460 (2.6809) grad_norm: 1.5568 (1.6446) time: 2.9363 data: 0.0003 max mem: 29202 +[2024-12-11 08:34:24 root] (utils.py 283): INFO Epoch: [10] [ 730/2502] eta: 1:26:52 lr: 0.000008 loss_cls: 2.7475 (2.6793) grad_norm: 1.5270 (1.6445) time: 2.9354 data: 0.0003 max mem: 29202 +[2024-12-11 08:34:54 root] (utils.py 283): INFO Epoch: [10] [ 740/2502] eta: 1:26:22 lr: 0.000008 loss_cls: 2.8051 (2.6808) grad_norm: 1.5262 (1.6434) time: 2.9402 data: 0.0003 max mem: 29202 +[2024-12-11 08:35:23 root] (utils.py 283): INFO Epoch: [10] [ 750/2502] eta: 1:25:53 lr: 0.000008 loss_cls: 2.5760 (2.6765) grad_norm: 1.6109 (1.6466) time: 2.9392 data: 0.0003 max mem: 29202 +[2024-12-11 08:35:53 root] (utils.py 283): INFO Epoch: [10] [ 760/2502] eta: 1:25:23 lr: 0.000008 loss_cls: 2.5517 (2.6748) grad_norm: 1.6068 (1.6448) time: 2.9398 data: 0.0003 max mem: 29202 +[2024-12-11 08:36:22 root] (utils.py 283): INFO Epoch: [10] [ 770/2502] eta: 1:24:54 lr: 0.000008 loss_cls: 2.6686 (2.6753) grad_norm: 1.5534 (1.6455) time: 2.9408 data: 0.0003 max mem: 29202 +[2024-12-11 08:36:51 root] (utils.py 283): INFO Epoch: [10] [ 780/2502] eta: 1:24:25 lr: 0.000008 loss_cls: 2.6767 (2.6759) grad_norm: 1.5534 (1.6441) time: 2.9396 data: 0.0003 max mem: 29202 +[2024-12-11 08:37:21 root] (utils.py 283): INFO Epoch: [10] [ 790/2502] eta: 1:23:55 lr: 0.000008 loss_cls: 2.6508 (2.6776) grad_norm: 1.5571 (1.6454) time: 2.9400 data: 0.0003 max mem: 29202 +[2024-12-11 08:37:50 root] (utils.py 283): INFO Epoch: [10] [ 800/2502] eta: 1:23:26 lr: 0.000008 loss_cls: 2.6850 (2.6782) grad_norm: 1.6263 (1.6456) time: 2.9404 data: 0.0003 max mem: 29202 +[2024-12-11 08:38:20 root] (utils.py 283): INFO Epoch: [10] [ 810/2502] eta: 1:22:56 lr: 0.000008 loss_cls: 2.6850 (2.6784) grad_norm: 1.6053 (1.6458) time: 2.9407 data: 0.0003 max mem: 29202 +[2024-12-11 08:38:49 root] (utils.py 283): INFO Epoch: [10] [ 820/2502] eta: 1:22:27 lr: 0.000008 loss_cls: 2.5972 (2.6786) grad_norm: 1.5743 (1.6464) time: 2.9412 data: 0.0003 max mem: 29202 +[2024-12-11 08:39:19 root] (utils.py 283): INFO Epoch: [10] [ 830/2502] eta: 1:21:57 lr: 0.000008 loss_cls: 2.8894 (2.6817) grad_norm: 1.5498 (1.6456) time: 2.9412 data: 0.0003 max mem: 29202 +[2024-12-11 08:39:48 root] (utils.py 283): INFO Epoch: [10] [ 840/2502] eta: 1:21:28 lr: 0.000008 loss_cls: 2.9608 (2.6828) grad_norm: 1.4829 (1.6438) time: 2.9398 data: 0.0003 max mem: 29202 +[2024-12-11 08:40:17 root] (utils.py 283): INFO Epoch: [10] [ 850/2502] eta: 1:20:58 lr: 0.000008 loss_cls: 2.9637 (2.6840) grad_norm: 1.4577 (1.6430) time: 2.9381 data: 0.0003 max mem: 29202 +[2024-12-11 08:40:47 root] (utils.py 283): INFO Epoch: [10] [ 860/2502] eta: 1:20:29 lr: 0.000008 loss_cls: 2.7155 (2.6813) grad_norm: 1.5395 (1.6421) time: 2.9389 data: 0.0003 max mem: 29202 +[2024-12-11 08:41:16 root] (utils.py 283): INFO Epoch: [10] [ 870/2502] eta: 1:20:00 lr: 0.000008 loss_cls: 2.3352 (2.6775) grad_norm: 1.5202 (1.6404) time: 2.9440 data: 0.0003 max mem: 29202 +[2024-12-11 08:41:46 root] (utils.py 283): INFO Epoch: [10] [ 880/2502] eta: 1:19:31 lr: 0.000008 loss_cls: 2.5080 (2.6776) grad_norm: 1.5202 (1.6393) time: 2.9551 data: 0.0003 max mem: 29202 +[2024-12-11 08:42:15 root] (utils.py 283): INFO Epoch: [10] [ 890/2502] eta: 1:19:01 lr: 0.000008 loss_cls: 2.6287 (2.6749) grad_norm: 1.4574 (1.6371) time: 2.9522 data: 0.0003 max mem: 29202 +[2024-12-11 08:42:45 root] (utils.py 283): INFO Epoch: [10] [ 900/2502] eta: 1:18:32 lr: 0.000008 loss_cls: 2.7567 (2.6758) grad_norm: 1.4811 (1.6368) time: 2.9398 data: 0.0003 max mem: 29202 +[2024-12-11 08:43:14 root] (utils.py 283): INFO Epoch: [10] [ 910/2502] eta: 1:18:02 lr: 0.000008 loss_cls: 2.8692 (2.6773) grad_norm: 1.5027 (1.6357) time: 2.9377 data: 0.0003 max mem: 29202 +[2024-12-11 08:43:43 root] (utils.py 283): INFO Epoch: [10] [ 920/2502] eta: 1:17:33 lr: 0.000008 loss_cls: 2.7810 (2.6765) grad_norm: 1.5688 (1.6354) time: 2.9389 data: 0.0003 max mem: 29202 +[2024-12-11 08:44:13 root] (utils.py 283): INFO Epoch: [10] [ 930/2502] eta: 1:17:03 lr: 0.000008 loss_cls: 2.4648 (2.6744) grad_norm: 1.5688 (1.6350) time: 2.9386 data: 0.0003 max mem: 29202 +[2024-12-11 08:44:42 root] (utils.py 283): INFO Epoch: [10] [ 940/2502] eta: 1:16:34 lr: 0.000008 loss_cls: 2.4648 (2.6748) grad_norm: 1.5184 (1.6338) time: 2.9380 data: 0.0003 max mem: 29202 +[2024-12-11 08:45:11 root] (utils.py 283): INFO Epoch: [10] [ 950/2502] eta: 1:16:04 lr: 0.000008 loss_cls: 2.7877 (2.6747) grad_norm: 1.5103 (1.6341) time: 2.9373 data: 0.0003 max mem: 29202 +[2024-12-11 08:45:41 root] (utils.py 283): INFO Epoch: [10] [ 960/2502] eta: 1:15:35 lr: 0.000008 loss_cls: 2.6421 (2.6737) grad_norm: 1.5147 (1.6340) time: 2.9373 data: 0.0003 max mem: 29202 +[2024-12-11 08:46:10 root] (utils.py 283): INFO Epoch: [10] [ 970/2502] eta: 1:15:06 lr: 0.000008 loss_cls: 2.6420 (2.6723) grad_norm: 1.5838 (1.6380) time: 2.9428 data: 0.0003 max mem: 29202 +[2024-12-11 08:46:40 root] (utils.py 283): INFO Epoch: [10] [ 980/2502] eta: 1:14:36 lr: 0.000008 loss_cls: 2.5737 (2.6713) grad_norm: 1.6251 (1.6377) time: 2.9409 data: 0.0003 max mem: 29202 +[2024-12-11 08:47:09 root] (utils.py 283): INFO Epoch: [10] [ 990/2502] eta: 1:14:07 lr: 0.000008 loss_cls: 2.6383 (2.6717) grad_norm: 1.6071 (1.6371) time: 2.9383 data: 0.0003 max mem: 29202 +[2024-12-11 08:47:38 root] (utils.py 283): INFO Epoch: [10] [1000/2502] eta: 1:13:37 lr: 0.000008 loss_cls: 2.6978 (2.6715) grad_norm: 1.5686 (1.6364) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-11 08:48:08 root] (utils.py 283): INFO Epoch: [10] [1010/2502] eta: 1:13:08 lr: 0.000008 loss_cls: 2.7983 (2.6720) grad_norm: 1.4919 (1.6361) time: 2.9400 data: 0.0003 max mem: 29202 +[2024-12-11 08:48:37 root] (utils.py 283): INFO Epoch: [10] [1020/2502] eta: 1:12:38 lr: 0.000008 loss_cls: 2.7983 (2.6720) grad_norm: 1.5642 (1.6422) time: 2.9404 data: 0.0003 max mem: 29202 +[2024-12-11 08:49:07 root] (utils.py 283): INFO Epoch: [10] [1030/2502] eta: 1:12:09 lr: 0.000008 loss_cls: 2.6120 (2.6712) grad_norm: 1.6680 (1.6418) time: 2.9440 data: 0.0003 max mem: 29202 +[2024-12-11 08:49:36 root] (utils.py 283): INFO Epoch: [10] [1040/2502] eta: 1:11:40 lr: 0.000008 loss_cls: 2.6141 (2.6711) grad_norm: 1.5318 (1.6421) time: 2.9503 data: 0.0003 max mem: 29202 +[2024-12-11 08:50:06 root] (utils.py 283): INFO Epoch: [10] [1050/2502] eta: 1:11:10 lr: 0.000008 loss_cls: 2.6352 (2.6700) grad_norm: 1.6601 (1.6432) time: 2.9450 data: 0.0003 max mem: 29202 +[2024-12-11 08:50:35 root] (utils.py 283): INFO Epoch: [10] [1060/2502] eta: 1:10:41 lr: 0.000008 loss_cls: 2.5672 (2.6687) grad_norm: 1.6549 (1.6426) time: 2.9386 data: 0.0002 max mem: 29202 +[2024-12-11 08:51:04 root] (utils.py 283): INFO Epoch: [10] [1070/2502] eta: 1:10:12 lr: 0.000008 loss_cls: 2.7288 (2.6707) grad_norm: 1.6085 (1.6428) time: 2.9399 data: 0.0002 max mem: 29202 +[2024-12-11 08:51:34 root] (utils.py 283): INFO Epoch: [10] [1080/2502] eta: 1:09:42 lr: 0.000008 loss_cls: 2.8272 (2.6707) grad_norm: 1.6610 (1.6438) time: 2.9408 data: 0.0002 max mem: 29202 +[2024-12-11 08:52:03 root] (utils.py 283): INFO Epoch: [10] [1090/2502] eta: 1:09:13 lr: 0.000008 loss_cls: 2.7302 (2.6715) grad_norm: 1.6960 (1.6459) time: 2.9402 data: 0.0002 max mem: 29202 +[2024-12-11 08:52:33 root] (utils.py 283): INFO Epoch: [10] [1100/2502] eta: 1:08:43 lr: 0.000008 loss_cls: 2.7302 (2.6693) grad_norm: 1.5279 (1.6448) time: 2.9412 data: 0.0003 max mem: 29202 +[2024-12-11 08:53:02 root] (utils.py 283): INFO Epoch: [10] [1110/2502] eta: 1:08:14 lr: 0.000008 loss_cls: 2.4522 (2.6693) grad_norm: 1.5028 (1.6446) time: 2.9434 data: 0.0003 max mem: 29202 +[2024-12-11 08:53:32 root] (utils.py 283): INFO Epoch: [10] [1120/2502] eta: 1:07:44 lr: 0.000008 loss_cls: 2.8494 (2.6704) grad_norm: 1.5676 (1.6439) time: 2.9419 data: 0.0003 max mem: 29202 +[2024-12-11 08:54:01 root] (utils.py 283): INFO Epoch: [10] [1130/2502] eta: 1:07:15 lr: 0.000008 loss_cls: 2.6263 (2.6696) grad_norm: 1.5844 (1.6434) time: 2.9389 data: 0.0003 max mem: 29202 +[2024-12-11 08:54:30 root] (utils.py 283): INFO Epoch: [10] [1140/2502] eta: 1:06:46 lr: 0.000008 loss_cls: 2.5675 (2.6683) grad_norm: 1.5668 (1.6431) time: 2.9390 data: 0.0003 max mem: 29202 +[2024-12-11 08:55:00 root] (utils.py 283): INFO Epoch: [10] [1150/2502] eta: 1:06:16 lr: 0.000008 loss_cls: 2.8036 (2.6689) grad_norm: 1.6385 (1.6449) time: 2.9412 data: 0.0003 max mem: 29202 +[2024-12-11 08:55:29 root] (utils.py 283): INFO Epoch: [10] [1160/2502] eta: 1:05:47 lr: 0.000008 loss_cls: 2.8267 (2.6706) grad_norm: 1.6809 (1.6447) time: 2.9410 data: 0.0003 max mem: 29202 +[2024-12-11 08:55:59 root] (utils.py 283): INFO Epoch: [10] [1170/2502] eta: 1:05:17 lr: 0.000008 loss_cls: 2.8140 (2.6704) grad_norm: 1.6316 (1.6444) time: 2.9418 data: 0.0003 max mem: 29202 +[2024-12-11 08:56:28 root] (utils.py 283): INFO Epoch: [10] [1180/2502] eta: 1:04:48 lr: 0.000008 loss_cls: 2.8611 (2.6712) grad_norm: 1.6171 (1.6435) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-11 08:56:57 root] (utils.py 283): INFO Epoch: [10] [1190/2502] eta: 1:04:18 lr: 0.000008 loss_cls: 2.7604 (2.6700) grad_norm: 1.5330 (1.6426) time: 2.9381 data: 0.0003 max mem: 29202 +[2024-12-11 08:57:27 root] (utils.py 283): INFO Epoch: [10] [1200/2502] eta: 1:03:49 lr: 0.000008 loss_cls: 2.8278 (2.6716) grad_norm: 1.5638 (1.6442) time: 2.9373 data: 0.0003 max mem: 29202 +[2024-12-11 08:57:56 root] (utils.py 283): INFO Epoch: [10] [1210/2502] eta: 1:03:20 lr: 0.000008 loss_cls: 2.8746 (2.6705) grad_norm: 1.4418 (1.6425) time: 2.9353 data: 0.0003 max mem: 29202 +[2024-12-11 08:58:25 root] (utils.py 283): INFO Epoch: [10] [1220/2502] eta: 1:02:50 lr: 0.000008 loss_cls: 2.8531 (2.6703) grad_norm: 1.4191 (1.6412) time: 2.9340 data: 0.0003 max mem: 29202 +[2024-12-11 08:58:55 root] (utils.py 283): INFO Epoch: [10] [1230/2502] eta: 1:02:21 lr: 0.000008 loss_cls: 2.9030 (2.6717) grad_norm: 1.5424 (1.6425) time: 2.9345 data: 0.0003 max mem: 29202 +[2024-12-11 08:59:24 root] (utils.py 283): INFO Epoch: [10] [1240/2502] eta: 1:01:51 lr: 0.000008 loss_cls: 2.9448 (2.6731) grad_norm: 1.7353 (1.6432) time: 2.9343 data: 0.0003 max mem: 29202 +[2024-12-11 08:59:54 root] (utils.py 283): INFO Epoch: [10] [1250/2502] eta: 1:01:22 lr: 0.000008 loss_cls: 2.9261 (2.6740) grad_norm: 1.5651 (1.6428) time: 2.9380 data: 0.0003 max mem: 29202 +[2024-12-11 09:00:23 root] (utils.py 283): INFO Epoch: [10] [1260/2502] eta: 1:00:52 lr: 0.000008 loss_cls: 2.8532 (2.6750) grad_norm: 1.5651 (1.6435) time: 2.9412 data: 0.0003 max mem: 29202 +[2024-12-11 09:00:52 root] (utils.py 283): INFO Epoch: [10] [1270/2502] eta: 1:00:23 lr: 0.000008 loss_cls: 2.8461 (2.6758) grad_norm: 1.5195 (1.6424) time: 2.9411 data: 0.0003 max mem: 29202 +[2024-12-11 09:01:22 root] (utils.py 283): INFO Epoch: [10] [1280/2502] eta: 0:59:53 lr: 0.000008 loss_cls: 2.8092 (2.6759) grad_norm: 1.5133 (1.6428) time: 2.9404 data: 0.0003 max mem: 29202 +[2024-12-11 09:01:51 root] (utils.py 283): INFO Epoch: [10] [1290/2502] eta: 0:59:24 lr: 0.000008 loss_cls: 2.8092 (2.6755) grad_norm: 1.6007 (1.6425) time: 2.9395 data: 0.0003 max mem: 29202 +[2024-12-11 09:02:21 root] (utils.py 283): INFO Epoch: [10] [1300/2502] eta: 0:58:55 lr: 0.000008 loss_cls: 2.6936 (2.6760) grad_norm: 1.5800 (1.6421) time: 2.9386 data: 0.0003 max mem: 29202 +[2024-12-11 09:02:50 root] (utils.py 283): INFO Epoch: [10] [1310/2502] eta: 0:58:25 lr: 0.000008 loss_cls: 2.8031 (2.6760) grad_norm: 1.5491 (1.6416) time: 2.9353 data: 0.0003 max mem: 29202 +[2024-12-11 09:03:19 root] (utils.py 283): INFO Epoch: [10] [1320/2502] eta: 0:57:56 lr: 0.000008 loss_cls: 2.8031 (2.6767) grad_norm: 1.4862 (1.6419) time: 2.9385 data: 0.0003 max mem: 29202 +[2024-12-11 09:03:49 root] (utils.py 283): INFO Epoch: [10] [1330/2502] eta: 0:57:26 lr: 0.000008 loss_cls: 2.6782 (2.6761) grad_norm: 1.5644 (1.6418) time: 2.9384 data: 0.0003 max mem: 29202 +[2024-12-11 09:04:18 root] (utils.py 283): INFO Epoch: [10] [1340/2502] eta: 0:56:57 lr: 0.000008 loss_cls: 2.7450 (2.6771) grad_norm: 1.6709 (1.6421) time: 2.9333 data: 0.0003 max mem: 29202 +[2024-12-11 09:04:47 root] (utils.py 283): INFO Epoch: [10] [1350/2502] eta: 0:56:27 lr: 0.000008 loss_cls: 2.8528 (2.6771) grad_norm: 1.6284 (1.6414) time: 2.9408 data: 0.0003 max mem: 29202 +[2024-12-11 09:05:17 root] (utils.py 283): INFO Epoch: [10] [1360/2502] eta: 0:55:58 lr: 0.000008 loss_cls: 2.9271 (2.6784) grad_norm: 1.5735 (1.6413) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-11 09:05:46 root] (utils.py 283): INFO Epoch: [10] [1370/2502] eta: 0:55:28 lr: 0.000008 loss_cls: 2.8555 (2.6796) grad_norm: 1.6275 (1.6419) time: 2.9316 data: 0.0003 max mem: 29202 +[2024-12-11 09:06:15 root] (utils.py 283): INFO Epoch: [10] [1380/2502] eta: 0:54:59 lr: 0.000008 loss_cls: 2.8200 (2.6803) grad_norm: 1.5863 (1.6413) time: 2.9336 data: 0.0003 max mem: 29202 +[2024-12-11 09:06:45 root] (utils.py 283): INFO Epoch: [10] [1390/2502] eta: 0:54:30 lr: 0.000008 loss_cls: 2.5964 (2.6796) grad_norm: 1.5861 (1.6411) time: 2.9355 data: 0.0003 max mem: 29202 +[2024-12-11 09:07:14 root] (utils.py 283): INFO Epoch: [10] [1400/2502] eta: 0:54:00 lr: 0.000008 loss_cls: 2.8527 (2.6806) grad_norm: 1.5822 (1.6407) time: 2.9408 data: 0.0003 max mem: 29202 +[2024-12-11 09:07:44 root] (utils.py 283): INFO Epoch: [10] [1410/2502] eta: 0:53:31 lr: 0.000008 loss_cls: 2.8679 (2.6806) grad_norm: 1.6044 (1.6405) time: 2.9489 data: 0.0003 max mem: 29202 +[2024-12-11 09:08:13 root] (utils.py 283): INFO Epoch: [10] [1420/2502] eta: 0:53:01 lr: 0.000008 loss_cls: 2.7718 (2.6809) grad_norm: 1.5124 (1.6397) time: 2.9457 data: 0.0003 max mem: 29202 +[2024-12-11 09:08:43 root] (utils.py 283): INFO Epoch: [10] [1430/2502] eta: 0:52:32 lr: 0.000008 loss_cls: 2.7718 (2.6811) grad_norm: 1.5079 (1.6402) time: 2.9374 data: 0.0003 max mem: 29202 +[2024-12-11 09:09:12 root] (utils.py 283): INFO Epoch: [10] [1440/2502] eta: 0:52:03 lr: 0.000008 loss_cls: 2.8602 (2.6816) grad_norm: 1.5125 (1.6393) time: 2.9375 data: 0.0003 max mem: 29202 +[2024-12-11 09:09:41 root] (utils.py 283): INFO Epoch: [10] [1450/2502] eta: 0:51:33 lr: 0.000008 loss_cls: 2.8704 (2.6815) grad_norm: 1.5446 (1.6396) time: 2.9396 data: 0.0003 max mem: 29202 +[2024-12-11 09:10:11 root] (utils.py 283): INFO Epoch: [10] [1460/2502] eta: 0:51:04 lr: 0.000008 loss_cls: 2.5168 (2.6797) grad_norm: 1.5910 (1.6396) time: 2.9374 data: 0.0003 max mem: 29202 +[2024-12-11 09:10:40 root] (utils.py 283): INFO Epoch: [10] [1470/2502] eta: 0:50:34 lr: 0.000008 loss_cls: 2.5168 (2.6796) grad_norm: 1.5038 (1.6385) time: 2.9344 data: 0.0003 max mem: 29202 +[2024-12-11 09:11:09 root] (utils.py 283): INFO Epoch: [10] [1480/2502] eta: 0:50:05 lr: 0.000008 loss_cls: 2.6963 (2.6807) grad_norm: 1.5335 (1.6387) time: 2.9368 data: 0.0003 max mem: 29202 +[2024-12-11 09:11:39 root] (utils.py 283): INFO Epoch: [10] [1490/2502] eta: 0:49:35 lr: 0.000008 loss_cls: 2.9147 (2.6807) grad_norm: 1.4915 (1.6382) time: 2.9390 data: 0.0003 max mem: 29202 +[2024-12-11 09:12:08 root] (utils.py 283): INFO Epoch: [10] [1500/2502] eta: 0:49:06 lr: 0.000008 loss_cls: 2.9147 (2.6806) grad_norm: 1.4915 (1.6376) time: 2.9351 data: 0.0003 max mem: 29202 +[2024-12-11 09:12:37 root] (utils.py 283): INFO Epoch: [10] [1510/2502] eta: 0:48:37 lr: 0.000008 loss_cls: 2.7006 (2.6811) grad_norm: 1.5659 (1.6374) time: 2.9332 data: 0.0003 max mem: 29202 +[2024-12-11 09:13:07 root] (utils.py 283): INFO Epoch: [10] [1520/2502] eta: 0:48:07 lr: 0.000008 loss_cls: 2.8554 (2.6834) grad_norm: 1.5601 (1.6378) time: 2.9360 data: 0.0003 max mem: 29202 +[2024-12-11 09:13:36 root] (utils.py 283): INFO Epoch: [10] [1530/2502] eta: 0:47:38 lr: 0.000008 loss_cls: 2.9794 (2.6842) grad_norm: 1.6217 (1.6377) time: 2.9381 data: 0.0003 max mem: 29202 +[2024-12-11 09:14:06 root] (utils.py 283): INFO Epoch: [10] [1540/2502] eta: 0:47:08 lr: 0.000008 loss_cls: 2.8754 (2.6845) grad_norm: 1.6217 (1.6378) time: 2.9378 data: 0.0003 max mem: 29202 +[2024-12-11 09:14:35 root] (utils.py 283): INFO Epoch: [10] [1550/2502] eta: 0:46:39 lr: 0.000008 loss_cls: 2.8754 (2.6849) grad_norm: 1.5254 (1.6373) time: 2.9380 data: 0.0003 max mem: 29202 +[2024-12-11 09:15:04 root] (utils.py 283): INFO Epoch: [10] [1560/2502] eta: 0:46:09 lr: 0.000008 loss_cls: 2.8051 (2.6853) grad_norm: 1.5599 (1.6375) time: 2.9367 data: 0.0003 max mem: 29202 +[2024-12-11 09:15:34 root] (utils.py 283): INFO Epoch: [10] [1570/2502] eta: 0:45:40 lr: 0.000008 loss_cls: 2.8728 (2.6859) grad_norm: 1.6320 (1.6373) time: 2.9360 data: 0.0003 max mem: 29202 +[2024-12-11 09:16:03 root] (utils.py 283): INFO Epoch: [10] [1580/2502] eta: 0:45:11 lr: 0.000008 loss_cls: 2.8783 (2.6853) grad_norm: 1.6056 (1.6370) time: 2.9385 data: 0.0003 max mem: 29202 +[2024-12-11 09:16:33 root] (utils.py 283): INFO Epoch: [10] [1590/2502] eta: 0:44:41 lr: 0.000008 loss_cls: 2.7959 (2.6852) grad_norm: 1.5718 (1.6371) time: 2.9390 data: 0.0003 max mem: 29202 +[2024-12-11 09:17:02 root] (utils.py 283): INFO Epoch: [10] [1600/2502] eta: 0:44:12 lr: 0.000008 loss_cls: 2.6813 (2.6849) grad_norm: 1.5611 (1.6367) time: 2.9391 data: 0.0003 max mem: 29202 +[2024-12-11 09:17:31 root] (utils.py 283): INFO Epoch: [10] [1610/2502] eta: 0:43:42 lr: 0.000008 loss_cls: 2.6463 (2.6841) grad_norm: 1.5611 (1.6376) time: 2.9398 data: 0.0003 max mem: 29202 +[2024-12-11 09:18:01 root] (utils.py 283): INFO Epoch: [10] [1620/2502] eta: 0:43:13 lr: 0.000008 loss_cls: 2.4924 (2.6823) grad_norm: 1.5655 (1.6371) time: 2.9353 data: 0.0003 max mem: 29202 +[2024-12-11 09:18:30 root] (utils.py 283): INFO Epoch: [10] [1630/2502] eta: 0:42:44 lr: 0.000008 loss_cls: 2.7051 (2.6830) grad_norm: 1.5370 (1.6371) time: 2.9421 data: 0.0003 max mem: 29202 +[2024-12-11 09:19:00 root] (utils.py 283): INFO Epoch: [10] [1640/2502] eta: 0:42:14 lr: 0.000008 loss_cls: 2.9515 (2.6846) grad_norm: 1.5370 (1.6370) time: 2.9524 data: 0.0003 max mem: 29202 +[2024-12-11 09:19:29 root] (utils.py 283): INFO Epoch: [10] [1650/2502] eta: 0:41:45 lr: 0.000008 loss_cls: 2.9630 (2.6856) grad_norm: 1.6225 (1.6379) time: 2.9447 data: 0.0003 max mem: 29202 +[2024-12-11 09:19:58 root] (utils.py 283): INFO Epoch: [10] [1660/2502] eta: 0:41:15 lr: 0.000008 loss_cls: 2.8877 (2.6862) grad_norm: 1.6924 (1.6380) time: 2.9363 data: 0.0003 max mem: 29202 +[2024-12-11 09:20:28 root] (utils.py 283): INFO Epoch: [10] [1670/2502] eta: 0:40:46 lr: 0.000008 loss_cls: 2.7752 (2.6860) grad_norm: 1.6924 (1.6398) time: 2.9349 data: 0.0003 max mem: 29202 +[2024-12-11 09:20:57 root] (utils.py 283): INFO Epoch: [10] [1680/2502] eta: 0:40:17 lr: 0.000008 loss_cls: 2.7606 (2.6860) grad_norm: 1.6453 (1.6406) time: 2.9400 data: 0.0003 max mem: 29202 +[2024-12-11 09:21:27 root] (utils.py 283): INFO Epoch: [10] [1690/2502] eta: 0:39:47 lr: 0.000008 loss_cls: 2.8038 (2.6869) grad_norm: 1.6146 (1.6408) time: 2.9420 data: 0.0003 max mem: 29202 +[2024-12-11 09:21:56 root] (utils.py 283): INFO Epoch: [10] [1700/2502] eta: 0:39:18 lr: 0.000008 loss_cls: 2.8923 (2.6875) grad_norm: 1.5880 (1.6405) time: 2.9424 data: 0.0003 max mem: 29202 +[2024-12-11 09:22:25 root] (utils.py 283): INFO Epoch: [10] [1710/2502] eta: 0:38:48 lr: 0.000008 loss_cls: 2.8857 (2.6881) grad_norm: 1.5851 (1.6406) time: 2.9409 data: 0.0003 max mem: 29202 +[2024-12-11 09:22:55 root] (utils.py 283): INFO Epoch: [10] [1720/2502] eta: 0:38:19 lr: 0.000008 loss_cls: 2.8286 (2.6877) grad_norm: 1.5851 (1.6402) time: 2.9365 data: 0.0003 max mem: 29202 +[2024-12-11 09:23:24 root] (utils.py 283): INFO Epoch: [10] [1730/2502] eta: 0:37:49 lr: 0.000008 loss_cls: 2.5457 (2.6877) grad_norm: 1.5766 (1.6406) time: 2.9364 data: 0.0003 max mem: 29202 +[2024-12-11 09:23:53 root] (utils.py 283): INFO Epoch: [10] [1740/2502] eta: 0:37:20 lr: 0.000008 loss_cls: 2.6338 (2.6869) grad_norm: 1.5326 (1.6399) time: 2.9345 data: 0.0003 max mem: 29202 +[2024-12-11 09:24:23 root] (utils.py 283): INFO Epoch: [10] [1750/2502] eta: 0:36:51 lr: 0.000008 loss_cls: 2.6144 (2.6873) grad_norm: 1.5243 (1.6397) time: 2.9366 data: 0.0003 max mem: 29202 +[2024-12-11 09:24:52 root] (utils.py 283): INFO Epoch: [10] [1760/2502] eta: 0:36:21 lr: 0.000008 loss_cls: 2.7561 (2.6875) grad_norm: 1.5243 (1.6412) time: 2.9458 data: 0.0003 max mem: 29202 +[2024-12-11 09:25:22 root] (utils.py 283): INFO Epoch: [10] [1770/2502] eta: 0:35:52 lr: 0.000008 loss_cls: 2.9736 (2.6886) grad_norm: 1.5449 (1.6414) time: 2.9443 data: 0.0003 max mem: 29202 +[2024-12-11 09:25:51 root] (utils.py 283): INFO Epoch: [10] [1780/2502] eta: 0:35:22 lr: 0.000008 loss_cls: 2.8973 (2.6882) grad_norm: 1.6625 (1.6410) time: 2.9378 data: 0.0003 max mem: 29202 +[2024-12-11 09:26:20 root] (utils.py 283): INFO Epoch: [10] [1790/2502] eta: 0:34:53 lr: 0.000008 loss_cls: 2.8452 (2.6889) grad_norm: 1.6171 (1.6410) time: 2.9345 data: 0.0003 max mem: 29202 +[2024-12-11 09:26:50 root] (utils.py 283): INFO Epoch: [10] [1800/2502] eta: 0:34:24 lr: 0.000008 loss_cls: 2.8803 (2.6891) grad_norm: 1.5360 (1.6408) time: 2.9324 data: 0.0003 max mem: 29202 +[2024-12-11 09:27:19 root] (utils.py 283): INFO Epoch: [10] [1810/2502] eta: 0:33:54 lr: 0.000008 loss_cls: 2.8443 (2.6899) grad_norm: 1.6082 (1.6411) time: 2.9363 data: 0.0003 max mem: 29202 +[2024-12-11 09:27:49 root] (utils.py 283): INFO Epoch: [10] [1820/2502] eta: 0:33:25 lr: 0.000008 loss_cls: 2.7766 (2.6884) grad_norm: 1.6082 (1.6408) time: 2.9386 data: 0.0003 max mem: 29202 +[2024-12-11 09:28:18 root] (utils.py 283): INFO Epoch: [10] [1830/2502] eta: 0:32:55 lr: 0.000008 loss_cls: 2.7433 (2.6888) grad_norm: 1.4977 (1.6402) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-11 09:28:47 root] (utils.py 283): INFO Epoch: [10] [1840/2502] eta: 0:32:26 lr: 0.000008 loss_cls: 2.8338 (2.6896) grad_norm: 1.4521 (1.6396) time: 2.9398 data: 0.0003 max mem: 29202 +[2024-12-11 09:29:17 root] (utils.py 283): INFO Epoch: [10] [1850/2502] eta: 0:31:57 lr: 0.000008 loss_cls: 2.8145 (2.6895) grad_norm: 1.5298 (1.6399) time: 2.9384 data: 0.0003 max mem: 29202 +[2024-12-11 09:29:46 root] (utils.py 283): INFO Epoch: [10] [1860/2502] eta: 0:31:27 lr: 0.000008 loss_cls: 2.8720 (2.6905) grad_norm: 1.5635 (1.6394) time: 2.9378 data: 0.0003 max mem: 29202 +[2024-12-11 09:30:16 root] (utils.py 283): INFO Epoch: [10] [1870/2502] eta: 0:30:58 lr: 0.000008 loss_cls: 2.7696 (2.6894) grad_norm: 1.5290 (1.6388) time: 2.9389 data: 0.0003 max mem: 29202 +[2024-12-11 09:30:45 root] (utils.py 283): INFO Epoch: [10] [1880/2502] eta: 0:30:28 lr: 0.000008 loss_cls: 2.7696 (2.6899) grad_norm: 1.5290 (1.6391) time: 2.9465 data: 0.0003 max mem: 29202 +[2024-12-11 09:31:15 root] (utils.py 283): INFO Epoch: [10] [1890/2502] eta: 0:29:59 lr: 0.000008 loss_cls: 2.7900 (2.6900) grad_norm: 1.5774 (1.6392) time: 2.9499 data: 0.0003 max mem: 29202 +[2024-12-11 09:31:44 root] (utils.py 283): INFO Epoch: [10] [1900/2502] eta: 0:29:30 lr: 0.000008 loss_cls: 2.7008 (2.6902) grad_norm: 1.5326 (1.6394) time: 2.9442 data: 0.0003 max mem: 29202 +[2024-12-11 09:32:13 root] (utils.py 283): INFO Epoch: [10] [1910/2502] eta: 0:29:00 lr: 0.000008 loss_cls: 2.7356 (2.6896) grad_norm: 1.5890 (1.6392) time: 2.9378 data: 0.0003 max mem: 29202 +[2024-12-11 09:32:43 root] (utils.py 283): INFO Epoch: [10] [1920/2502] eta: 0:28:31 lr: 0.000008 loss_cls: 2.7460 (2.6904) grad_norm: 1.5260 (1.6388) time: 2.9328 data: 0.0003 max mem: 29202 +[2024-12-11 09:33:12 root] (utils.py 283): INFO Epoch: [10] [1930/2502] eta: 0:28:01 lr: 0.000008 loss_cls: 2.8880 (2.6908) grad_norm: 1.4911 (1.6383) time: 2.9331 data: 0.0003 max mem: 29202 +[2024-12-11 09:33:41 root] (utils.py 283): INFO Epoch: [10] [1940/2502] eta: 0:27:32 lr: 0.000008 loss_cls: 2.8846 (2.6916) grad_norm: 1.6306 (1.6385) time: 2.9356 data: 0.0003 max mem: 29202 +[2024-12-11 09:34:11 root] (utils.py 283): INFO Epoch: [10] [1950/2502] eta: 0:27:02 lr: 0.000008 loss_cls: 2.8356 (2.6917) grad_norm: 1.5991 (1.6381) time: 2.9383 data: 0.0003 max mem: 29202 +[2024-12-11 09:34:40 root] (utils.py 283): INFO Epoch: [10] [1960/2502] eta: 0:26:33 lr: 0.000008 loss_cls: 2.7405 (2.6914) grad_norm: 1.6208 (1.6380) time: 2.9389 data: 0.0003 max mem: 29202 +[2024-12-11 09:35:09 root] (utils.py 283): INFO Epoch: [10] [1970/2502] eta: 0:26:04 lr: 0.000008 loss_cls: 2.5645 (2.6907) grad_norm: 1.5646 (1.6373) time: 2.9373 data: 0.0003 max mem: 29202 +[2024-12-11 09:35:39 root] (utils.py 283): INFO Epoch: [10] [1980/2502] eta: 0:25:34 lr: 0.000008 loss_cls: 2.7658 (2.6915) grad_norm: 1.5109 (1.6370) time: 2.9410 data: 0.0003 max mem: 29202 +[2024-12-11 09:36:08 root] (utils.py 283): INFO Epoch: [10] [1990/2502] eta: 0:25:05 lr: 0.000008 loss_cls: 2.7348 (2.6907) grad_norm: 1.5440 (1.6375) time: 2.9412 data: 0.0003 max mem: 29202 +[2024-12-11 09:36:38 root] (utils.py 283): INFO Epoch: [10] [2000/2502] eta: 0:24:35 lr: 0.000008 loss_cls: 2.5769 (2.6906) grad_norm: 1.5255 (1.6371) time: 2.9389 data: 0.0003 max mem: 29202 +[2024-12-11 09:37:07 root] (utils.py 283): INFO Epoch: [10] [2010/2502] eta: 0:24:06 lr: 0.000008 loss_cls: 2.8345 (2.6913) grad_norm: 1.5181 (1.6365) time: 2.9389 data: 0.0003 max mem: 29202 +[2024-12-11 09:37:36 root] (utils.py 283): INFO Epoch: [10] [2020/2502] eta: 0:23:37 lr: 0.000008 loss_cls: 2.7453 (2.6908) grad_norm: 1.5663 (1.6365) time: 2.9390 data: 0.0003 max mem: 29202 +[2024-12-11 09:38:06 root] (utils.py 283): INFO Epoch: [10] [2030/2502] eta: 0:23:07 lr: 0.000008 loss_cls: 2.8328 (2.6922) grad_norm: 1.5716 (1.6365) time: 2.9394 data: 0.0003 max mem: 29202 +[2024-12-11 09:38:35 root] (utils.py 283): INFO Epoch: [10] [2040/2502] eta: 0:22:38 lr: 0.000008 loss_cls: 2.8328 (2.6918) grad_norm: 1.4848 (1.6361) time: 2.9402 data: 0.0003 max mem: 29202 +[2024-12-11 09:39:05 root] (utils.py 283): INFO Epoch: [10] [2050/2502] eta: 0:22:08 lr: 0.000008 loss_cls: 2.6476 (2.6916) grad_norm: 1.4668 (1.6353) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-11 09:39:34 root] (utils.py 283): INFO Epoch: [10] [2060/2502] eta: 0:21:39 lr: 0.000008 loss_cls: 2.6889 (2.6909) grad_norm: 1.4668 (1.6350) time: 2.9375 data: 0.0003 max mem: 29202 +[2024-12-11 09:40:03 root] (utils.py 283): INFO Epoch: [10] [2070/2502] eta: 0:21:10 lr: 0.000008 loss_cls: 2.8514 (2.6925) grad_norm: 1.5397 (1.6349) time: 2.9368 data: 0.0003 max mem: 29202 +[2024-12-11 09:40:33 root] (utils.py 283): INFO Epoch: [10] [2080/2502] eta: 0:20:40 lr: 0.000008 loss_cls: 3.0903 (2.6933) grad_norm: 1.5513 (1.6348) time: 2.9383 data: 0.0003 max mem: 29202 +[2024-12-11 09:41:02 root] (utils.py 283): INFO Epoch: [10] [2090/2502] eta: 0:20:11 lr: 0.000008 loss_cls: 2.8407 (2.6942) grad_norm: 1.5725 (1.6356) time: 2.9352 data: 0.0003 max mem: 29202 +[2024-12-11 09:41:31 root] (utils.py 283): INFO Epoch: [10] [2100/2502] eta: 0:19:41 lr: 0.000008 loss_cls: 2.8407 (2.6948) grad_norm: 1.5673 (1.6353) time: 2.9283 data: 0.0003 max mem: 29202 +[2024-12-11 09:42:01 root] (utils.py 283): INFO Epoch: [10] [2110/2502] eta: 0:19:12 lr: 0.000008 loss_cls: 2.9175 (2.6952) grad_norm: 1.5351 (1.6351) time: 2.9231 data: 0.0003 max mem: 29202 +[2024-12-11 09:42:30 root] (utils.py 283): INFO Epoch: [10] [2120/2502] eta: 0:18:43 lr: 0.000008 loss_cls: 2.8918 (2.6950) grad_norm: 1.5945 (1.6364) time: 2.9310 data: 0.0003 max mem: 29202 +[2024-12-11 09:42:59 root] (utils.py 283): INFO Epoch: [10] [2130/2502] eta: 0:18:13 lr: 0.000008 loss_cls: 2.8918 (2.6956) grad_norm: 1.5945 (1.6368) time: 2.9336 data: 0.0003 max mem: 29202 +[2024-12-11 09:43:28 root] (utils.py 283): INFO Epoch: [10] [2140/2502] eta: 0:17:44 lr: 0.000008 loss_cls: 2.8040 (2.6964) grad_norm: 1.5808 (1.6367) time: 2.9225 data: 0.0003 max mem: 29202 +[2024-12-11 09:43:58 root] (utils.py 283): INFO Epoch: [10] [2150/2502] eta: 0:17:14 lr: 0.000008 loss_cls: 2.8026 (2.6961) grad_norm: 1.5238 (1.6360) time: 2.9234 data: 0.0003 max mem: 29202 +[2024-12-11 09:44:27 root] (utils.py 283): INFO Epoch: [10] [2160/2502] eta: 0:16:45 lr: 0.000008 loss_cls: 2.6747 (2.6963) grad_norm: 1.5662 (1.6364) time: 2.9246 data: 0.0003 max mem: 29202 +[2024-12-11 09:44:56 root] (utils.py 283): INFO Epoch: [10] [2170/2502] eta: 0:16:15 lr: 0.000008 loss_cls: 2.6747 (2.6958) grad_norm: 1.6492 (1.6366) time: 2.9215 data: 0.0003 max mem: 29202 +[2024-12-11 09:45:25 root] (utils.py 283): INFO Epoch: [10] [2180/2502] eta: 0:15:46 lr: 0.000008 loss_cls: 2.7950 (2.6961) grad_norm: 1.6001 (1.6367) time: 2.9202 data: 0.0003 max mem: 29202 +[2024-12-11 09:45:55 root] (utils.py 283): INFO Epoch: [10] [2190/2502] eta: 0:15:17 lr: 0.000008 loss_cls: 2.7546 (2.6954) grad_norm: 1.5648 (1.6366) time: 2.9205 data: 0.0003 max mem: 29202 +[2024-12-11 09:46:24 root] (utils.py 283): INFO Epoch: [10] [2200/2502] eta: 0:14:47 lr: 0.000008 loss_cls: 2.7336 (2.6954) grad_norm: 1.6133 (1.6364) time: 2.9236 data: 0.0003 max mem: 29202 +[2024-12-11 09:46:53 root] (utils.py 283): INFO Epoch: [10] [2210/2502] eta: 0:14:18 lr: 0.000008 loss_cls: 2.4552 (2.6935) grad_norm: 1.5452 (1.6361) time: 2.9251 data: 0.0003 max mem: 29202 +[2024-12-11 09:47:22 root] (utils.py 283): INFO Epoch: [10] [2220/2502] eta: 0:13:48 lr: 0.000008 loss_cls: 2.4025 (2.6937) grad_norm: 1.4991 (1.6361) time: 2.9254 data: 0.0003 max mem: 29202 +[2024-12-11 09:47:52 root] (utils.py 283): INFO Epoch: [10] [2230/2502] eta: 0:13:19 lr: 0.000008 loss_cls: 2.7599 (2.6932) grad_norm: 1.4706 (1.6356) time: 2.9256 data: 0.0003 max mem: 29202 +[2024-12-11 09:48:21 root] (utils.py 283): INFO Epoch: [10] [2240/2502] eta: 0:12:50 lr: 0.000008 loss_cls: 2.7980 (2.6941) grad_norm: 1.4071 (1.6347) time: 2.9268 data: 0.0003 max mem: 29202 +[2024-12-11 09:48:50 root] (utils.py 283): INFO Epoch: [10] [2250/2502] eta: 0:12:20 lr: 0.000008 loss_cls: 2.8091 (2.6947) grad_norm: 1.5318 (1.6355) time: 2.9270 data: 0.0003 max mem: 29202 +[2024-12-11 09:49:19 root] (utils.py 283): INFO Epoch: [10] [2260/2502] eta: 0:11:51 lr: 0.000008 loss_cls: 2.8376 (2.6945) grad_norm: 1.5415 (1.6349) time: 2.9246 data: 0.0003 max mem: 29202 +[2024-12-11 09:49:49 root] (utils.py 283): INFO Epoch: [10] [2270/2502] eta: 0:11:21 lr: 0.000008 loss_cls: 2.9361 (2.6949) grad_norm: 1.5024 (1.6349) time: 2.9225 data: 0.0003 max mem: 29202 +[2024-12-11 09:50:18 root] (utils.py 283): INFO Epoch: [10] [2280/2502] eta: 0:10:52 lr: 0.000008 loss_cls: 2.7083 (2.6941) grad_norm: 1.6169 (1.6354) time: 2.9219 data: 0.0003 max mem: 29202 +[2024-12-11 09:50:47 root] (utils.py 283): INFO Epoch: [10] [2290/2502] eta: 0:10:23 lr: 0.000008 loss_cls: 2.7083 (2.6937) grad_norm: 1.6016 (1.6351) time: 2.9221 data: 0.0003 max mem: 29202 +[2024-12-11 09:51:16 root] (utils.py 283): INFO Epoch: [10] [2300/2502] eta: 0:09:53 lr: 0.000008 loss_cls: 2.7978 (2.6939) grad_norm: 1.5138 (1.6349) time: 2.9228 data: 0.0003 max mem: 29202 +[2024-12-11 09:51:46 root] (utils.py 283): INFO Epoch: [10] [2310/2502] eta: 0:09:24 lr: 0.000008 loss_cls: 2.9345 (2.6945) grad_norm: 1.5409 (1.6348) time: 2.9230 data: 0.0003 max mem: 29202 +[2024-12-11 09:52:15 root] (utils.py 283): INFO Epoch: [10] [2320/2502] eta: 0:08:54 lr: 0.000008 loss_cls: 2.8193 (2.6938) grad_norm: 1.6601 (1.6352) time: 2.9237 data: 0.0003 max mem: 29202 +[2024-12-11 09:52:44 root] (utils.py 283): INFO Epoch: [10] [2330/2502] eta: 0:08:25 lr: 0.000008 loss_cls: 2.6400 (2.6938) grad_norm: 1.7036 (1.6353) time: 2.9241 data: 0.0003 max mem: 29202 +[2024-12-11 09:53:13 root] (utils.py 283): INFO Epoch: [10] [2340/2502] eta: 0:07:56 lr: 0.000008 loss_cls: 2.9041 (2.6933) grad_norm: 1.5898 (1.6351) time: 2.9254 data: 0.0003 max mem: 29202 +[2024-12-11 09:53:43 root] (utils.py 283): INFO Epoch: [10] [2350/2502] eta: 0:07:26 lr: 0.000008 loss_cls: 2.5391 (2.6925) grad_norm: 1.5050 (1.6350) time: 2.9288 data: 0.0003 max mem: 29202 +[2024-12-11 09:54:12 root] (utils.py 283): INFO Epoch: [10] [2360/2502] eta: 0:06:57 lr: 0.000008 loss_cls: 2.5852 (2.6931) grad_norm: 1.5121 (1.6347) time: 2.9321 data: 0.0003 max mem: 29202 +[2024-12-11 09:54:41 root] (utils.py 283): INFO Epoch: [10] [2370/2502] eta: 0:06:27 lr: 0.000008 loss_cls: 2.8504 (2.6935) grad_norm: 1.5477 (1.6346) time: 2.9308 data: 0.0003 max mem: 29202 +[2024-12-11 09:55:11 root] (utils.py 283): INFO Epoch: [10] [2380/2502] eta: 0:05:58 lr: 0.000008 loss_cls: 2.9203 (2.6938) grad_norm: 1.5385 (1.6342) time: 2.9295 data: 0.0003 max mem: 29202 +[2024-12-11 09:55:40 root] (utils.py 283): INFO Epoch: [10] [2390/2502] eta: 0:05:29 lr: 0.000008 loss_cls: 2.8385 (2.6935) grad_norm: 1.5198 (1.6341) time: 2.9277 data: 0.0003 max mem: 29202 +[2024-12-11 09:56:09 root] (utils.py 283): INFO Epoch: [10] [2400/2502] eta: 0:04:59 lr: 0.000008 loss_cls: 2.8143 (2.6934) grad_norm: 1.5355 (1.6339) time: 2.9262 data: 0.0003 max mem: 29202 +[2024-12-11 09:56:38 root] (utils.py 283): INFO Epoch: [10] [2410/2502] eta: 0:04:30 lr: 0.000008 loss_cls: 2.7452 (2.6927) grad_norm: 1.5544 (1.6339) time: 2.9266 data: 0.0003 max mem: 29202 +[2024-12-11 09:57:08 root] (utils.py 283): INFO Epoch: [10] [2420/2502] eta: 0:04:00 lr: 0.000008 loss_cls: 2.6600 (2.6931) grad_norm: 1.5987 (1.6341) time: 2.9304 data: 0.0003 max mem: 29202 +[2024-12-11 09:57:37 root] (utils.py 283): INFO Epoch: [10] [2430/2502] eta: 0:03:31 lr: 0.000008 loss_cls: 2.7069 (2.6926) grad_norm: 1.6091 (1.6342) time: 2.9311 data: 0.0003 max mem: 29202 +[2024-12-11 09:58:06 root] (utils.py 283): INFO Epoch: [10] [2440/2502] eta: 0:03:02 lr: 0.000008 loss_cls: 2.6911 (2.6926) grad_norm: 1.5697 (1.6340) time: 2.9277 data: 0.0003 max mem: 29202 +[2024-12-11 09:58:35 root] (utils.py 283): INFO Epoch: [10] [2450/2502] eta: 0:02:32 lr: 0.000008 loss_cls: 2.6926 (2.6921) grad_norm: 1.5616 (1.6339) time: 2.9263 data: 0.0003 max mem: 29202 +[2024-12-11 09:59:05 root] (utils.py 283): INFO Epoch: [10] [2460/2502] eta: 0:02:03 lr: 0.000008 loss_cls: 2.7870 (2.6924) grad_norm: 1.5790 (1.6340) time: 2.9236 data: 0.0003 max mem: 29202 +[2024-12-11 09:59:34 root] (utils.py 283): INFO Epoch: [10] [2470/2502] eta: 0:01:34 lr: 0.000008 loss_cls: 2.7684 (2.6922) grad_norm: 1.5197 (1.6335) time: 2.9315 data: 0.0002 max mem: 29202 +[2024-12-11 10:00:03 root] (utils.py 283): INFO Epoch: [10] [2480/2502] eta: 0:01:04 lr: 0.000008 loss_cls: 2.8093 (2.6925) grad_norm: 1.5197 (1.6338) time: 2.9325 data: 0.0003 max mem: 29202 +[2024-12-11 10:00:33 root] (utils.py 283): INFO Epoch: [10] [2490/2502] eta: 0:00:35 lr: 0.000008 loss_cls: 2.8093 (2.6918) grad_norm: 1.6088 (1.6340) time: 2.9484 data: 0.0243 max mem: 29202 +[2024-12-11 10:01:02 root] (utils.py 283): INFO Epoch: [10] [2500/2502] eta: 0:00:05 lr: 0.000008 loss_cls: 2.4768 (2.6913) grad_norm: 1.6364 (1.6340) time: 2.9469 data: 0.0243 max mem: 29202 +[2024-12-11 10:01:05 root] (utils.py 283): INFO Epoch: [10] [2501/2502] eta: 0:00:02 lr: 0.000008 loss_cls: 2.4768 (2.6911) grad_norm: 1.6028 (1.6340) time: 2.9471 data: 0.0243 max mem: 29202 +[2024-12-11 10:01:05 root] (utils.py 297): INFO Epoch: [10] Total time: 2:02:31 (2.9381 s / it) +[2024-12-11 10:01:05 root] (engine.py 179): INFO Averaged stats:lr: 0.000008 loss_cls: 2.4768 (2.6964) grad_norm: 1.6028 (1.6340) +[2024-12-11 10:01:09 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:55 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.2749 (0.2749) acc1: 92.9688 (92.9688) acc3: 98.4375 (98.4375) acc5: 98.4375 (98.4375) time: 0.5640 data: 0.0003 max mem: 29202 +[2024-12-11 10:01:15 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:48 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.5520 (0.5576) acc1: 86.7188 (86.6477) acc3: 97.6562 (96.9460) acc5: 98.4375 (98.1534) time: 0.5524 data: 0.0004 max mem: 29202 +[2024-12-11 10:01:20 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:43 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.5520 (0.6002) acc1: 85.9375 (85.6027) acc3: 96.8750 (96.0938) acc5: 97.6562 (97.8051) time: 0.5521 data: 0.0004 max mem: 29202 +[2024-12-11 10:01:26 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:37 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6265 (0.6300) acc1: 85.1562 (85.1562) acc3: 96.0938 (95.8669) acc5: 96.8750 (97.4798) time: 0.5527 data: 0.0004 max mem: 29202 +[2024-12-11 10:01:31 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:32 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6549 (0.6323) acc1: 85.1562 (85.2706) acc3: 96.0938 (95.8841) acc5: 97.6562 (97.5610) time: 0.5528 data: 0.0004 max mem: 29202 +[2024-12-11 10:01:37 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:26 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.8346 (0.7138) acc1: 80.4688 (83.6244) acc3: 92.9688 (94.7610) acc5: 95.3125 (96.6452) time: 0.5532 data: 0.0004 max mem: 29202 +[2024-12-11 10:01:42 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:21 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9071 (0.7458) acc1: 77.3438 (83.1583) acc3: 89.8438 (94.1342) acc5: 92.9688 (96.1578) time: 0.5537 data: 0.0004 max mem: 29202 +[2024-12-11 10:01:48 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:15 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9529 (0.7776) acc1: 80.4688 (82.3063) acc3: 91.4062 (93.8820) acc5: 93.7500 (95.9067) time: 0.5539 data: 0.0004 max mem: 29202 +[2024-12-11 10:01:53 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:09 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9209 (0.8013) acc1: 78.9062 (81.9252) acc3: 91.4062 (93.4799) acc5: 92.9688 (95.5343) time: 0.5534 data: 0.0006 max mem: 29202 +[2024-12-11 10:01:59 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:04 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9454 (0.8311) acc1: 76.5625 (81.1384) acc3: 89.8438 (93.1405) acc5: 92.9688 (95.3297) time: 0.5536 data: 0.0006 max mem: 29202 +[2024-12-11 10:02:03 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9112 (0.8292) acc1: 75.7812 (81.1040) acc3: 90.6250 (93.2080) acc5: 94.5312 (95.4080) time: 0.5445 data: 0.0005 max mem: 29202 +[2024-12-11 10:02:03 root] (utils.py 297): INFO Test: Total time: 0:00:54 (0.5515 s / it) +[2024-12-11 10:02:03 root] (engine.py 264): INFO * Acc@1 81.272 Acc@3 93.030 Acc@5 95.352 loss 0.827 flops 13.207 layer_flops 13.109 +[2024-12-11 10:02:03 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 81.3% +[2024-12-11 10:02:05 root] (main.py 576): INFO Max accuracy: 81.27% +[2024-12-11 10:02:08 root] (utils.py 283): INFO Epoch: [11] [ 0/2502] eta: 2:00:04 lr: 0.000006 loss_cls: 3.1857 (3.1857) grad_norm: 1.8735 (1.8735) time: 2.8796 data: 0.0003 max mem: 29202 +[2024-12-11 10:02:37 root] (utils.py 283): INFO Epoch: [11] [ 10/2502] eta: 2:01:05 lr: 0.000006 loss_cls: 3.0646 (2.8889) grad_norm: 1.6181 (1.6395) time: 2.9157 data: 0.0003 max mem: 29202 +[2024-12-11 10:03:07 root] (utils.py 283): INFO Epoch: [11] [ 20/2502] eta: 2:00:50 lr: 0.000006 loss_cls: 3.0155 (2.8685) grad_norm: 1.6181 (1.6899) time: 2.9233 data: 0.0003 max mem: 29202 +[2024-12-11 10:03:36 root] (utils.py 283): INFO Epoch: [11] [ 30/2502] eta: 2:00:25 lr: 0.000006 loss_cls: 3.0155 (2.8487) grad_norm: 1.7538 (1.7241) time: 2.9269 data: 0.0003 max mem: 29202 +[2024-12-11 10:04:05 root] (utils.py 283): INFO Epoch: [11] [ 40/2502] eta: 1:59:58 lr: 0.000006 loss_cls: 2.8179 (2.8091) grad_norm: 1.6006 (1.6846) time: 2.9270 data: 0.0003 max mem: 29202 +[2024-12-11 10:04:34 root] (utils.py 283): INFO Epoch: [11] [ 50/2502] eta: 1:59:30 lr: 0.000006 loss_cls: 2.7524 (2.7687) grad_norm: 1.4582 (1.6621) time: 2.9264 data: 0.0003 max mem: 29202 +[2024-12-11 10:05:04 root] (utils.py 283): INFO Epoch: [11] [ 60/2502] eta: 1:59:01 lr: 0.000006 loss_cls: 2.6635 (2.7439) grad_norm: 1.6196 (1.6757) time: 2.9255 data: 0.0003 max mem: 29202 +[2024-12-11 10:05:33 root] (utils.py 283): INFO Epoch: [11] [ 70/2502] eta: 1:58:33 lr: 0.000006 loss_cls: 2.6783 (2.7390) grad_norm: 1.6360 (1.6865) time: 2.9270 data: 0.0003 max mem: 29202 +[2024-12-11 10:06:02 root] (utils.py 283): INFO Epoch: [11] [ 80/2502] eta: 1:58:05 lr: 0.000006 loss_cls: 2.7872 (2.7375) grad_norm: 1.5315 (1.6657) time: 2.9282 data: 0.0003 max mem: 29202 +[2024-12-11 10:06:31 root] (utils.py 283): INFO Epoch: [11] [ 90/2502] eta: 1:57:36 lr: 0.000006 loss_cls: 2.8019 (2.7293) grad_norm: 1.5315 (1.6574) time: 2.9280 data: 0.0003 max mem: 29202 +[2024-12-11 10:07:01 root] (utils.py 283): INFO Epoch: [11] [ 100/2502] eta: 1:57:07 lr: 0.000006 loss_cls: 2.5326 (2.7050) grad_norm: 1.5949 (1.6560) time: 2.9270 data: 0.0003 max mem: 29202 +[2024-12-11 10:07:30 root] (utils.py 283): INFO Epoch: [11] [ 110/2502] eta: 1:56:38 lr: 0.000006 loss_cls: 2.6492 (2.7148) grad_norm: 1.5736 (1.6533) time: 2.9266 data: 0.0003 max mem: 29202 +[2024-12-11 10:07:59 root] (utils.py 283): INFO Epoch: [11] [ 120/2502] eta: 1:56:09 lr: 0.000006 loss_cls: 2.8477 (2.7049) grad_norm: 1.6208 (1.6604) time: 2.9259 data: 0.0003 max mem: 29202 +[2024-12-11 10:08:28 root] (utils.py 283): INFO Epoch: [11] [ 130/2502] eta: 1:55:39 lr: 0.000006 loss_cls: 2.5526 (2.6938) grad_norm: 1.6106 (1.6568) time: 2.9252 data: 0.0003 max mem: 29202 +[2024-12-11 10:08:58 root] (utils.py 283): INFO Epoch: [11] [ 140/2502] eta: 1:55:11 lr: 0.000006 loss_cls: 2.8469 (2.7126) grad_norm: 1.5747 (1.6571) time: 2.9269 data: 0.0003 max mem: 29202 +[2024-12-11 10:09:27 root] (utils.py 283): INFO Epoch: [11] [ 150/2502] eta: 1:54:41 lr: 0.000006 loss_cls: 2.9551 (2.7121) grad_norm: 1.5248 (1.6500) time: 2.9265 data: 0.0003 max mem: 29202 +[2024-12-11 10:09:56 root] (utils.py 283): INFO Epoch: [11] [ 160/2502] eta: 1:54:12 lr: 0.000006 loss_cls: 2.7915 (2.6916) grad_norm: 1.5198 (1.6474) time: 2.9257 data: 0.0003 max mem: 29202 +[2024-12-11 10:10:26 root] (utils.py 283): INFO Epoch: [11] [ 170/2502] eta: 1:53:43 lr: 0.000006 loss_cls: 2.6491 (2.6899) grad_norm: 1.5002 (1.6391) time: 2.9256 data: 0.0003 max mem: 29202 +[2024-12-11 10:10:55 root] (utils.py 283): INFO Epoch: [11] [ 180/2502] eta: 1:53:14 lr: 0.000006 loss_cls: 2.7617 (2.6897) grad_norm: 1.5002 (1.6346) time: 2.9263 data: 0.0003 max mem: 29202 +[2024-12-11 10:11:24 root] (utils.py 283): INFO Epoch: [11] [ 190/2502] eta: 1:52:44 lr: 0.000006 loss_cls: 2.6986 (2.6818) grad_norm: 1.5524 (1.6358) time: 2.9264 data: 0.0003 max mem: 29202 +[2024-12-11 10:11:54 root] (utils.py 283): INFO Epoch: [11] [ 200/2502] eta: 1:52:18 lr: 0.000006 loss_cls: 2.6958 (2.6850) grad_norm: 1.5524 (1.6333) time: 2.9381 data: 0.0003 max mem: 29202 +[2024-12-11 10:12:23 root] (utils.py 283): INFO Epoch: [11] [ 210/2502] eta: 1:51:51 lr: 0.000006 loss_cls: 2.7766 (2.6894) grad_norm: 1.4943 (1.6317) time: 2.9509 data: 0.0003 max mem: 29202 +[2024-12-11 10:12:53 root] (utils.py 283): INFO Epoch: [11] [ 220/2502] eta: 1:51:24 lr: 0.000006 loss_cls: 2.5862 (2.6802) grad_norm: 1.4943 (1.6317) time: 2.9489 data: 0.0003 max mem: 29202 +[2024-12-11 10:13:22 root] (utils.py 283): INFO Epoch: [11] [ 230/2502] eta: 1:50:58 lr: 0.000006 loss_cls: 2.6341 (2.6801) grad_norm: 1.5473 (1.6333) time: 2.9571 data: 0.0003 max mem: 29202 +[2024-12-11 10:13:52 root] (utils.py 283): INFO Epoch: [11] [ 240/2502] eta: 1:50:30 lr: 0.000006 loss_cls: 2.8040 (2.6829) grad_norm: 1.5394 (1.6275) time: 2.9562 data: 0.0003 max mem: 29202 +[2024-12-11 10:14:21 root] (utils.py 283): INFO Epoch: [11] [ 250/2502] eta: 1:50:02 lr: 0.000006 loss_cls: 2.7479 (2.6852) grad_norm: 1.5902 (1.6353) time: 2.9449 data: 0.0003 max mem: 29202 +[2024-12-11 10:14:50 root] (utils.py 283): INFO Epoch: [11] [ 260/2502] eta: 1:49:33 lr: 0.000006 loss_cls: 2.7720 (2.6838) grad_norm: 1.6448 (1.6345) time: 2.9418 data: 0.0003 max mem: 29202 +[2024-12-11 10:15:20 root] (utils.py 283): INFO Epoch: [11] [ 270/2502] eta: 1:49:06 lr: 0.000006 loss_cls: 2.6733 (2.6823) grad_norm: 1.5934 (1.6342) time: 2.9450 data: 0.0003 max mem: 29202 +[2024-12-11 10:15:49 root] (utils.py 283): INFO Epoch: [11] [ 280/2502] eta: 1:48:37 lr: 0.000006 loss_cls: 2.7621 (2.6879) grad_norm: 1.6248 (1.6348) time: 2.9440 data: 0.0003 max mem: 29202 +[2024-12-11 10:16:19 root] (utils.py 283): INFO Epoch: [11] [ 290/2502] eta: 1:48:08 lr: 0.000006 loss_cls: 2.8702 (2.6894) grad_norm: 1.7056 (1.6389) time: 2.9369 data: 0.0003 max mem: 29202 +[2024-12-11 10:16:48 root] (utils.py 283): INFO Epoch: [11] [ 300/2502] eta: 1:47:39 lr: 0.000006 loss_cls: 3.0241 (2.6928) grad_norm: 1.6159 (1.6366) time: 2.9384 data: 0.0003 max mem: 29202 +[2024-12-11 10:17:18 root] (utils.py 283): INFO Epoch: [11] [ 310/2502] eta: 1:47:10 lr: 0.000006 loss_cls: 2.9011 (2.6911) grad_norm: 1.6024 (1.6406) time: 2.9444 data: 0.0003 max mem: 29202 +[2024-12-11 10:17:47 root] (utils.py 283): INFO Epoch: [11] [ 320/2502] eta: 1:46:42 lr: 0.000006 loss_cls: 2.8729 (2.6962) grad_norm: 1.5240 (1.6383) time: 2.9493 data: 0.0003 max mem: 29202 +[2024-12-11 10:18:17 root] (utils.py 283): INFO Epoch: [11] [ 330/2502] eta: 1:46:13 lr: 0.000006 loss_cls: 2.9727 (2.7047) grad_norm: 1.5240 (1.6389) time: 2.9442 data: 0.0003 max mem: 29202 +[2024-12-11 10:18:46 root] (utils.py 283): INFO Epoch: [11] [ 340/2502] eta: 1:45:44 lr: 0.000006 loss_cls: 3.0163 (2.7067) grad_norm: 1.5631 (1.6381) time: 2.9372 data: 0.0003 max mem: 29202 +[2024-12-11 10:19:15 root] (utils.py 283): INFO Epoch: [11] [ 350/2502] eta: 1:45:14 lr: 0.000006 loss_cls: 3.0187 (2.7111) grad_norm: 1.5941 (1.6410) time: 2.9339 data: 0.0002 max mem: 29202 +[2024-12-11 10:19:45 root] (utils.py 283): INFO Epoch: [11] [ 360/2502] eta: 1:44:45 lr: 0.000006 loss_cls: 2.9879 (2.7112) grad_norm: 1.6072 (1.6415) time: 2.9348 data: 0.0003 max mem: 29202 +[2024-12-11 10:20:14 root] (utils.py 283): INFO Epoch: [11] [ 370/2502] eta: 1:44:16 lr: 0.000006 loss_cls: 2.7411 (2.7095) grad_norm: 1.6072 (1.6437) time: 2.9369 data: 0.0004 max mem: 29202 +[2024-12-11 10:20:43 root] (utils.py 283): INFO Epoch: [11] [ 380/2502] eta: 1:43:47 lr: 0.000006 loss_cls: 2.7051 (2.7070) grad_norm: 1.5792 (1.6422) time: 2.9352 data: 0.0004 max mem: 29202 +[2024-12-11 10:21:13 root] (utils.py 283): INFO Epoch: [11] [ 390/2502] eta: 1:43:17 lr: 0.000006 loss_cls: 2.5375 (2.7027) grad_norm: 1.5126 (1.6400) time: 2.9359 data: 0.0003 max mem: 29202 +[2024-12-11 10:21:42 root] (utils.py 283): INFO Epoch: [11] [ 400/2502] eta: 1:42:48 lr: 0.000006 loss_cls: 2.6149 (2.7031) grad_norm: 1.5126 (1.6371) time: 2.9367 data: 0.0003 max mem: 29202 +[2024-12-11 10:22:11 root] (utils.py 283): INFO Epoch: [11] [ 410/2502] eta: 1:42:19 lr: 0.000006 loss_cls: 2.8899 (2.7059) grad_norm: 1.5092 (1.6400) time: 2.9369 data: 0.0003 max mem: 29202 +[2024-12-11 10:22:41 root] (utils.py 283): INFO Epoch: [11] [ 420/2502] eta: 1:41:50 lr: 0.000006 loss_cls: 2.6727 (2.7010) grad_norm: 1.5178 (1.6401) time: 2.9370 data: 0.0003 max mem: 29202 +[2024-12-11 10:23:10 root] (utils.py 283): INFO Epoch: [11] [ 430/2502] eta: 1:41:20 lr: 0.000006 loss_cls: 2.6727 (2.7023) grad_norm: 1.5849 (1.6409) time: 2.9352 data: 0.0003 max mem: 29202 +[2024-12-11 10:23:40 root] (utils.py 283): INFO Epoch: [11] [ 440/2502] eta: 1:40:51 lr: 0.000006 loss_cls: 2.8371 (2.7009) grad_norm: 1.6222 (1.6403) time: 2.9368 data: 0.0003 max mem: 29202 +[2024-12-11 10:24:09 root] (utils.py 283): INFO Epoch: [11] [ 450/2502] eta: 1:40:23 lr: 0.000006 loss_cls: 2.8736 (2.7055) grad_norm: 1.5950 (1.6385) time: 2.9461 data: 0.0003 max mem: 29202 +[2024-12-11 10:24:39 root] (utils.py 283): INFO Epoch: [11] [ 460/2502] eta: 1:39:54 lr: 0.000006 loss_cls: 2.9160 (2.7022) grad_norm: 1.5494 (1.6358) time: 2.9510 data: 0.0003 max mem: 29202 +[2024-12-11 10:25:08 root] (utils.py 283): INFO Epoch: [11] [ 470/2502] eta: 1:39:25 lr: 0.000006 loss_cls: 2.6591 (2.7020) grad_norm: 1.5547 (1.6355) time: 2.9525 data: 0.0003 max mem: 29202 +[2024-12-11 10:25:38 root] (utils.py 283): INFO Epoch: [11] [ 480/2502] eta: 1:38:56 lr: 0.000006 loss_cls: 2.7964 (2.7026) grad_norm: 1.5746 (1.6365) time: 2.9486 data: 0.0003 max mem: 29202 +[2024-12-11 10:26:07 root] (utils.py 283): INFO Epoch: [11] [ 490/2502] eta: 1:38:27 lr: 0.000006 loss_cls: 2.7164 (2.7002) grad_norm: 1.5984 (1.6368) time: 2.9398 data: 0.0003 max mem: 29202 +[2024-12-11 10:26:36 root] (utils.py 283): INFO Epoch: [11] [ 500/2502] eta: 1:37:58 lr: 0.000006 loss_cls: 2.7454 (2.6997) grad_norm: 1.5449 (1.6390) time: 2.9359 data: 0.0003 max mem: 29202 +[2024-12-11 10:27:06 root] (utils.py 283): INFO Epoch: [11] [ 510/2502] eta: 1:37:28 lr: 0.000006 loss_cls: 2.7454 (2.6974) grad_norm: 1.5428 (1.6397) time: 2.9369 data: 0.0003 max mem: 29202 +[2024-12-11 10:27:35 root] (utils.py 283): INFO Epoch: [11] [ 520/2502] eta: 1:37:00 lr: 0.000006 loss_cls: 2.7769 (2.6988) grad_norm: 1.5716 (1.6381) time: 2.9473 data: 0.0003 max mem: 29202 +[2024-12-11 10:28:05 root] (utils.py 283): INFO Epoch: [11] [ 530/2502] eta: 1:36:30 lr: 0.000006 loss_cls: 2.8087 (2.6985) grad_norm: 1.5674 (1.6379) time: 2.9461 data: 0.0003 max mem: 29202 +[2024-12-11 10:28:34 root] (utils.py 283): INFO Epoch: [11] [ 540/2502] eta: 1:36:01 lr: 0.000006 loss_cls: 2.7092 (2.6965) grad_norm: 1.5674 (1.6376) time: 2.9346 data: 0.0003 max mem: 29202 +[2024-12-11 10:29:03 root] (utils.py 283): INFO Epoch: [11] [ 550/2502] eta: 1:35:31 lr: 0.000006 loss_cls: 2.7092 (2.6937) grad_norm: 1.5929 (1.6386) time: 2.9338 data: 0.0003 max mem: 29202 +[2024-12-11 10:29:33 root] (utils.py 283): INFO Epoch: [11] [ 560/2502] eta: 1:35:03 lr: 0.000006 loss_cls: 2.8980 (2.6976) grad_norm: 1.5929 (1.6370) time: 2.9463 data: 0.0003 max mem: 29202 +[2024-12-11 10:30:02 root] (utils.py 283): INFO Epoch: [11] [ 570/2502] eta: 1:34:34 lr: 0.000006 loss_cls: 2.9288 (2.7014) grad_norm: 1.5453 (1.6381) time: 2.9602 data: 0.0003 max mem: 29202 +[2024-12-11 10:30:32 root] (utils.py 283): INFO Epoch: [11] [ 580/2502] eta: 1:34:05 lr: 0.000006 loss_cls: 2.8015 (2.6950) grad_norm: 1.5339 (1.6356) time: 2.9499 data: 0.0003 max mem: 29202 +[2024-12-11 10:31:01 root] (utils.py 283): INFO Epoch: [11] [ 590/2502] eta: 1:33:35 lr: 0.000006 loss_cls: 2.6365 (2.6987) grad_norm: 1.5339 (1.6354) time: 2.9349 data: 0.0003 max mem: 29202 +[2024-12-11 10:31:30 root] (utils.py 283): INFO Epoch: [11] [ 600/2502] eta: 1:33:06 lr: 0.000006 loss_cls: 2.8175 (2.6983) grad_norm: 1.5364 (1.6345) time: 2.9320 data: 0.0003 max mem: 29202 +[2024-12-11 10:32:00 root] (utils.py 283): INFO Epoch: [11] [ 610/2502] eta: 1:32:37 lr: 0.000006 loss_cls: 2.5438 (2.6938) grad_norm: 1.4985 (1.6325) time: 2.9349 data: 0.0003 max mem: 29202 +[2024-12-11 10:32:29 root] (utils.py 283): INFO Epoch: [11] [ 620/2502] eta: 1:32:07 lr: 0.000006 loss_cls: 2.5438 (2.6916) grad_norm: 1.5306 (1.6321) time: 2.9331 data: 0.0003 max mem: 29202 +[2024-12-11 10:32:58 root] (utils.py 283): INFO Epoch: [11] [ 630/2502] eta: 1:31:37 lr: 0.000006 loss_cls: 2.8613 (2.6957) grad_norm: 1.5096 (1.6304) time: 2.9291 data: 0.0003 max mem: 29202 +[2024-12-11 10:33:28 root] (utils.py 283): INFO Epoch: [11] [ 640/2502] eta: 1:31:08 lr: 0.000006 loss_cls: 2.8613 (2.6934) grad_norm: 1.5610 (1.6299) time: 2.9333 data: 0.0003 max mem: 29202 +[2024-12-11 10:33:57 root] (utils.py 283): INFO Epoch: [11] [ 650/2502] eta: 1:30:38 lr: 0.000006 loss_cls: 2.7986 (2.6951) grad_norm: 1.5364 (1.6297) time: 2.9323 data: 0.0003 max mem: 29202 +[2024-12-11 10:34:26 root] (utils.py 283): INFO Epoch: [11] [ 660/2502] eta: 1:30:09 lr: 0.000006 loss_cls: 2.6955 (2.6947) grad_norm: 1.5154 (1.6283) time: 2.9342 data: 0.0003 max mem: 29202 +[2024-12-11 10:34:56 root] (utils.py 283): INFO Epoch: [11] [ 670/2502] eta: 1:29:40 lr: 0.000006 loss_cls: 2.5383 (2.6921) grad_norm: 1.5589 (1.6290) time: 2.9398 data: 0.0003 max mem: 29202 +[2024-12-11 10:35:25 root] (utils.py 283): INFO Epoch: [11] [ 680/2502] eta: 1:29:10 lr: 0.000006 loss_cls: 2.7936 (2.6933) grad_norm: 1.6834 (1.6297) time: 2.9392 data: 0.0003 max mem: 29202 +[2024-12-11 10:35:55 root] (utils.py 283): INFO Epoch: [11] [ 690/2502] eta: 1:28:42 lr: 0.000006 loss_cls: 2.7248 (2.6912) grad_norm: 1.6559 (1.6294) time: 2.9474 data: 0.0003 max mem: 29202 +[2024-12-11 10:36:24 root] (utils.py 283): INFO Epoch: [11] [ 700/2502] eta: 1:28:12 lr: 0.000006 loss_cls: 2.5420 (2.6915) grad_norm: 1.4887 (1.6271) time: 2.9434 data: 0.0003 max mem: 29202 +[2024-12-11 10:36:53 root] (utils.py 283): INFO Epoch: [11] [ 710/2502] eta: 1:27:42 lr: 0.000006 loss_cls: 2.8298 (2.6937) grad_norm: 1.5272 (1.6277) time: 2.9295 data: 0.0003 max mem: 29202 +[2024-12-11 10:37:23 root] (utils.py 283): INFO Epoch: [11] [ 720/2502] eta: 1:27:13 lr: 0.000006 loss_cls: 2.7865 (2.6884) grad_norm: 1.5815 (1.6269) time: 2.9338 data: 0.0003 max mem: 29202 +[2024-12-11 10:37:52 root] (utils.py 283): INFO Epoch: [11] [ 730/2502] eta: 1:26:44 lr: 0.000006 loss_cls: 2.6176 (2.6892) grad_norm: 1.5005 (1.6251) time: 2.9418 data: 0.0003 max mem: 29202 +[2024-12-11 10:38:22 root] (utils.py 283): INFO Epoch: [11] [ 740/2502] eta: 1:26:15 lr: 0.000006 loss_cls: 2.8392 (2.6925) grad_norm: 1.4800 (1.6240) time: 2.9415 data: 0.0003 max mem: 29202 +[2024-12-11 10:38:51 root] (utils.py 283): INFO Epoch: [11] [ 750/2502] eta: 1:25:45 lr: 0.000006 loss_cls: 2.8093 (2.6928) grad_norm: 1.4981 (1.6234) time: 2.9334 data: 0.0003 max mem: 29202 +[2024-12-11 10:39:20 root] (utils.py 283): INFO Epoch: [11] [ 760/2502] eta: 1:25:16 lr: 0.000006 loss_cls: 2.7247 (2.6924) grad_norm: 1.5311 (1.6236) time: 2.9332 data: 0.0003 max mem: 29202 +[2024-12-11 10:39:50 root] (utils.py 283): INFO Epoch: [11] [ 770/2502] eta: 1:24:46 lr: 0.000006 loss_cls: 2.7384 (2.6921) grad_norm: 1.5311 (1.6222) time: 2.9364 data: 0.0003 max mem: 29202 +[2024-12-11 10:40:19 root] (utils.py 283): INFO Epoch: [11] [ 780/2502] eta: 1:24:17 lr: 0.000006 loss_cls: 2.7197 (2.6910) grad_norm: 1.5151 (1.6220) time: 2.9346 data: 0.0003 max mem: 29202 +[2024-12-11 10:40:48 root] (utils.py 283): INFO Epoch: [11] [ 790/2502] eta: 1:23:47 lr: 0.000006 loss_cls: 2.7197 (2.6924) grad_norm: 1.5153 (1.6207) time: 2.9352 data: 0.0003 max mem: 29202 +[2024-12-11 10:41:18 root] (utils.py 283): INFO Epoch: [11] [ 800/2502] eta: 1:23:18 lr: 0.000006 loss_cls: 2.7100 (2.6908) grad_norm: 1.4993 (1.6203) time: 2.9333 data: 0.0003 max mem: 29202 +[2024-12-11 10:41:47 root] (utils.py 283): INFO Epoch: [11] [ 810/2502] eta: 1:22:48 lr: 0.000006 loss_cls: 2.6417 (2.6891) grad_norm: 1.6143 (1.6205) time: 2.9308 data: 0.0003 max mem: 29202 +[2024-12-11 10:42:16 root] (utils.py 283): INFO Epoch: [11] [ 820/2502] eta: 1:22:19 lr: 0.000006 loss_cls: 2.6417 (2.6882) grad_norm: 1.5369 (1.6199) time: 2.9335 data: 0.0003 max mem: 29202 +[2024-12-11 10:42:46 root] (utils.py 283): INFO Epoch: [11] [ 830/2502] eta: 1:21:50 lr: 0.000006 loss_cls: 2.5163 (2.6853) grad_norm: 1.5044 (1.6188) time: 2.9331 data: 0.0003 max mem: 29202 +[2024-12-11 10:43:15 root] (utils.py 283): INFO Epoch: [11] [ 840/2502] eta: 1:21:20 lr: 0.000006 loss_cls: 2.7119 (2.6876) grad_norm: 1.5127 (1.6185) time: 2.9319 data: 0.0002 max mem: 29202 +[2024-12-11 10:43:44 root] (utils.py 283): INFO Epoch: [11] [ 850/2502] eta: 1:20:51 lr: 0.000006 loss_cls: 2.8789 (2.6879) grad_norm: 1.5668 (1.6185) time: 2.9312 data: 0.0002 max mem: 29202 +[2024-12-11 10:44:14 root] (utils.py 283): INFO Epoch: [11] [ 860/2502] eta: 1:20:21 lr: 0.000006 loss_cls: 2.6906 (2.6872) grad_norm: 1.4909 (1.6173) time: 2.9312 data: 0.0003 max mem: 29202 +[2024-12-11 10:44:43 root] (utils.py 283): INFO Epoch: [11] [ 870/2502] eta: 1:19:52 lr: 0.000006 loss_cls: 2.5803 (2.6857) grad_norm: 1.4909 (1.6171) time: 2.9349 data: 0.0003 max mem: 29202 +[2024-12-11 10:45:12 root] (utils.py 283): INFO Epoch: [11] [ 880/2502] eta: 1:19:22 lr: 0.000006 loss_cls: 2.8039 (2.6854) grad_norm: 1.5197 (1.6171) time: 2.9344 data: 0.0002 max mem: 29202 +[2024-12-11 10:45:42 root] (utils.py 283): INFO Epoch: [11] [ 890/2502] eta: 1:18:53 lr: 0.000006 loss_cls: 2.8039 (2.6848) grad_norm: 1.5492 (1.6178) time: 2.9320 data: 0.0002 max mem: 29202 +[2024-12-11 10:46:11 root] (utils.py 283): INFO Epoch: [11] [ 900/2502] eta: 1:18:24 lr: 0.000006 loss_cls: 2.7990 (2.6841) grad_norm: 1.5525 (1.6173) time: 2.9320 data: 0.0003 max mem: 29202 +[2024-12-11 10:46:40 root] (utils.py 283): INFO Epoch: [11] [ 910/2502] eta: 1:17:54 lr: 0.000006 loss_cls: 2.7284 (2.6840) grad_norm: 1.5511 (1.6166) time: 2.9330 data: 0.0003 max mem: 29202 +[2024-12-11 10:47:10 root] (utils.py 283): INFO Epoch: [11] [ 920/2502] eta: 1:17:25 lr: 0.000006 loss_cls: 2.6794 (2.6837) grad_norm: 1.5434 (1.6163) time: 2.9331 data: 0.0003 max mem: 29202 +[2024-12-11 10:47:39 root] (utils.py 283): INFO Epoch: [11] [ 930/2502] eta: 1:16:55 lr: 0.000006 loss_cls: 2.5746 (2.6838) grad_norm: 1.6206 (1.6167) time: 2.9380 data: 0.0003 max mem: 29202 +[2024-12-11 10:48:08 root] (utils.py 283): INFO Epoch: [11] [ 940/2502] eta: 1:16:26 lr: 0.000006 loss_cls: 2.7708 (2.6843) grad_norm: 1.6301 (1.6161) time: 2.9432 data: 0.0003 max mem: 29202 +[2024-12-11 10:48:38 root] (utils.py 283): INFO Epoch: [11] [ 950/2502] eta: 1:15:57 lr: 0.000006 loss_cls: 2.9218 (2.6858) grad_norm: 1.4891 (1.6143) time: 2.9349 data: 0.0003 max mem: 29202 +[2024-12-11 10:49:07 root] (utils.py 283): INFO Epoch: [11] [ 960/2502] eta: 1:15:27 lr: 0.000006 loss_cls: 2.8349 (2.6850) grad_norm: 1.4655 (1.6138) time: 2.9315 data: 0.0003 max mem: 29202 +[2024-12-11 10:49:36 root] (utils.py 283): INFO Epoch: [11] [ 970/2502] eta: 1:14:58 lr: 0.000006 loss_cls: 2.8127 (2.6858) grad_norm: 1.6311 (1.6150) time: 2.9345 data: 0.0003 max mem: 29202 +[2024-12-11 10:50:06 root] (utils.py 283): INFO Epoch: [11] [ 980/2502] eta: 1:14:28 lr: 0.000006 loss_cls: 2.9140 (2.6855) grad_norm: 1.5831 (1.6140) time: 2.9321 data: 0.0003 max mem: 29202 +[2024-12-11 10:50:35 root] (utils.py 283): INFO Epoch: [11] [ 990/2502] eta: 1:13:59 lr: 0.000006 loss_cls: 2.9140 (2.6873) grad_norm: 1.5434 (1.6140) time: 2.9318 data: 0.0003 max mem: 29202 +[2024-12-11 10:51:04 root] (utils.py 283): INFO Epoch: [11] [1000/2502] eta: 1:13:30 lr: 0.000006 loss_cls: 2.7965 (2.6864) grad_norm: 1.5494 (1.6164) time: 2.9331 data: 0.0002 max mem: 29202 +[2024-12-11 10:51:34 root] (utils.py 283): INFO Epoch: [11] [1010/2502] eta: 1:13:00 lr: 0.000006 loss_cls: 2.6490 (2.6866) grad_norm: 1.5208 (1.6154) time: 2.9319 data: 0.0003 max mem: 29202 +[2024-12-11 10:52:03 root] (utils.py 283): INFO Epoch: [11] [1020/2502] eta: 1:12:31 lr: 0.000006 loss_cls: 2.7579 (2.6879) grad_norm: 1.4853 (1.6149) time: 2.9336 data: 0.0003 max mem: 29202 +[2024-12-11 10:52:33 root] (utils.py 283): INFO Epoch: [11] [1030/2502] eta: 1:12:02 lr: 0.000006 loss_cls: 2.8037 (2.6875) grad_norm: 1.5620 (1.6158) time: 2.9472 data: 0.0003 max mem: 29202 +[2024-12-11 10:53:02 root] (utils.py 283): INFO Epoch: [11] [1040/2502] eta: 1:11:33 lr: 0.000006 loss_cls: 2.8037 (2.6888) grad_norm: 1.6207 (1.6158) time: 2.9553 data: 0.0003 max mem: 29202 +[2024-12-11 10:53:32 root] (utils.py 283): INFO Epoch: [11] [1050/2502] eta: 1:11:03 lr: 0.000006 loss_cls: 2.7972 (2.6864) grad_norm: 1.6207 (1.6160) time: 2.9425 data: 0.0003 max mem: 29202 +[2024-12-11 10:54:01 root] (utils.py 283): INFO Epoch: [11] [1060/2502] eta: 1:10:34 lr: 0.000006 loss_cls: 2.7247 (2.6875) grad_norm: 1.4909 (1.6149) time: 2.9307 data: 0.0003 max mem: 29202 +[2024-12-11 10:54:30 root] (utils.py 283): INFO Epoch: [11] [1070/2502] eta: 1:10:04 lr: 0.000006 loss_cls: 2.7957 (2.6878) grad_norm: 1.4448 (1.6147) time: 2.9294 data: 0.0003 max mem: 29202 +[2024-12-11 10:54:59 root] (utils.py 283): INFO Epoch: [11] [1080/2502] eta: 1:09:35 lr: 0.000006 loss_cls: 2.8058 (2.6881) grad_norm: 1.6550 (1.6174) time: 2.9341 data: 0.0003 max mem: 29202 +[2024-12-11 10:55:29 root] (utils.py 283): INFO Epoch: [11] [1090/2502] eta: 1:09:06 lr: 0.000006 loss_cls: 2.7577 (2.6893) grad_norm: 1.7181 (1.6188) time: 2.9376 data: 0.0003 max mem: 29202 +[2024-12-11 10:55:58 root] (utils.py 283): INFO Epoch: [11] [1100/2502] eta: 1:08:36 lr: 0.000006 loss_cls: 2.9043 (2.6915) grad_norm: 1.6475 (1.6184) time: 2.9362 data: 0.0003 max mem: 29202 +[2024-12-11 10:56:28 root] (utils.py 283): INFO Epoch: [11] [1110/2502] eta: 1:08:07 lr: 0.000006 loss_cls: 2.9237 (2.6908) grad_norm: 1.5856 (1.6179) time: 2.9488 data: 0.0003 max mem: 29202 +[2024-12-11 10:56:57 root] (utils.py 283): INFO Epoch: [11] [1120/2502] eta: 1:07:38 lr: 0.000006 loss_cls: 2.8619 (2.6899) grad_norm: 1.4593 (1.6171) time: 2.9519 data: 0.0003 max mem: 29202 +[2024-12-11 10:57:27 root] (utils.py 283): INFO Epoch: [11] [1130/2502] eta: 1:07:09 lr: 0.000006 loss_cls: 2.7817 (2.6905) grad_norm: 1.4569 (1.6174) time: 2.9413 data: 0.0003 max mem: 29202 +[2024-12-11 10:57:56 root] (utils.py 283): INFO Epoch: [11] [1140/2502] eta: 1:06:39 lr: 0.000006 loss_cls: 2.7741 (2.6914) grad_norm: 1.5946 (1.6177) time: 2.9381 data: 0.0003 max mem: 29202 +[2024-12-11 10:58:25 root] (utils.py 283): INFO Epoch: [11] [1150/2502] eta: 1:06:10 lr: 0.000006 loss_cls: 2.6478 (2.6896) grad_norm: 1.5946 (1.6179) time: 2.9379 data: 0.0003 max mem: 29202 +[2024-12-11 10:58:55 root] (utils.py 283): INFO Epoch: [11] [1160/2502] eta: 1:05:41 lr: 0.000006 loss_cls: 2.6478 (2.6906) grad_norm: 1.6713 (1.6184) time: 2.9407 data: 0.0003 max mem: 29202 +[2024-12-11 10:59:24 root] (utils.py 283): INFO Epoch: [11] [1170/2502] eta: 1:05:11 lr: 0.000006 loss_cls: 2.7818 (2.6918) grad_norm: 1.6183 (1.6196) time: 2.9398 data: 0.0003 max mem: 29202 +[2024-12-11 10:59:54 root] (utils.py 283): INFO Epoch: [11] [1180/2502] eta: 1:04:42 lr: 0.000006 loss_cls: 2.9251 (2.6929) grad_norm: 1.6170 (1.6201) time: 2.9394 data: 0.0003 max mem: 29202 +[2024-12-11 11:00:23 root] (utils.py 283): INFO Epoch: [11] [1190/2502] eta: 1:04:12 lr: 0.000006 loss_cls: 2.9235 (2.6933) grad_norm: 1.6449 (1.6205) time: 2.9357 data: 0.0003 max mem: 29202 +[2024-12-11 11:00:52 root] (utils.py 283): INFO Epoch: [11] [1200/2502] eta: 1:03:43 lr: 0.000006 loss_cls: 2.8188 (2.6935) grad_norm: 1.6257 (1.6208) time: 2.9329 data: 0.0003 max mem: 29202 +[2024-12-11 11:01:22 root] (utils.py 283): INFO Epoch: [11] [1210/2502] eta: 1:03:14 lr: 0.000006 loss_cls: 2.9063 (2.6950) grad_norm: 1.6458 (1.6215) time: 2.9347 data: 0.0003 max mem: 29202 +[2024-12-11 11:01:51 root] (utils.py 283): INFO Epoch: [11] [1220/2502] eta: 1:02:44 lr: 0.000006 loss_cls: 2.7739 (2.6933) grad_norm: 1.5693 (1.6208) time: 2.9336 data: 0.0003 max mem: 29202 +[2024-12-11 11:02:20 root] (utils.py 283): INFO Epoch: [11] [1230/2502] eta: 1:02:15 lr: 0.000006 loss_cls: 2.7739 (2.6941) grad_norm: 1.4826 (1.6201) time: 2.9351 data: 0.0003 max mem: 29202 +[2024-12-11 11:02:50 root] (utils.py 283): INFO Epoch: [11] [1240/2502] eta: 1:01:46 lr: 0.000006 loss_cls: 2.8414 (2.6938) grad_norm: 1.4751 (1.6198) time: 2.9363 data: 0.0003 max mem: 29202 +[2024-12-11 11:03:19 root] (utils.py 283): INFO Epoch: [11] [1250/2502] eta: 1:01:16 lr: 0.000006 loss_cls: 2.7574 (2.6918) grad_norm: 1.4741 (1.6195) time: 2.9357 data: 0.0003 max mem: 29202 +[2024-12-11 11:03:48 root] (utils.py 283): INFO Epoch: [11] [1260/2502] eta: 1:00:47 lr: 0.000006 loss_cls: 2.5935 (2.6910) grad_norm: 1.6149 (1.6200) time: 2.9345 data: 0.0003 max mem: 29202 +[2024-12-11 11:04:18 root] (utils.py 283): INFO Epoch: [11] [1270/2502] eta: 1:00:17 lr: 0.000006 loss_cls: 2.7863 (2.6920) grad_norm: 1.5798 (1.6191) time: 2.9328 data: 0.0003 max mem: 29202 +[2024-12-11 11:04:47 root] (utils.py 283): INFO Epoch: [11] [1280/2502] eta: 0:59:48 lr: 0.000006 loss_cls: 2.8547 (2.6917) grad_norm: 1.5456 (1.6186) time: 2.9360 data: 0.0003 max mem: 29202 +[2024-12-11 11:05:17 root] (utils.py 283): INFO Epoch: [11] [1290/2502] eta: 0:59:19 lr: 0.000006 loss_cls: 2.7643 (2.6911) grad_norm: 1.5608 (1.6199) time: 2.9513 data: 0.0003 max mem: 29202 +[2024-12-11 11:05:46 root] (utils.py 283): INFO Epoch: [11] [1300/2502] eta: 0:58:50 lr: 0.000006 loss_cls: 2.7896 (2.6909) grad_norm: 1.6514 (1.6208) time: 2.9546 data: 0.0003 max mem: 29202 +[2024-12-11 11:06:16 root] (utils.py 283): INFO Epoch: [11] [1310/2502] eta: 0:58:20 lr: 0.000006 loss_cls: 2.8092 (2.6917) grad_norm: 1.6636 (1.6217) time: 2.9473 data: 0.0003 max mem: 29202 +[2024-12-11 11:06:45 root] (utils.py 283): INFO Epoch: [11] [1320/2502] eta: 0:57:51 lr: 0.000006 loss_cls: 2.7435 (2.6903) grad_norm: 1.6526 (1.6216) time: 2.9411 data: 0.0003 max mem: 29202 +[2024-12-11 11:07:14 root] (utils.py 283): INFO Epoch: [11] [1330/2502] eta: 0:57:22 lr: 0.000006 loss_cls: 2.6428 (2.6907) grad_norm: 1.5191 (1.6213) time: 2.9341 data: 0.0003 max mem: 29202 +[2024-12-11 11:07:44 root] (utils.py 283): INFO Epoch: [11] [1340/2502] eta: 0:56:52 lr: 0.000006 loss_cls: 2.9150 (2.6921) grad_norm: 1.5547 (1.6212) time: 2.9344 data: 0.0003 max mem: 29202 +[2024-12-11 11:08:13 root] (utils.py 283): INFO Epoch: [11] [1350/2502] eta: 0:56:23 lr: 0.000006 loss_cls: 3.0052 (2.6935) grad_norm: 1.5547 (1.6207) time: 2.9325 data: 0.0003 max mem: 29202 +[2024-12-11 11:08:42 root] (utils.py 283): INFO Epoch: [11] [1360/2502] eta: 0:55:53 lr: 0.000006 loss_cls: 2.7952 (2.6935) grad_norm: 1.5393 (1.6211) time: 2.9335 data: 0.0003 max mem: 29202 +[2024-12-11 11:09:12 root] (utils.py 283): INFO Epoch: [11] [1370/2502] eta: 0:55:24 lr: 0.000006 loss_cls: 2.6107 (2.6922) grad_norm: 1.5849 (1.6213) time: 2.9329 data: 0.0003 max mem: 29202 +[2024-12-11 11:09:41 root] (utils.py 283): INFO Epoch: [11] [1380/2502] eta: 0:54:54 lr: 0.000006 loss_cls: 2.6372 (2.6919) grad_norm: 1.6417 (1.6220) time: 2.9257 data: 0.0003 max mem: 29202 +[2024-12-11 11:10:10 root] (utils.py 283): INFO Epoch: [11] [1390/2502] eta: 0:54:25 lr: 0.000006 loss_cls: 2.6716 (2.6908) grad_norm: 1.5367 (1.6216) time: 2.9224 data: 0.0003 max mem: 29202 +[2024-12-11 11:10:39 root] (utils.py 283): INFO Epoch: [11] [1400/2502] eta: 0:53:55 lr: 0.000006 loss_cls: 2.6734 (2.6904) grad_norm: 1.5643 (1.6220) time: 2.9204 data: 0.0003 max mem: 29202 +[2024-12-11 11:11:09 root] (utils.py 283): INFO Epoch: [11] [1410/2502] eta: 0:53:26 lr: 0.000006 loss_cls: 2.6387 (2.6892) grad_norm: 1.5720 (1.6215) time: 2.9230 data: 0.0003 max mem: 29202 +[2024-12-11 11:11:38 root] (utils.py 283): INFO Epoch: [11] [1420/2502] eta: 0:52:57 lr: 0.000006 loss_cls: 2.5233 (2.6884) grad_norm: 1.5720 (1.6213) time: 2.9256 data: 0.0003 max mem: 29202 +[2024-12-11 11:12:07 root] (utils.py 283): INFO Epoch: [11] [1430/2502] eta: 0:52:27 lr: 0.000006 loss_cls: 2.5119 (2.6870) grad_norm: 1.6728 (1.6220) time: 2.9221 data: 0.0003 max mem: 29202 +[2024-12-11 11:12:36 root] (utils.py 283): INFO Epoch: [11] [1440/2502] eta: 0:51:58 lr: 0.000006 loss_cls: 2.6508 (2.6880) grad_norm: 1.5886 (1.6213) time: 2.9221 data: 0.0003 max mem: 29202 +[2024-12-11 11:13:06 root] (utils.py 283): INFO Epoch: [11] [1450/2502] eta: 0:51:28 lr: 0.000006 loss_cls: 2.7893 (2.6880) grad_norm: 1.5594 (1.6204) time: 2.9221 data: 0.0003 max mem: 29202 +[2024-12-11 11:13:35 root] (utils.py 283): INFO Epoch: [11] [1460/2502] eta: 0:50:59 lr: 0.000006 loss_cls: 2.7718 (2.6886) grad_norm: 1.5023 (1.6197) time: 2.9244 data: 0.0003 max mem: 29202 +[2024-12-11 11:14:04 root] (utils.py 283): INFO Epoch: [11] [1470/2502] eta: 0:50:29 lr: 0.000006 loss_cls: 2.7718 (2.6883) grad_norm: 1.4663 (1.6189) time: 2.9262 data: 0.0003 max mem: 29202 +[2024-12-11 11:14:33 root] (utils.py 283): INFO Epoch: [11] [1480/2502] eta: 0:50:00 lr: 0.000006 loss_cls: 2.7359 (2.6880) grad_norm: 1.5987 (1.6193) time: 2.9245 data: 0.0003 max mem: 29202 +[2024-12-11 11:15:02 root] (utils.py 283): INFO Epoch: [11] [1490/2502] eta: 0:49:30 lr: 0.000006 loss_cls: 2.8638 (2.6881) grad_norm: 1.5315 (1.6188) time: 2.9229 data: 0.0003 max mem: 29202 +[2024-12-11 11:15:32 root] (utils.py 283): INFO Epoch: [11] [1500/2502] eta: 0:49:01 lr: 0.000006 loss_cls: 2.6808 (2.6880) grad_norm: 1.5250 (1.6185) time: 2.9225 data: 0.0003 max mem: 29202 +[2024-12-11 11:16:01 root] (utils.py 283): INFO Epoch: [11] [1510/2502] eta: 0:48:32 lr: 0.000006 loss_cls: 2.7426 (2.6890) grad_norm: 1.4778 (1.6178) time: 2.9223 data: 0.0003 max mem: 29202 +[2024-12-11 11:16:30 root] (utils.py 283): INFO Epoch: [11] [1520/2502] eta: 0:48:02 lr: 0.000006 loss_cls: 2.7717 (2.6883) grad_norm: 1.4504 (1.6174) time: 2.9231 data: 0.0003 max mem: 29202 +[2024-12-11 11:16:59 root] (utils.py 283): INFO Epoch: [11] [1530/2502] eta: 0:47:33 lr: 0.000006 loss_cls: 2.6387 (2.6881) grad_norm: 1.5552 (1.6171) time: 2.9243 data: 0.0003 max mem: 29202 +[2024-12-11 11:17:29 root] (utils.py 283): INFO Epoch: [11] [1540/2502] eta: 0:47:03 lr: 0.000006 loss_cls: 2.7254 (2.6886) grad_norm: 1.5993 (1.6170) time: 2.9278 data: 0.0003 max mem: 29202 +[2024-12-11 11:17:58 root] (utils.py 283): INFO Epoch: [11] [1550/2502] eta: 0:46:34 lr: 0.000006 loss_cls: 2.7130 (2.6876) grad_norm: 1.5658 (1.6164) time: 2.9355 data: 0.0003 max mem: 29202 +[2024-12-11 11:18:28 root] (utils.py 283): INFO Epoch: [11] [1560/2502] eta: 0:46:05 lr: 0.000006 loss_cls: 2.7037 (2.6889) grad_norm: 1.5511 (1.6163) time: 2.9439 data: 0.0003 max mem: 29202 +[2024-12-11 11:18:57 root] (utils.py 283): INFO Epoch: [11] [1570/2502] eta: 0:45:35 lr: 0.000006 loss_cls: 2.7269 (2.6885) grad_norm: 1.5952 (1.6173) time: 2.9411 data: 0.0003 max mem: 29202 +[2024-12-11 11:19:26 root] (utils.py 283): INFO Epoch: [11] [1580/2502] eta: 0:45:06 lr: 0.000006 loss_cls: 2.7639 (2.6892) grad_norm: 1.5822 (1.6171) time: 2.9360 data: 0.0003 max mem: 29202 +[2024-12-11 11:19:56 root] (utils.py 283): INFO Epoch: [11] [1590/2502] eta: 0:44:37 lr: 0.000006 loss_cls: 2.7372 (2.6888) grad_norm: 1.5702 (1.6176) time: 2.9349 data: 0.0003 max mem: 29202 +[2024-12-11 11:20:25 root] (utils.py 283): INFO Epoch: [11] [1600/2502] eta: 0:44:07 lr: 0.000006 loss_cls: 2.7458 (2.6885) grad_norm: 1.5583 (1.6183) time: 2.9335 data: 0.0003 max mem: 29202 +[2024-12-11 11:20:54 root] (utils.py 283): INFO Epoch: [11] [1610/2502] eta: 0:43:38 lr: 0.000006 loss_cls: 2.6714 (2.6866) grad_norm: 1.5466 (1.6179) time: 2.9376 data: 0.0003 max mem: 29202 +[2024-12-11 11:21:24 root] (utils.py 283): INFO Epoch: [11] [1620/2502] eta: 0:43:09 lr: 0.000006 loss_cls: 2.7012 (2.6876) grad_norm: 1.5762 (1.6191) time: 2.9401 data: 0.0003 max mem: 29202 +[2024-12-11 11:21:53 root] (utils.py 283): INFO Epoch: [11] [1630/2502] eta: 0:42:39 lr: 0.000006 loss_cls: 2.9103 (2.6880) grad_norm: 1.6127 (1.6187) time: 2.9374 data: 0.0003 max mem: 29202 +[2024-12-11 11:22:23 root] (utils.py 283): INFO Epoch: [11] [1640/2502] eta: 0:42:10 lr: 0.000006 loss_cls: 2.7973 (2.6880) grad_norm: 1.6121 (1.6188) time: 2.9352 data: 0.0003 max mem: 29202 +[2024-12-11 11:22:52 root] (utils.py 283): INFO Epoch: [11] [1650/2502] eta: 0:41:41 lr: 0.000006 loss_cls: 2.7146 (2.6875) grad_norm: 1.5293 (1.6186) time: 2.9362 data: 0.0003 max mem: 29202 +[2024-12-11 11:23:21 root] (utils.py 283): INFO Epoch: [11] [1660/2502] eta: 0:41:11 lr: 0.000006 loss_cls: 2.5966 (2.6866) grad_norm: 1.5170 (1.6181) time: 2.9407 data: 0.0003 max mem: 29202 +[2024-12-11 11:23:51 root] (utils.py 283): INFO Epoch: [11] [1670/2502] eta: 0:40:42 lr: 0.000006 loss_cls: 2.5223 (2.6859) grad_norm: 1.5115 (1.6177) time: 2.9393 data: 0.0003 max mem: 29202 +[2024-12-11 11:24:20 root] (utils.py 283): INFO Epoch: [11] [1680/2502] eta: 0:40:13 lr: 0.000006 loss_cls: 2.8150 (2.6857) grad_norm: 1.6439 (1.6183) time: 2.9420 data: 0.0003 max mem: 29202 +[2024-12-11 11:24:50 root] (utils.py 283): INFO Epoch: [11] [1690/2502] eta: 0:39:43 lr: 0.000006 loss_cls: 2.8165 (2.6857) grad_norm: 1.6766 (1.6185) time: 2.9451 data: 0.0003 max mem: 29202 +[2024-12-11 11:25:19 root] (utils.py 283): INFO Epoch: [11] [1700/2502] eta: 0:39:14 lr: 0.000006 loss_cls: 2.8734 (2.6858) grad_norm: 1.6258 (1.6189) time: 2.9372 data: 0.0003 max mem: 29202 +[2024-12-11 11:25:48 root] (utils.py 283): INFO Epoch: [11] [1710/2502] eta: 0:38:45 lr: 0.000006 loss_cls: 2.7297 (2.6849) grad_norm: 1.6138 (1.6196) time: 2.9357 data: 0.0003 max mem: 29202 +[2024-12-11 11:26:18 root] (utils.py 283): INFO Epoch: [11] [1720/2502] eta: 0:38:15 lr: 0.000006 loss_cls: 2.7297 (2.6845) grad_norm: 1.5195 (1.6191) time: 2.9374 data: 0.0003 max mem: 29202 +[2024-12-11 11:26:47 root] (utils.py 283): INFO Epoch: [11] [1730/2502] eta: 0:37:46 lr: 0.000006 loss_cls: 2.8325 (2.6845) grad_norm: 1.4871 (1.6188) time: 2.9382 data: 0.0003 max mem: 29202 +[2024-12-11 11:27:16 root] (utils.py 283): INFO Epoch: [11] [1740/2502] eta: 0:37:17 lr: 0.000006 loss_cls: 2.8216 (2.6846) grad_norm: 1.4871 (1.6183) time: 2.9390 data: 0.0003 max mem: 29202 +[2024-12-11 11:27:46 root] (utils.py 283): INFO Epoch: [11] [1750/2502] eta: 0:36:47 lr: 0.000006 loss_cls: 2.7517 (2.6842) grad_norm: 1.5585 (1.6186) time: 2.9386 data: 0.0003 max mem: 29202 +[2024-12-11 11:28:15 root] (utils.py 283): INFO Epoch: [11] [1760/2502] eta: 0:36:18 lr: 0.000006 loss_cls: 2.8558 (2.6859) grad_norm: 1.5743 (1.6187) time: 2.9460 data: 0.0003 max mem: 29202 +[2024-12-11 11:28:45 root] (utils.py 283): INFO Epoch: [11] [1770/2502] eta: 0:35:49 lr: 0.000006 loss_cls: 2.9500 (2.6875) grad_norm: 1.5743 (1.6186) time: 2.9447 data: 0.0003 max mem: 29202 +[2024-12-11 11:29:14 root] (utils.py 283): INFO Epoch: [11] [1780/2502] eta: 0:35:19 lr: 0.000006 loss_cls: 2.8435 (2.6877) grad_norm: 1.5748 (1.6187) time: 2.9343 data: 0.0003 max mem: 29202 +[2024-12-11 11:29:43 root] (utils.py 283): INFO Epoch: [11] [1790/2502] eta: 0:34:50 lr: 0.000006 loss_cls: 2.8081 (2.6879) grad_norm: 1.5742 (1.6183) time: 2.9343 data: 0.0003 max mem: 29202 +[2024-12-11 11:30:13 root] (utils.py 283): INFO Epoch: [11] [1800/2502] eta: 0:34:20 lr: 0.000006 loss_cls: 2.6686 (2.6878) grad_norm: 1.6166 (1.6197) time: 2.9341 data: 0.0003 max mem: 29202 +[2024-12-11 11:30:42 root] (utils.py 283): INFO Epoch: [11] [1810/2502] eta: 0:33:51 lr: 0.000006 loss_cls: 2.7827 (2.6883) grad_norm: 1.6166 (1.6202) time: 2.9357 data: 0.0003 max mem: 29202 +[2024-12-11 11:31:11 root] (utils.py 283): INFO Epoch: [11] [1820/2502] eta: 0:33:22 lr: 0.000006 loss_cls: 2.8798 (2.6890) grad_norm: 1.5723 (1.6205) time: 2.9354 data: 0.0003 max mem: 29202 +[2024-12-11 11:31:41 root] (utils.py 283): INFO Epoch: [11] [1830/2502] eta: 0:32:52 lr: 0.000006 loss_cls: 2.8798 (2.6902) grad_norm: 1.5646 (1.6211) time: 2.9334 data: 0.0003 max mem: 29202 +[2024-12-11 11:32:10 root] (utils.py 283): INFO Epoch: [11] [1840/2502] eta: 0:32:23 lr: 0.000006 loss_cls: 2.7503 (2.6886) grad_norm: 1.6841 (1.6218) time: 2.9369 data: 0.0003 max mem: 29202 +[2024-12-11 11:32:40 root] (utils.py 283): INFO Epoch: [11] [1850/2502] eta: 0:31:54 lr: 0.000006 loss_cls: 2.5620 (2.6886) grad_norm: 1.5773 (1.6212) time: 2.9372 data: 0.0003 max mem: 29202 +[2024-12-11 11:33:09 root] (utils.py 283): INFO Epoch: [11] [1860/2502] eta: 0:31:24 lr: 0.000006 loss_cls: 2.5620 (2.6880) grad_norm: 1.5620 (1.6224) time: 2.9346 data: 0.0003 max mem: 29202 +[2024-12-11 11:33:38 root] (utils.py 283): INFO Epoch: [11] [1870/2502] eta: 0:30:55 lr: 0.000006 loss_cls: 2.6141 (2.6878) grad_norm: 1.6432 (1.6221) time: 2.9350 data: 0.0003 max mem: 29202 +[2024-12-11 11:34:08 root] (utils.py 283): INFO Epoch: [11] [1880/2502] eta: 0:30:26 lr: 0.000006 loss_cls: 2.8288 (2.6879) grad_norm: 1.5856 (1.6219) time: 2.9363 data: 0.0003 max mem: 29202 +[2024-12-11 11:34:37 root] (utils.py 283): INFO Epoch: [11] [1890/2502] eta: 0:29:56 lr: 0.000006 loss_cls: 2.7455 (2.6880) grad_norm: 1.4664 (1.6214) time: 2.9376 data: 0.0003 max mem: 29202 +[2024-12-11 11:35:06 root] (utils.py 283): INFO Epoch: [11] [1900/2502] eta: 0:29:27 lr: 0.000006 loss_cls: 2.7345 (2.6879) grad_norm: 1.5216 (1.6216) time: 2.9348 data: 0.0003 max mem: 29202 +[2024-12-11 11:35:36 root] (utils.py 283): INFO Epoch: [11] [1910/2502] eta: 0:28:57 lr: 0.000006 loss_cls: 2.8674 (2.6877) grad_norm: 1.6185 (1.6222) time: 2.9350 data: 0.0003 max mem: 29202 +[2024-12-11 11:36:05 root] (utils.py 283): INFO Epoch: [11] [1920/2502] eta: 0:28:28 lr: 0.000006 loss_cls: 2.8272 (2.6877) grad_norm: 1.5199 (1.6221) time: 2.9372 data: 0.0003 max mem: 29202 +[2024-12-11 11:36:34 root] (utils.py 283): INFO Epoch: [11] [1930/2502] eta: 0:27:59 lr: 0.000006 loss_cls: 2.6412 (2.6866) grad_norm: 1.4766 (1.6222) time: 2.9364 data: 0.0003 max mem: 29202 +[2024-12-11 11:37:04 root] (utils.py 283): INFO Epoch: [11] [1940/2502] eta: 0:27:29 lr: 0.000006 loss_cls: 2.6412 (2.6868) grad_norm: 1.5438 (1.6220) time: 2.9357 data: 0.0003 max mem: 29202 +[2024-12-11 11:37:33 root] (utils.py 283): INFO Epoch: [11] [1950/2502] eta: 0:27:00 lr: 0.000006 loss_cls: 2.8193 (2.6866) grad_norm: 1.5622 (1.6217) time: 2.9340 data: 0.0003 max mem: 29202 +[2024-12-11 11:38:03 root] (utils.py 283): INFO Epoch: [11] [1960/2502] eta: 0:26:31 lr: 0.000006 loss_cls: 2.8327 (2.6877) grad_norm: 1.5043 (1.6215) time: 2.9350 data: 0.0003 max mem: 29202 +[2024-12-11 11:38:32 root] (utils.py 283): INFO Epoch: [11] [1970/2502] eta: 0:26:01 lr: 0.000006 loss_cls: 2.8609 (2.6873) grad_norm: 1.5043 (1.6213) time: 2.9348 data: 0.0003 max mem: 29202 +[2024-12-11 11:39:01 root] (utils.py 283): INFO Epoch: [11] [1980/2502] eta: 0:25:32 lr: 0.000006 loss_cls: 2.8889 (2.6881) grad_norm: 1.5246 (1.6217) time: 2.9337 data: 0.0003 max mem: 29202 +[2024-12-11 11:39:31 root] (utils.py 283): INFO Epoch: [11] [1990/2502] eta: 0:25:03 lr: 0.000006 loss_cls: 2.8889 (2.6891) grad_norm: 1.6623 (1.6222) time: 2.9347 data: 0.0003 max mem: 29202 +[2024-12-11 11:40:00 root] (utils.py 283): INFO Epoch: [11] [2000/2502] eta: 0:24:33 lr: 0.000006 loss_cls: 2.8127 (2.6878) grad_norm: 1.5933 (1.6217) time: 2.9335 data: 0.0003 max mem: 29202 +[2024-12-11 11:40:29 root] (utils.py 283): INFO Epoch: [11] [2010/2502] eta: 0:24:04 lr: 0.000006 loss_cls: 2.4967 (2.6870) grad_norm: 1.5127 (1.6212) time: 2.9327 data: 0.0003 max mem: 29202 +[2024-12-11 11:40:58 root] (utils.py 283): INFO Epoch: [11] [2020/2502] eta: 0:23:35 lr: 0.000006 loss_cls: 2.7322 (2.6878) grad_norm: 1.5127 (1.6208) time: 2.9299 data: 0.0003 max mem: 29202 +[2024-12-11 11:41:28 root] (utils.py 283): INFO Epoch: [11] [2030/2502] eta: 0:23:05 lr: 0.000006 loss_cls: 2.7427 (2.6872) grad_norm: 1.4946 (1.6201) time: 2.9302 data: 0.0003 max mem: 29202 +[2024-12-11 11:41:57 root] (utils.py 283): INFO Epoch: [11] [2040/2502] eta: 0:22:36 lr: 0.000006 loss_cls: 2.6795 (2.6879) grad_norm: 1.5074 (1.6199) time: 2.9341 data: 0.0003 max mem: 29202 +[2024-12-11 11:42:27 root] (utils.py 283): INFO Epoch: [11] [2050/2502] eta: 0:22:06 lr: 0.000006 loss_cls: 2.7467 (2.6883) grad_norm: 1.5825 (1.6197) time: 2.9350 data: 0.0003 max mem: 29202 +[2024-12-11 11:42:56 root] (utils.py 283): INFO Epoch: [11] [2060/2502] eta: 0:21:37 lr: 0.000006 loss_cls: 2.8358 (2.6889) grad_norm: 1.5570 (1.6195) time: 2.9332 data: 0.0003 max mem: 29202 +[2024-12-11 11:43:25 root] (utils.py 283): INFO Epoch: [11] [2070/2502] eta: 0:21:08 lr: 0.000006 loss_cls: 2.8097 (2.6879) grad_norm: 1.5112 (1.6193) time: 2.9316 data: 0.0003 max mem: 29202 +[2024-12-11 11:43:54 root] (utils.py 283): INFO Epoch: [11] [2080/2502] eta: 0:20:38 lr: 0.000006 loss_cls: 2.6183 (2.6874) grad_norm: 1.5112 (1.6192) time: 2.9339 data: 0.0003 max mem: 29202 +[2024-12-11 11:44:24 root] (utils.py 283): INFO Epoch: [11] [2090/2502] eta: 0:20:09 lr: 0.000006 loss_cls: 2.6000 (2.6869) grad_norm: 1.5163 (1.6188) time: 2.9352 data: 0.0003 max mem: 29202 +[2024-12-11 11:44:53 root] (utils.py 283): INFO Epoch: [11] [2100/2502] eta: 0:19:40 lr: 0.000006 loss_cls: 2.5727 (2.6865) grad_norm: 1.5274 (1.6191) time: 2.9386 data: 0.0003 max mem: 29202 +[2024-12-11 11:45:23 root] (utils.py 283): INFO Epoch: [11] [2110/2502] eta: 0:19:10 lr: 0.000006 loss_cls: 2.6179 (2.6860) grad_norm: 1.5495 (1.6190) time: 2.9421 data: 0.0003 max mem: 29202 +[2024-12-11 11:45:52 root] (utils.py 283): INFO Epoch: [11] [2120/2502] eta: 0:18:41 lr: 0.000006 loss_cls: 2.8296 (2.6868) grad_norm: 1.5713 (1.6190) time: 2.9466 data: 0.0003 max mem: 29202 +[2024-12-11 11:46:22 root] (utils.py 283): INFO Epoch: [11] [2130/2502] eta: 0:18:12 lr: 0.000006 loss_cls: 2.8296 (2.6872) grad_norm: 1.5369 (1.6188) time: 2.9440 data: 0.0003 max mem: 29202 +[2024-12-11 11:46:51 root] (utils.py 283): INFO Epoch: [11] [2140/2502] eta: 0:17:42 lr: 0.000006 loss_cls: 2.7821 (2.6870) grad_norm: 1.5453 (1.6189) time: 2.9391 data: 0.0003 max mem: 29202 +[2024-12-11 11:47:20 root] (utils.py 283): INFO Epoch: [11] [2150/2502] eta: 0:17:13 lr: 0.000006 loss_cls: 2.7498 (2.6874) grad_norm: 1.5820 (1.6191) time: 2.9432 data: 0.0003 max mem: 29202 +[2024-12-11 11:47:50 root] (utils.py 283): INFO Epoch: [11] [2160/2502] eta: 0:16:44 lr: 0.000006 loss_cls: 2.7754 (2.6881) grad_norm: 1.5820 (1.6194) time: 2.9440 data: 0.0003 max mem: 29202 +[2024-12-11 11:48:19 root] (utils.py 283): INFO Epoch: [11] [2170/2502] eta: 0:16:14 lr: 0.000006 loss_cls: 2.8378 (2.6884) grad_norm: 1.5444 (1.6193) time: 2.9399 data: 0.0003 max mem: 29202 +[2024-12-11 11:48:49 root] (utils.py 283): INFO Epoch: [11] [2180/2502] eta: 0:15:45 lr: 0.000006 loss_cls: 2.8384 (2.6886) grad_norm: 1.5274 (1.6199) time: 2.9331 data: 0.0003 max mem: 29202 +[2024-12-11 11:49:18 root] (utils.py 283): INFO Epoch: [11] [2190/2502] eta: 0:15:15 lr: 0.000006 loss_cls: 2.7974 (2.6884) grad_norm: 1.6341 (1.6201) time: 2.9290 data: 0.0003 max mem: 29202 +[2024-12-11 11:49:47 root] (utils.py 283): INFO Epoch: [11] [2200/2502] eta: 0:14:46 lr: 0.000006 loss_cls: 2.7974 (2.6887) grad_norm: 1.6245 (1.6201) time: 2.9287 data: 0.0003 max mem: 29202 +[2024-12-11 11:50:16 root] (utils.py 283): INFO Epoch: [11] [2210/2502] eta: 0:14:17 lr: 0.000006 loss_cls: 2.7182 (2.6884) grad_norm: 1.5822 (1.6201) time: 2.9289 data: 0.0003 max mem: 29202 +[2024-12-11 11:50:46 root] (utils.py 283): INFO Epoch: [11] [2220/2502] eta: 0:13:47 lr: 0.000006 loss_cls: 2.5928 (2.6876) grad_norm: 1.5770 (1.6201) time: 2.9328 data: 0.0003 max mem: 29202 +[2024-12-11 11:51:15 root] (utils.py 283): INFO Epoch: [11] [2230/2502] eta: 0:13:18 lr: 0.000006 loss_cls: 2.6704 (2.6879) grad_norm: 1.5674 (1.6202) time: 2.9384 data: 0.0003 max mem: 29202 +[2024-12-11 11:51:45 root] (utils.py 283): INFO Epoch: [11] [2240/2502] eta: 0:12:49 lr: 0.000006 loss_cls: 2.6341 (2.6872) grad_norm: 1.5674 (1.6200) time: 2.9392 data: 0.0003 max mem: 29202 +[2024-12-11 11:52:14 root] (utils.py 283): INFO Epoch: [11] [2250/2502] eta: 0:12:19 lr: 0.000006 loss_cls: 2.6572 (2.6873) grad_norm: 1.4546 (1.6192) time: 2.9394 data: 0.0003 max mem: 29202 +[2024-12-11 11:52:43 root] (utils.py 283): INFO Epoch: [11] [2260/2502] eta: 0:11:50 lr: 0.000006 loss_cls: 2.6572 (2.6862) grad_norm: 1.4464 (1.6190) time: 2.9407 data: 0.0003 max mem: 29202 +[2024-12-11 11:53:13 root] (utils.py 283): INFO Epoch: [11] [2270/2502] eta: 0:11:21 lr: 0.000006 loss_cls: 2.7057 (2.6865) grad_norm: 1.5524 (1.6194) time: 2.9399 data: 0.0003 max mem: 29202 +[2024-12-11 11:53:42 root] (utils.py 283): INFO Epoch: [11] [2280/2502] eta: 0:10:51 lr: 0.000006 loss_cls: 2.7401 (2.6870) grad_norm: 1.4763 (1.6191) time: 2.9400 data: 0.0003 max mem: 29202 +[2024-12-11 11:54:12 root] (utils.py 283): INFO Epoch: [11] [2290/2502] eta: 0:10:22 lr: 0.000006 loss_cls: 2.8680 (2.6874) grad_norm: 1.5265 (1.6195) time: 2.9379 data: 0.0003 max mem: 29202 +[2024-12-11 11:54:41 root] (utils.py 283): INFO Epoch: [11] [2300/2502] eta: 0:09:53 lr: 0.000006 loss_cls: 2.8098 (2.6877) grad_norm: 1.5785 (1.6200) time: 2.9376 data: 0.0003 max mem: 29202 +[2024-12-11 11:55:10 root] (utils.py 283): INFO Epoch: [11] [2310/2502] eta: 0:09:23 lr: 0.000006 loss_cls: 2.8067 (2.6878) grad_norm: 1.5257 (1.6196) time: 2.9404 data: 0.0003 max mem: 29202 +[2024-12-11 11:55:40 root] (utils.py 283): INFO Epoch: [11] [2320/2502] eta: 0:08:54 lr: 0.000006 loss_cls: 2.8312 (2.6877) grad_norm: 1.5560 (1.6194) time: 2.9398 data: 0.0003 max mem: 29202 +[2024-12-11 11:56:09 root] (utils.py 283): INFO Epoch: [11] [2330/2502] eta: 0:08:24 lr: 0.000006 loss_cls: 2.8655 (2.6879) grad_norm: 1.5807 (1.6198) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-11 11:56:39 root] (utils.py 283): INFO Epoch: [11] [2340/2502] eta: 0:07:55 lr: 0.000006 loss_cls: 2.7139 (2.6878) grad_norm: 1.6679 (1.6206) time: 2.9404 data: 0.0003 max mem: 29202 +[2024-12-11 11:57:08 root] (utils.py 283): INFO Epoch: [11] [2350/2502] eta: 0:07:26 lr: 0.000006 loss_cls: 2.7911 (2.6879) grad_norm: 1.6378 (1.6206) time: 2.9375 data: 0.0003 max mem: 29202 +[2024-12-11 11:57:37 root] (utils.py 283): INFO Epoch: [11] [2360/2502] eta: 0:06:56 lr: 0.000006 loss_cls: 2.8087 (2.6876) grad_norm: 1.5490 (1.6204) time: 2.9359 data: 0.0003 max mem: 29202 +[2024-12-11 11:58:07 root] (utils.py 283): INFO Epoch: [11] [2370/2502] eta: 0:06:27 lr: 0.000006 loss_cls: 2.6698 (2.6871) grad_norm: 1.5692 (1.6204) time: 2.9376 data: 0.0003 max mem: 29202 +[2024-12-11 11:58:36 root] (utils.py 283): INFO Epoch: [11] [2380/2502] eta: 0:05:58 lr: 0.000006 loss_cls: 2.6593 (2.6867) grad_norm: 1.5692 (1.6202) time: 2.9539 data: 0.0004 max mem: 29202 +[2024-12-11 11:59:06 root] (utils.py 283): INFO Epoch: [11] [2390/2502] eta: 0:05:28 lr: 0.000006 loss_cls: 2.6593 (2.6864) grad_norm: 1.5090 (1.6198) time: 2.9576 data: 0.0004 max mem: 29202 +[2024-12-11 11:59:35 root] (utils.py 283): INFO Epoch: [11] [2400/2502] eta: 0:04:59 lr: 0.000006 loss_cls: 2.6632 (2.6857) grad_norm: 1.5090 (1.6199) time: 2.9467 data: 0.0003 max mem: 29202 +[2024-12-11 12:00:05 root] (utils.py 283): INFO Epoch: [11] [2410/2502] eta: 0:04:30 lr: 0.000006 loss_cls: 2.5877 (2.6855) grad_norm: 1.5259 (1.6197) time: 2.9429 data: 0.0003 max mem: 29202 +[2024-12-11 12:00:34 root] (utils.py 283): INFO Epoch: [11] [2420/2502] eta: 0:04:00 lr: 0.000006 loss_cls: 2.7753 (2.6861) grad_norm: 1.5927 (1.6197) time: 2.9424 data: 0.0003 max mem: 29202 +[2024-12-11 12:01:04 root] (utils.py 283): INFO Epoch: [11] [2430/2502] eta: 0:03:31 lr: 0.000006 loss_cls: 2.9102 (2.6869) grad_norm: 1.5270 (1.6194) time: 2.9422 data: 0.0003 max mem: 29202 +[2024-12-11 12:01:33 root] (utils.py 283): INFO Epoch: [11] [2440/2502] eta: 0:03:02 lr: 0.000006 loss_cls: 2.8902 (2.6870) grad_norm: 1.4865 (1.6190) time: 2.9391 data: 0.0003 max mem: 29202 +[2024-12-11 12:02:02 root] (utils.py 283): INFO Epoch: [11] [2450/2502] eta: 0:02:32 lr: 0.000006 loss_cls: 2.9205 (2.6877) grad_norm: 1.5425 (1.6191) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-11 12:02:32 root] (utils.py 283): INFO Epoch: [11] [2460/2502] eta: 0:02:03 lr: 0.000006 loss_cls: 2.9035 (2.6869) grad_norm: 1.6212 (1.6191) time: 2.9414 data: 0.0003 max mem: 29202 +[2024-12-11 12:03:02 root] (utils.py 283): INFO Epoch: [11] [2470/2502] eta: 0:01:33 lr: 0.000006 loss_cls: 2.8799 (2.6874) grad_norm: 1.5823 (1.6194) time: 2.9591 data: 0.0003 max mem: 29202 +[2024-12-11 12:03:31 root] (utils.py 283): INFO Epoch: [11] [2480/2502] eta: 0:01:04 lr: 0.000006 loss_cls: 2.9169 (2.6876) grad_norm: 1.5823 (1.6210) time: 2.9579 data: 0.0003 max mem: 29202 +[2024-12-11 12:04:01 root] (utils.py 283): INFO Epoch: [11] [2490/2502] eta: 0:00:35 lr: 0.000006 loss_cls: 2.7043 (2.6877) grad_norm: 1.5784 (1.6212) time: 2.9733 data: 0.0244 max mem: 29202 +[2024-12-11 12:04:30 root] (utils.py 283): INFO Epoch: [11] [2500/2502] eta: 0:00:05 lr: 0.000006 loss_cls: 2.8200 (2.6876) grad_norm: 1.6770 (1.6218) time: 2.9765 data: 0.0244 max mem: 29202 +[2024-12-11 12:04:33 root] (utils.py 283): INFO Epoch: [11] [2501/2502] eta: 0:00:02 lr: 0.000006 loss_cls: 2.7447 (2.6874) grad_norm: 1.6770 (1.6217) time: 2.9742 data: 0.0244 max mem: 29202 +[2024-12-11 12:04:33 root] (utils.py 297): INFO Epoch: [11] Total time: 2:02:28 (2.9369 s / it) +[2024-12-11 12:04:33 root] (engine.py 179): INFO Averaged stats:lr: 0.000006 loss_cls: 2.7447 (2.6865) grad_norm: 1.6770 (1.6217) +[2024-12-11 12:04:37 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:55 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.2907 (0.2907) acc1: 91.4062 (91.4062) acc3: 98.4375 (98.4375) acc5: 98.4375 (98.4375) time: 0.5690 data: 0.0003 max mem: 29202 +[2024-12-11 12:04:43 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:48 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.5560 (0.5564) acc1: 86.7188 (86.5767) acc3: 97.6562 (96.6619) acc5: 98.4375 (98.2244) time: 0.5537 data: 0.0004 max mem: 29202 +[2024-12-11 12:04:49 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:43 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.5560 (0.5992) acc1: 85.9375 (85.7515) acc3: 96.8750 (96.1310) acc5: 97.6562 (97.8423) time: 0.5525 data: 0.0004 max mem: 29202 +[2024-12-11 12:04:54 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:37 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6132 (0.6243) acc1: 85.1562 (85.1562) acc3: 96.0938 (95.8921) acc5: 97.6562 (97.6058) time: 0.5532 data: 0.0004 max mem: 29202 +[2024-12-11 12:05:00 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:32 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6482 (0.6290) acc1: 85.1562 (85.3087) acc3: 96.8750 (95.9413) acc5: 97.6562 (97.6753) time: 0.5535 data: 0.0004 max mem: 29202 +[2024-12-11 12:05:05 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:26 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.7973 (0.7101) acc1: 80.4688 (83.5938) acc3: 92.1875 (94.7917) acc5: 95.3125 (96.7984) time: 0.5537 data: 0.0004 max mem: 29202 +[2024-12-11 12:05:11 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:21 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9014 (0.7416) acc1: 78.1250 (83.0558) acc3: 89.8438 (94.1342) acc5: 93.7500 (96.3627) time: 0.5540 data: 0.0004 max mem: 29202 +[2024-12-11 12:05:16 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:15 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9504 (0.7750) acc1: 78.9062 (82.3724) acc3: 90.6250 (93.8160) acc5: 93.7500 (96.1158) time: 0.5535 data: 0.0004 max mem: 29202 +[2024-12-11 12:05:22 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:09 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9358 (0.7998) acc1: 78.1250 (81.9348) acc3: 91.4062 (93.4028) acc5: 93.7500 (95.7272) time: 0.5534 data: 0.0007 max mem: 29202 +[2024-12-11 12:05:27 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:04 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9419 (0.8288) acc1: 76.5625 (81.1384) acc3: 89.8438 (93.0975) acc5: 92.9688 (95.4928) time: 0.5541 data: 0.0006 max mem: 29202 +[2024-12-11 12:05:31 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9184 (0.8268) acc1: 76.5625 (81.1040) acc3: 91.4062 (93.0960) acc5: 93.7500 (95.5600) time: 0.5445 data: 0.0005 max mem: 29202 +[2024-12-11 12:05:31 root] (utils.py 297): INFO Test: Total time: 0:00:54 (0.5519 s / it) +[2024-12-11 12:05:31 root] (engine.py 264): INFO * Acc@1 81.218 Acc@3 93.076 Acc@5 95.390 loss 0.823 flops 13.207 layer_flops 13.109 +[2024-12-11 12:05:31 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 81.2% +[2024-12-11 12:05:31 root] (main.py 576): INFO Max accuracy: 81.27% +[2024-12-11 12:05:34 root] (utils.py 283): INFO Epoch: [12] [ 0/2502] eta: 2:00:34 lr: 0.000004 loss_cls: 3.0542 (3.0542) grad_norm: 1.7156 (1.7156) time: 2.8916 data: 0.0003 max mem: 29202 +[2024-12-11 12:06:03 root] (utils.py 283): INFO Epoch: [12] [ 10/2502] eta: 2:02:04 lr: 0.000004 loss_cls: 2.8230 (2.6198) grad_norm: 1.6188 (1.6915) time: 2.9391 data: 0.0003 max mem: 29202 +[2024-12-11 12:06:33 root] (utils.py 283): INFO Epoch: [12] [ 20/2502] eta: 2:01:44 lr: 0.000004 loss_cls: 2.6520 (2.6097) grad_norm: 1.6188 (1.6839) time: 2.9456 data: 0.0003 max mem: 29202 +[2024-12-11 12:07:02 root] (utils.py 283): INFO Epoch: [12] [ 30/2502] eta: 2:01:12 lr: 0.000004 loss_cls: 2.7435 (2.6667) grad_norm: 1.6321 (1.6821) time: 2.9438 data: 0.0003 max mem: 29202 +[2024-12-11 12:07:32 root] (utils.py 283): INFO Epoch: [12] [ 40/2502] eta: 2:00:48 lr: 0.000004 loss_cls: 2.8171 (2.6715) grad_norm: 1.5648 (1.6524) time: 2.9450 data: 0.0003 max mem: 29202 +[2024-12-11 12:08:01 root] (utils.py 283): INFO Epoch: [12] [ 50/2502] eta: 2:00:19 lr: 0.000004 loss_cls: 2.8171 (2.6912) grad_norm: 1.5648 (1.6456) time: 2.9477 data: 0.0003 max mem: 29202 +[2024-12-11 12:08:31 root] (utils.py 283): INFO Epoch: [12] [ 60/2502] eta: 1:59:51 lr: 0.000004 loss_cls: 2.6348 (2.6431) grad_norm: 1.4847 (1.6623) time: 2.9468 data: 0.0003 max mem: 29202 +[2024-12-11 12:09:00 root] (utils.py 283): INFO Epoch: [12] [ 70/2502] eta: 1:59:21 lr: 0.000004 loss_cls: 2.5931 (2.6445) grad_norm: 1.5328 (1.6607) time: 2.9461 data: 0.0003 max mem: 29202 +[2024-12-11 12:09:30 root] (utils.py 283): INFO Epoch: [12] [ 80/2502] eta: 1:58:52 lr: 0.000004 loss_cls: 2.9490 (2.6804) grad_norm: 1.6165 (1.6536) time: 2.9439 data: 0.0003 max mem: 29202 +[2024-12-11 12:09:59 root] (utils.py 283): INFO Epoch: [12] [ 90/2502] eta: 1:58:21 lr: 0.000004 loss_cls: 3.0144 (2.6934) grad_norm: 1.5576 (1.6455) time: 2.9425 data: 0.0003 max mem: 29202 +[2024-12-11 12:10:28 root] (utils.py 283): INFO Epoch: [12] [ 100/2502] eta: 1:57:53 lr: 0.000004 loss_cls: 2.9331 (2.7019) grad_norm: 1.4327 (1.6331) time: 2.9451 data: 0.0003 max mem: 29202 +[2024-12-11 12:10:58 root] (utils.py 283): INFO Epoch: [12] [ 110/2502] eta: 1:57:23 lr: 0.000004 loss_cls: 2.9236 (2.7234) grad_norm: 1.5362 (1.6388) time: 2.9458 data: 0.0003 max mem: 29202 +[2024-12-11 12:11:27 root] (utils.py 283): INFO Epoch: [12] [ 120/2502] eta: 1:56:54 lr: 0.000004 loss_cls: 2.8311 (2.7178) grad_norm: 1.6718 (1.6320) time: 2.9439 data: 0.0003 max mem: 29202 +[2024-12-11 12:11:57 root] (utils.py 283): INFO Epoch: [12] [ 130/2502] eta: 1:56:24 lr: 0.000004 loss_cls: 2.7997 (2.7270) grad_norm: 1.5852 (1.6361) time: 2.9443 data: 0.0003 max mem: 29202 +[2024-12-11 12:12:26 root] (utils.py 283): INFO Epoch: [12] [ 140/2502] eta: 1:55:54 lr: 0.000004 loss_cls: 2.7690 (2.7156) grad_norm: 1.5852 (1.6308) time: 2.9431 data: 0.0003 max mem: 29202 +[2024-12-11 12:12:56 root] (utils.py 283): INFO Epoch: [12] [ 150/2502] eta: 1:55:24 lr: 0.000004 loss_cls: 2.6666 (2.7202) grad_norm: 1.5186 (1.6264) time: 2.9423 data: 0.0003 max mem: 29202 +[2024-12-11 12:13:25 root] (utils.py 283): INFO Epoch: [12] [ 160/2502] eta: 1:54:54 lr: 0.000004 loss_cls: 2.6666 (2.7105) grad_norm: 1.4966 (1.6340) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-11 12:13:54 root] (utils.py 283): INFO Epoch: [12] [ 170/2502] eta: 1:54:24 lr: 0.000004 loss_cls: 2.8149 (2.7201) grad_norm: 1.6781 (1.6507) time: 2.9402 data: 0.0003 max mem: 29202 +[2024-12-11 12:14:24 root] (utils.py 283): INFO Epoch: [12] [ 180/2502] eta: 1:53:55 lr: 0.000004 loss_cls: 2.8297 (2.7218) grad_norm: 1.7091 (1.6643) time: 2.9421 data: 0.0003 max mem: 29202 +[2024-12-11 12:14:53 root] (utils.py 283): INFO Epoch: [12] [ 190/2502] eta: 1:53:25 lr: 0.000004 loss_cls: 2.5713 (2.7181) grad_norm: 1.6209 (1.6699) time: 2.9413 data: 0.0003 max mem: 29202 +[2024-12-11 12:15:23 root] (utils.py 283): INFO Epoch: [12] [ 200/2502] eta: 1:52:56 lr: 0.000004 loss_cls: 2.4807 (2.7044) grad_norm: 1.5092 (1.6596) time: 2.9421 data: 0.0003 max mem: 29202 +[2024-12-11 12:15:52 root] (utils.py 283): INFO Epoch: [12] [ 210/2502] eta: 1:52:26 lr: 0.000004 loss_cls: 2.4594 (2.6946) grad_norm: 1.5289 (1.6602) time: 2.9423 data: 0.0003 max mem: 29202 +[2024-12-11 12:16:22 root] (utils.py 283): INFO Epoch: [12] [ 220/2502] eta: 1:51:57 lr: 0.000004 loss_cls: 2.6356 (2.6958) grad_norm: 1.5772 (1.6573) time: 2.9441 data: 0.0003 max mem: 29202 +[2024-12-11 12:16:51 root] (utils.py 283): INFO Epoch: [12] [ 230/2502] eta: 1:51:27 lr: 0.000004 loss_cls: 2.9094 (2.7060) grad_norm: 1.6063 (1.6568) time: 2.9456 data: 0.0003 max mem: 29202 +[2024-12-11 12:17:21 root] (utils.py 283): INFO Epoch: [12] [ 240/2502] eta: 1:50:59 lr: 0.000004 loss_cls: 2.9094 (2.7112) grad_norm: 1.5638 (1.6522) time: 2.9468 data: 0.0003 max mem: 29202 +[2024-12-11 12:17:50 root] (utils.py 283): INFO Epoch: [12] [ 250/2502] eta: 1:50:30 lr: 0.000004 loss_cls: 2.7913 (2.7083) grad_norm: 1.4849 (1.6465) time: 2.9509 data: 0.0003 max mem: 29202 +[2024-12-11 12:18:20 root] (utils.py 283): INFO Epoch: [12] [ 260/2502] eta: 1:50:01 lr: 0.000004 loss_cls: 2.7113 (2.7039) grad_norm: 1.5014 (1.6469) time: 2.9508 data: 0.0003 max mem: 29202 +[2024-12-11 12:18:49 root] (utils.py 283): INFO Epoch: [12] [ 270/2502] eta: 1:49:33 lr: 0.000004 loss_cls: 2.6557 (2.7002) grad_norm: 1.5567 (1.6511) time: 2.9580 data: 0.0003 max mem: 29202 +[2024-12-11 12:19:19 root] (utils.py 283): INFO Epoch: [12] [ 280/2502] eta: 1:49:04 lr: 0.000004 loss_cls: 2.8477 (2.7037) grad_norm: 1.6108 (1.6481) time: 2.9604 data: 0.0003 max mem: 29202 +[2024-12-11 12:19:48 root] (utils.py 283): INFO Epoch: [12] [ 290/2502] eta: 1:48:35 lr: 0.000004 loss_cls: 2.9129 (2.7042) grad_norm: 1.4806 (1.6446) time: 2.9507 data: 0.0003 max mem: 29202 +[2024-12-11 12:20:18 root] (utils.py 283): INFO Epoch: [12] [ 300/2502] eta: 1:48:06 lr: 0.000004 loss_cls: 2.6239 (2.7034) grad_norm: 1.6089 (1.6475) time: 2.9458 data: 0.0003 max mem: 29202 +[2024-12-11 12:20:47 root] (utils.py 283): INFO Epoch: [12] [ 310/2502] eta: 1:47:36 lr: 0.000004 loss_cls: 2.6239 (2.7022) grad_norm: 1.6089 (1.6517) time: 2.9435 data: 0.0003 max mem: 29202 +[2024-12-11 12:21:17 root] (utils.py 283): INFO Epoch: [12] [ 320/2502] eta: 1:47:07 lr: 0.000004 loss_cls: 2.7841 (2.7041) grad_norm: 1.5885 (1.6542) time: 2.9504 data: 0.0003 max mem: 29202 +[2024-12-11 12:21:46 root] (utils.py 283): INFO Epoch: [12] [ 330/2502] eta: 1:46:38 lr: 0.000004 loss_cls: 2.6811 (2.6989) grad_norm: 1.6198 (1.6571) time: 2.9514 data: 0.0003 max mem: 29202 +[2024-12-11 12:22:16 root] (utils.py 283): INFO Epoch: [12] [ 340/2502] eta: 1:46:08 lr: 0.000004 loss_cls: 2.6811 (2.7042) grad_norm: 1.6468 (1.6596) time: 2.9430 data: 0.0003 max mem: 29202 +[2024-12-11 12:22:45 root] (utils.py 283): INFO Epoch: [12] [ 350/2502] eta: 1:45:38 lr: 0.000004 loss_cls: 2.6962 (2.7023) grad_norm: 1.6468 (1.6596) time: 2.9416 data: 0.0003 max mem: 29202 +[2024-12-11 12:23:14 root] (utils.py 283): INFO Epoch: [12] [ 360/2502] eta: 1:45:09 lr: 0.000004 loss_cls: 2.6707 (2.7013) grad_norm: 1.6413 (1.6599) time: 2.9408 data: 0.0003 max mem: 29202 +[2024-12-11 12:23:44 root] (utils.py 283): INFO Epoch: [12] [ 370/2502] eta: 1:44:39 lr: 0.000004 loss_cls: 2.8182 (2.7022) grad_norm: 1.5601 (1.6577) time: 2.9448 data: 0.0003 max mem: 29202 +[2024-12-11 12:24:13 root] (utils.py 283): INFO Epoch: [12] [ 380/2502] eta: 1:44:10 lr: 0.000004 loss_cls: 2.8261 (2.6967) grad_norm: 1.5459 (1.6590) time: 2.9477 data: 0.0003 max mem: 29202 +[2024-12-11 12:24:43 root] (utils.py 283): INFO Epoch: [12] [ 390/2502] eta: 1:43:40 lr: 0.000004 loss_cls: 2.2473 (2.6900) grad_norm: 1.6560 (1.6586) time: 2.9383 data: 0.0003 max mem: 29202 +[2024-12-11 12:25:12 root] (utils.py 283): INFO Epoch: [12] [ 400/2502] eta: 1:43:09 lr: 0.000004 loss_cls: 2.6314 (2.6923) grad_norm: 1.6290 (1.6603) time: 2.9303 data: 0.0003 max mem: 29202 +[2024-12-11 12:25:41 root] (utils.py 283): INFO Epoch: [12] [ 410/2502] eta: 1:42:40 lr: 0.000004 loss_cls: 2.9005 (2.6938) grad_norm: 1.6290 (1.6602) time: 2.9349 data: 0.0003 max mem: 29202 +[2024-12-11 12:26:11 root] (utils.py 283): INFO Epoch: [12] [ 420/2502] eta: 1:42:10 lr: 0.000004 loss_cls: 2.8690 (2.6981) grad_norm: 1.5678 (1.6570) time: 2.9409 data: 0.0003 max mem: 29202 +[2024-12-11 12:26:40 root] (utils.py 283): INFO Epoch: [12] [ 430/2502] eta: 1:41:41 lr: 0.000004 loss_cls: 2.8422 (2.6958) grad_norm: 1.5660 (1.6579) time: 2.9432 data: 0.0003 max mem: 29202 +[2024-12-11 12:27:10 root] (utils.py 283): INFO Epoch: [12] [ 440/2502] eta: 1:41:11 lr: 0.000004 loss_cls: 2.9373 (2.6986) grad_norm: 1.6297 (1.6560) time: 2.9429 data: 0.0003 max mem: 29202 +[2024-12-11 12:27:39 root] (utils.py 283): INFO Epoch: [12] [ 450/2502] eta: 1:40:42 lr: 0.000004 loss_cls: 2.9128 (2.6992) grad_norm: 1.5308 (1.6537) time: 2.9427 data: 0.0003 max mem: 29202 +[2024-12-11 12:28:08 root] (utils.py 283): INFO Epoch: [12] [ 460/2502] eta: 1:40:12 lr: 0.000004 loss_cls: 2.9128 (2.7045) grad_norm: 1.5381 (1.6532) time: 2.9425 data: 0.0003 max mem: 29202 +[2024-12-11 12:28:38 root] (utils.py 283): INFO Epoch: [12] [ 470/2502] eta: 1:39:43 lr: 0.000004 loss_cls: 2.8042 (2.7034) grad_norm: 1.5449 (1.6512) time: 2.9426 data: 0.0003 max mem: 29202 +[2024-12-11 12:29:07 root] (utils.py 283): INFO Epoch: [12] [ 480/2502] eta: 1:39:13 lr: 0.000004 loss_cls: 2.6614 (2.7030) grad_norm: 1.5546 (1.6507) time: 2.9445 data: 0.0003 max mem: 29202 +[2024-12-11 12:29:37 root] (utils.py 283): INFO Epoch: [12] [ 490/2502] eta: 1:38:44 lr: 0.000004 loss_cls: 2.5131 (2.6968) grad_norm: 1.5722 (1.6513) time: 2.9456 data: 0.0003 max mem: 29202 +[2024-12-11 12:30:06 root] (utils.py 283): INFO Epoch: [12] [ 500/2502] eta: 1:38:14 lr: 0.000004 loss_cls: 2.3521 (2.6932) grad_norm: 1.5264 (1.6501) time: 2.9438 data: 0.0003 max mem: 29202 +[2024-12-11 12:30:36 root] (utils.py 283): INFO Epoch: [12] [ 510/2502] eta: 1:37:45 lr: 0.000004 loss_cls: 2.3521 (2.6887) grad_norm: 1.4964 (1.6481) time: 2.9502 data: 0.0004 max mem: 29202 +[2024-12-11 12:31:05 root] (utils.py 283): INFO Epoch: [12] [ 520/2502] eta: 1:37:16 lr: 0.000004 loss_cls: 2.5469 (2.6889) grad_norm: 1.5096 (1.6470) time: 2.9556 data: 0.0003 max mem: 29202 +[2024-12-11 12:31:35 root] (utils.py 283): INFO Epoch: [12] [ 530/2502] eta: 1:36:47 lr: 0.000004 loss_cls: 2.7132 (2.6891) grad_norm: 1.5482 (1.6466) time: 2.9520 data: 0.0003 max mem: 29202 +[2024-12-11 12:32:04 root] (utils.py 283): INFO Epoch: [12] [ 540/2502] eta: 1:36:17 lr: 0.000004 loss_cls: 2.5699 (2.6839) grad_norm: 1.5482 (1.6443) time: 2.9454 data: 0.0003 max mem: 29202 +[2024-12-11 12:32:34 root] (utils.py 283): INFO Epoch: [12] [ 550/2502] eta: 1:35:48 lr: 0.000004 loss_cls: 2.5018 (2.6806) grad_norm: 1.5114 (1.6420) time: 2.9406 data: 0.0003 max mem: 29202 +[2024-12-11 12:33:03 root] (utils.py 283): INFO Epoch: [12] [ 560/2502] eta: 1:35:18 lr: 0.000004 loss_cls: 2.6921 (2.6813) grad_norm: 1.5066 (1.6387) time: 2.9425 data: 0.0003 max mem: 29202 +[2024-12-11 12:33:33 root] (utils.py 283): INFO Epoch: [12] [ 570/2502] eta: 1:34:49 lr: 0.000004 loss_cls: 2.8210 (2.6827) grad_norm: 1.4355 (1.6358) time: 2.9424 data: 0.0004 max mem: 29202 +[2024-12-11 12:34:02 root] (utils.py 283): INFO Epoch: [12] [ 580/2502] eta: 1:34:19 lr: 0.000004 loss_cls: 2.8663 (2.6813) grad_norm: 1.5546 (1.6364) time: 2.9428 data: 0.0003 max mem: 29202 +[2024-12-11 12:34:31 root] (utils.py 283): INFO Epoch: [12] [ 590/2502] eta: 1:33:50 lr: 0.000004 loss_cls: 2.8031 (2.6819) grad_norm: 1.5590 (1.6348) time: 2.9432 data: 0.0003 max mem: 29202 +[2024-12-11 12:35:01 root] (utils.py 283): INFO Epoch: [12] [ 600/2502] eta: 1:33:20 lr: 0.000004 loss_cls: 2.8031 (2.6831) grad_norm: 1.5814 (1.6347) time: 2.9405 data: 0.0003 max mem: 29202 +[2024-12-11 12:35:30 root] (utils.py 283): INFO Epoch: [12] [ 610/2502] eta: 1:32:51 lr: 0.000004 loss_cls: 2.7699 (2.6826) grad_norm: 1.5910 (1.6353) time: 2.9419 data: 0.0003 max mem: 29202 +[2024-12-11 12:36:00 root] (utils.py 283): INFO Epoch: [12] [ 620/2502] eta: 1:32:21 lr: 0.000004 loss_cls: 2.7460 (2.6829) grad_norm: 1.5948 (1.6371) time: 2.9434 data: 0.0003 max mem: 29202 +[2024-12-11 12:36:29 root] (utils.py 283): INFO Epoch: [12] [ 630/2502] eta: 1:31:51 lr: 0.000004 loss_cls: 2.7929 (2.6855) grad_norm: 1.5675 (1.6365) time: 2.9394 data: 0.0003 max mem: 29202 +[2024-12-11 12:36:58 root] (utils.py 283): INFO Epoch: [12] [ 640/2502] eta: 1:31:22 lr: 0.000004 loss_cls: 2.9376 (2.6886) grad_norm: 1.5585 (1.6361) time: 2.9366 data: 0.0003 max mem: 29202 +[2024-12-11 12:37:28 root] (utils.py 283): INFO Epoch: [12] [ 650/2502] eta: 1:30:52 lr: 0.000004 loss_cls: 2.8951 (2.6862) grad_norm: 1.5212 (1.6349) time: 2.9391 data: 0.0003 max mem: 29202 +[2024-12-11 12:37:57 root] (utils.py 283): INFO Epoch: [12] [ 660/2502] eta: 1:30:23 lr: 0.000004 loss_cls: 2.5219 (2.6823) grad_norm: 1.5783 (1.6356) time: 2.9425 data: 0.0003 max mem: 29202 +[2024-12-11 12:38:27 root] (utils.py 283): INFO Epoch: [12] [ 670/2502] eta: 1:29:54 lr: 0.000004 loss_cls: 2.5211 (2.6811) grad_norm: 1.6227 (1.6348) time: 2.9521 data: 0.0003 max mem: 29202 +[2024-12-11 12:38:56 root] (utils.py 283): INFO Epoch: [12] [ 680/2502] eta: 1:29:25 lr: 0.000004 loss_cls: 2.8034 (2.6816) grad_norm: 1.5117 (1.6351) time: 2.9581 data: 0.0003 max mem: 29202 +[2024-12-11 12:39:26 root] (utils.py 283): INFO Epoch: [12] [ 690/2502] eta: 1:28:55 lr: 0.000004 loss_cls: 2.7031 (2.6815) grad_norm: 1.6080 (1.6357) time: 2.9527 data: 0.0003 max mem: 29202 +[2024-12-11 12:39:55 root] (utils.py 283): INFO Epoch: [12] [ 700/2502] eta: 1:28:26 lr: 0.000004 loss_cls: 2.7159 (2.6823) grad_norm: 1.5200 (1.6342) time: 2.9498 data: 0.0003 max mem: 29202 +[2024-12-11 12:40:25 root] (utils.py 283): INFO Epoch: [12] [ 710/2502] eta: 1:27:57 lr: 0.000004 loss_cls: 2.8061 (2.6845) grad_norm: 1.5047 (1.6323) time: 2.9478 data: 0.0003 max mem: 29202 +[2024-12-11 12:40:54 root] (utils.py 283): INFO Epoch: [12] [ 720/2502] eta: 1:27:27 lr: 0.000004 loss_cls: 2.8077 (2.6828) grad_norm: 1.4451 (1.6309) time: 2.9418 data: 0.0003 max mem: 29202 +[2024-12-11 12:41:24 root] (utils.py 283): INFO Epoch: [12] [ 730/2502] eta: 1:26:57 lr: 0.000004 loss_cls: 2.8077 (2.6843) grad_norm: 1.4916 (1.6312) time: 2.9396 data: 0.0003 max mem: 29202 +[2024-12-11 12:41:53 root] (utils.py 283): INFO Epoch: [12] [ 740/2502] eta: 1:26:28 lr: 0.000004 loss_cls: 2.7792 (2.6853) grad_norm: 1.5783 (1.6301) time: 2.9420 data: 0.0003 max mem: 29202 +[2024-12-11 12:42:22 root] (utils.py 283): INFO Epoch: [12] [ 750/2502] eta: 1:25:58 lr: 0.000004 loss_cls: 2.8080 (2.6878) grad_norm: 1.5208 (1.6296) time: 2.9390 data: 0.0003 max mem: 29202 +[2024-12-11 12:42:52 root] (utils.py 283): INFO Epoch: [12] [ 760/2502] eta: 1:25:29 lr: 0.000004 loss_cls: 2.8080 (2.6883) grad_norm: 1.4920 (1.6279) time: 2.9372 data: 0.0003 max mem: 29202 +[2024-12-11 12:43:21 root] (utils.py 283): INFO Epoch: [12] [ 770/2502] eta: 1:24:59 lr: 0.000004 loss_cls: 2.7997 (2.6898) grad_norm: 1.5824 (1.6277) time: 2.9385 data: 0.0003 max mem: 29202 +[2024-12-11 12:43:51 root] (utils.py 283): INFO Epoch: [12] [ 780/2502] eta: 1:24:30 lr: 0.000004 loss_cls: 2.7847 (2.6905) grad_norm: 1.6184 (1.6284) time: 2.9384 data: 0.0003 max mem: 29202 +[2024-12-11 12:44:20 root] (utils.py 283): INFO Epoch: [12] [ 790/2502] eta: 1:24:00 lr: 0.000004 loss_cls: 2.8921 (2.6943) grad_norm: 1.6021 (1.6289) time: 2.9374 data: 0.0003 max mem: 29202 +[2024-12-11 12:44:49 root] (utils.py 283): INFO Epoch: [12] [ 800/2502] eta: 1:23:30 lr: 0.000004 loss_cls: 2.9944 (2.6971) grad_norm: 1.5608 (1.6285) time: 2.9389 data: 0.0003 max mem: 29202 +[2024-12-11 12:45:19 root] (utils.py 283): INFO Epoch: [12] [ 810/2502] eta: 1:23:01 lr: 0.000004 loss_cls: 2.8363 (2.6963) grad_norm: 1.4937 (1.6270) time: 2.9424 data: 0.0003 max mem: 29202 +[2024-12-11 12:45:48 root] (utils.py 283): INFO Epoch: [12] [ 820/2502] eta: 1:22:31 lr: 0.000004 loss_cls: 2.7917 (2.6974) grad_norm: 1.5502 (1.6273) time: 2.9427 data: 0.0003 max mem: 29202 +[2024-12-11 12:46:18 root] (utils.py 283): INFO Epoch: [12] [ 830/2502] eta: 1:22:02 lr: 0.000004 loss_cls: 2.8149 (2.6983) grad_norm: 1.5592 (1.6255) time: 2.9392 data: 0.0003 max mem: 29202 +[2024-12-11 12:46:47 root] (utils.py 283): INFO Epoch: [12] [ 840/2502] eta: 1:21:33 lr: 0.000004 loss_cls: 2.8410 (2.7006) grad_norm: 1.4988 (1.6259) time: 2.9417 data: 0.0003 max mem: 29202 +[2024-12-11 12:47:16 root] (utils.py 283): INFO Epoch: [12] [ 850/2502] eta: 1:21:03 lr: 0.000004 loss_cls: 2.8410 (2.7019) grad_norm: 1.5640 (1.6256) time: 2.9418 data: 0.0003 max mem: 29202 +[2024-12-11 12:47:46 root] (utils.py 283): INFO Epoch: [12] [ 860/2502] eta: 1:20:33 lr: 0.000004 loss_cls: 2.7900 (2.7016) grad_norm: 1.5753 (1.6245) time: 2.9383 data: 0.0003 max mem: 29202 +[2024-12-11 12:48:16 root] (utils.py 283): INFO Epoch: [12] [ 870/2502] eta: 1:20:04 lr: 0.000004 loss_cls: 2.7303 (2.7007) grad_norm: 1.5120 (1.6262) time: 2.9527 data: 0.0003 max mem: 29202 +[2024-12-11 12:48:45 root] (utils.py 283): INFO Epoch: [12] [ 880/2502] eta: 1:19:35 lr: 0.000004 loss_cls: 2.7005 (2.6996) grad_norm: 1.5780 (1.6315) time: 2.9529 data: 0.0003 max mem: 29202 +[2024-12-11 12:49:14 root] (utils.py 283): INFO Epoch: [12] [ 890/2502] eta: 1:19:05 lr: 0.000004 loss_cls: 2.7845 (2.7011) grad_norm: 1.5573 (1.6325) time: 2.9399 data: 0.0003 max mem: 29202 +[2024-12-11 12:49:44 root] (utils.py 283): INFO Epoch: [12] [ 900/2502] eta: 1:18:36 lr: 0.000004 loss_cls: 2.8127 (2.6998) grad_norm: 1.4673 (1.6333) time: 2.9399 data: 0.0003 max mem: 29202 +[2024-12-11 12:50:13 root] (utils.py 283): INFO Epoch: [12] [ 910/2502] eta: 1:18:06 lr: 0.000004 loss_cls: 2.8008 (2.7019) grad_norm: 1.5233 (1.6347) time: 2.9406 data: 0.0003 max mem: 29202 +[2024-12-11 12:50:43 root] (utils.py 283): INFO Epoch: [12] [ 920/2502] eta: 1:17:37 lr: 0.000004 loss_cls: 2.8008 (2.6979) grad_norm: 1.4879 (1.6333) time: 2.9405 data: 0.0003 max mem: 29202 +[2024-12-11 12:51:12 root] (utils.py 283): INFO Epoch: [12] [ 930/2502] eta: 1:17:08 lr: 0.000004 loss_cls: 2.5389 (2.6961) grad_norm: 1.4879 (1.6341) time: 2.9492 data: 0.0003 max mem: 29202 +[2024-12-11 12:51:42 root] (utils.py 283): INFO Epoch: [12] [ 940/2502] eta: 1:16:38 lr: 0.000004 loss_cls: 2.8063 (2.6963) grad_norm: 1.5778 (1.6337) time: 2.9564 data: 0.0003 max mem: 29202 +[2024-12-11 12:52:11 root] (utils.py 283): INFO Epoch: [12] [ 950/2502] eta: 1:16:09 lr: 0.000004 loss_cls: 2.7830 (2.6956) grad_norm: 1.5778 (1.6325) time: 2.9559 data: 0.0003 max mem: 29202 +[2024-12-11 12:52:41 root] (utils.py 283): INFO Epoch: [12] [ 960/2502] eta: 1:15:39 lr: 0.000004 loss_cls: 2.7505 (2.6963) grad_norm: 1.5008 (1.6315) time: 2.9430 data: 0.0003 max mem: 29202 +[2024-12-11 12:53:10 root] (utils.py 283): INFO Epoch: [12] [ 970/2502] eta: 1:15:10 lr: 0.000004 loss_cls: 2.7576 (2.6951) grad_norm: 1.5008 (1.6313) time: 2.9429 data: 0.0003 max mem: 29202 +[2024-12-11 12:53:39 root] (utils.py 283): INFO Epoch: [12] [ 980/2502] eta: 1:14:41 lr: 0.000004 loss_cls: 2.7438 (2.6938) grad_norm: 1.4753 (1.6298) time: 2.9470 data: 0.0003 max mem: 29202 +[2024-12-11 12:54:09 root] (utils.py 283): INFO Epoch: [12] [ 990/2502] eta: 1:14:11 lr: 0.000004 loss_cls: 2.7707 (2.6947) grad_norm: 1.4851 (1.6297) time: 2.9372 data: 0.0003 max mem: 29202 +[2024-12-11 12:54:38 root] (utils.py 283): INFO Epoch: [12] [1000/2502] eta: 1:13:42 lr: 0.000004 loss_cls: 2.7459 (2.6936) grad_norm: 1.6237 (1.6301) time: 2.9385 data: 0.0003 max mem: 29202 +[2024-12-11 12:55:08 root] (utils.py 283): INFO Epoch: [12] [1010/2502] eta: 1:13:12 lr: 0.000004 loss_cls: 2.7030 (2.6948) grad_norm: 1.5682 (1.6288) time: 2.9396 data: 0.0003 max mem: 29202 +[2024-12-11 12:55:37 root] (utils.py 283): INFO Epoch: [12] [1020/2502] eta: 1:12:43 lr: 0.000004 loss_cls: 2.9190 (2.6971) grad_norm: 1.5682 (1.6340) time: 2.9401 data: 0.0003 max mem: 29202 +[2024-12-11 12:56:06 root] (utils.py 283): INFO Epoch: [12] [1030/2502] eta: 1:12:13 lr: 0.000004 loss_cls: 2.9970 (2.6981) grad_norm: 1.6805 (1.6343) time: 2.9371 data: 0.0003 max mem: 29202 +[2024-12-11 12:56:36 root] (utils.py 283): INFO Epoch: [12] [1040/2502] eta: 1:11:44 lr: 0.000004 loss_cls: 3.0112 (2.6991) grad_norm: 1.6975 (1.6375) time: 2.9453 data: 0.0003 max mem: 29202 +[2024-12-11 12:57:05 root] (utils.py 283): INFO Epoch: [12] [1050/2502] eta: 1:11:14 lr: 0.000004 loss_cls: 2.7235 (2.6986) grad_norm: 1.6560 (1.6376) time: 2.9490 data: 0.0003 max mem: 29202 +[2024-12-11 12:57:35 root] (utils.py 283): INFO Epoch: [12] [1060/2502] eta: 1:10:45 lr: 0.000004 loss_cls: 2.7928 (2.7003) grad_norm: 1.5852 (1.6388) time: 2.9384 data: 0.0003 max mem: 29202 +[2024-12-11 12:58:04 root] (utils.py 283): INFO Epoch: [12] [1070/2502] eta: 1:10:15 lr: 0.000004 loss_cls: 2.8307 (2.7013) grad_norm: 1.7230 (1.6418) time: 2.9348 data: 0.0003 max mem: 29202 +[2024-12-11 12:58:33 root] (utils.py 283): INFO Epoch: [12] [1080/2502] eta: 1:09:46 lr: 0.000004 loss_cls: 2.8307 (2.7029) grad_norm: 1.7230 (1.6416) time: 2.9348 data: 0.0003 max mem: 29202 +[2024-12-11 12:59:03 root] (utils.py 283): INFO Epoch: [12] [1090/2502] eta: 1:09:16 lr: 0.000004 loss_cls: 2.9053 (2.7037) grad_norm: 1.5570 (1.6409) time: 2.9367 data: 0.0003 max mem: 29202 +[2024-12-11 12:59:32 root] (utils.py 283): INFO Epoch: [12] [1100/2502] eta: 1:08:47 lr: 0.000004 loss_cls: 2.7765 (2.7037) grad_norm: 1.5570 (1.6407) time: 2.9382 data: 0.0003 max mem: 29202 +[2024-12-11 13:00:02 root] (utils.py 283): INFO Epoch: [12] [1110/2502] eta: 1:08:17 lr: 0.000004 loss_cls: 2.9456 (2.7062) grad_norm: 1.5605 (1.6408) time: 2.9395 data: 0.0003 max mem: 29202 +[2024-12-11 13:00:31 root] (utils.py 283): INFO Epoch: [12] [1120/2502] eta: 1:07:48 lr: 0.000004 loss_cls: 2.9456 (2.7070) grad_norm: 1.5482 (1.6406) time: 2.9397 data: 0.0003 max mem: 29202 +[2024-12-11 13:01:00 root] (utils.py 283): INFO Epoch: [12] [1130/2502] eta: 1:07:18 lr: 0.000004 loss_cls: 2.8361 (2.7073) grad_norm: 1.5418 (1.6411) time: 2.9367 data: 0.0003 max mem: 29202 +[2024-12-11 13:01:30 root] (utils.py 283): INFO Epoch: [12] [1140/2502] eta: 1:06:49 lr: 0.000004 loss_cls: 2.8060 (2.7058) grad_norm: 1.6002 (1.6408) time: 2.9345 data: 0.0003 max mem: 29202 +[2024-12-11 13:01:59 root] (utils.py 283): INFO Epoch: [12] [1150/2502] eta: 1:06:19 lr: 0.000004 loss_cls: 2.7835 (2.7063) grad_norm: 1.5722 (1.6399) time: 2.9334 data: 0.0003 max mem: 29202 +[2024-12-11 13:02:28 root] (utils.py 283): INFO Epoch: [12] [1160/2502] eta: 1:05:49 lr: 0.000004 loss_cls: 2.7821 (2.7055) grad_norm: 1.5813 (1.6398) time: 2.9337 data: 0.0003 max mem: 29202 +[2024-12-11 13:02:58 root] (utils.py 283): INFO Epoch: [12] [1170/2502] eta: 1:05:20 lr: 0.000004 loss_cls: 2.5166 (2.7042) grad_norm: 1.5813 (1.6399) time: 2.9345 data: 0.0003 max mem: 29202 +[2024-12-11 13:03:27 root] (utils.py 283): INFO Epoch: [12] [1180/2502] eta: 1:04:50 lr: 0.000004 loss_cls: 2.4616 (2.7034) grad_norm: 1.4528 (1.6386) time: 2.9373 data: 0.0003 max mem: 29202 +[2024-12-11 13:03:56 root] (utils.py 283): INFO Epoch: [12] [1190/2502] eta: 1:04:21 lr: 0.000004 loss_cls: 2.6012 (2.7028) grad_norm: 1.4792 (1.6388) time: 2.9365 data: 0.0003 max mem: 29202 +[2024-12-11 13:04:26 root] (utils.py 283): INFO Epoch: [12] [1200/2502] eta: 1:03:51 lr: 0.000004 loss_cls: 2.6012 (2.7020) grad_norm: 1.6104 (1.6399) time: 2.9340 data: 0.0003 max mem: 29202 +[2024-12-11 13:04:55 root] (utils.py 283): INFO Epoch: [12] [1210/2502] eta: 1:03:22 lr: 0.000004 loss_cls: 2.8194 (2.7030) grad_norm: 1.5475 (1.6388) time: 2.9349 data: 0.0003 max mem: 29202 +[2024-12-11 13:05:25 root] (utils.py 283): INFO Epoch: [12] [1220/2502] eta: 1:02:53 lr: 0.000004 loss_cls: 2.9384 (2.7042) grad_norm: 1.5559 (1.6398) time: 2.9478 data: 0.0003 max mem: 29202 +[2024-12-11 13:05:54 root] (utils.py 283): INFO Epoch: [12] [1230/2502] eta: 1:02:23 lr: 0.000004 loss_cls: 2.9042 (2.7043) grad_norm: 1.6010 (1.6399) time: 2.9484 data: 0.0003 max mem: 29202 +[2024-12-11 13:06:23 root] (utils.py 283): INFO Epoch: [12] [1240/2502] eta: 1:01:54 lr: 0.000004 loss_cls: 2.9260 (2.7059) grad_norm: 1.6010 (1.6404) time: 2.9368 data: 0.0003 max mem: 29202 +[2024-12-11 13:06:53 root] (utils.py 283): INFO Epoch: [12] [1250/2502] eta: 1:01:24 lr: 0.000004 loss_cls: 2.8448 (2.7049) grad_norm: 1.5166 (1.6399) time: 2.9421 data: 0.0003 max mem: 29202 +[2024-12-11 13:07:22 root] (utils.py 283): INFO Epoch: [12] [1260/2502] eta: 1:00:55 lr: 0.000004 loss_cls: 2.7180 (2.7057) grad_norm: 1.5532 (1.6408) time: 2.9415 data: 0.0003 max mem: 29202 +[2024-12-11 13:07:52 root] (utils.py 283): INFO Epoch: [12] [1270/2502] eta: 1:00:25 lr: 0.000004 loss_cls: 2.7521 (2.7041) grad_norm: 1.7382 (1.6415) time: 2.9378 data: 0.0003 max mem: 29202 +[2024-12-11 13:08:21 root] (utils.py 283): INFO Epoch: [12] [1280/2502] eta: 0:59:56 lr: 0.000004 loss_cls: 2.5331 (2.7023) grad_norm: 1.5988 (1.6408) time: 2.9380 data: 0.0003 max mem: 29202 +[2024-12-11 13:08:50 root] (utils.py 283): INFO Epoch: [12] [1290/2502] eta: 0:59:26 lr: 0.000004 loss_cls: 2.7194 (2.7031) grad_norm: 1.5396 (1.6412) time: 2.9369 data: 0.0003 max mem: 29202 +[2024-12-11 13:09:20 root] (utils.py 283): INFO Epoch: [12] [1300/2502] eta: 0:58:57 lr: 0.000004 loss_cls: 2.6843 (2.7011) grad_norm: 1.6529 (1.6415) time: 2.9450 data: 0.0003 max mem: 29202 +[2024-12-11 13:09:50 root] (utils.py 283): INFO Epoch: [12] [1310/2502] eta: 0:58:28 lr: 0.000004 loss_cls: 2.7735 (2.7028) grad_norm: 1.6277 (1.6416) time: 2.9593 data: 0.0003 max mem: 29202 +[2024-12-11 13:10:19 root] (utils.py 283): INFO Epoch: [12] [1320/2502] eta: 0:57:58 lr: 0.000004 loss_cls: 2.8818 (2.7034) grad_norm: 1.4993 (1.6417) time: 2.9535 data: 0.0003 max mem: 29202 +[2024-12-11 13:10:48 root] (utils.py 283): INFO Epoch: [12] [1330/2502] eta: 0:57:29 lr: 0.000004 loss_cls: 2.7826 (2.7029) grad_norm: 1.5283 (1.6406) time: 2.9391 data: 0.0003 max mem: 29202 +[2024-12-11 13:11:18 root] (utils.py 283): INFO Epoch: [12] [1340/2502] eta: 0:56:59 lr: 0.000004 loss_cls: 2.7516 (2.7025) grad_norm: 1.5340 (1.6411) time: 2.9353 data: 0.0003 max mem: 29202 +[2024-12-11 13:11:47 root] (utils.py 283): INFO Epoch: [12] [1350/2502] eta: 0:56:30 lr: 0.000004 loss_cls: 2.7516 (2.7024) grad_norm: 1.5397 (1.6407) time: 2.9348 data: 0.0003 max mem: 29202 +[2024-12-11 13:12:17 root] (utils.py 283): INFO Epoch: [12] [1360/2502] eta: 0:56:00 lr: 0.000004 loss_cls: 2.7703 (2.7018) grad_norm: 1.5536 (1.6400) time: 2.9388 data: 0.0003 max mem: 29202 +[2024-12-11 13:12:46 root] (utils.py 283): INFO Epoch: [12] [1370/2502] eta: 0:55:31 lr: 0.000004 loss_cls: 2.8652 (2.7026) grad_norm: 1.5153 (1.6396) time: 2.9386 data: 0.0003 max mem: 29202 +[2024-12-11 13:13:15 root] (utils.py 283): INFO Epoch: [12] [1380/2502] eta: 0:55:01 lr: 0.000004 loss_cls: 2.7599 (2.7013) grad_norm: 1.4438 (1.6385) time: 2.9341 data: 0.0003 max mem: 29202 +[2024-12-11 13:13:45 root] (utils.py 283): INFO Epoch: [12] [1390/2502] eta: 0:54:32 lr: 0.000004 loss_cls: 2.6780 (2.7015) grad_norm: 1.5656 (1.6382) time: 2.9340 data: 0.0003 max mem: 29202 +[2024-12-11 13:14:14 root] (utils.py 283): INFO Epoch: [12] [1400/2502] eta: 0:54:02 lr: 0.000004 loss_cls: 2.8405 (2.7031) grad_norm: 1.6281 (1.6389) time: 2.9387 data: 0.0003 max mem: 29202 +[2024-12-11 13:14:43 root] (utils.py 283): INFO Epoch: [12] [1410/2502] eta: 0:53:33 lr: 0.000004 loss_cls: 2.9856 (2.7036) grad_norm: 1.5664 (1.6385) time: 2.9465 data: 0.0003 max mem: 29202 +[2024-12-11 13:15:13 root] (utils.py 283): INFO Epoch: [12] [1420/2502] eta: 0:53:04 lr: 0.000004 loss_cls: 2.9594 (2.7035) grad_norm: 1.5684 (1.6389) time: 2.9429 data: 0.0003 max mem: 29202 +[2024-12-11 13:15:42 root] (utils.py 283): INFO Epoch: [12] [1430/2502] eta: 0:52:34 lr: 0.000004 loss_cls: 2.7674 (2.7033) grad_norm: 1.5975 (1.6421) time: 2.9468 data: 0.0003 max mem: 29202 +[2024-12-11 13:16:12 root] (utils.py 283): INFO Epoch: [12] [1440/2502] eta: 0:52:05 lr: 0.000004 loss_cls: 2.7674 (2.7042) grad_norm: 1.5685 (1.6419) time: 2.9520 data: 0.0003 max mem: 29202 +[2024-12-11 13:16:42 root] (utils.py 283): INFO Epoch: [12] [1450/2502] eta: 0:51:36 lr: 0.000004 loss_cls: 2.8634 (2.7044) grad_norm: 1.6051 (1.6431) time: 2.9534 data: 0.0003 max mem: 29202 +[2024-12-11 13:17:11 root] (utils.py 283): INFO Epoch: [12] [1460/2502] eta: 0:51:06 lr: 0.000004 loss_cls: 2.7957 (2.7044) grad_norm: 1.6171 (1.6431) time: 2.9484 data: 0.0003 max mem: 29202 +[2024-12-11 13:17:40 root] (utils.py 283): INFO Epoch: [12] [1470/2502] eta: 0:50:37 lr: 0.000004 loss_cls: 2.7730 (2.7043) grad_norm: 1.5491 (1.6418) time: 2.9407 data: 0.0003 max mem: 29202 +[2024-12-11 13:18:10 root] (utils.py 283): INFO Epoch: [12] [1480/2502] eta: 0:50:07 lr: 0.000004 loss_cls: 2.8557 (2.7045) grad_norm: 1.5100 (1.6419) time: 2.9446 data: 0.0003 max mem: 29202 +[2024-12-11 13:18:39 root] (utils.py 283): INFO Epoch: [12] [1490/2502] eta: 0:49:38 lr: 0.000004 loss_cls: 2.6743 (2.7037) grad_norm: 1.4979 (1.6428) time: 2.9390 data: 0.0003 max mem: 29202 +[2024-12-11 13:19:08 root] (utils.py 283): INFO Epoch: [12] [1500/2502] eta: 0:49:08 lr: 0.000004 loss_cls: 2.6741 (2.7044) grad_norm: 1.4570 (1.6433) time: 2.9354 data: 0.0003 max mem: 29202 +[2024-12-11 13:19:38 root] (utils.py 283): INFO Epoch: [12] [1510/2502] eta: 0:48:39 lr: 0.000004 loss_cls: 2.7296 (2.7042) grad_norm: 1.4979 (1.6427) time: 2.9377 data: 0.0003 max mem: 29202 +[2024-12-11 13:20:07 root] (utils.py 283): INFO Epoch: [12] [1520/2502] eta: 0:48:09 lr: 0.000004 loss_cls: 2.7270 (2.7025) grad_norm: 1.5318 (1.6418) time: 2.9401 data: 0.0003 max mem: 29202 +[2024-12-11 13:20:37 root] (utils.py 283): INFO Epoch: [12] [1530/2502] eta: 0:47:40 lr: 0.000004 loss_cls: 2.2429 (2.7013) grad_norm: 1.5392 (1.6411) time: 2.9405 data: 0.0003 max mem: 29202 +[2024-12-11 13:21:06 root] (utils.py 283): INFO Epoch: [12] [1540/2502] eta: 0:47:10 lr: 0.000004 loss_cls: 2.6913 (2.7010) grad_norm: 1.5025 (1.6408) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-11 13:21:35 root] (utils.py 283): INFO Epoch: [12] [1550/2502] eta: 0:46:41 lr: 0.000004 loss_cls: 2.8684 (2.7017) grad_norm: 1.5572 (1.6411) time: 2.9394 data: 0.0003 max mem: 29202 +[2024-12-11 13:22:05 root] (utils.py 283): INFO Epoch: [12] [1560/2502] eta: 0:46:12 lr: 0.000004 loss_cls: 2.8685 (2.7022) grad_norm: 1.5605 (1.6406) time: 2.9382 data: 0.0003 max mem: 29202 +[2024-12-11 13:22:34 root] (utils.py 283): INFO Epoch: [12] [1570/2502] eta: 0:45:42 lr: 0.000004 loss_cls: 2.8990 (2.7032) grad_norm: 1.6116 (1.6402) time: 2.9385 data: 0.0003 max mem: 29202 +[2024-12-11 13:23:04 root] (utils.py 283): INFO Epoch: [12] [1580/2502] eta: 0:45:13 lr: 0.000004 loss_cls: 2.6890 (2.7015) grad_norm: 1.5415 (1.6395) time: 2.9389 data: 0.0003 max mem: 29202 +[2024-12-11 13:23:33 root] (utils.py 283): INFO Epoch: [12] [1590/2502] eta: 0:44:43 lr: 0.000004 loss_cls: 2.5314 (2.7013) grad_norm: 1.5735 (1.6396) time: 2.9385 data: 0.0003 max mem: 29202 +[2024-12-11 13:24:02 root] (utils.py 283): INFO Epoch: [12] [1600/2502] eta: 0:44:14 lr: 0.000004 loss_cls: 2.6046 (2.7003) grad_norm: 1.5893 (1.6390) time: 2.9405 data: 0.0003 max mem: 29202 +[2024-12-11 13:24:32 root] (utils.py 283): INFO Epoch: [12] [1610/2502] eta: 0:43:44 lr: 0.000004 loss_cls: 2.6249 (2.6999) grad_norm: 1.4837 (1.6391) time: 2.9428 data: 0.0003 max mem: 29202 +[2024-12-11 13:25:01 root] (utils.py 283): INFO Epoch: [12] [1620/2502] eta: 0:43:15 lr: 0.000004 loss_cls: 2.8204 (2.7000) grad_norm: 1.5619 (1.6395) time: 2.9455 data: 0.0003 max mem: 29202 +[2024-12-11 13:25:31 root] (utils.py 283): INFO Epoch: [12] [1630/2502] eta: 0:42:46 lr: 0.000004 loss_cls: 2.8802 (2.7006) grad_norm: 1.5619 (1.6397) time: 2.9425 data: 0.0003 max mem: 29202 +[2024-12-11 13:26:00 root] (utils.py 283): INFO Epoch: [12] [1640/2502] eta: 0:42:16 lr: 0.000004 loss_cls: 2.8475 (2.7011) grad_norm: 1.7275 (1.6404) time: 2.9360 data: 0.0003 max mem: 29202 +[2024-12-11 13:26:29 root] (utils.py 283): INFO Epoch: [12] [1650/2502] eta: 0:41:47 lr: 0.000004 loss_cls: 2.8687 (2.7017) grad_norm: 1.5759 (1.6400) time: 2.9370 data: 0.0003 max mem: 29202 +[2024-12-11 13:26:59 root] (utils.py 283): INFO Epoch: [12] [1660/2502] eta: 0:41:17 lr: 0.000004 loss_cls: 2.9337 (2.7029) grad_norm: 1.5560 (1.6400) time: 2.9510 data: 0.0003 max mem: 29202 +[2024-12-11 13:27:29 root] (utils.py 283): INFO Epoch: [12] [1670/2502] eta: 0:40:48 lr: 0.000004 loss_cls: 2.8575 (2.7018) grad_norm: 1.6433 (1.6406) time: 2.9531 data: 0.0003 max mem: 29202 +[2024-12-11 13:27:58 root] (utils.py 283): INFO Epoch: [12] [1680/2502] eta: 0:40:18 lr: 0.000004 loss_cls: 2.7136 (2.7027) grad_norm: 1.7044 (1.6408) time: 2.9406 data: 0.0003 max mem: 29202 +[2024-12-11 13:28:27 root] (utils.py 283): INFO Epoch: [12] [1690/2502] eta: 0:39:49 lr: 0.000004 loss_cls: 2.6995 (2.7025) grad_norm: 1.5674 (1.6405) time: 2.9379 data: 0.0003 max mem: 29202 +[2024-12-11 13:28:57 root] (utils.py 283): INFO Epoch: [12] [1700/2502] eta: 0:39:20 lr: 0.000004 loss_cls: 2.6995 (2.7024) grad_norm: 1.6126 (1.6407) time: 2.9398 data: 0.0003 max mem: 29202 +[2024-12-11 13:29:26 root] (utils.py 283): INFO Epoch: [12] [1710/2502] eta: 0:38:50 lr: 0.000004 loss_cls: 2.7908 (2.7008) grad_norm: 1.6365 (1.6402) time: 2.9413 data: 0.0003 max mem: 29202 +[2024-12-11 13:29:56 root] (utils.py 283): INFO Epoch: [12] [1720/2502] eta: 0:38:21 lr: 0.000004 loss_cls: 2.5982 (2.7006) grad_norm: 1.5512 (1.6400) time: 2.9409 data: 0.0003 max mem: 29202 +[2024-12-11 13:30:25 root] (utils.py 283): INFO Epoch: [12] [1730/2502] eta: 0:37:51 lr: 0.000004 loss_cls: 2.6947 (2.7012) grad_norm: 1.5110 (1.6397) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-11 13:30:54 root] (utils.py 283): INFO Epoch: [12] [1740/2502] eta: 0:37:22 lr: 0.000004 loss_cls: 2.8907 (2.7018) grad_norm: 1.5227 (1.6390) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-11 13:31:24 root] (utils.py 283): INFO Epoch: [12] [1750/2502] eta: 0:36:52 lr: 0.000004 loss_cls: 2.9387 (2.7026) grad_norm: 1.5669 (1.6386) time: 2.9399 data: 0.0003 max mem: 29202 +[2024-12-11 13:31:53 root] (utils.py 283): INFO Epoch: [12] [1760/2502] eta: 0:36:23 lr: 0.000004 loss_cls: 2.9503 (2.7035) grad_norm: 1.6133 (1.6390) time: 2.9456 data: 0.0003 max mem: 29202 +[2024-12-11 13:32:23 root] (utils.py 283): INFO Epoch: [12] [1770/2502] eta: 0:35:54 lr: 0.000004 loss_cls: 2.9026 (2.7040) grad_norm: 1.6237 (1.6389) time: 2.9510 data: 0.0003 max mem: 29202 +[2024-12-11 13:32:52 root] (utils.py 283): INFO Epoch: [12] [1780/2502] eta: 0:35:24 lr: 0.000004 loss_cls: 2.8257 (2.7041) grad_norm: 1.5764 (1.6387) time: 2.9454 data: 0.0003 max mem: 29202 +[2024-12-11 13:33:22 root] (utils.py 283): INFO Epoch: [12] [1790/2502] eta: 0:34:55 lr: 0.000004 loss_cls: 2.8119 (2.7038) grad_norm: 1.6002 (1.6383) time: 2.9405 data: 0.0003 max mem: 29202 +[2024-12-11 13:33:51 root] (utils.py 283): INFO Epoch: [12] [1800/2502] eta: 0:34:25 lr: 0.000004 loss_cls: 2.7547 (2.7035) grad_norm: 1.6002 (1.6379) time: 2.9410 data: 0.0003 max mem: 29202 +[2024-12-11 13:34:20 root] (utils.py 283): INFO Epoch: [12] [1810/2502] eta: 0:33:56 lr: 0.000004 loss_cls: 2.6916 (2.7036) grad_norm: 1.5642 (1.6381) time: 2.9399 data: 0.0003 max mem: 29202 +[2024-12-11 13:34:50 root] (utils.py 283): INFO Epoch: [12] [1820/2502] eta: 0:33:26 lr: 0.000004 loss_cls: 2.6500 (2.7029) grad_norm: 1.5642 (1.6374) time: 2.9389 data: 0.0003 max mem: 29202 +[2024-12-11 13:35:19 root] (utils.py 283): INFO Epoch: [12] [1830/2502] eta: 0:32:57 lr: 0.000004 loss_cls: 2.3861 (2.7016) grad_norm: 1.5794 (1.6376) time: 2.9420 data: 0.0003 max mem: 29202 +[2024-12-11 13:35:49 root] (utils.py 283): INFO Epoch: [12] [1840/2502] eta: 0:32:28 lr: 0.000004 loss_cls: 2.5483 (2.7016) grad_norm: 1.5758 (1.6374) time: 2.9427 data: 0.0003 max mem: 29202 +[2024-12-11 13:36:18 root] (utils.py 283): INFO Epoch: [12] [1850/2502] eta: 0:31:58 lr: 0.000004 loss_cls: 2.7749 (2.7014) grad_norm: 1.5500 (1.6372) time: 2.9395 data: 0.0003 max mem: 29202 +[2024-12-11 13:36:47 root] (utils.py 283): INFO Epoch: [12] [1860/2502] eta: 0:31:29 lr: 0.000004 loss_cls: 2.6050 (2.7008) grad_norm: 1.5387 (1.6373) time: 2.9370 data: 0.0003 max mem: 29202 +[2024-12-11 13:37:17 root] (utils.py 283): INFO Epoch: [12] [1870/2502] eta: 0:30:59 lr: 0.000004 loss_cls: 2.5196 (2.7002) grad_norm: 1.5678 (1.6371) time: 2.9355 data: 0.0003 max mem: 29202 +[2024-12-11 13:37:46 root] (utils.py 283): INFO Epoch: [12] [1880/2502] eta: 0:30:30 lr: 0.000004 loss_cls: 2.5196 (2.6994) grad_norm: 1.6349 (1.6387) time: 2.9401 data: 0.0003 max mem: 29202 +[2024-12-11 13:38:16 root] (utils.py 283): INFO Epoch: [12] [1890/2502] eta: 0:30:00 lr: 0.000004 loss_cls: 2.8674 (2.6998) grad_norm: 1.5627 (1.6383) time: 2.9414 data: 0.0003 max mem: 29202 +[2024-12-11 13:38:45 root] (utils.py 283): INFO Epoch: [12] [1900/2502] eta: 0:29:31 lr: 0.000004 loss_cls: 2.9179 (2.7004) grad_norm: 1.5385 (1.6385) time: 2.9398 data: 0.0003 max mem: 29202 +[2024-12-11 13:39:14 root] (utils.py 283): INFO Epoch: [12] [1910/2502] eta: 0:29:01 lr: 0.000004 loss_cls: 2.9167 (2.7007) grad_norm: 1.6102 (1.6390) time: 2.9386 data: 0.0003 max mem: 29202 +[2024-12-11 13:39:44 root] (utils.py 283): INFO Epoch: [12] [1920/2502] eta: 0:28:32 lr: 0.000004 loss_cls: 2.8818 (2.7013) grad_norm: 1.5276 (1.6385) time: 2.9367 data: 0.0003 max mem: 29202 +[2024-12-11 13:40:13 root] (utils.py 283): INFO Epoch: [12] [1930/2502] eta: 0:28:03 lr: 0.000004 loss_cls: 2.8818 (2.7019) grad_norm: 1.5256 (1.6382) time: 2.9378 data: 0.0003 max mem: 29202 +[2024-12-11 13:40:42 root] (utils.py 283): INFO Epoch: [12] [1940/2502] eta: 0:27:33 lr: 0.000004 loss_cls: 2.8459 (2.7013) grad_norm: 1.5506 (1.6386) time: 2.9383 data: 0.0003 max mem: 29202 +[2024-12-11 13:41:12 root] (utils.py 283): INFO Epoch: [12] [1950/2502] eta: 0:27:04 lr: 0.000004 loss_cls: 2.7401 (2.7010) grad_norm: 1.6181 (1.6384) time: 2.9400 data: 0.0003 max mem: 29202 +[2024-12-11 13:41:41 root] (utils.py 283): INFO Epoch: [12] [1960/2502] eta: 0:26:34 lr: 0.000004 loss_cls: 2.4969 (2.6996) grad_norm: 1.6016 (1.6381) time: 2.9463 data: 0.0003 max mem: 29202 +[2024-12-11 13:42:11 root] (utils.py 283): INFO Epoch: [12] [1970/2502] eta: 0:26:05 lr: 0.000004 loss_cls: 2.4969 (2.6997) grad_norm: 1.5262 (1.6377) time: 2.9440 data: 0.0003 max mem: 29202 +[2024-12-11 13:42:40 root] (utils.py 283): INFO Epoch: [12] [1980/2502] eta: 0:25:35 lr: 0.000004 loss_cls: 2.8209 (2.6997) grad_norm: 1.5758 (1.6378) time: 2.9394 data: 0.0003 max mem: 29202 +[2024-12-11 13:43:10 root] (utils.py 283): INFO Epoch: [12] [1990/2502] eta: 0:25:06 lr: 0.000004 loss_cls: 2.7104 (2.6985) grad_norm: 1.5758 (1.6371) time: 2.9484 data: 0.0003 max mem: 29202 +[2024-12-11 13:43:39 root] (utils.py 283): INFO Epoch: [12] [2000/2502] eta: 0:24:37 lr: 0.000004 loss_cls: 2.7649 (2.6990) grad_norm: 1.4610 (1.6370) time: 2.9465 data: 0.0003 max mem: 29202 +[2024-12-11 13:44:09 root] (utils.py 283): INFO Epoch: [12] [2010/2502] eta: 0:24:07 lr: 0.000004 loss_cls: 2.7663 (2.6989) grad_norm: 1.6015 (1.6373) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-11 13:44:38 root] (utils.py 283): INFO Epoch: [12] [2020/2502] eta: 0:23:38 lr: 0.000004 loss_cls: 2.5777 (2.6981) grad_norm: 1.5740 (1.6373) time: 2.9442 data: 0.0003 max mem: 29202 +[2024-12-11 13:45:07 root] (utils.py 283): INFO Epoch: [12] [2030/2502] eta: 0:23:08 lr: 0.000004 loss_cls: 2.6385 (2.6978) grad_norm: 1.5855 (1.6373) time: 2.9432 data: 0.0003 max mem: 29202 +[2024-12-11 13:45:37 root] (utils.py 283): INFO Epoch: [12] [2040/2502] eta: 0:22:39 lr: 0.000004 loss_cls: 2.7392 (2.6984) grad_norm: 1.5855 (1.6377) time: 2.9551 data: 0.0003 max mem: 29202 +[2024-12-11 13:46:07 root] (utils.py 283): INFO Epoch: [12] [2050/2502] eta: 0:22:10 lr: 0.000004 loss_cls: 2.7594 (2.6988) grad_norm: 1.5896 (1.6376) time: 2.9566 data: 0.0003 max mem: 29202 +[2024-12-11 13:46:36 root] (utils.py 283): INFO Epoch: [12] [2060/2502] eta: 0:21:40 lr: 0.000004 loss_cls: 2.7594 (2.6990) grad_norm: 1.6006 (1.6380) time: 2.9397 data: 0.0002 max mem: 29202 +[2024-12-11 13:47:05 root] (utils.py 283): INFO Epoch: [12] [2070/2502] eta: 0:21:11 lr: 0.000004 loss_cls: 2.8934 (2.6990) grad_norm: 1.5549 (1.6377) time: 2.9369 data: 0.0002 max mem: 29202 +[2024-12-11 13:47:35 root] (utils.py 283): INFO Epoch: [12] [2080/2502] eta: 0:20:41 lr: 0.000004 loss_cls: 2.8337 (2.6993) grad_norm: 1.5157 (1.6372) time: 2.9371 data: 0.0002 max mem: 29202 +[2024-12-11 13:48:04 root] (utils.py 283): INFO Epoch: [12] [2090/2502] eta: 0:20:12 lr: 0.000004 loss_cls: 2.7671 (2.6999) grad_norm: 1.5210 (1.6371) time: 2.9371 data: 0.0003 max mem: 29202 +[2024-12-11 13:48:33 root] (utils.py 283): INFO Epoch: [12] [2100/2502] eta: 0:19:42 lr: 0.000004 loss_cls: 2.9425 (2.7012) grad_norm: 1.5154 (1.6366) time: 2.9339 data: 0.0003 max mem: 29202 +[2024-12-11 13:49:03 root] (utils.py 283): INFO Epoch: [12] [2110/2502] eta: 0:19:13 lr: 0.000004 loss_cls: 2.8503 (2.7009) grad_norm: 1.5154 (1.6364) time: 2.9314 data: 0.0003 max mem: 29202 +[2024-12-11 13:49:32 root] (utils.py 283): INFO Epoch: [12] [2120/2502] eta: 0:18:44 lr: 0.000004 loss_cls: 2.7189 (2.7012) grad_norm: 1.6053 (1.6369) time: 2.9431 data: 0.0003 max mem: 29202 +[2024-12-11 13:50:02 root] (utils.py 283): INFO Epoch: [12] [2130/2502] eta: 0:18:14 lr: 0.000004 loss_cls: 2.8724 (2.7018) grad_norm: 1.5299 (1.6363) time: 2.9433 data: 0.0003 max mem: 29202 +[2024-12-11 13:50:31 root] (utils.py 283): INFO Epoch: [12] [2140/2502] eta: 0:17:45 lr: 0.000004 loss_cls: 2.8718 (2.7021) grad_norm: 1.5299 (1.6364) time: 2.9374 data: 0.0003 max mem: 29202 +[2024-12-11 13:51:00 root] (utils.py 283): INFO Epoch: [12] [2150/2502] eta: 0:17:15 lr: 0.000004 loss_cls: 2.8767 (2.7025) grad_norm: 1.5695 (1.6363) time: 2.9390 data: 0.0003 max mem: 29202 +[2024-12-11 13:51:30 root] (utils.py 283): INFO Epoch: [12] [2160/2502] eta: 0:16:46 lr: 0.000004 loss_cls: 2.8811 (2.7029) grad_norm: 1.5695 (1.6364) time: 2.9337 data: 0.0003 max mem: 29202 +[2024-12-11 13:51:59 root] (utils.py 283): INFO Epoch: [12] [2170/2502] eta: 0:16:16 lr: 0.000004 loss_cls: 2.7865 (2.7031) grad_norm: 1.5763 (1.6363) time: 2.9345 data: 0.0003 max mem: 29202 +[2024-12-11 13:52:28 root] (utils.py 283): INFO Epoch: [12] [2180/2502] eta: 0:15:47 lr: 0.000004 loss_cls: 2.7843 (2.7029) grad_norm: 1.6808 (1.6365) time: 2.9374 data: 0.0003 max mem: 29202 +[2024-12-11 13:52:58 root] (utils.py 283): INFO Epoch: [12] [2190/2502] eta: 0:15:17 lr: 0.000004 loss_cls: 2.7753 (2.7033) grad_norm: 1.6495 (1.6364) time: 2.9378 data: 0.0003 max mem: 29202 +[2024-12-11 13:53:27 root] (utils.py 283): INFO Epoch: [12] [2200/2502] eta: 0:14:48 lr: 0.000004 loss_cls: 2.8096 (2.7031) grad_norm: 1.6273 (1.6362) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-11 13:53:57 root] (utils.py 283): INFO Epoch: [12] [2210/2502] eta: 0:14:19 lr: 0.000004 loss_cls: 2.5761 (2.7022) grad_norm: 1.6057 (1.6366) time: 2.9498 data: 0.0003 max mem: 29202 +[2024-12-11 13:54:26 root] (utils.py 283): INFO Epoch: [12] [2220/2502] eta: 0:13:49 lr: 0.000004 loss_cls: 2.7863 (2.7030) grad_norm: 1.5292 (1.6363) time: 2.9468 data: 0.0004 max mem: 29202 +[2024-12-11 13:54:55 root] (utils.py 283): INFO Epoch: [12] [2230/2502] eta: 0:13:20 lr: 0.000004 loss_cls: 2.7965 (2.7025) grad_norm: 1.5019 (1.6357) time: 2.9355 data: 0.0003 max mem: 29202 +[2024-12-11 13:55:25 root] (utils.py 283): INFO Epoch: [12] [2240/2502] eta: 0:12:50 lr: 0.000004 loss_cls: 2.7508 (2.7025) grad_norm: 1.5221 (1.6361) time: 2.9356 data: 0.0003 max mem: 29202 +[2024-12-11 13:55:54 root] (utils.py 283): INFO Epoch: [12] [2250/2502] eta: 0:12:21 lr: 0.000004 loss_cls: 2.6969 (2.7022) grad_norm: 1.5870 (1.6367) time: 2.9370 data: 0.0003 max mem: 29202 +[2024-12-11 13:56:24 root] (utils.py 283): INFO Epoch: [12] [2260/2502] eta: 0:11:52 lr: 0.000004 loss_cls: 2.7191 (2.7021) grad_norm: 1.5999 (1.6370) time: 2.9375 data: 0.0003 max mem: 29202 +[2024-12-11 13:56:53 root] (utils.py 283): INFO Epoch: [12] [2270/2502] eta: 0:11:22 lr: 0.000004 loss_cls: 2.5946 (2.7012) grad_norm: 1.6353 (1.6371) time: 2.9346 data: 0.0003 max mem: 29202 +[2024-12-11 13:57:22 root] (utils.py 283): INFO Epoch: [12] [2280/2502] eta: 0:10:53 lr: 0.000004 loss_cls: 2.7448 (2.7021) grad_norm: 1.6588 (1.6376) time: 2.9333 data: 0.0003 max mem: 29202 +[2024-12-11 13:57:52 root] (utils.py 283): INFO Epoch: [12] [2290/2502] eta: 0:10:23 lr: 0.000004 loss_cls: 2.9308 (2.7027) grad_norm: 1.6588 (1.6379) time: 2.9346 data: 0.0003 max mem: 29202 +[2024-12-11 13:58:21 root] (utils.py 283): INFO Epoch: [12] [2300/2502] eta: 0:09:54 lr: 0.000004 loss_cls: 2.8384 (2.7032) grad_norm: 1.6571 (1.6382) time: 2.9346 data: 0.0003 max mem: 29202 +[2024-12-11 13:58:50 root] (utils.py 283): INFO Epoch: [12] [2310/2502] eta: 0:09:24 lr: 0.000004 loss_cls: 2.8374 (2.7035) grad_norm: 1.6025 (1.6382) time: 2.9336 data: 0.0003 max mem: 29202 +[2024-12-11 13:59:20 root] (utils.py 283): INFO Epoch: [12] [2320/2502] eta: 0:08:55 lr: 0.000004 loss_cls: 2.8393 (2.7043) grad_norm: 1.5201 (1.6381) time: 2.9338 data: 0.0003 max mem: 29202 +[2024-12-11 13:59:49 root] (utils.py 283): INFO Epoch: [12] [2330/2502] eta: 0:08:26 lr: 0.000004 loss_cls: 2.7762 (2.7039) grad_norm: 1.5441 (1.6378) time: 2.9363 data: 0.0003 max mem: 29202 +[2024-12-11 14:00:18 root] (utils.py 283): INFO Epoch: [12] [2340/2502] eta: 0:07:56 lr: 0.000004 loss_cls: 2.6201 (2.7032) grad_norm: 1.5506 (1.6379) time: 2.9355 data: 0.0003 max mem: 29202 +[2024-12-11 14:00:48 root] (utils.py 283): INFO Epoch: [12] [2350/2502] eta: 0:07:27 lr: 0.000004 loss_cls: 2.5617 (2.7025) grad_norm: 1.6212 (1.6380) time: 2.9321 data: 0.0003 max mem: 29202 +[2024-12-11 14:01:17 root] (utils.py 283): INFO Epoch: [12] [2360/2502] eta: 0:06:57 lr: 0.000004 loss_cls: 2.7294 (2.7028) grad_norm: 1.6212 (1.6380) time: 2.9307 data: 0.0003 max mem: 29202 +[2024-12-11 14:01:46 root] (utils.py 283): INFO Epoch: [12] [2370/2502] eta: 0:06:28 lr: 0.000004 loss_cls: 2.8208 (2.7026) grad_norm: 1.6028 (1.6380) time: 2.9335 data: 0.0003 max mem: 29202 +[2024-12-11 14:02:16 root] (utils.py 283): INFO Epoch: [12] [2380/2502] eta: 0:05:58 lr: 0.000004 loss_cls: 2.8169 (2.7024) grad_norm: 1.6028 (1.6378) time: 2.9360 data: 0.0003 max mem: 29202 +[2024-12-11 14:02:45 root] (utils.py 283): INFO Epoch: [12] [2390/2502] eta: 0:05:29 lr: 0.000004 loss_cls: 2.7512 (2.7021) grad_norm: 1.6110 (1.6375) time: 2.9363 data: 0.0003 max mem: 29202 +[2024-12-11 14:03:14 root] (utils.py 283): INFO Epoch: [12] [2400/2502] eta: 0:05:00 lr: 0.000004 loss_cls: 2.8073 (2.7027) grad_norm: 1.5363 (1.6372) time: 2.9359 data: 0.0003 max mem: 29202 +[2024-12-11 14:03:44 root] (utils.py 283): INFO Epoch: [12] [2410/2502] eta: 0:04:30 lr: 0.000004 loss_cls: 2.8611 (2.7022) grad_norm: 1.5261 (1.6372) time: 2.9436 data: 0.0003 max mem: 29202 +[2024-12-11 14:04:13 root] (utils.py 283): INFO Epoch: [12] [2420/2502] eta: 0:04:01 lr: 0.000004 loss_cls: 2.8284 (2.7027) grad_norm: 1.5325 (1.6372) time: 2.9493 data: 0.0003 max mem: 29202 +[2024-12-11 14:04:43 root] (utils.py 283): INFO Epoch: [12] [2430/2502] eta: 0:03:31 lr: 0.000004 loss_cls: 2.7364 (2.7019) grad_norm: 1.6417 (1.6380) time: 2.9445 data: 0.0003 max mem: 29202 +[2024-12-11 14:05:12 root] (utils.py 283): INFO Epoch: [12] [2440/2502] eta: 0:03:02 lr: 0.000004 loss_cls: 2.6152 (2.7022) grad_norm: 1.6324 (1.6378) time: 2.9422 data: 0.0003 max mem: 29202 +[2024-12-11 14:05:42 root] (utils.py 283): INFO Epoch: [12] [2450/2502] eta: 0:02:32 lr: 0.000004 loss_cls: 2.8679 (2.7022) grad_norm: 1.5926 (1.6377) time: 2.9420 data: 0.0003 max mem: 29202 +[2024-12-11 14:06:11 root] (utils.py 283): INFO Epoch: [12] [2460/2502] eta: 0:02:03 lr: 0.000004 loss_cls: 2.7309 (2.7017) grad_norm: 1.5926 (1.6375) time: 2.9379 data: 0.0003 max mem: 29202 +[2024-12-11 14:06:41 root] (utils.py 283): INFO Epoch: [12] [2470/2502] eta: 0:01:34 lr: 0.000004 loss_cls: 2.6599 (2.7015) grad_norm: 1.5252 (1.6372) time: 2.9470 data: 0.0003 max mem: 29202 +[2024-12-11 14:07:10 root] (utils.py 283): INFO Epoch: [12] [2480/2502] eta: 0:01:04 lr: 0.000004 loss_cls: 2.6599 (2.7009) grad_norm: 1.5232 (1.6367) time: 2.9481 data: 0.0003 max mem: 29202 +[2024-12-11 14:07:40 root] (utils.py 283): INFO Epoch: [12] [2490/2502] eta: 0:00:35 lr: 0.000004 loss_cls: 2.5677 (2.7003) grad_norm: 1.5809 (1.6367) time: 2.9593 data: 0.0240 max mem: 29202 +[2024-12-11 14:08:09 root] (utils.py 283): INFO Epoch: [12] [2500/2502] eta: 0:00:05 lr: 0.000004 loss_cls: 2.6756 (2.7004) grad_norm: 1.6004 (1.6366) time: 2.9568 data: 0.0240 max mem: 29202 +[2024-12-11 14:08:12 root] (utils.py 283): INFO Epoch: [12] [2501/2502] eta: 0:00:02 lr: 0.000004 loss_cls: 2.6756 (2.7005) grad_norm: 1.6004 (1.6365) time: 2.9571 data: 0.0240 max mem: 29202 +[2024-12-11 14:08:12 root] (utils.py 297): INFO Epoch: [12] Total time: 2:02:41 (2.9421 s / it) +[2024-12-11 14:08:12 root] (engine.py 179): INFO Averaged stats:lr: 0.000004 loss_cls: 2.6756 (2.6935) grad_norm: 1.6004 (1.6365) +[2024-12-11 14:08:16 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:55 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.2899 (0.2899) acc1: 90.6250 (90.6250) acc3: 98.4375 (98.4375) acc5: 98.4375 (98.4375) time: 0.5642 data: 0.0005 max mem: 29202 +[2024-12-11 14:08:22 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:48 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.5491 (0.5383) acc1: 85.1562 (86.7898) acc3: 97.6562 (97.0170) acc5: 98.4375 (98.1534) time: 0.5527 data: 0.0004 max mem: 29202 +[2024-12-11 14:08:27 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:43 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.5491 (0.5921) acc1: 85.9375 (85.8259) acc3: 96.8750 (96.4286) acc5: 97.6562 (97.6935) time: 0.5511 data: 0.0004 max mem: 29202 +[2024-12-11 14:08:33 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:37 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.5824 (0.6202) acc1: 85.9375 (85.3831) acc3: 96.0938 (96.0433) acc5: 97.6562 (97.5554) time: 0.5510 data: 0.0004 max mem: 29202 +[2024-12-11 14:08:38 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:31 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6082 (0.6220) acc1: 85.9375 (85.4802) acc3: 96.0938 (95.9985) acc5: 97.6562 (97.5991) time: 0.5515 data: 0.0004 max mem: 29202 +[2024-12-11 14:08:44 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:26 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.7989 (0.7026) acc1: 78.9062 (83.7163) acc3: 92.9688 (94.8836) acc5: 96.0938 (96.7984) time: 0.5528 data: 0.0004 max mem: 29202 +[2024-12-11 14:08:49 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:20 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9030 (0.7367) acc1: 77.3438 (83.1455) acc3: 90.6250 (94.1855) acc5: 93.7500 (96.3371) time: 0.5534 data: 0.0004 max mem: 29202 +[2024-12-11 14:08:55 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:15 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9397 (0.7686) acc1: 79.6875 (82.4054) acc3: 91.4062 (93.8600) acc5: 93.7500 (96.0827) time: 0.5529 data: 0.0004 max mem: 29202 +[2024-12-11 14:09:00 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:09 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9270 (0.7938) acc1: 78.9062 (81.9155) acc3: 91.4062 (93.4606) acc5: 93.7500 (95.7079) time: 0.5536 data: 0.0007 max mem: 29202 +[2024-12-11 14:09:06 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:04 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9404 (0.8216) acc1: 76.5625 (81.1641) acc3: 91.4062 (93.1490) acc5: 92.9688 (95.4756) time: 0.5538 data: 0.0006 max mem: 29202 +[2024-12-11 14:09:10 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9375 (0.8203) acc1: 76.5625 (81.1360) acc3: 91.4062 (93.1840) acc5: 93.7500 (95.5440) time: 0.5443 data: 0.0005 max mem: 29202 +[2024-12-11 14:09:10 root] (utils.py 297): INFO Test: Total time: 0:00:53 (0.5510 s / it) +[2024-12-11 14:09:10 root] (engine.py 264): INFO * Acc@1 81.256 Acc@3 93.088 Acc@5 95.408 loss 0.817 flops 13.207 layer_flops 13.109 +[2024-12-11 14:09:10 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 81.3% +[2024-12-11 14:09:10 root] (main.py 576): INFO Max accuracy: 81.27% +[2024-12-11 14:09:12 root] (utils.py 283): INFO Epoch: [13] [ 0/2502] eta: 2:00:26 lr: 0.000003 loss_cls: 2.7428 (2.7428) grad_norm: 1.6125 (1.6125) time: 2.8885 data: 0.0002 max mem: 29202 +[2024-12-11 14:09:42 root] (utils.py 283): INFO Epoch: [13] [ 10/2502] eta: 2:01:43 lr: 0.000003 loss_cls: 2.7273 (2.6198) grad_norm: 1.6140 (1.6985) time: 2.9306 data: 0.0004 max mem: 29202 +[2024-12-11 14:10:11 root] (utils.py 283): INFO Epoch: [13] [ 20/2502] eta: 2:01:20 lr: 0.000003 loss_cls: 2.7273 (2.6185) grad_norm: 1.6061 (1.6106) time: 2.9356 data: 0.0003 max mem: 29202 +[2024-12-11 14:10:41 root] (utils.py 283): INFO Epoch: [13] [ 30/2502] eta: 2:00:55 lr: 0.000003 loss_cls: 2.7292 (2.6058) grad_norm: 1.4947 (1.5978) time: 2.9378 data: 0.0003 max mem: 29202 +[2024-12-11 14:11:10 root] (utils.py 283): INFO Epoch: [13] [ 40/2502] eta: 2:00:29 lr: 0.000003 loss_cls: 2.9099 (2.6807) grad_norm: 1.5157 (1.5926) time: 2.9395 data: 0.0003 max mem: 29202 +[2024-12-11 14:11:39 root] (utils.py 283): INFO Epoch: [13] [ 50/2502] eta: 2:00:02 lr: 0.000003 loss_cls: 2.9417 (2.6859) grad_norm: 1.5713 (1.6206) time: 2.9407 data: 0.0003 max mem: 29202 +[2024-12-11 14:12:09 root] (utils.py 283): INFO Epoch: [13] [ 60/2502] eta: 1:59:32 lr: 0.000003 loss_cls: 2.7852 (2.6905) grad_norm: 1.5323 (1.6096) time: 2.9385 data: 0.0003 max mem: 29202 +[2024-12-11 14:12:38 root] (utils.py 283): INFO Epoch: [13] [ 70/2502] eta: 1:59:04 lr: 0.000003 loss_cls: 2.8133 (2.6809) grad_norm: 1.5239 (1.5947) time: 2.9388 data: 0.0003 max mem: 29202 +[2024-12-11 14:13:08 root] (utils.py 283): INFO Epoch: [13] [ 80/2502] eta: 1:58:36 lr: 0.000003 loss_cls: 2.7125 (2.6784) grad_norm: 1.5185 (1.5960) time: 2.9422 data: 0.0003 max mem: 29202 +[2024-12-11 14:13:37 root] (utils.py 283): INFO Epoch: [13] [ 90/2502] eta: 1:58:06 lr: 0.000003 loss_cls: 2.5170 (2.6667) grad_norm: 1.5429 (1.5949) time: 2.9383 data: 0.0003 max mem: 29202 +[2024-12-11 14:14:06 root] (utils.py 283): INFO Epoch: [13] [ 100/2502] eta: 1:57:37 lr: 0.000003 loss_cls: 2.4875 (2.6517) grad_norm: 1.5171 (1.6131) time: 2.9376 data: 0.0003 max mem: 29202 +[2024-12-11 14:14:36 root] (utils.py 283): INFO Epoch: [13] [ 110/2502] eta: 1:57:11 lr: 0.000003 loss_cls: 2.7544 (2.6689) grad_norm: 1.5029 (1.5988) time: 2.9470 data: 0.0003 max mem: 29202 +[2024-12-11 14:15:05 root] (utils.py 283): INFO Epoch: [13] [ 120/2502] eta: 1:56:39 lr: 0.000003 loss_cls: 2.9680 (2.6760) grad_norm: 1.5187 (1.6255) time: 2.9414 data: 0.0003 max mem: 29202 +[2024-12-11 14:15:35 root] (utils.py 283): INFO Epoch: [13] [ 130/2502] eta: 1:56:11 lr: 0.000003 loss_cls: 2.8453 (2.6732) grad_norm: 1.6020 (1.6280) time: 2.9361 data: 0.0003 max mem: 29202 +[2024-12-11 14:16:04 root] (utils.py 283): INFO Epoch: [13] [ 140/2502] eta: 1:55:41 lr: 0.000003 loss_cls: 2.7947 (2.6701) grad_norm: 1.5180 (1.6223) time: 2.9400 data: 0.0003 max mem: 29202 +[2024-12-11 14:16:33 root] (utils.py 283): INFO Epoch: [13] [ 150/2502] eta: 1:55:12 lr: 0.000003 loss_cls: 2.9286 (2.6792) grad_norm: 1.5142 (1.6184) time: 2.9383 data: 0.0003 max mem: 29202 +[2024-12-11 14:17:03 root] (utils.py 283): INFO Epoch: [13] [ 160/2502] eta: 1:54:43 lr: 0.000003 loss_cls: 2.9545 (2.6842) grad_norm: 1.5659 (1.6209) time: 2.9405 data: 0.0003 max mem: 29202 +[2024-12-11 14:17:32 root] (utils.py 283): INFO Epoch: [13] [ 170/2502] eta: 1:54:14 lr: 0.000003 loss_cls: 2.6156 (2.6782) grad_norm: 1.5659 (1.6176) time: 2.9411 data: 0.0003 max mem: 29202 +[2024-12-11 14:18:02 root] (utils.py 283): INFO Epoch: [13] [ 180/2502] eta: 1:53:44 lr: 0.000003 loss_cls: 2.5141 (2.6670) grad_norm: 1.5350 (1.6195) time: 2.9401 data: 0.0003 max mem: 29202 +[2024-12-11 14:18:31 root] (utils.py 283): INFO Epoch: [13] [ 190/2502] eta: 1:53:15 lr: 0.000003 loss_cls: 2.7985 (2.6771) grad_norm: 1.6145 (1.6201) time: 2.9382 data: 0.0003 max mem: 29202 +[2024-12-11 14:19:00 root] (utils.py 283): INFO Epoch: [13] [ 200/2502] eta: 1:52:45 lr: 0.000003 loss_cls: 2.8552 (2.6838) grad_norm: 1.6052 (1.6166) time: 2.9377 data: 0.0003 max mem: 29202 +[2024-12-11 14:19:30 root] (utils.py 283): INFO Epoch: [13] [ 210/2502] eta: 1:52:16 lr: 0.000003 loss_cls: 2.8796 (2.6920) grad_norm: 1.6033 (1.6206) time: 2.9389 data: 0.0003 max mem: 29202 +[2024-12-11 14:19:59 root] (utils.py 283): INFO Epoch: [13] [ 220/2502] eta: 1:51:46 lr: 0.000003 loss_cls: 2.7661 (2.6880) grad_norm: 1.5323 (1.6153) time: 2.9393 data: 0.0003 max mem: 29202 +[2024-12-11 14:20:28 root] (utils.py 283): INFO Epoch: [13] [ 230/2502] eta: 1:51:17 lr: 0.000003 loss_cls: 2.6129 (2.6863) grad_norm: 1.5098 (1.6131) time: 2.9404 data: 0.0003 max mem: 29202 +[2024-12-11 14:20:58 root] (utils.py 283): INFO Epoch: [13] [ 240/2502] eta: 1:50:49 lr: 0.000003 loss_cls: 2.7084 (2.6880) grad_norm: 1.4947 (1.6083) time: 2.9442 data: 0.0003 max mem: 29202 +[2024-12-11 14:21:27 root] (utils.py 283): INFO Epoch: [13] [ 250/2502] eta: 1:50:20 lr: 0.000003 loss_cls: 2.8741 (2.6930) grad_norm: 1.4956 (1.6101) time: 2.9452 data: 0.0003 max mem: 29202 +[2024-12-11 14:21:57 root] (utils.py 283): INFO Epoch: [13] [ 260/2502] eta: 1:49:50 lr: 0.000003 loss_cls: 2.7510 (2.6927) grad_norm: 1.5570 (1.6087) time: 2.9431 data: 0.0003 max mem: 29202 +[2024-12-11 14:22:26 root] (utils.py 283): INFO Epoch: [13] [ 270/2502] eta: 1:49:21 lr: 0.000003 loss_cls: 2.6792 (2.6937) grad_norm: 1.6166 (1.6331) time: 2.9437 data: 0.0003 max mem: 29202 +[2024-12-11 14:22:56 root] (utils.py 283): INFO Epoch: [13] [ 280/2502] eta: 1:48:52 lr: 0.000003 loss_cls: 2.6438 (2.6850) grad_norm: 1.6166 (1.6287) time: 2.9401 data: 0.0003 max mem: 29202 +[2024-12-11 14:23:25 root] (utils.py 283): INFO Epoch: [13] [ 290/2502] eta: 1:48:22 lr: 0.000003 loss_cls: 2.7008 (2.6891) grad_norm: 1.5194 (1.6264) time: 2.9383 data: 0.0003 max mem: 29202 +[2024-12-11 14:23:54 root] (utils.py 283): INFO Epoch: [13] [ 300/2502] eta: 1:47:53 lr: 0.000003 loss_cls: 2.8597 (2.6963) grad_norm: 1.5660 (1.6264) time: 2.9420 data: 0.0003 max mem: 29202 +[2024-12-11 14:24:24 root] (utils.py 283): INFO Epoch: [13] [ 310/2502] eta: 1:47:24 lr: 0.000003 loss_cls: 2.8597 (2.6998) grad_norm: 1.5490 (1.6238) time: 2.9435 data: 0.0003 max mem: 29202 +[2024-12-11 14:24:53 root] (utils.py 283): INFO Epoch: [13] [ 320/2502] eta: 1:46:56 lr: 0.000003 loss_cls: 2.7870 (2.6982) grad_norm: 1.5490 (1.6226) time: 2.9495 data: 0.0003 max mem: 29202 +[2024-12-11 14:25:23 root] (utils.py 283): INFO Epoch: [13] [ 330/2502] eta: 1:46:26 lr: 0.000003 loss_cls: 2.9137 (2.7062) grad_norm: 1.5707 (1.6204) time: 2.9484 data: 0.0003 max mem: 29202 +[2024-12-11 14:25:52 root] (utils.py 283): INFO Epoch: [13] [ 340/2502] eta: 1:45:57 lr: 0.000003 loss_cls: 2.9922 (2.7083) grad_norm: 1.4661 (1.6180) time: 2.9418 data: 0.0003 max mem: 29202 +[2024-12-11 14:26:22 root] (utils.py 283): INFO Epoch: [13] [ 350/2502] eta: 1:45:28 lr: 0.000003 loss_cls: 2.7542 (2.7018) grad_norm: 1.4949 (1.6156) time: 2.9411 data: 0.0003 max mem: 29202 +[2024-12-11 14:26:51 root] (utils.py 283): INFO Epoch: [13] [ 360/2502] eta: 1:44:59 lr: 0.000003 loss_cls: 2.7052 (2.6988) grad_norm: 1.6144 (1.6175) time: 2.9428 data: 0.0003 max mem: 29202 +[2024-12-11 14:27:21 root] (utils.py 283): INFO Epoch: [13] [ 370/2502] eta: 1:44:29 lr: 0.000003 loss_cls: 2.6775 (2.6931) grad_norm: 1.6144 (1.6171) time: 2.9426 data: 0.0003 max mem: 29202 +[2024-12-11 14:27:50 root] (utils.py 283): INFO Epoch: [13] [ 380/2502] eta: 1:44:00 lr: 0.000003 loss_cls: 2.6264 (2.6927) grad_norm: 1.5575 (1.6163) time: 2.9424 data: 0.0003 max mem: 29202 +[2024-12-11 14:28:20 root] (utils.py 283): INFO Epoch: [13] [ 390/2502] eta: 1:43:31 lr: 0.000003 loss_cls: 2.8076 (2.6951) grad_norm: 1.4691 (1.6151) time: 2.9504 data: 0.0003 max mem: 29202 +[2024-12-11 14:28:49 root] (utils.py 283): INFO Epoch: [13] [ 400/2502] eta: 1:43:02 lr: 0.000003 loss_cls: 2.8054 (2.6933) grad_norm: 1.4380 (1.6129) time: 2.9507 data: 0.0003 max mem: 29202 +[2024-12-11 14:29:18 root] (utils.py 283): INFO Epoch: [13] [ 410/2502] eta: 1:42:33 lr: 0.000003 loss_cls: 2.8054 (2.6952) grad_norm: 1.6166 (1.6164) time: 2.9422 data: 0.0003 max mem: 29202 +[2024-12-11 14:29:48 root] (utils.py 283): INFO Epoch: [13] [ 420/2502] eta: 1:42:04 lr: 0.000003 loss_cls: 2.8156 (2.6949) grad_norm: 1.6590 (1.6207) time: 2.9506 data: 0.0003 max mem: 29202 +[2024-12-11 14:30:18 root] (utils.py 283): INFO Epoch: [13] [ 430/2502] eta: 1:41:35 lr: 0.000003 loss_cls: 2.6802 (2.6917) grad_norm: 1.5296 (1.6198) time: 2.9538 data: 0.0003 max mem: 29202 +[2024-12-11 14:30:47 root] (utils.py 283): INFO Epoch: [13] [ 440/2502] eta: 1:41:05 lr: 0.000003 loss_cls: 2.5463 (2.6909) grad_norm: 1.5439 (1.6204) time: 2.9415 data: 0.0003 max mem: 29202 +[2024-12-11 14:31:16 root] (utils.py 283): INFO Epoch: [13] [ 450/2502] eta: 1:40:36 lr: 0.000003 loss_cls: 2.6076 (2.6895) grad_norm: 1.4995 (1.6190) time: 2.9399 data: 0.0003 max mem: 29202 +[2024-12-11 14:31:46 root] (utils.py 283): INFO Epoch: [13] [ 460/2502] eta: 1:40:07 lr: 0.000003 loss_cls: 2.5250 (2.6821) grad_norm: 1.4995 (1.6182) time: 2.9418 data: 0.0003 max mem: 29202 +[2024-12-11 14:32:15 root] (utils.py 283): INFO Epoch: [13] [ 470/2502] eta: 1:39:38 lr: 0.000003 loss_cls: 2.6079 (2.6822) grad_norm: 1.5040 (1.6158) time: 2.9537 data: 0.0003 max mem: 29202 +[2024-12-11 14:32:45 root] (utils.py 283): INFO Epoch: [13] [ 480/2502] eta: 1:39:09 lr: 0.000003 loss_cls: 2.7940 (2.6829) grad_norm: 1.4977 (1.6157) time: 2.9563 data: 0.0003 max mem: 29202 +[2024-12-11 14:33:14 root] (utils.py 283): INFO Epoch: [13] [ 490/2502] eta: 1:38:40 lr: 0.000003 loss_cls: 2.6050 (2.6845) grad_norm: 1.5677 (1.6170) time: 2.9529 data: 0.0003 max mem: 29202 +[2024-12-11 14:33:44 root] (utils.py 283): INFO Epoch: [13] [ 500/2502] eta: 1:38:11 lr: 0.000003 loss_cls: 2.8671 (2.6883) grad_norm: 1.6094 (1.6197) time: 2.9524 data: 0.0003 max mem: 29202 +[2024-12-11 14:34:13 root] (utils.py 283): INFO Epoch: [13] [ 510/2502] eta: 1:37:41 lr: 0.000003 loss_cls: 2.8681 (2.6886) grad_norm: 1.6199 (1.6207) time: 2.9446 data: 0.0003 max mem: 29202 +[2024-12-11 14:34:43 root] (utils.py 283): INFO Epoch: [13] [ 520/2502] eta: 1:37:12 lr: 0.000003 loss_cls: 2.8725 (2.6924) grad_norm: 1.5565 (1.6198) time: 2.9445 data: 0.0003 max mem: 29202 +[2024-12-11 14:35:12 root] (utils.py 283): INFO Epoch: [13] [ 530/2502] eta: 1:36:43 lr: 0.000003 loss_cls: 2.8988 (2.6955) grad_norm: 1.5565 (1.6190) time: 2.9448 data: 0.0003 max mem: 29202 +[2024-12-11 14:35:41 root] (utils.py 283): INFO Epoch: [13] [ 540/2502] eta: 1:36:13 lr: 0.000003 loss_cls: 2.8988 (2.6972) grad_norm: 1.5539 (1.6196) time: 2.9338 data: 0.0003 max mem: 29202 +[2024-12-11 14:36:11 root] (utils.py 283): INFO Epoch: [13] [ 550/2502] eta: 1:35:43 lr: 0.000003 loss_cls: 2.8629 (2.6959) grad_norm: 1.5608 (1.6189) time: 2.9258 data: 0.0003 max mem: 29202 +[2024-12-11 14:36:40 root] (utils.py 283): INFO Epoch: [13] [ 560/2502] eta: 1:35:13 lr: 0.000003 loss_cls: 2.7255 (2.6975) grad_norm: 1.4617 (1.6161) time: 2.9271 data: 0.0003 max mem: 29202 +[2024-12-11 14:37:09 root] (utils.py 283): INFO Epoch: [13] [ 570/2502] eta: 1:34:43 lr: 0.000003 loss_cls: 2.8077 (2.6974) grad_norm: 1.4469 (1.6140) time: 2.9269 data: 0.0003 max mem: 29202 +[2024-12-11 14:37:39 root] (utils.py 283): INFO Epoch: [13] [ 580/2502] eta: 1:34:13 lr: 0.000003 loss_cls: 2.8722 (2.6975) grad_norm: 1.4803 (1.6129) time: 2.9343 data: 0.0003 max mem: 29202 +[2024-12-11 14:38:08 root] (utils.py 283): INFO Epoch: [13] [ 590/2502] eta: 1:33:44 lr: 0.000003 loss_cls: 2.8830 (2.6998) grad_norm: 1.5068 (1.6126) time: 2.9413 data: 0.0003 max mem: 29202 +[2024-12-11 14:38:38 root] (utils.py 283): INFO Epoch: [13] [ 600/2502] eta: 1:33:15 lr: 0.000003 loss_cls: 2.8624 (2.7010) grad_norm: 1.5454 (1.6112) time: 2.9489 data: 0.0003 max mem: 29202 +[2024-12-11 14:39:07 root] (utils.py 283): INFO Epoch: [13] [ 610/2502] eta: 1:32:46 lr: 0.000003 loss_cls: 2.7959 (2.7003) grad_norm: 1.5387 (1.6113) time: 2.9518 data: 0.0003 max mem: 29202 +[2024-12-11 14:39:37 root] (utils.py 283): INFO Epoch: [13] [ 620/2502] eta: 1:32:16 lr: 0.000003 loss_cls: 2.5995 (2.6942) grad_norm: 1.5387 (1.6107) time: 2.9466 data: 0.0003 max mem: 29202 +[2024-12-11 14:40:06 root] (utils.py 283): INFO Epoch: [13] [ 630/2502] eta: 1:31:47 lr: 0.000003 loss_cls: 2.6051 (2.6944) grad_norm: 1.5764 (1.6107) time: 2.9431 data: 0.0003 max mem: 29202 +[2024-12-11 14:40:35 root] (utils.py 283): INFO Epoch: [13] [ 640/2502] eta: 1:31:18 lr: 0.000003 loss_cls: 2.7162 (2.6948) grad_norm: 1.6326 (1.6109) time: 2.9404 data: 0.0003 max mem: 29202 +[2024-12-11 14:41:05 root] (utils.py 283): INFO Epoch: [13] [ 650/2502] eta: 1:30:48 lr: 0.000003 loss_cls: 2.7162 (2.6940) grad_norm: 1.5619 (1.6113) time: 2.9415 data: 0.0003 max mem: 29202 +[2024-12-11 14:41:34 root] (utils.py 283): INFO Epoch: [13] [ 660/2502] eta: 1:30:19 lr: 0.000003 loss_cls: 2.7356 (2.6936) grad_norm: 1.5592 (1.6109) time: 2.9417 data: 0.0003 max mem: 29202 +[2024-12-11 14:42:04 root] (utils.py 283): INFO Epoch: [13] [ 670/2502] eta: 1:29:49 lr: 0.000003 loss_cls: 2.7569 (2.6949) grad_norm: 1.5225 (1.6107) time: 2.9421 data: 0.0003 max mem: 29202 +[2024-12-11 14:42:33 root] (utils.py 283): INFO Epoch: [13] [ 680/2502] eta: 1:29:20 lr: 0.000003 loss_cls: 2.7569 (2.6958) grad_norm: 1.5164 (1.6106) time: 2.9404 data: 0.0003 max mem: 29202 +[2024-12-11 14:43:03 root] (utils.py 283): INFO Epoch: [13] [ 690/2502] eta: 1:28:51 lr: 0.000003 loss_cls: 2.7274 (2.6930) grad_norm: 1.5021 (1.6087) time: 2.9436 data: 0.0003 max mem: 29202 +[2024-12-11 14:43:32 root] (utils.py 283): INFO Epoch: [13] [ 700/2502] eta: 1:28:22 lr: 0.000003 loss_cls: 2.5772 (2.6910) grad_norm: 1.4553 (1.6104) time: 2.9580 data: 0.0003 max mem: 29202 +[2024-12-11 14:44:02 root] (utils.py 283): INFO Epoch: [13] [ 710/2502] eta: 1:27:53 lr: 0.000003 loss_cls: 2.6759 (2.6916) grad_norm: 1.5394 (1.6100) time: 2.9652 data: 0.0003 max mem: 29202 +[2024-12-11 14:44:31 root] (utils.py 283): INFO Epoch: [13] [ 720/2502] eta: 1:27:23 lr: 0.000003 loss_cls: 2.8803 (2.6938) grad_norm: 1.5535 (1.6102) time: 2.9517 data: 0.0003 max mem: 29202 +[2024-12-11 14:45:01 root] (utils.py 283): INFO Epoch: [13] [ 730/2502] eta: 1:26:54 lr: 0.000003 loss_cls: 2.7077 (2.6903) grad_norm: 1.6212 (1.6113) time: 2.9408 data: 0.0003 max mem: 29202 +[2024-12-11 14:45:30 root] (utils.py 283): INFO Epoch: [13] [ 740/2502] eta: 1:26:25 lr: 0.000003 loss_cls: 2.6056 (2.6924) grad_norm: 1.5757 (1.6126) time: 2.9448 data: 0.0003 max mem: 29202 +[2024-12-11 14:46:00 root] (utils.py 283): INFO Epoch: [13] [ 750/2502] eta: 1:25:55 lr: 0.000003 loss_cls: 2.7306 (2.6907) grad_norm: 1.5591 (1.6127) time: 2.9469 data: 0.0003 max mem: 29202 +[2024-12-11 14:46:29 root] (utils.py 283): INFO Epoch: [13] [ 760/2502] eta: 1:25:26 lr: 0.000003 loss_cls: 2.7306 (2.6928) grad_norm: 1.5353 (1.6125) time: 2.9435 data: 0.0003 max mem: 29202 +[2024-12-11 14:46:59 root] (utils.py 283): INFO Epoch: [13] [ 770/2502] eta: 1:24:56 lr: 0.000003 loss_cls: 2.7559 (2.6932) grad_norm: 1.5353 (1.6136) time: 2.9450 data: 0.0003 max mem: 29202 +[2024-12-11 14:47:28 root] (utils.py 283): INFO Epoch: [13] [ 780/2502] eta: 1:24:27 lr: 0.000003 loss_cls: 2.7389 (2.6943) grad_norm: 1.5296 (1.6136) time: 2.9455 data: 0.0003 max mem: 29202 +[2024-12-11 14:47:57 root] (utils.py 283): INFO Epoch: [13] [ 790/2502] eta: 1:23:58 lr: 0.000003 loss_cls: 2.7833 (2.6949) grad_norm: 1.6391 (1.6142) time: 2.9417 data: 0.0003 max mem: 29202 +[2024-12-11 14:48:27 root] (utils.py 283): INFO Epoch: [13] [ 800/2502] eta: 1:23:28 lr: 0.000003 loss_cls: 2.7833 (2.6964) grad_norm: 1.5572 (1.6124) time: 2.9412 data: 0.0003 max mem: 29202 +[2024-12-11 14:48:56 root] (utils.py 283): INFO Epoch: [13] [ 810/2502] eta: 1:22:59 lr: 0.000003 loss_cls: 2.7601 (2.6973) grad_norm: 1.4857 (1.6114) time: 2.9395 data: 0.0003 max mem: 29202 +[2024-12-11 14:49:26 root] (utils.py 283): INFO Epoch: [13] [ 820/2502] eta: 1:22:29 lr: 0.000003 loss_cls: 2.7601 (2.6978) grad_norm: 1.5078 (1.6121) time: 2.9422 data: 0.0003 max mem: 29202 +[2024-12-11 14:49:55 root] (utils.py 283): INFO Epoch: [13] [ 830/2502] eta: 1:22:00 lr: 0.000003 loss_cls: 2.7979 (2.6989) grad_norm: 1.6979 (1.6138) time: 2.9589 data: 0.0002 max mem: 29202 +[2024-12-11 14:50:25 root] (utils.py 283): INFO Epoch: [13] [ 840/2502] eta: 1:21:31 lr: 0.000003 loss_cls: 2.6086 (2.6959) grad_norm: 1.7427 (1.6152) time: 2.9514 data: 0.0003 max mem: 29202 +[2024-12-11 14:50:54 root] (utils.py 283): INFO Epoch: [13] [ 850/2502] eta: 1:21:01 lr: 0.000003 loss_cls: 2.7018 (2.6975) grad_norm: 1.6339 (1.6154) time: 2.9366 data: 0.0003 max mem: 29202 +[2024-12-11 14:51:23 root] (utils.py 283): INFO Epoch: [13] [ 860/2502] eta: 1:20:32 lr: 0.000003 loss_cls: 2.8571 (2.6989) grad_norm: 1.5684 (1.6148) time: 2.9410 data: 0.0003 max mem: 29202 +[2024-12-11 14:51:53 root] (utils.py 283): INFO Epoch: [13] [ 870/2502] eta: 1:20:02 lr: 0.000003 loss_cls: 2.6712 (2.6972) grad_norm: 1.5278 (1.6144) time: 2.9392 data: 0.0003 max mem: 29202 +[2024-12-11 14:52:22 root] (utils.py 283): INFO Epoch: [13] [ 880/2502] eta: 1:19:33 lr: 0.000003 loss_cls: 2.5646 (2.6960) grad_norm: 1.4817 (1.6135) time: 2.9396 data: 0.0003 max mem: 29202 +[2024-12-11 14:52:52 root] (utils.py 283): INFO Epoch: [13] [ 890/2502] eta: 1:19:03 lr: 0.000003 loss_cls: 2.5274 (2.6936) grad_norm: 1.4781 (1.6127) time: 2.9408 data: 0.0003 max mem: 29202 +[2024-12-11 14:53:21 root] (utils.py 283): INFO Epoch: [13] [ 900/2502] eta: 1:18:34 lr: 0.000003 loss_cls: 2.8272 (2.6932) grad_norm: 1.5257 (1.6136) time: 2.9411 data: 0.0003 max mem: 29202 +[2024-12-11 14:53:50 root] (utils.py 283): INFO Epoch: [13] [ 910/2502] eta: 1:18:04 lr: 0.000003 loss_cls: 2.7261 (2.6904) grad_norm: 1.4932 (1.6124) time: 2.9397 data: 0.0003 max mem: 29202 +[2024-12-11 14:54:20 root] (utils.py 283): INFO Epoch: [13] [ 920/2502] eta: 1:17:35 lr: 0.000003 loss_cls: 2.5723 (2.6906) grad_norm: 1.5029 (1.6114) time: 2.9390 data: 0.0003 max mem: 29202 +[2024-12-11 14:54:49 root] (utils.py 283): INFO Epoch: [13] [ 930/2502] eta: 1:17:06 lr: 0.000003 loss_cls: 2.6240 (2.6901) grad_norm: 1.5125 (1.6124) time: 2.9451 data: 0.0003 max mem: 29202 +[2024-12-11 14:55:19 root] (utils.py 283): INFO Epoch: [13] [ 940/2502] eta: 1:16:36 lr: 0.000003 loss_cls: 2.6901 (2.6911) grad_norm: 1.6092 (1.6132) time: 2.9514 data: 0.0003 max mem: 29202 +[2024-12-11 14:55:48 root] (utils.py 283): INFO Epoch: [13] [ 950/2502] eta: 1:16:07 lr: 0.000003 loss_cls: 2.8855 (2.6940) grad_norm: 1.6052 (1.6138) time: 2.9523 data: 0.0003 max mem: 29202 +[2024-12-11 14:56:18 root] (utils.py 283): INFO Epoch: [13] [ 960/2502] eta: 1:15:38 lr: 0.000003 loss_cls: 2.9292 (2.6951) grad_norm: 1.5712 (1.6132) time: 2.9557 data: 0.0003 max mem: 29202 +[2024-12-11 14:56:48 root] (utils.py 283): INFO Epoch: [13] [ 970/2502] eta: 1:15:09 lr: 0.000003 loss_cls: 2.8021 (2.6948) grad_norm: 1.4830 (1.6122) time: 2.9536 data: 0.0003 max mem: 29202 +[2024-12-11 14:57:17 root] (utils.py 283): INFO Epoch: [13] [ 980/2502] eta: 1:14:39 lr: 0.000003 loss_cls: 2.8021 (2.6957) grad_norm: 1.4648 (1.6122) time: 2.9496 data: 0.0003 max mem: 29202 +[2024-12-11 14:57:47 root] (utils.py 283): INFO Epoch: [13] [ 990/2502] eta: 1:14:10 lr: 0.000003 loss_cls: 2.8821 (2.6980) grad_norm: 1.4513 (1.6108) time: 2.9593 data: 0.0003 max mem: 29202 +[2024-12-11 14:58:16 root] (utils.py 283): INFO Epoch: [13] [1000/2502] eta: 1:13:41 lr: 0.000003 loss_cls: 2.8821 (2.6974) grad_norm: 1.4527 (1.6104) time: 2.9681 data: 0.0003 max mem: 29202 +[2024-12-11 14:58:46 root] (utils.py 283): INFO Epoch: [13] [1010/2502] eta: 1:13:12 lr: 0.000003 loss_cls: 2.8362 (2.6985) grad_norm: 1.4612 (1.6128) time: 2.9702 data: 0.0003 max mem: 29202 +[2024-12-11 14:59:16 root] (utils.py 283): INFO Epoch: [13] [1020/2502] eta: 1:12:43 lr: 0.000003 loss_cls: 2.8957 (2.6995) grad_norm: 1.5667 (1.6137) time: 2.9665 data: 0.0003 max mem: 29202 +[2024-12-11 14:59:45 root] (utils.py 283): INFO Epoch: [13] [1030/2502] eta: 1:12:13 lr: 0.000003 loss_cls: 2.8592 (2.6992) grad_norm: 1.5860 (1.6138) time: 2.9498 data: 0.0003 max mem: 29202 +[2024-12-11 15:00:15 root] (utils.py 283): INFO Epoch: [13] [1040/2502] eta: 1:11:44 lr: 0.000003 loss_cls: 2.7846 (2.6994) grad_norm: 1.5759 (1.6137) time: 2.9441 data: 0.0003 max mem: 29202 +[2024-12-11 15:00:44 root] (utils.py 283): INFO Epoch: [13] [1050/2502] eta: 1:11:15 lr: 0.000003 loss_cls: 2.8656 (2.6998) grad_norm: 1.5628 (1.6129) time: 2.9523 data: 0.0003 max mem: 29202 +[2024-12-11 15:01:14 root] (utils.py 283): INFO Epoch: [13] [1060/2502] eta: 1:10:45 lr: 0.000003 loss_cls: 2.7613 (2.6997) grad_norm: 1.5446 (1.6130) time: 2.9523 data: 0.0003 max mem: 29202 +[2024-12-11 15:01:43 root] (utils.py 283): INFO Epoch: [13] [1070/2502] eta: 1:10:16 lr: 0.000003 loss_cls: 2.8213 (2.7010) grad_norm: 1.5959 (1.6128) time: 2.9446 data: 0.0003 max mem: 29202 +[2024-12-11 15:02:12 root] (utils.py 283): INFO Epoch: [13] [1080/2502] eta: 1:09:46 lr: 0.000003 loss_cls: 2.8213 (2.7013) grad_norm: 1.5971 (1.6129) time: 2.9400 data: 0.0003 max mem: 29202 +[2024-12-11 15:02:42 root] (utils.py 283): INFO Epoch: [13] [1090/2502] eta: 1:09:17 lr: 0.000003 loss_cls: 2.7833 (2.7033) grad_norm: 1.5971 (1.6132) time: 2.9413 data: 0.0003 max mem: 29202 +[2024-12-11 15:03:11 root] (utils.py 283): INFO Epoch: [13] [1100/2502] eta: 1:08:47 lr: 0.000003 loss_cls: 2.8348 (2.7029) grad_norm: 1.6642 (1.6148) time: 2.9405 data: 0.0003 max mem: 29202 +[2024-12-11 15:03:41 root] (utils.py 283): INFO Epoch: [13] [1110/2502] eta: 1:08:18 lr: 0.000003 loss_cls: 2.8187 (2.7022) grad_norm: 1.6190 (1.6143) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-11 15:04:10 root] (utils.py 283): INFO Epoch: [13] [1120/2502] eta: 1:07:48 lr: 0.000003 loss_cls: 2.7125 (2.6997) grad_norm: 1.5796 (1.6142) time: 2.9389 data: 0.0003 max mem: 29202 +[2024-12-11 15:04:39 root] (utils.py 283): INFO Epoch: [13] [1130/2502] eta: 1:07:19 lr: 0.000003 loss_cls: 2.6648 (2.6988) grad_norm: 1.5938 (1.6155) time: 2.9380 data: 0.0003 max mem: 29202 +[2024-12-11 15:05:09 root] (utils.py 283): INFO Epoch: [13] [1140/2502] eta: 1:06:49 lr: 0.000003 loss_cls: 2.6245 (2.6977) grad_norm: 1.4821 (1.6142) time: 2.9375 data: 0.0003 max mem: 29202 +[2024-12-11 15:05:38 root] (utils.py 283): INFO Epoch: [13] [1150/2502] eta: 1:06:20 lr: 0.000003 loss_cls: 2.6245 (2.6975) grad_norm: 1.5026 (1.6138) time: 2.9388 data: 0.0003 max mem: 29202 +[2024-12-11 15:06:08 root] (utils.py 283): INFO Epoch: [13] [1160/2502] eta: 1:05:50 lr: 0.000003 loss_cls: 2.6909 (2.6966) grad_norm: 1.5388 (1.6134) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-11 15:06:37 root] (utils.py 283): INFO Epoch: [13] [1170/2502] eta: 1:05:21 lr: 0.000003 loss_cls: 2.6593 (2.6963) grad_norm: 1.4599 (1.6130) time: 2.9389 data: 0.0003 max mem: 29202 +[2024-12-11 15:07:07 root] (utils.py 283): INFO Epoch: [13] [1180/2502] eta: 1:04:52 lr: 0.000003 loss_cls: 2.6593 (2.6964) grad_norm: 1.4611 (1.6127) time: 2.9502 data: 0.0003 max mem: 29202 +[2024-12-11 15:07:36 root] (utils.py 283): INFO Epoch: [13] [1190/2502] eta: 1:04:22 lr: 0.000003 loss_cls: 2.8382 (2.6963) grad_norm: 1.5983 (1.6149) time: 2.9576 data: 0.0003 max mem: 29202 +[2024-12-11 15:08:05 root] (utils.py 283): INFO Epoch: [13] [1200/2502] eta: 1:03:53 lr: 0.000003 loss_cls: 2.4201 (2.6934) grad_norm: 1.5178 (1.6141) time: 2.9418 data: 0.0003 max mem: 29202 +[2024-12-11 15:08:35 root] (utils.py 283): INFO Epoch: [13] [1210/2502] eta: 1:03:23 lr: 0.000003 loss_cls: 2.4201 (2.6930) grad_norm: 1.5741 (1.6149) time: 2.9349 data: 0.0003 max mem: 29202 +[2024-12-11 15:09:04 root] (utils.py 283): INFO Epoch: [13] [1220/2502] eta: 1:02:54 lr: 0.000003 loss_cls: 2.8805 (2.6949) grad_norm: 1.5923 (1.6149) time: 2.9375 data: 0.0003 max mem: 29202 +[2024-12-11 15:09:34 root] (utils.py 283): INFO Epoch: [13] [1230/2502] eta: 1:02:24 lr: 0.000003 loss_cls: 2.8372 (2.6936) grad_norm: 1.5007 (1.6144) time: 2.9370 data: 0.0003 max mem: 29202 +[2024-12-11 15:10:03 root] (utils.py 283): INFO Epoch: [13] [1240/2502] eta: 1:01:55 lr: 0.000003 loss_cls: 2.5075 (2.6923) grad_norm: 1.5385 (1.6140) time: 2.9413 data: 0.0003 max mem: 29202 +[2024-12-11 15:10:32 root] (utils.py 283): INFO Epoch: [13] [1250/2502] eta: 1:01:25 lr: 0.000003 loss_cls: 2.5841 (2.6914) grad_norm: 1.5620 (1.6144) time: 2.9409 data: 0.0003 max mem: 29202 +[2024-12-11 15:11:02 root] (utils.py 283): INFO Epoch: [13] [1260/2502] eta: 1:00:56 lr: 0.000003 loss_cls: 2.7146 (2.6896) grad_norm: 1.5399 (1.6143) time: 2.9382 data: 0.0003 max mem: 29202 +[2024-12-11 15:11:31 root] (utils.py 283): INFO Epoch: [13] [1270/2502] eta: 1:00:26 lr: 0.000003 loss_cls: 2.7164 (2.6882) grad_norm: 1.5398 (1.6143) time: 2.9390 data: 0.0003 max mem: 29202 +[2024-12-11 15:12:01 root] (utils.py 283): INFO Epoch: [13] [1280/2502] eta: 0:59:57 lr: 0.000003 loss_cls: 2.7917 (2.6908) grad_norm: 1.5474 (1.6149) time: 2.9364 data: 0.0003 max mem: 29202 +[2024-12-11 15:12:30 root] (utils.py 283): INFO Epoch: [13] [1290/2502] eta: 0:59:27 lr: 0.000003 loss_cls: 2.9103 (2.6910) grad_norm: 1.6157 (1.6145) time: 2.9334 data: 0.0003 max mem: 29202 +[2024-12-11 15:12:59 root] (utils.py 283): INFO Epoch: [13] [1300/2502] eta: 0:58:58 lr: 0.000003 loss_cls: 2.9114 (2.6913) grad_norm: 1.5346 (1.6139) time: 2.9368 data: 0.0003 max mem: 29202 +[2024-12-11 15:13:29 root] (utils.py 283): INFO Epoch: [13] [1310/2502] eta: 0:58:28 lr: 0.000003 loss_cls: 2.9492 (2.6915) grad_norm: 1.5008 (1.6131) time: 2.9356 data: 0.0003 max mem: 29202 +[2024-12-11 15:13:58 root] (utils.py 283): INFO Epoch: [13] [1320/2502] eta: 0:57:59 lr: 0.000003 loss_cls: 2.9492 (2.6922) grad_norm: 1.5632 (1.6137) time: 2.9310 data: 0.0003 max mem: 29202 +[2024-12-11 15:14:27 root] (utils.py 283): INFO Epoch: [13] [1330/2502] eta: 0:57:29 lr: 0.000003 loss_cls: 2.9329 (2.6933) grad_norm: 1.5880 (1.6134) time: 2.9334 data: 0.0003 max mem: 29202 +[2024-12-11 15:14:57 root] (utils.py 283): INFO Epoch: [13] [1340/2502] eta: 0:57:00 lr: 0.000003 loss_cls: 2.8586 (2.6950) grad_norm: 1.5348 (1.6130) time: 2.9370 data: 0.0003 max mem: 29202 +[2024-12-11 15:15:26 root] (utils.py 283): INFO Epoch: [13] [1350/2502] eta: 0:56:30 lr: 0.000003 loss_cls: 2.8404 (2.6959) grad_norm: 1.5384 (1.6128) time: 2.9367 data: 0.0003 max mem: 29202 +[2024-12-11 15:15:55 root] (utils.py 283): INFO Epoch: [13] [1360/2502] eta: 0:56:01 lr: 0.000003 loss_cls: 2.7804 (2.6948) grad_norm: 1.5786 (1.6128) time: 2.9368 data: 0.0003 max mem: 29202 +[2024-12-11 15:16:25 root] (utils.py 283): INFO Epoch: [13] [1370/2502] eta: 0:55:31 lr: 0.000003 loss_cls: 2.7340 (2.6955) grad_norm: 1.5164 (1.6118) time: 2.9418 data: 0.0003 max mem: 29202 +[2024-12-11 15:16:54 root] (utils.py 283): INFO Epoch: [13] [1380/2502] eta: 0:55:02 lr: 0.000003 loss_cls: 2.7458 (2.6937) grad_norm: 1.4579 (1.6113) time: 2.9427 data: 0.0003 max mem: 29202 +[2024-12-11 15:17:24 root] (utils.py 283): INFO Epoch: [13] [1390/2502] eta: 0:54:32 lr: 0.000003 loss_cls: 2.5036 (2.6923) grad_norm: 1.5502 (1.6113) time: 2.9389 data: 0.0003 max mem: 29202 +[2024-12-11 15:17:53 root] (utils.py 283): INFO Epoch: [13] [1400/2502] eta: 0:54:03 lr: 0.000003 loss_cls: 2.5255 (2.6919) grad_norm: 1.6039 (1.6123) time: 2.9362 data: 0.0003 max mem: 29202 +[2024-12-11 15:18:22 root] (utils.py 283): INFO Epoch: [13] [1410/2502] eta: 0:53:33 lr: 0.000003 loss_cls: 2.7564 (2.6929) grad_norm: 1.4966 (1.6125) time: 2.9410 data: 0.0003 max mem: 29202 +[2024-12-11 15:18:52 root] (utils.py 283): INFO Epoch: [13] [1420/2502] eta: 0:53:04 lr: 0.000003 loss_cls: 2.7561 (2.6914) grad_norm: 1.4861 (1.6117) time: 2.9423 data: 0.0003 max mem: 29202 +[2024-12-11 15:19:21 root] (utils.py 283): INFO Epoch: [13] [1430/2502] eta: 0:52:34 lr: 0.000003 loss_cls: 2.2689 (2.6879) grad_norm: 1.5664 (1.6120) time: 2.9374 data: 0.0003 max mem: 29202 +[2024-12-11 15:19:51 root] (utils.py 283): INFO Epoch: [13] [1440/2502] eta: 0:52:05 lr: 0.000003 loss_cls: 2.4416 (2.6886) grad_norm: 1.5748 (1.6120) time: 2.9381 data: 0.0003 max mem: 29202 +[2024-12-11 15:20:20 root] (utils.py 283): INFO Epoch: [13] [1450/2502] eta: 0:51:35 lr: 0.000003 loss_cls: 2.7069 (2.6887) grad_norm: 1.5154 (1.6122) time: 2.9365 data: 0.0003 max mem: 29202 +[2024-12-11 15:20:49 root] (utils.py 283): INFO Epoch: [13] [1460/2502] eta: 0:51:06 lr: 0.000003 loss_cls: 2.7010 (2.6883) grad_norm: 1.5556 (1.6119) time: 2.9367 data: 0.0003 max mem: 29202 +[2024-12-11 15:21:19 root] (utils.py 283): INFO Epoch: [13] [1470/2502] eta: 0:50:37 lr: 0.000003 loss_cls: 2.8487 (2.6875) grad_norm: 1.5556 (1.6117) time: 2.9383 data: 0.0003 max mem: 29202 +[2024-12-11 15:21:48 root] (utils.py 283): INFO Epoch: [13] [1480/2502] eta: 0:50:07 lr: 0.000003 loss_cls: 2.8487 (2.6885) grad_norm: 1.6415 (1.6118) time: 2.9384 data: 0.0003 max mem: 29202 +[2024-12-11 15:22:17 root] (utils.py 283): INFO Epoch: [13] [1490/2502] eta: 0:49:38 lr: 0.000003 loss_cls: 2.7738 (2.6881) grad_norm: 1.6592 (1.6120) time: 2.9373 data: 0.0003 max mem: 29202 +[2024-12-11 15:22:47 root] (utils.py 283): INFO Epoch: [13] [1500/2502] eta: 0:49:08 lr: 0.000003 loss_cls: 2.6707 (2.6878) grad_norm: 1.5572 (1.6121) time: 2.9344 data: 0.0003 max mem: 29202 +[2024-12-11 15:23:16 root] (utils.py 283): INFO Epoch: [13] [1510/2502] eta: 0:48:39 lr: 0.000003 loss_cls: 2.6059 (2.6872) grad_norm: 1.5236 (1.6117) time: 2.9354 data: 0.0003 max mem: 29202 +[2024-12-11 15:23:46 root] (utils.py 283): INFO Epoch: [13] [1520/2502] eta: 0:48:09 lr: 0.000003 loss_cls: 2.7643 (2.6872) grad_norm: 1.5285 (1.6116) time: 2.9378 data: 0.0003 max mem: 29202 +[2024-12-11 15:24:15 root] (utils.py 283): INFO Epoch: [13] [1530/2502] eta: 0:47:40 lr: 0.000003 loss_cls: 2.7314 (2.6865) grad_norm: 1.5110 (1.6112) time: 2.9367 data: 0.0003 max mem: 29202 +[2024-12-11 15:24:44 root] (utils.py 283): INFO Epoch: [13] [1540/2502] eta: 0:47:10 lr: 0.000003 loss_cls: 2.7108 (2.6874) grad_norm: 1.5280 (1.6115) time: 2.9355 data: 0.0003 max mem: 29202 +[2024-12-11 15:25:14 root] (utils.py 283): INFO Epoch: [13] [1550/2502] eta: 0:46:41 lr: 0.000003 loss_cls: 2.8541 (2.6879) grad_norm: 1.5360 (1.6109) time: 2.9462 data: 0.0003 max mem: 29202 +[2024-12-11 15:25:43 root] (utils.py 283): INFO Epoch: [13] [1560/2502] eta: 0:46:11 lr: 0.000003 loss_cls: 2.5313 (2.6862) grad_norm: 1.5004 (1.6105) time: 2.9421 data: 0.0003 max mem: 29202 +[2024-12-11 15:26:12 root] (utils.py 283): INFO Epoch: [13] [1570/2502] eta: 0:45:42 lr: 0.000003 loss_cls: 2.6864 (2.6868) grad_norm: 1.5637 (1.6106) time: 2.9303 data: 0.0003 max mem: 29202 +[2024-12-11 15:26:42 root] (utils.py 283): INFO Epoch: [13] [1580/2502] eta: 0:45:12 lr: 0.000003 loss_cls: 2.7554 (2.6866) grad_norm: 1.5524 (1.6108) time: 2.9339 data: 0.0003 max mem: 29202 +[2024-12-11 15:27:11 root] (utils.py 283): INFO Epoch: [13] [1590/2502] eta: 0:44:43 lr: 0.000003 loss_cls: 2.6316 (2.6858) grad_norm: 1.4826 (1.6107) time: 2.9358 data: 0.0003 max mem: 29202 +[2024-12-11 15:27:41 root] (utils.py 283): INFO Epoch: [13] [1600/2502] eta: 0:44:14 lr: 0.000003 loss_cls: 2.7541 (2.6866) grad_norm: 1.4722 (1.6103) time: 2.9370 data: 0.0003 max mem: 29202 +[2024-12-11 15:28:10 root] (utils.py 283): INFO Epoch: [13] [1610/2502] eta: 0:43:44 lr: 0.000003 loss_cls: 2.8125 (2.6871) grad_norm: 1.5520 (1.6102) time: 2.9386 data: 0.0003 max mem: 29202 +[2024-12-11 15:28:39 root] (utils.py 283): INFO Epoch: [13] [1620/2502] eta: 0:43:15 lr: 0.000003 loss_cls: 2.7224 (2.6867) grad_norm: 1.5657 (1.6100) time: 2.9394 data: 0.0003 max mem: 29202 +[2024-12-11 15:29:09 root] (utils.py 283): INFO Epoch: [13] [1630/2502] eta: 0:42:45 lr: 0.000003 loss_cls: 2.7126 (2.6860) grad_norm: 1.5624 (1.6095) time: 2.9381 data: 0.0003 max mem: 29202 +[2024-12-11 15:29:38 root] (utils.py 283): INFO Epoch: [13] [1640/2502] eta: 0:42:16 lr: 0.000003 loss_cls: 2.8119 (2.6872) grad_norm: 1.5311 (1.6093) time: 2.9363 data: 0.0003 max mem: 29202 +[2024-12-11 15:30:07 root] (utils.py 283): INFO Epoch: [13] [1650/2502] eta: 0:41:46 lr: 0.000003 loss_cls: 2.7636 (2.6848) grad_norm: 1.5021 (1.6087) time: 2.9332 data: 0.0003 max mem: 29202 +[2024-12-11 15:30:37 root] (utils.py 283): INFO Epoch: [13] [1660/2502] eta: 0:41:17 lr: 0.000003 loss_cls: 2.7161 (2.6859) grad_norm: 1.4755 (1.6085) time: 2.9332 data: 0.0003 max mem: 29202 +[2024-12-11 15:31:06 root] (utils.py 283): INFO Epoch: [13] [1670/2502] eta: 0:40:47 lr: 0.000003 loss_cls: 2.7677 (2.6845) grad_norm: 1.5796 (1.6088) time: 2.9342 data: 0.0003 max mem: 29202 +[2024-12-11 15:31:35 root] (utils.py 283): INFO Epoch: [13] [1680/2502] eta: 0:40:18 lr: 0.000003 loss_cls: 2.5476 (2.6835) grad_norm: 1.5724 (1.6085) time: 2.9335 data: 0.0003 max mem: 29202 +[2024-12-11 15:32:05 root] (utils.py 283): INFO Epoch: [13] [1690/2502] eta: 0:39:48 lr: 0.000003 loss_cls: 2.4862 (2.6825) grad_norm: 1.5713 (1.6083) time: 2.9383 data: 0.0003 max mem: 29202 +[2024-12-11 15:32:34 root] (utils.py 283): INFO Epoch: [13] [1700/2502] eta: 0:39:19 lr: 0.000003 loss_cls: 2.3726 (2.6805) grad_norm: 1.6040 (1.6081) time: 2.9480 data: 0.0004 max mem: 29202 +[2024-12-11 15:33:04 root] (utils.py 283): INFO Epoch: [13] [1710/2502] eta: 0:38:50 lr: 0.000003 loss_cls: 2.7029 (2.6810) grad_norm: 1.6058 (1.6082) time: 2.9505 data: 0.0003 max mem: 29202 +[2024-12-11 15:33:33 root] (utils.py 283): INFO Epoch: [13] [1720/2502] eta: 0:38:20 lr: 0.000003 loss_cls: 2.8459 (2.6821) grad_norm: 1.6250 (1.6083) time: 2.9476 data: 0.0003 max mem: 29202 +[2024-12-11 15:34:03 root] (utils.py 283): INFO Epoch: [13] [1730/2502] eta: 0:37:51 lr: 0.000003 loss_cls: 2.8698 (2.6814) grad_norm: 1.6371 (1.6092) time: 2.9429 data: 0.0003 max mem: 29202 +[2024-12-11 15:34:32 root] (utils.py 283): INFO Epoch: [13] [1740/2502] eta: 0:37:21 lr: 0.000003 loss_cls: 2.6293 (2.6804) grad_norm: 1.5061 (1.6084) time: 2.9377 data: 0.0003 max mem: 29202 +[2024-12-11 15:35:01 root] (utils.py 283): INFO Epoch: [13] [1750/2502] eta: 0:36:52 lr: 0.000003 loss_cls: 2.9394 (2.6819) grad_norm: 1.5130 (1.6088) time: 2.9325 data: 0.0003 max mem: 29202 +[2024-12-11 15:35:31 root] (utils.py 283): INFO Epoch: [13] [1760/2502] eta: 0:36:23 lr: 0.000003 loss_cls: 2.8683 (2.6822) grad_norm: 1.4871 (1.6076) time: 2.9402 data: 0.0003 max mem: 29202 +[2024-12-11 15:36:00 root] (utils.py 283): INFO Epoch: [13] [1770/2502] eta: 0:35:53 lr: 0.000003 loss_cls: 2.6846 (2.6819) grad_norm: 1.4910 (1.6084) time: 2.9410 data: 0.0003 max mem: 29202 +[2024-12-11 15:36:30 root] (utils.py 283): INFO Epoch: [13] [1780/2502] eta: 0:35:24 lr: 0.000003 loss_cls: 2.8115 (2.6831) grad_norm: 1.5666 (1.6085) time: 2.9318 data: 0.0003 max mem: 29202 +[2024-12-11 15:36:59 root] (utils.py 283): INFO Epoch: [13] [1790/2502] eta: 0:34:54 lr: 0.000003 loss_cls: 2.8333 (2.6829) grad_norm: 1.5661 (1.6083) time: 2.9331 data: 0.0003 max mem: 29202 +[2024-12-11 15:37:28 root] (utils.py 283): INFO Epoch: [13] [1800/2502] eta: 0:34:25 lr: 0.000003 loss_cls: 2.8598 (2.6838) grad_norm: 1.5182 (1.6076) time: 2.9342 data: 0.0003 max mem: 29202 +[2024-12-11 15:37:58 root] (utils.py 283): INFO Epoch: [13] [1810/2502] eta: 0:33:55 lr: 0.000003 loss_cls: 2.9173 (2.6842) grad_norm: 1.4882 (1.6070) time: 2.9356 data: 0.0002 max mem: 29202 +[2024-12-11 15:38:27 root] (utils.py 283): INFO Epoch: [13] [1820/2502] eta: 0:33:26 lr: 0.000003 loss_cls: 2.4885 (2.6823) grad_norm: 1.5236 (1.6070) time: 2.9356 data: 0.0002 max mem: 29202 +[2024-12-11 15:38:56 root] (utils.py 283): INFO Epoch: [13] [1830/2502] eta: 0:32:56 lr: 0.000003 loss_cls: 2.5639 (2.6823) grad_norm: 1.5286 (1.6072) time: 2.9360 data: 0.0003 max mem: 29202 +[2024-12-11 15:39:26 root] (utils.py 283): INFO Epoch: [13] [1840/2502] eta: 0:32:27 lr: 0.000003 loss_cls: 2.7630 (2.6831) grad_norm: 1.5173 (1.6070) time: 2.9389 data: 0.0003 max mem: 29202 +[2024-12-11 15:39:55 root] (utils.py 283): INFO Epoch: [13] [1850/2502] eta: 0:31:58 lr: 0.000003 loss_cls: 2.9032 (2.6846) grad_norm: 1.5173 (1.6065) time: 2.9384 data: 0.0003 max mem: 29202 +[2024-12-11 15:40:24 root] (utils.py 283): INFO Epoch: [13] [1860/2502] eta: 0:31:28 lr: 0.000003 loss_cls: 2.8181 (2.6845) grad_norm: 1.4889 (1.6059) time: 2.9364 data: 0.0003 max mem: 29202 +[2024-12-11 15:40:54 root] (utils.py 283): INFO Epoch: [13] [1870/2502] eta: 0:30:59 lr: 0.000003 loss_cls: 2.6853 (2.6848) grad_norm: 1.5430 (1.6065) time: 2.9363 data: 0.0003 max mem: 29202 +[2024-12-11 15:41:23 root] (utils.py 283): INFO Epoch: [13] [1880/2502] eta: 0:30:29 lr: 0.000003 loss_cls: 2.7451 (2.6849) grad_norm: 1.6393 (1.6065) time: 2.9343 data: 0.0003 max mem: 29202 +[2024-12-11 15:41:52 root] (utils.py 283): INFO Epoch: [13] [1890/2502] eta: 0:30:00 lr: 0.000003 loss_cls: 2.7416 (2.6846) grad_norm: 1.5916 (1.6071) time: 2.9349 data: 0.0003 max mem: 29202 +[2024-12-11 15:42:22 root] (utils.py 283): INFO Epoch: [13] [1900/2502] eta: 0:29:30 lr: 0.000003 loss_cls: 2.4909 (2.6832) grad_norm: 1.5916 (1.6072) time: 2.9367 data: 0.0003 max mem: 29202 +[2024-12-11 15:42:51 root] (utils.py 283): INFO Epoch: [13] [1910/2502] eta: 0:29:01 lr: 0.000003 loss_cls: 2.4909 (2.6829) grad_norm: 1.6038 (1.6079) time: 2.9496 data: 0.0003 max mem: 29202 +[2024-12-11 15:43:21 root] (utils.py 283): INFO Epoch: [13] [1920/2502] eta: 0:28:32 lr: 0.000003 loss_cls: 2.7594 (2.6833) grad_norm: 1.4870 (1.6072) time: 2.9451 data: 0.0003 max mem: 29202 +[2024-12-11 15:43:50 root] (utils.py 283): INFO Epoch: [13] [1930/2502] eta: 0:28:02 lr: 0.000003 loss_cls: 2.8135 (2.6835) grad_norm: 1.4811 (1.6072) time: 2.9314 data: 0.0003 max mem: 29202 +[2024-12-11 15:44:19 root] (utils.py 283): INFO Epoch: [13] [1940/2502] eta: 0:27:33 lr: 0.000003 loss_cls: 2.6575 (2.6832) grad_norm: 1.4990 (1.6067) time: 2.9364 data: 0.0003 max mem: 29202 +[2024-12-11 15:44:49 root] (utils.py 283): INFO Epoch: [13] [1950/2502] eta: 0:27:03 lr: 0.000003 loss_cls: 2.6902 (2.6833) grad_norm: 1.4923 (1.6068) time: 2.9388 data: 0.0003 max mem: 29202 +[2024-12-11 15:45:18 root] (utils.py 283): INFO Epoch: [13] [1960/2502] eta: 0:26:34 lr: 0.000003 loss_cls: 2.9426 (2.6845) grad_norm: 1.6480 (1.6074) time: 2.9385 data: 0.0003 max mem: 29202 +[2024-12-11 15:45:48 root] (utils.py 283): INFO Epoch: [13] [1970/2502] eta: 0:26:04 lr: 0.000003 loss_cls: 2.9542 (2.6850) grad_norm: 1.6873 (1.6074) time: 2.9357 data: 0.0003 max mem: 29202 +[2024-12-11 15:46:17 root] (utils.py 283): INFO Epoch: [13] [1980/2502] eta: 0:25:35 lr: 0.000003 loss_cls: 2.7987 (2.6849) grad_norm: 1.6096 (1.6072) time: 2.9401 data: 0.0003 max mem: 29202 +[2024-12-11 15:46:46 root] (utils.py 283): INFO Epoch: [13] [1990/2502] eta: 0:25:06 lr: 0.000003 loss_cls: 2.6996 (2.6849) grad_norm: 1.5823 (1.6074) time: 2.9382 data: 0.0003 max mem: 29202 +[2024-12-11 15:47:16 root] (utils.py 283): INFO Epoch: [13] [2000/2502] eta: 0:24:36 lr: 0.000003 loss_cls: 2.6456 (2.6844) grad_norm: 1.5564 (1.6074) time: 2.9316 data: 0.0003 max mem: 29202 +[2024-12-11 15:47:45 root] (utils.py 283): INFO Epoch: [13] [2010/2502] eta: 0:24:07 lr: 0.000003 loss_cls: 2.5007 (2.6840) grad_norm: 1.5614 (1.6080) time: 2.9354 data: 0.0003 max mem: 29202 +[2024-12-11 15:48:14 root] (utils.py 283): INFO Epoch: [13] [2020/2502] eta: 0:23:37 lr: 0.000003 loss_cls: 2.6122 (2.6834) grad_norm: 1.5949 (1.6083) time: 2.9366 data: 0.0003 max mem: 29202 +[2024-12-11 15:48:44 root] (utils.py 283): INFO Epoch: [13] [2030/2502] eta: 0:23:08 lr: 0.000003 loss_cls: 2.8536 (2.6836) grad_norm: 1.5795 (1.6085) time: 2.9359 data: 0.0003 max mem: 29202 +[2024-12-11 15:49:13 root] (utils.py 283): INFO Epoch: [13] [2040/2502] eta: 0:22:38 lr: 0.000003 loss_cls: 2.7702 (2.6833) grad_norm: 1.5722 (1.6087) time: 2.9402 data: 0.0003 max mem: 29202 +[2024-12-11 15:49:43 root] (utils.py 283): INFO Epoch: [13] [2050/2502] eta: 0:22:09 lr: 0.000003 loss_cls: 2.6537 (2.6832) grad_norm: 1.5132 (1.6088) time: 2.9471 data: 0.0003 max mem: 29202 +[2024-12-11 15:50:12 root] (utils.py 283): INFO Epoch: [13] [2060/2502] eta: 0:21:40 lr: 0.000003 loss_cls: 2.7289 (2.6834) grad_norm: 1.4696 (1.6086) time: 2.9501 data: 0.0003 max mem: 29202 +[2024-12-11 15:50:42 root] (utils.py 283): INFO Epoch: [13] [2070/2502] eta: 0:21:10 lr: 0.000003 loss_cls: 2.7285 (2.6826) grad_norm: 1.5553 (1.6090) time: 2.9434 data: 0.0003 max mem: 29202 +[2024-12-11 15:51:11 root] (utils.py 283): INFO Epoch: [13] [2080/2502] eta: 0:20:41 lr: 0.000003 loss_cls: 2.7017 (2.6827) grad_norm: 1.5553 (1.6091) time: 2.9398 data: 0.0003 max mem: 29202 +[2024-12-11 15:51:40 root] (utils.py 283): INFO Epoch: [13] [2090/2502] eta: 0:20:11 lr: 0.000003 loss_cls: 2.7691 (2.6815) grad_norm: 1.6248 (1.6093) time: 2.9417 data: 0.0003 max mem: 29202 +[2024-12-11 15:52:10 root] (utils.py 283): INFO Epoch: [13] [2100/2502] eta: 0:19:42 lr: 0.000003 loss_cls: 2.6288 (2.6807) grad_norm: 1.6665 (1.6095) time: 2.9394 data: 0.0003 max mem: 29202 +[2024-12-11 15:52:39 root] (utils.py 283): INFO Epoch: [13] [2110/2502] eta: 0:19:13 lr: 0.000003 loss_cls: 2.7511 (2.6809) grad_norm: 1.4789 (1.6087) time: 2.9366 data: 0.0003 max mem: 29202 +[2024-12-11 15:53:09 root] (utils.py 283): INFO Epoch: [13] [2120/2502] eta: 0:18:43 lr: 0.000003 loss_cls: 2.6044 (2.6795) grad_norm: 1.4517 (1.6082) time: 2.9438 data: 0.0003 max mem: 29202 +[2024-12-11 15:53:38 root] (utils.py 283): INFO Epoch: [13] [2130/2502] eta: 0:18:14 lr: 0.000003 loss_cls: 2.5586 (2.6794) grad_norm: 1.5324 (1.6082) time: 2.9428 data: 0.0003 max mem: 29202 +[2024-12-11 15:54:07 root] (utils.py 283): INFO Epoch: [13] [2140/2502] eta: 0:17:44 lr: 0.000003 loss_cls: 2.7249 (2.6799) grad_norm: 1.5382 (1.6079) time: 2.9368 data: 0.0003 max mem: 29202 +[2024-12-11 15:54:37 root] (utils.py 283): INFO Epoch: [13] [2150/2502] eta: 0:17:15 lr: 0.000003 loss_cls: 2.7675 (2.6797) grad_norm: 1.5382 (1.6081) time: 2.9388 data: 0.0003 max mem: 29202 +[2024-12-11 15:55:06 root] (utils.py 283): INFO Epoch: [13] [2160/2502] eta: 0:16:45 lr: 0.000003 loss_cls: 2.8213 (2.6801) grad_norm: 1.5703 (1.6081) time: 2.9390 data: 0.0003 max mem: 29202 +[2024-12-11 15:55:36 root] (utils.py 283): INFO Epoch: [13] [2170/2502] eta: 0:16:16 lr: 0.000003 loss_cls: 2.9582 (2.6815) grad_norm: 1.5385 (1.6085) time: 2.9376 data: 0.0003 max mem: 29202 +[2024-12-11 15:56:05 root] (utils.py 283): INFO Epoch: [13] [2180/2502] eta: 0:15:47 lr: 0.000003 loss_cls: 2.9710 (2.6832) grad_norm: 1.6328 (1.6088) time: 2.9375 data: 0.0003 max mem: 29202 +[2024-12-11 15:56:34 root] (utils.py 283): INFO Epoch: [13] [2190/2502] eta: 0:15:17 lr: 0.000003 loss_cls: 2.9277 (2.6838) grad_norm: 1.6863 (1.6090) time: 2.9398 data: 0.0003 max mem: 29202 +[2024-12-11 15:57:04 root] (utils.py 283): INFO Epoch: [13] [2200/2502] eta: 0:14:48 lr: 0.000003 loss_cls: 2.8308 (2.6840) grad_norm: 1.5824 (1.6088) time: 2.9418 data: 0.0003 max mem: 29202 +[2024-12-11 15:57:33 root] (utils.py 283): INFO Epoch: [13] [2210/2502] eta: 0:14:18 lr: 0.000003 loss_cls: 2.7746 (2.6836) grad_norm: 1.5824 (1.6088) time: 2.9412 data: 0.0003 max mem: 29202 +[2024-12-11 15:58:03 root] (utils.py 283): INFO Epoch: [13] [2220/2502] eta: 0:13:49 lr: 0.000003 loss_cls: 2.6681 (2.6836) grad_norm: 1.5230 (1.6094) time: 2.9413 data: 0.0003 max mem: 29202 +[2024-12-11 15:58:32 root] (utils.py 283): INFO Epoch: [13] [2230/2502] eta: 0:13:20 lr: 0.000003 loss_cls: 2.7817 (2.6844) grad_norm: 1.4750 (1.6092) time: 2.9393 data: 0.0003 max mem: 29202 +[2024-12-11 15:59:01 root] (utils.py 283): INFO Epoch: [13] [2240/2502] eta: 0:12:50 lr: 0.000003 loss_cls: 2.9156 (2.6846) grad_norm: 1.6117 (1.6094) time: 2.9368 data: 0.0003 max mem: 29202 +[2024-12-11 15:59:31 root] (utils.py 283): INFO Epoch: [13] [2250/2502] eta: 0:12:21 lr: 0.000003 loss_cls: 2.8311 (2.6851) grad_norm: 1.5682 (1.6089) time: 2.9351 data: 0.0003 max mem: 29202 +[2024-12-11 16:00:00 root] (utils.py 283): INFO Epoch: [13] [2260/2502] eta: 0:11:51 lr: 0.000003 loss_cls: 2.7300 (2.6847) grad_norm: 1.5478 (1.6093) time: 2.9348 data: 0.0003 max mem: 29202 +[2024-12-11 16:00:30 root] (utils.py 283): INFO Epoch: [13] [2270/2502] eta: 0:11:22 lr: 0.000003 loss_cls: 2.5582 (2.6843) grad_norm: 1.5642 (1.6091) time: 2.9475 data: 0.0003 max mem: 29202 +[2024-12-11 16:00:59 root] (utils.py 283): INFO Epoch: [13] [2280/2502] eta: 0:10:52 lr: 0.000003 loss_cls: 2.6322 (2.6836) grad_norm: 1.5589 (1.6090) time: 2.9441 data: 0.0003 max mem: 29202 +[2024-12-11 16:01:28 root] (utils.py 283): INFO Epoch: [13] [2290/2502] eta: 0:10:23 lr: 0.000003 loss_cls: 2.7850 (2.6838) grad_norm: 1.6022 (1.6092) time: 2.9416 data: 0.0003 max mem: 29202 +[2024-12-11 16:01:58 root] (utils.py 283): INFO Epoch: [13] [2300/2502] eta: 0:09:54 lr: 0.000003 loss_cls: 2.6935 (2.6835) grad_norm: 1.5772 (1.6088) time: 2.9466 data: 0.0003 max mem: 29202 +[2024-12-11 16:02:27 root] (utils.py 283): INFO Epoch: [13] [2310/2502] eta: 0:09:24 lr: 0.000003 loss_cls: 2.6935 (2.6844) grad_norm: 1.4586 (1.6088) time: 2.9420 data: 0.0003 max mem: 29202 +[2024-12-11 16:02:57 root] (utils.py 283): INFO Epoch: [13] [2320/2502] eta: 0:08:55 lr: 0.000003 loss_cls: 2.8910 (2.6845) grad_norm: 1.4797 (1.6090) time: 2.9391 data: 0.0003 max mem: 29202 +[2024-12-11 16:03:26 root] (utils.py 283): INFO Epoch: [13] [2330/2502] eta: 0:08:25 lr: 0.000003 loss_cls: 2.8910 (2.6854) grad_norm: 1.5277 (1.6089) time: 2.9350 data: 0.0003 max mem: 29202 +[2024-12-11 16:03:55 root] (utils.py 283): INFO Epoch: [13] [2340/2502] eta: 0:07:56 lr: 0.000003 loss_cls: 2.8024 (2.6853) grad_norm: 1.5364 (1.6086) time: 2.9342 data: 0.0003 max mem: 29202 +[2024-12-11 16:04:25 root] (utils.py 283): INFO Epoch: [13] [2350/2502] eta: 0:07:27 lr: 0.000003 loss_cls: 2.8176 (2.6862) grad_norm: 1.5109 (1.6085) time: 2.9345 data: 0.0003 max mem: 29202 +[2024-12-11 16:04:54 root] (utils.py 283): INFO Epoch: [13] [2360/2502] eta: 0:06:57 lr: 0.000003 loss_cls: 2.8137 (2.6855) grad_norm: 1.5553 (1.6085) time: 2.9391 data: 0.0003 max mem: 29202 +[2024-12-11 16:05:24 root] (utils.py 283): INFO Epoch: [13] [2370/2502] eta: 0:06:28 lr: 0.000003 loss_cls: 2.6444 (2.6852) grad_norm: 1.5207 (1.6084) time: 2.9424 data: 0.0003 max mem: 29202 +[2024-12-11 16:05:53 root] (utils.py 283): INFO Epoch: [13] [2380/2502] eta: 0:05:58 lr: 0.000003 loss_cls: 2.7618 (2.6852) grad_norm: 1.5364 (1.6084) time: 2.9407 data: 0.0003 max mem: 29202 +[2024-12-11 16:06:22 root] (utils.py 283): INFO Epoch: [13] [2390/2502] eta: 0:05:29 lr: 0.000003 loss_cls: 2.7636 (2.6858) grad_norm: 1.5850 (1.6086) time: 2.9408 data: 0.0003 max mem: 29202 +[2024-12-11 16:06:52 root] (utils.py 283): INFO Epoch: [13] [2400/2502] eta: 0:05:00 lr: 0.000003 loss_cls: 2.8098 (2.6859) grad_norm: 1.5241 (1.6087) time: 2.9383 data: 0.0003 max mem: 29202 +[2024-12-11 16:07:21 root] (utils.py 283): INFO Epoch: [13] [2410/2502] eta: 0:04:30 lr: 0.000003 loss_cls: 2.7357 (2.6857) grad_norm: 1.5360 (1.6090) time: 2.9361 data: 0.0003 max mem: 29202 +[2024-12-11 16:07:51 root] (utils.py 283): INFO Epoch: [13] [2420/2502] eta: 0:04:01 lr: 0.000003 loss_cls: 2.8053 (2.6864) grad_norm: 1.5847 (1.6094) time: 2.9398 data: 0.0003 max mem: 29202 +[2024-12-11 16:08:20 root] (utils.py 283): INFO Epoch: [13] [2430/2502] eta: 0:03:31 lr: 0.000003 loss_cls: 2.8719 (2.6872) grad_norm: 1.5536 (1.6112) time: 2.9391 data: 0.0003 max mem: 29202 +[2024-12-11 16:08:49 root] (utils.py 283): INFO Epoch: [13] [2440/2502] eta: 0:03:02 lr: 0.000003 loss_cls: 2.8219 (2.6873) grad_norm: 1.5536 (1.6115) time: 2.9366 data: 0.0003 max mem: 29202 +[2024-12-11 16:09:19 root] (utils.py 283): INFO Epoch: [13] [2450/2502] eta: 0:02:32 lr: 0.000003 loss_cls: 2.8219 (2.6877) grad_norm: 1.6035 (1.6115) time: 2.9365 data: 0.0003 max mem: 29202 +[2024-12-11 16:09:48 root] (utils.py 283): INFO Epoch: [13] [2460/2502] eta: 0:02:03 lr: 0.000003 loss_cls: 2.8446 (2.6867) grad_norm: 1.5695 (1.6113) time: 2.9351 data: 0.0003 max mem: 29202 +[2024-12-11 16:10:18 root] (utils.py 283): INFO Epoch: [13] [2470/2502] eta: 0:01:34 lr: 0.000003 loss_cls: 2.4850 (2.6867) grad_norm: 1.5695 (1.6115) time: 2.9448 data: 0.0003 max mem: 29202 +[2024-12-11 16:10:47 root] (utils.py 283): INFO Epoch: [13] [2480/2502] eta: 0:01:04 lr: 0.000003 loss_cls: 2.8182 (2.6869) grad_norm: 1.5879 (1.6112) time: 2.9441 data: 0.0003 max mem: 29202 +[2024-12-11 16:11:17 root] (utils.py 283): INFO Epoch: [13] [2490/2502] eta: 0:00:35 lr: 0.000003 loss_cls: 2.7928 (2.6865) grad_norm: 1.5879 (1.6113) time: 2.9598 data: 0.0245 max mem: 29202 +[2024-12-11 16:11:46 root] (utils.py 283): INFO Epoch: [13] [2500/2502] eta: 0:00:05 lr: 0.000003 loss_cls: 2.6591 (2.6863) grad_norm: 1.5494 (1.6108) time: 2.9616 data: 0.0245 max mem: 29202 +[2024-12-11 16:11:49 root] (utils.py 283): INFO Epoch: [13] [2501/2502] eta: 0:00:02 lr: 0.000003 loss_cls: 2.4806 (2.6859) grad_norm: 1.5494 (1.6107) time: 2.9627 data: 0.0245 max mem: 29202 +[2024-12-11 16:11:49 root] (utils.py 297): INFO Epoch: [13] Total time: 2:02:39 (2.9415 s / it) +[2024-12-11 16:11:49 root] (engine.py 179): INFO Averaged stats:lr: 0.000003 loss_cls: 2.4806 (2.6833) grad_norm: 1.5494 (1.6107) +[2024-12-11 16:11:53 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:55 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.2776 (0.2776) acc1: 91.4062 (91.4062) acc3: 98.4375 (98.4375) acc5: 99.2188 (99.2188) time: 0.5631 data: 0.0005 max mem: 29202 +[2024-12-11 16:11:58 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:48 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.5570 (0.5378) acc1: 86.7188 (86.7898) acc3: 97.6562 (97.0170) acc5: 99.2188 (98.2955) time: 0.5511 data: 0.0003 max mem: 29202 +[2024-12-11 16:12:04 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:42 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.5570 (0.5848) acc1: 85.1562 (85.9003) acc3: 96.8750 (96.3914) acc5: 97.6562 (97.8051) time: 0.5500 data: 0.0004 max mem: 29202 +[2024-12-11 16:12:09 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:37 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.5841 (0.6122) acc1: 85.1562 (85.3075) acc3: 96.0938 (95.9929) acc5: 97.6562 (97.6310) time: 0.5507 data: 0.0004 max mem: 29202 +[2024-12-11 16:12:15 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:31 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6404 (0.6165) acc1: 85.1562 (85.3087) acc3: 96.0938 (96.0175) acc5: 97.6562 (97.5991) time: 0.5516 data: 0.0004 max mem: 29202 +[2024-12-11 16:12:21 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:26 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.7698 (0.6938) acc1: 78.9062 (83.5631) acc3: 92.9688 (94.8836) acc5: 95.3125 (96.7065) time: 0.5516 data: 0.0004 max mem: 29202 +[2024-12-11 16:12:26 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:20 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.8811 (0.7291) acc1: 77.3438 (83.0046) acc3: 90.6250 (94.2495) acc5: 93.7500 (96.2474) time: 0.5516 data: 0.0004 max mem: 29202 +[2024-12-11 16:12:32 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:15 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9555 (0.7620) acc1: 79.6875 (82.2403) acc3: 90.6250 (93.9591) acc5: 93.7500 (96.0277) time: 0.5520 data: 0.0004 max mem: 29202 +[2024-12-11 16:12:37 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:09 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9298 (0.7866) acc1: 78.1250 (81.8287) acc3: 91.4062 (93.5475) acc5: 93.7500 (95.6404) time: 0.5523 data: 0.0006 max mem: 29202 +[2024-12-11 16:12:43 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:04 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9697 (0.8146) acc1: 77.3438 (81.0955) acc3: 90.6250 (93.2263) acc5: 92.9688 (95.4327) time: 0.5522 data: 0.0006 max mem: 29202 +[2024-12-11 16:12:46 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9074 (0.8132) acc1: 77.3438 (81.0480) acc3: 91.4062 (93.2720) acc5: 94.5312 (95.5120) time: 0.5426 data: 0.0005 max mem: 29202 +[2024-12-11 16:12:46 root] (utils.py 297): INFO Test: Total time: 0:00:53 (0.5499 s / it) +[2024-12-11 16:12:46 root] (engine.py 264): INFO * Acc@1 81.292 Acc@3 93.096 Acc@5 95.426 loss 0.812 flops 13.207 layer_flops 13.109 +[2024-12-11 16:12:46 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 81.3% +[2024-12-11 16:12:49 root] (main.py 576): INFO Max accuracy: 81.29% +[2024-12-11 16:12:52 root] (utils.py 283): INFO Epoch: [14] [ 0/2502] eta: 2:00:17 lr: 0.000002 loss_cls: 2.9766 (2.9766) grad_norm: 1.5959 (1.5959) time: 2.8848 data: 0.0003 max mem: 29202 +[2024-12-11 16:13:21 root] (utils.py 283): INFO Epoch: [14] [ 10/2502] eta: 2:01:20 lr: 0.000002 loss_cls: 2.9575 (2.7951) grad_norm: 1.6112 (1.6610) time: 2.9215 data: 0.0003 max mem: 29202 +[2024-12-11 16:13:51 root] (utils.py 283): INFO Epoch: [14] [ 20/2502] eta: 2:01:07 lr: 0.000002 loss_cls: 2.5538 (2.6285) grad_norm: 1.6112 (1.6383) time: 2.9302 data: 0.0003 max mem: 29202 +[2024-12-11 16:14:20 root] (utils.py 283): INFO Epoch: [14] [ 30/2502] eta: 2:00:42 lr: 0.000002 loss_cls: 2.5538 (2.6447) grad_norm: 1.6256 (1.6341) time: 2.9342 data: 0.0003 max mem: 29202 +[2024-12-11 16:14:49 root] (utils.py 283): INFO Epoch: [14] [ 40/2502] eta: 2:00:12 lr: 0.000002 loss_cls: 2.6633 (2.6502) grad_norm: 1.5441 (1.6092) time: 2.9313 data: 0.0003 max mem: 29202 +[2024-12-11 16:15:18 root] (utils.py 283): INFO Epoch: [14] [ 50/2502] eta: 1:59:44 lr: 0.000002 loss_cls: 2.6720 (2.6062) grad_norm: 1.5077 (1.6241) time: 2.9301 data: 0.0003 max mem: 29202 +[2024-12-11 16:15:48 root] (utils.py 283): INFO Epoch: [14] [ 60/2502] eta: 1:59:16 lr: 0.000002 loss_cls: 2.7412 (2.6529) grad_norm: 1.5292 (1.6267) time: 2.9331 data: 0.0002 max mem: 29202 +[2024-12-11 16:16:17 root] (utils.py 283): INFO Epoch: [14] [ 70/2502] eta: 1:58:47 lr: 0.000002 loss_cls: 2.9359 (2.6589) grad_norm: 1.5189 (1.6168) time: 2.9334 data: 0.0002 max mem: 29202 +[2024-12-11 16:16:47 root] (utils.py 283): INFO Epoch: [14] [ 80/2502] eta: 1:58:20 lr: 0.000002 loss_cls: 2.7650 (2.6432) grad_norm: 1.4646 (1.5971) time: 2.9346 data: 0.0003 max mem: 29202 +[2024-12-11 16:17:16 root] (utils.py 283): INFO Epoch: [14] [ 90/2502] eta: 1:57:55 lr: 0.000002 loss_cls: 2.7924 (2.6690) grad_norm: 1.4750 (1.5956) time: 2.9421 data: 0.0003 max mem: 29202 +[2024-12-11 16:17:45 root] (utils.py 283): INFO Epoch: [14] [ 100/2502] eta: 1:57:29 lr: 0.000002 loss_cls: 2.8582 (2.6889) grad_norm: 1.4983 (1.5929) time: 2.9483 data: 0.0003 max mem: 29202 +[2024-12-11 16:18:15 root] (utils.py 283): INFO Epoch: [14] [ 110/2502] eta: 1:57:03 lr: 0.000002 loss_cls: 2.9505 (2.7066) grad_norm: 1.6145 (1.6078) time: 2.9492 data: 0.0003 max mem: 29202 +[2024-12-11 16:18:44 root] (utils.py 283): INFO Epoch: [14] [ 120/2502] eta: 1:56:34 lr: 0.000002 loss_cls: 2.8482 (2.7012) grad_norm: 1.5878 (1.6044) time: 2.9424 data: 0.0003 max mem: 29202 +[2024-12-11 16:19:14 root] (utils.py 283): INFO Epoch: [14] [ 130/2502] eta: 1:56:04 lr: 0.000002 loss_cls: 2.8482 (2.7050) grad_norm: 1.5726 (1.6114) time: 2.9348 data: 0.0003 max mem: 29202 +[2024-12-11 16:19:43 root] (utils.py 283): INFO Epoch: [14] [ 140/2502] eta: 1:55:35 lr: 0.000002 loss_cls: 2.8931 (2.7149) grad_norm: 1.5943 (1.6058) time: 2.9360 data: 0.0003 max mem: 29202 +[2024-12-11 16:20:12 root] (utils.py 283): INFO Epoch: [14] [ 150/2502] eta: 1:55:05 lr: 0.000002 loss_cls: 2.8353 (2.7190) grad_norm: 1.5739 (1.6077) time: 2.9376 data: 0.0003 max mem: 29202 +[2024-12-11 16:20:42 root] (utils.py 283): INFO Epoch: [14] [ 160/2502] eta: 1:54:36 lr: 0.000002 loss_cls: 2.9464 (2.7350) grad_norm: 1.5858 (1.6110) time: 2.9356 data: 0.0003 max mem: 29202 +[2024-12-11 16:21:11 root] (utils.py 283): INFO Epoch: [14] [ 170/2502] eta: 1:54:07 lr: 0.000002 loss_cls: 2.9323 (2.7373) grad_norm: 1.5813 (1.6116) time: 2.9367 data: 0.0003 max mem: 29202 +[2024-12-11 16:21:41 root] (utils.py 283): INFO Epoch: [14] [ 180/2502] eta: 1:53:41 lr: 0.000002 loss_cls: 2.8234 (2.7422) grad_norm: 1.5595 (1.6066) time: 2.9503 data: 0.0002 max mem: 29202 +[2024-12-11 16:22:10 root] (utils.py 283): INFO Epoch: [14] [ 190/2502] eta: 1:53:14 lr: 0.000002 loss_cls: 2.8461 (2.7425) grad_norm: 1.5521 (1.6055) time: 2.9616 data: 0.0002 max mem: 29202 +[2024-12-11 16:22:40 root] (utils.py 283): INFO Epoch: [14] [ 200/2502] eta: 1:52:45 lr: 0.000002 loss_cls: 2.7238 (2.7427) grad_norm: 1.5520 (1.6043) time: 2.9508 data: 0.0003 max mem: 29202 +[2024-12-11 16:23:09 root] (utils.py 283): INFO Epoch: [14] [ 210/2502] eta: 1:52:16 lr: 0.000002 loss_cls: 2.6670 (2.7392) grad_norm: 1.5166 (1.5993) time: 2.9391 data: 0.0003 max mem: 29202 +[2024-12-11 16:23:39 root] (utils.py 283): INFO Epoch: [14] [ 220/2502] eta: 1:51:46 lr: 0.000002 loss_cls: 2.8659 (2.7524) grad_norm: 1.5166 (1.6011) time: 2.9381 data: 0.0003 max mem: 29202 +[2024-12-11 16:24:08 root] (utils.py 283): INFO Epoch: [14] [ 230/2502] eta: 1:51:16 lr: 0.000002 loss_cls: 2.9116 (2.7554) grad_norm: 1.6023 (1.6010) time: 2.9362 data: 0.0003 max mem: 29202 +[2024-12-11 16:24:37 root] (utils.py 283): INFO Epoch: [14] [ 240/2502] eta: 1:50:47 lr: 0.000002 loss_cls: 2.8706 (2.7502) grad_norm: 1.5467 (1.6030) time: 2.9380 data: 0.0003 max mem: 29202 +[2024-12-11 16:25:07 root] (utils.py 283): INFO Epoch: [14] [ 250/2502] eta: 1:50:17 lr: 0.000002 loss_cls: 2.6734 (2.7390) grad_norm: 1.6376 (1.6066) time: 2.9389 data: 0.0003 max mem: 29202 +[2024-12-11 16:25:36 root] (utils.py 283): INFO Epoch: [14] [ 260/2502] eta: 1:49:49 lr: 0.000002 loss_cls: 2.5194 (2.7274) grad_norm: 1.5316 (1.6039) time: 2.9412 data: 0.0003 max mem: 29202 +[2024-12-11 16:26:06 root] (utils.py 283): INFO Epoch: [14] [ 270/2502] eta: 1:49:20 lr: 0.000002 loss_cls: 2.5805 (2.7258) grad_norm: 1.5040 (1.6025) time: 2.9493 data: 0.0003 max mem: 29202 +[2024-12-11 16:26:35 root] (utils.py 283): INFO Epoch: [14] [ 280/2502] eta: 1:48:51 lr: 0.000002 loss_cls: 2.7593 (2.7242) grad_norm: 1.5335 (1.6026) time: 2.9453 data: 0.0003 max mem: 29202 +[2024-12-11 16:27:04 root] (utils.py 283): INFO Epoch: [14] [ 290/2502] eta: 1:48:22 lr: 0.000002 loss_cls: 2.5921 (2.7192) grad_norm: 1.5280 (1.6024) time: 2.9400 data: 0.0003 max mem: 29202 +[2024-12-11 16:27:34 root] (utils.py 283): INFO Epoch: [14] [ 300/2502] eta: 1:47:52 lr: 0.000002 loss_cls: 2.5937 (2.7161) grad_norm: 1.5178 (1.6026) time: 2.9388 data: 0.0003 max mem: 29202 +[2024-12-11 16:28:03 root] (utils.py 283): INFO Epoch: [14] [ 310/2502] eta: 1:47:23 lr: 0.000002 loss_cls: 2.8281 (2.7149) grad_norm: 1.5474 (1.6072) time: 2.9373 data: 0.0003 max mem: 29202 +[2024-12-11 16:28:33 root] (utils.py 283): INFO Epoch: [14] [ 320/2502] eta: 1:46:54 lr: 0.000002 loss_cls: 2.5473 (2.7116) grad_norm: 1.5474 (1.6094) time: 2.9481 data: 0.0003 max mem: 29202 +[2024-12-11 16:29:02 root] (utils.py 283): INFO Epoch: [14] [ 330/2502] eta: 1:46:25 lr: 0.000002 loss_cls: 2.5339 (2.7068) grad_norm: 1.6014 (1.6107) time: 2.9482 data: 0.0003 max mem: 29202 +[2024-12-11 16:29:32 root] (utils.py 283): INFO Epoch: [14] [ 340/2502] eta: 1:45:56 lr: 0.000002 loss_cls: 2.7249 (2.7072) grad_norm: 1.5062 (1.6110) time: 2.9420 data: 0.0003 max mem: 29202 +[2024-12-11 16:30:01 root] (utils.py 283): INFO Epoch: [14] [ 350/2502] eta: 1:45:26 lr: 0.000002 loss_cls: 2.6338 (2.6985) grad_norm: 1.4794 (1.6076) time: 2.9420 data: 0.0003 max mem: 29202 +[2024-12-11 16:30:30 root] (utils.py 283): INFO Epoch: [14] [ 360/2502] eta: 1:44:57 lr: 0.000002 loss_cls: 2.6338 (2.6999) grad_norm: 1.4983 (1.6078) time: 2.9391 data: 0.0003 max mem: 29202 +[2024-12-11 16:31:00 root] (utils.py 283): INFO Epoch: [14] [ 370/2502] eta: 1:44:28 lr: 0.000002 loss_cls: 2.5607 (2.6882) grad_norm: 1.5705 (1.6076) time: 2.9394 data: 0.0003 max mem: 29202 +[2024-12-11 16:31:29 root] (utils.py 283): INFO Epoch: [14] [ 380/2502] eta: 1:43:58 lr: 0.000002 loss_cls: 2.2727 (2.6821) grad_norm: 1.4488 (1.6066) time: 2.9375 data: 0.0003 max mem: 29202 +[2024-12-11 16:31:59 root] (utils.py 283): INFO Epoch: [14] [ 390/2502] eta: 1:43:28 lr: 0.000002 loss_cls: 2.6794 (2.6847) grad_norm: 1.5278 (1.6078) time: 2.9364 data: 0.0003 max mem: 29202 +[2024-12-11 16:32:28 root] (utils.py 283): INFO Epoch: [14] [ 400/2502] eta: 1:42:59 lr: 0.000002 loss_cls: 2.7123 (2.6847) grad_norm: 1.5549 (1.6068) time: 2.9371 data: 0.0003 max mem: 29202 +[2024-12-11 16:32:57 root] (utils.py 283): INFO Epoch: [14] [ 410/2502] eta: 1:42:29 lr: 0.000002 loss_cls: 2.7123 (2.6864) grad_norm: 1.5496 (1.6063) time: 2.9358 data: 0.0003 max mem: 29202 +[2024-12-11 16:33:27 root] (utils.py 283): INFO Epoch: [14] [ 420/2502] eta: 1:41:59 lr: 0.000002 loss_cls: 2.6716 (2.6843) grad_norm: 1.6434 (1.6106) time: 2.9311 data: 0.0003 max mem: 29202 +[2024-12-11 16:33:56 root] (utils.py 283): INFO Epoch: [14] [ 430/2502] eta: 1:41:30 lr: 0.000002 loss_cls: 2.6111 (2.6832) grad_norm: 1.6615 (1.6100) time: 2.9391 data: 0.0003 max mem: 29202 +[2024-12-11 16:34:25 root] (utils.py 283): INFO Epoch: [14] [ 440/2502] eta: 1:41:01 lr: 0.000002 loss_cls: 2.8506 (2.6879) grad_norm: 1.4854 (1.6080) time: 2.9451 data: 0.0003 max mem: 29202 +[2024-12-11 16:34:55 root] (utils.py 283): INFO Epoch: [14] [ 450/2502] eta: 1:40:31 lr: 0.000002 loss_cls: 2.8506 (2.6898) grad_norm: 1.5564 (1.6167) time: 2.9389 data: 0.0003 max mem: 29202 +[2024-12-11 16:35:24 root] (utils.py 283): INFO Epoch: [14] [ 460/2502] eta: 1:40:02 lr: 0.000002 loss_cls: 2.8000 (2.6900) grad_norm: 1.6127 (1.6157) time: 2.9370 data: 0.0003 max mem: 29202 +[2024-12-11 16:35:54 root] (utils.py 283): INFO Epoch: [14] [ 470/2502] eta: 1:39:32 lr: 0.000002 loss_cls: 2.8477 (2.6893) grad_norm: 1.5421 (1.6134) time: 2.9339 data: 0.0003 max mem: 29202 +[2024-12-11 16:36:23 root] (utils.py 283): INFO Epoch: [14] [ 480/2502] eta: 1:39:03 lr: 0.000002 loss_cls: 2.7034 (2.6884) grad_norm: 1.4378 (1.6119) time: 2.9335 data: 0.0003 max mem: 29202 +[2024-12-11 16:36:52 root] (utils.py 283): INFO Epoch: [14] [ 490/2502] eta: 1:38:33 lr: 0.000002 loss_cls: 2.9372 (2.6905) grad_norm: 1.6650 (1.6131) time: 2.9368 data: 0.0003 max mem: 29202 +[2024-12-11 16:37:22 root] (utils.py 283): INFO Epoch: [14] [ 500/2502] eta: 1:38:03 lr: 0.000002 loss_cls: 2.9248 (2.6891) grad_norm: 1.6846 (1.6192) time: 2.9340 data: 0.0003 max mem: 29202 +[2024-12-11 16:37:51 root] (utils.py 283): INFO Epoch: [14] [ 510/2502] eta: 1:37:34 lr: 0.000002 loss_cls: 2.4657 (2.6844) grad_norm: 1.5454 (1.6169) time: 2.9345 data: 0.0003 max mem: 29202 +[2024-12-11 16:38:20 root] (utils.py 283): INFO Epoch: [14] [ 520/2502] eta: 1:37:05 lr: 0.000002 loss_cls: 2.6504 (2.6854) grad_norm: 1.5069 (1.6169) time: 2.9445 data: 0.0003 max mem: 29202 +[2024-12-11 16:38:50 root] (utils.py 283): INFO Epoch: [14] [ 530/2502] eta: 1:36:35 lr: 0.000002 loss_cls: 2.6869 (2.6846) grad_norm: 1.5477 (1.6195) time: 2.9418 data: 0.0003 max mem: 29202 +[2024-12-11 16:39:19 root] (utils.py 283): INFO Epoch: [14] [ 540/2502] eta: 1:36:06 lr: 0.000002 loss_cls: 2.7512 (2.6857) grad_norm: 1.5641 (1.6204) time: 2.9374 data: 0.0003 max mem: 29202 +[2024-12-11 16:39:49 root] (utils.py 283): INFO Epoch: [14] [ 550/2502] eta: 1:35:37 lr: 0.000002 loss_cls: 2.8609 (2.6872) grad_norm: 1.5813 (1.6228) time: 2.9396 data: 0.0003 max mem: 29202 +[2024-12-11 16:40:18 root] (utils.py 283): INFO Epoch: [14] [ 560/2502] eta: 1:35:07 lr: 0.000002 loss_cls: 2.8609 (2.6872) grad_norm: 1.5501 (1.6209) time: 2.9397 data: 0.0003 max mem: 29202 +[2024-12-11 16:40:47 root] (utils.py 283): INFO Epoch: [14] [ 570/2502] eta: 1:34:38 lr: 0.000002 loss_cls: 2.9978 (2.6898) grad_norm: 1.5302 (1.6211) time: 2.9394 data: 0.0003 max mem: 29202 +[2024-12-11 16:41:17 root] (utils.py 283): INFO Epoch: [14] [ 580/2502] eta: 1:34:09 lr: 0.000002 loss_cls: 2.9436 (2.6950) grad_norm: 1.4745 (1.6180) time: 2.9386 data: 0.0003 max mem: 29202 +[2024-12-11 16:41:46 root] (utils.py 283): INFO Epoch: [14] [ 590/2502] eta: 1:33:39 lr: 0.000002 loss_cls: 2.9295 (2.6949) grad_norm: 1.4491 (1.6172) time: 2.9426 data: 0.0003 max mem: 29202 +[2024-12-11 16:42:16 root] (utils.py 283): INFO Epoch: [14] [ 600/2502] eta: 1:33:10 lr: 0.000002 loss_cls: 2.8818 (2.7002) grad_norm: 1.5747 (1.6184) time: 2.9442 data: 0.0003 max mem: 29202 +[2024-12-11 16:42:45 root] (utils.py 283): INFO Epoch: [14] [ 610/2502] eta: 1:32:41 lr: 0.000002 loss_cls: 2.8818 (2.6995) grad_norm: 1.4914 (1.6157) time: 2.9429 data: 0.0003 max mem: 29202 +[2024-12-11 16:43:15 root] (utils.py 283): INFO Epoch: [14] [ 620/2502] eta: 1:32:12 lr: 0.000002 loss_cls: 2.5778 (2.6944) grad_norm: 1.4930 (1.6142) time: 2.9440 data: 0.0003 max mem: 29202 +[2024-12-11 16:43:44 root] (utils.py 283): INFO Epoch: [14] [ 630/2502] eta: 1:31:42 lr: 0.000002 loss_cls: 2.7623 (2.6973) grad_norm: 1.4934 (1.6125) time: 2.9460 data: 0.0003 max mem: 29202 +[2024-12-11 16:44:13 root] (utils.py 283): INFO Epoch: [14] [ 640/2502] eta: 1:31:13 lr: 0.000002 loss_cls: 2.6805 (2.6931) grad_norm: 1.4313 (1.6101) time: 2.9477 data: 0.0003 max mem: 29202 +[2024-12-11 16:44:43 root] (utils.py 283): INFO Epoch: [14] [ 650/2502] eta: 1:30:44 lr: 0.000002 loss_cls: 2.3693 (2.6913) grad_norm: 1.4128 (1.6133) time: 2.9437 data: 0.0003 max mem: 29202 +[2024-12-11 16:45:12 root] (utils.py 283): INFO Epoch: [14] [ 660/2502] eta: 1:30:14 lr: 0.000002 loss_cls: 2.8272 (2.6896) grad_norm: 1.4278 (1.6128) time: 2.9377 data: 0.0003 max mem: 29202 +[2024-12-11 16:45:42 root] (utils.py 283): INFO Epoch: [14] [ 670/2502] eta: 1:29:45 lr: 0.000002 loss_cls: 2.8272 (2.6904) grad_norm: 1.4525 (1.6123) time: 2.9353 data: 0.0002 max mem: 29202 +[2024-12-11 16:46:11 root] (utils.py 283): INFO Epoch: [14] [ 680/2502] eta: 1:29:15 lr: 0.000002 loss_cls: 2.8303 (2.6894) grad_norm: 1.4596 (1.6104) time: 2.9342 data: 0.0002 max mem: 29202 +[2024-12-11 16:46:40 root] (utils.py 283): INFO Epoch: [14] [ 690/2502] eta: 1:28:46 lr: 0.000002 loss_cls: 2.4045 (2.6833) grad_norm: 1.4778 (1.6096) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-11 16:47:10 root] (utils.py 283): INFO Epoch: [14] [ 700/2502] eta: 1:28:17 lr: 0.000002 loss_cls: 2.5000 (2.6857) grad_norm: 1.5181 (1.6079) time: 2.9431 data: 0.0003 max mem: 29202 +[2024-12-11 16:47:39 root] (utils.py 283): INFO Epoch: [14] [ 710/2502] eta: 1:27:47 lr: 0.000002 loss_cls: 2.7728 (2.6835) grad_norm: 1.5056 (1.6067) time: 2.9396 data: 0.0003 max mem: 29202 +[2024-12-11 16:48:09 root] (utils.py 283): INFO Epoch: [14] [ 720/2502] eta: 1:27:18 lr: 0.000002 loss_cls: 2.8232 (2.6851) grad_norm: 1.5240 (1.6050) time: 2.9425 data: 0.0003 max mem: 29202 +[2024-12-11 16:48:38 root] (utils.py 283): INFO Epoch: [14] [ 730/2502] eta: 1:26:49 lr: 0.000002 loss_cls: 2.8409 (2.6864) grad_norm: 1.5367 (1.6049) time: 2.9398 data: 0.0003 max mem: 29202 +[2024-12-11 16:49:07 root] (utils.py 283): INFO Epoch: [14] [ 740/2502] eta: 1:26:19 lr: 0.000002 loss_cls: 2.6302 (2.6808) grad_norm: 1.4988 (1.6035) time: 2.9358 data: 0.0003 max mem: 29202 +[2024-12-11 16:49:37 root] (utils.py 283): INFO Epoch: [14] [ 750/2502] eta: 1:25:50 lr: 0.000002 loss_cls: 2.6018 (2.6802) grad_norm: 1.4611 (1.6016) time: 2.9364 data: 0.0003 max mem: 29202 +[2024-12-11 16:50:06 root] (utils.py 283): INFO Epoch: [14] [ 760/2502] eta: 1:25:20 lr: 0.000002 loss_cls: 2.8317 (2.6821) grad_norm: 1.5099 (1.6026) time: 2.9366 data: 0.0003 max mem: 29202 +[2024-12-11 16:50:35 root] (utils.py 283): INFO Epoch: [14] [ 770/2502] eta: 1:24:51 lr: 0.000002 loss_cls: 2.9173 (2.6847) grad_norm: 1.5814 (1.6032) time: 2.9374 data: 0.0003 max mem: 29202 +[2024-12-11 16:51:05 root] (utils.py 283): INFO Epoch: [14] [ 780/2502] eta: 1:24:21 lr: 0.000002 loss_cls: 2.8982 (2.6833) grad_norm: 1.5814 (1.6050) time: 2.9356 data: 0.0002 max mem: 29202 +[2024-12-11 16:51:34 root] (utils.py 283): INFO Epoch: [14] [ 790/2502] eta: 1:23:52 lr: 0.000002 loss_cls: 2.5101 (2.6821) grad_norm: 1.5320 (1.6040) time: 2.9344 data: 0.0003 max mem: 29202 +[2024-12-11 16:52:04 root] (utils.py 283): INFO Epoch: [14] [ 800/2502] eta: 1:23:22 lr: 0.000002 loss_cls: 2.7019 (2.6831) grad_norm: 1.5017 (1.6059) time: 2.9341 data: 0.0003 max mem: 29202 +[2024-12-11 16:52:33 root] (utils.py 283): INFO Epoch: [14] [ 810/2502] eta: 1:22:53 lr: 0.000002 loss_cls: 2.7319 (2.6839) grad_norm: 1.4992 (1.6050) time: 2.9359 data: 0.0003 max mem: 29202 +[2024-12-11 16:53:02 root] (utils.py 283): INFO Epoch: [14] [ 820/2502] eta: 1:22:23 lr: 0.000002 loss_cls: 2.9170 (2.6847) grad_norm: 1.4491 (1.6053) time: 2.9362 data: 0.0003 max mem: 29202 +[2024-12-11 16:53:32 root] (utils.py 283): INFO Epoch: [14] [ 830/2502] eta: 1:21:54 lr: 0.000002 loss_cls: 2.6878 (2.6826) grad_norm: 1.5426 (1.6051) time: 2.9356 data: 0.0003 max mem: 29202 +[2024-12-11 16:54:01 root] (utils.py 283): INFO Epoch: [14] [ 840/2502] eta: 1:21:24 lr: 0.000002 loss_cls: 2.4849 (2.6813) grad_norm: 1.5610 (1.6047) time: 2.9344 data: 0.0002 max mem: 29202 +[2024-12-11 16:54:30 root] (utils.py 283): INFO Epoch: [14] [ 850/2502] eta: 1:20:55 lr: 0.000002 loss_cls: 2.6158 (2.6819) grad_norm: 1.5677 (1.6051) time: 2.9362 data: 0.0002 max mem: 29202 +[2024-12-11 16:55:00 root] (utils.py 283): INFO Epoch: [14] [ 860/2502] eta: 1:20:25 lr: 0.000002 loss_cls: 2.5808 (2.6788) grad_norm: 1.4493 (1.6048) time: 2.9390 data: 0.0002 max mem: 29202 +[2024-12-11 16:55:29 root] (utils.py 283): INFO Epoch: [14] [ 870/2502] eta: 1:19:56 lr: 0.000002 loss_cls: 2.4349 (2.6758) grad_norm: 1.4493 (1.6040) time: 2.9379 data: 0.0002 max mem: 29202 +[2024-12-11 16:55:58 root] (utils.py 283): INFO Epoch: [14] [ 880/2502] eta: 1:19:27 lr: 0.000002 loss_cls: 2.8739 (2.6780) grad_norm: 1.5554 (1.6048) time: 2.9387 data: 0.0003 max mem: 29202 +[2024-12-11 16:56:28 root] (utils.py 283): INFO Epoch: [14] [ 890/2502] eta: 1:18:57 lr: 0.000002 loss_cls: 2.7735 (2.6766) grad_norm: 1.5958 (1.6048) time: 2.9355 data: 0.0003 max mem: 29202 +[2024-12-11 16:56:57 root] (utils.py 283): INFO Epoch: [14] [ 900/2502] eta: 1:18:28 lr: 0.000002 loss_cls: 2.7278 (2.6766) grad_norm: 1.5374 (1.6044) time: 2.9385 data: 0.0003 max mem: 29202 +[2024-12-11 16:57:27 root] (utils.py 283): INFO Epoch: [14] [ 910/2502] eta: 1:17:59 lr: 0.000002 loss_cls: 2.8342 (2.6769) grad_norm: 1.5091 (1.6036) time: 2.9544 data: 0.0003 max mem: 29202 +[2024-12-11 16:57:56 root] (utils.py 283): INFO Epoch: [14] [ 920/2502] eta: 1:17:29 lr: 0.000002 loss_cls: 2.7157 (2.6776) grad_norm: 1.4997 (1.6024) time: 2.9478 data: 0.0003 max mem: 29202 +[2024-12-11 16:58:26 root] (utils.py 283): INFO Epoch: [14] [ 930/2502] eta: 1:17:00 lr: 0.000002 loss_cls: 2.7614 (2.6788) grad_norm: 1.5423 (1.6023) time: 2.9324 data: 0.0003 max mem: 29202 +[2024-12-11 16:58:55 root] (utils.py 283): INFO Epoch: [14] [ 940/2502] eta: 1:16:30 lr: 0.000002 loss_cls: 2.8395 (2.6771) grad_norm: 1.6242 (1.6022) time: 2.9321 data: 0.0003 max mem: 29202 +[2024-12-11 16:59:24 root] (utils.py 283): INFO Epoch: [14] [ 950/2502] eta: 1:16:01 lr: 0.000002 loss_cls: 2.4290 (2.6743) grad_norm: 1.5346 (1.6017) time: 2.9332 data: 0.0003 max mem: 29202 +[2024-12-11 16:59:54 root] (utils.py 283): INFO Epoch: [14] [ 960/2502] eta: 1:15:31 lr: 0.000002 loss_cls: 2.5606 (2.6728) grad_norm: 1.4931 (1.6008) time: 2.9317 data: 0.0003 max mem: 29202 +[2024-12-11 17:00:23 root] (utils.py 283): INFO Epoch: [14] [ 970/2502] eta: 1:15:02 lr: 0.000002 loss_cls: 2.5994 (2.6723) grad_norm: 1.4821 (1.5993) time: 2.9311 data: 0.0003 max mem: 29202 +[2024-12-11 17:00:52 root] (utils.py 283): INFO Epoch: [14] [ 980/2502] eta: 1:14:32 lr: 0.000002 loss_cls: 2.6990 (2.6710) grad_norm: 1.4383 (1.5997) time: 2.9339 data: 0.0003 max mem: 29202 +[2024-12-11 17:01:22 root] (utils.py 283): INFO Epoch: [14] [ 990/2502] eta: 1:14:03 lr: 0.000002 loss_cls: 2.7021 (2.6718) grad_norm: 1.4405 (1.5983) time: 2.9348 data: 0.0003 max mem: 29202 +[2024-12-11 17:01:51 root] (utils.py 283): INFO Epoch: [14] [1000/2502] eta: 1:13:34 lr: 0.000002 loss_cls: 2.7714 (2.6716) grad_norm: 1.5285 (1.5992) time: 2.9360 data: 0.0003 max mem: 29202 +[2024-12-11 17:02:20 root] (utils.py 283): INFO Epoch: [14] [1010/2502] eta: 1:13:04 lr: 0.000002 loss_cls: 2.7114 (2.6712) grad_norm: 1.4882 (1.5982) time: 2.9350 data: 0.0003 max mem: 29202 +[2024-12-11 17:02:50 root] (utils.py 283): INFO Epoch: [14] [1020/2502] eta: 1:12:35 lr: 0.000002 loss_cls: 2.7292 (2.6723) grad_norm: 1.4788 (1.5987) time: 2.9379 data: 0.0003 max mem: 29202 +[2024-12-11 17:03:19 root] (utils.py 283): INFO Epoch: [14] [1030/2502] eta: 1:12:06 lr: 0.000002 loss_cls: 2.7729 (2.6716) grad_norm: 1.5807 (1.5983) time: 2.9454 data: 0.0003 max mem: 29202 +[2024-12-11 17:03:49 root] (utils.py 283): INFO Epoch: [14] [1040/2502] eta: 1:11:36 lr: 0.000002 loss_cls: 2.7166 (2.6717) grad_norm: 1.5608 (1.5981) time: 2.9541 data: 0.0003 max mem: 29202 +[2024-12-11 17:04:18 root] (utils.py 283): INFO Epoch: [14] [1050/2502] eta: 1:11:07 lr: 0.000002 loss_cls: 2.6999 (2.6702) grad_norm: 1.5608 (1.5986) time: 2.9492 data: 0.0003 max mem: 29202 +[2024-12-11 17:04:48 root] (utils.py 283): INFO Epoch: [14] [1060/2502] eta: 1:10:38 lr: 0.000002 loss_cls: 2.6752 (2.6700) grad_norm: 1.5970 (1.5997) time: 2.9415 data: 0.0003 max mem: 29202 +[2024-12-11 17:05:17 root] (utils.py 283): INFO Epoch: [14] [1070/2502] eta: 1:10:08 lr: 0.000002 loss_cls: 2.5150 (2.6686) grad_norm: 1.6496 (1.6006) time: 2.9468 data: 0.0003 max mem: 29202 +[2024-12-11 17:05:46 root] (utils.py 283): INFO Epoch: [14] [1080/2502] eta: 1:09:39 lr: 0.000002 loss_cls: 2.4893 (2.6686) grad_norm: 1.6496 (1.6014) time: 2.9455 data: 0.0003 max mem: 29202 +[2024-12-11 17:06:16 root] (utils.py 283): INFO Epoch: [14] [1090/2502] eta: 1:09:10 lr: 0.000002 loss_cls: 2.6070 (2.6668) grad_norm: 1.6237 (1.6013) time: 2.9383 data: 0.0003 max mem: 29202 +[2024-12-11 17:06:45 root] (utils.py 283): INFO Epoch: [14] [1100/2502] eta: 1:08:40 lr: 0.000002 loss_cls: 2.6047 (2.6663) grad_norm: 1.5409 (1.6017) time: 2.9339 data: 0.0003 max mem: 29202 +[2024-12-11 17:07:15 root] (utils.py 283): INFO Epoch: [14] [1110/2502] eta: 1:08:11 lr: 0.000002 loss_cls: 2.6931 (2.6656) grad_norm: 1.5166 (1.6029) time: 2.9327 data: 0.0003 max mem: 29202 +[2024-12-11 17:07:44 root] (utils.py 283): INFO Epoch: [14] [1120/2502] eta: 1:07:41 lr: 0.000002 loss_cls: 2.6545 (2.6653) grad_norm: 1.5919 (1.6037) time: 2.9349 data: 0.0003 max mem: 29202 +[2024-12-11 17:08:13 root] (utils.py 283): INFO Epoch: [14] [1130/2502] eta: 1:07:12 lr: 0.000002 loss_cls: 2.7422 (2.6672) grad_norm: 1.5919 (1.6037) time: 2.9392 data: 0.0003 max mem: 29202 +[2024-12-11 17:08:43 root] (utils.py 283): INFO Epoch: [14] [1140/2502] eta: 1:06:43 lr: 0.000002 loss_cls: 2.8828 (2.6676) grad_norm: 1.5312 (1.6044) time: 2.9406 data: 0.0003 max mem: 29202 +[2024-12-11 17:09:12 root] (utils.py 283): INFO Epoch: [14] [1150/2502] eta: 1:06:13 lr: 0.000002 loss_cls: 2.8229 (2.6685) grad_norm: 1.4836 (1.6044) time: 2.9475 data: 0.0003 max mem: 29202 +[2024-12-11 17:09:42 root] (utils.py 283): INFO Epoch: [14] [1160/2502] eta: 1:05:44 lr: 0.000002 loss_cls: 2.7122 (2.6686) grad_norm: 1.5535 (1.6042) time: 2.9463 data: 0.0003 max mem: 29202 +[2024-12-11 17:10:11 root] (utils.py 283): INFO Epoch: [14] [1170/2502] eta: 1:05:15 lr: 0.000002 loss_cls: 2.6894 (2.6680) grad_norm: 1.5884 (1.6046) time: 2.9405 data: 0.0003 max mem: 29202 +[2024-12-11 17:10:41 root] (utils.py 283): INFO Epoch: [14] [1180/2502] eta: 1:04:45 lr: 0.000002 loss_cls: 2.6727 (2.6682) grad_norm: 1.5094 (1.6035) time: 2.9444 data: 0.0003 max mem: 29202 +[2024-12-11 17:11:10 root] (utils.py 283): INFO Epoch: [14] [1190/2502] eta: 1:04:16 lr: 0.000002 loss_cls: 2.7241 (2.6679) grad_norm: 1.5079 (1.6035) time: 2.9414 data: 0.0003 max mem: 29202 +[2024-12-11 17:11:39 root] (utils.py 283): INFO Epoch: [14] [1200/2502] eta: 1:03:46 lr: 0.000002 loss_cls: 2.7527 (2.6679) grad_norm: 1.5566 (1.6027) time: 2.9369 data: 0.0003 max mem: 29202 +[2024-12-11 17:12:09 root] (utils.py 283): INFO Epoch: [14] [1210/2502] eta: 1:03:17 lr: 0.000002 loss_cls: 2.7527 (2.6679) grad_norm: 1.5408 (1.6041) time: 2.9368 data: 0.0003 max mem: 29202 +[2024-12-11 17:12:38 root] (utils.py 283): INFO Epoch: [14] [1220/2502] eta: 1:02:48 lr: 0.000002 loss_cls: 2.7590 (2.6682) grad_norm: 1.5504 (1.6044) time: 2.9371 data: 0.0003 max mem: 29202 +[2024-12-11 17:13:07 root] (utils.py 283): INFO Epoch: [14] [1230/2502] eta: 1:02:18 lr: 0.000002 loss_cls: 2.7594 (2.6685) grad_norm: 1.5397 (1.6042) time: 2.9379 data: 0.0003 max mem: 29202 +[2024-12-11 17:13:37 root] (utils.py 283): INFO Epoch: [14] [1240/2502] eta: 1:01:49 lr: 0.000002 loss_cls: 2.8590 (2.6703) grad_norm: 1.6028 (1.6045) time: 2.9375 data: 0.0003 max mem: 29202 +[2024-12-11 17:14:06 root] (utils.py 283): INFO Epoch: [14] [1250/2502] eta: 1:01:20 lr: 0.000002 loss_cls: 2.9740 (2.6720) grad_norm: 1.5867 (1.6045) time: 2.9442 data: 0.0003 max mem: 29202 +[2024-12-11 17:14:36 root] (utils.py 283): INFO Epoch: [14] [1260/2502] eta: 1:00:50 lr: 0.000002 loss_cls: 2.8579 (2.6731) grad_norm: 1.5867 (1.6055) time: 2.9511 data: 0.0003 max mem: 29202 +[2024-12-11 17:15:05 root] (utils.py 283): INFO Epoch: [14] [1270/2502] eta: 1:00:21 lr: 0.000002 loss_cls: 2.7865 (2.6715) grad_norm: 1.6538 (1.6066) time: 2.9482 data: 0.0003 max mem: 29202 +[2024-12-11 17:15:35 root] (utils.py 283): INFO Epoch: [14] [1280/2502] eta: 0:59:52 lr: 0.000002 loss_cls: 2.7880 (2.6719) grad_norm: 1.6286 (1.6069) time: 2.9445 data: 0.0003 max mem: 29202 +[2024-12-11 17:16:04 root] (utils.py 283): INFO Epoch: [14] [1290/2502] eta: 0:59:22 lr: 0.000002 loss_cls: 2.5306 (2.6705) grad_norm: 1.4991 (1.6063) time: 2.9401 data: 0.0003 max mem: 29202 +[2024-12-11 17:16:33 root] (utils.py 283): INFO Epoch: [14] [1300/2502] eta: 0:58:53 lr: 0.000002 loss_cls: 2.5306 (2.6711) grad_norm: 1.5012 (1.6059) time: 2.9377 data: 0.0003 max mem: 29202 +[2024-12-11 17:17:03 root] (utils.py 283): INFO Epoch: [14] [1310/2502] eta: 0:58:23 lr: 0.000002 loss_cls: 2.3087 (2.6679) grad_norm: 1.4870 (1.6049) time: 2.9362 data: 0.0003 max mem: 29202 +[2024-12-11 17:17:32 root] (utils.py 283): INFO Epoch: [14] [1320/2502] eta: 0:57:54 lr: 0.000002 loss_cls: 2.3432 (2.6678) grad_norm: 1.5086 (1.6046) time: 2.9390 data: 0.0003 max mem: 29202 +[2024-12-11 17:18:02 root] (utils.py 283): INFO Epoch: [14] [1330/2502] eta: 0:57:24 lr: 0.000002 loss_cls: 2.7440 (2.6680) grad_norm: 1.5888 (1.6051) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-11 17:18:31 root] (utils.py 283): INFO Epoch: [14] [1340/2502] eta: 0:56:55 lr: 0.000002 loss_cls: 2.8032 (2.6684) grad_norm: 1.5862 (1.6068) time: 2.9382 data: 0.0003 max mem: 29202 +[2024-12-11 17:19:01 root] (utils.py 283): INFO Epoch: [14] [1350/2502] eta: 0:56:26 lr: 0.000002 loss_cls: 2.8463 (2.6683) grad_norm: 1.6294 (1.6076) time: 2.9478 data: 0.0003 max mem: 29202 +[2024-12-11 17:19:30 root] (utils.py 283): INFO Epoch: [14] [1360/2502] eta: 0:55:57 lr: 0.000002 loss_cls: 2.6768 (2.6674) grad_norm: 1.6321 (1.6073) time: 2.9525 data: 0.0003 max mem: 29202 +[2024-12-11 17:19:59 root] (utils.py 283): INFO Epoch: [14] [1370/2502] eta: 0:55:27 lr: 0.000002 loss_cls: 2.7971 (2.6689) grad_norm: 1.6090 (1.6074) time: 2.9435 data: 0.0003 max mem: 29202 +[2024-12-11 17:20:29 root] (utils.py 283): INFO Epoch: [14] [1380/2502] eta: 0:54:58 lr: 0.000002 loss_cls: 2.8886 (2.6693) grad_norm: 1.6485 (1.6082) time: 2.9386 data: 0.0003 max mem: 29202 +[2024-12-11 17:20:58 root] (utils.py 283): INFO Epoch: [14] [1390/2502] eta: 0:54:28 lr: 0.000002 loss_cls: 2.7860 (2.6677) grad_norm: 1.7013 (1.6096) time: 2.9384 data: 0.0002 max mem: 29202 +[2024-12-11 17:21:28 root] (utils.py 283): INFO Epoch: [14] [1400/2502] eta: 0:53:59 lr: 0.000002 loss_cls: 2.6663 (2.6676) grad_norm: 1.5024 (1.6087) time: 2.9371 data: 0.0003 max mem: 29202 +[2024-12-11 17:21:57 root] (utils.py 283): INFO Epoch: [14] [1410/2502] eta: 0:53:30 lr: 0.000002 loss_cls: 2.9433 (2.6702) grad_norm: 1.5051 (1.6089) time: 2.9441 data: 0.0003 max mem: 29202 +[2024-12-11 17:22:26 root] (utils.py 283): INFO Epoch: [14] [1420/2502] eta: 0:53:00 lr: 0.000002 loss_cls: 2.9993 (2.6714) grad_norm: 1.5648 (1.6086) time: 2.9461 data: 0.0003 max mem: 29202 +[2024-12-11 17:22:56 root] (utils.py 283): INFO Epoch: [14] [1430/2502] eta: 0:52:31 lr: 0.000002 loss_cls: 2.8333 (2.6714) grad_norm: 1.4756 (1.6078) time: 2.9386 data: 0.0003 max mem: 29202 +[2024-12-11 17:23:25 root] (utils.py 283): INFO Epoch: [14] [1440/2502] eta: 0:52:02 lr: 0.000002 loss_cls: 2.6466 (2.6702) grad_norm: 1.5671 (1.6076) time: 2.9461 data: 0.0003 max mem: 29202 +[2024-12-11 17:23:55 root] (utils.py 283): INFO Epoch: [14] [1450/2502] eta: 0:51:32 lr: 0.000002 loss_cls: 2.6063 (2.6687) grad_norm: 1.5439 (1.6072) time: 2.9522 data: 0.0003 max mem: 29202 +[2024-12-11 17:24:24 root] (utils.py 283): INFO Epoch: [14] [1460/2502] eta: 0:51:03 lr: 0.000002 loss_cls: 2.7040 (2.6687) grad_norm: 1.5551 (1.6076) time: 2.9454 data: 0.0003 max mem: 29202 +[2024-12-11 17:24:54 root] (utils.py 283): INFO Epoch: [14] [1470/2502] eta: 0:50:33 lr: 0.000002 loss_cls: 2.7314 (2.6694) grad_norm: 1.6217 (1.6076) time: 2.9399 data: 0.0003 max mem: 29202 +[2024-12-11 17:25:23 root] (utils.py 283): INFO Epoch: [14] [1480/2502] eta: 0:50:04 lr: 0.000002 loss_cls: 2.8350 (2.6697) grad_norm: 1.4922 (1.6063) time: 2.9387 data: 0.0003 max mem: 29202 +[2024-12-11 17:25:52 root] (utils.py 283): INFO Epoch: [14] [1490/2502] eta: 0:49:35 lr: 0.000002 loss_cls: 2.7481 (2.6697) grad_norm: 1.5301 (1.6074) time: 2.9401 data: 0.0003 max mem: 29202 +[2024-12-11 17:26:22 root] (utils.py 283): INFO Epoch: [14] [1500/2502] eta: 0:49:05 lr: 0.000002 loss_cls: 2.6666 (2.6695) grad_norm: 1.6309 (1.6075) time: 2.9424 data: 0.0003 max mem: 29202 +[2024-12-11 17:26:51 root] (utils.py 283): INFO Epoch: [14] [1510/2502] eta: 0:48:36 lr: 0.000002 loss_cls: 2.6181 (2.6690) grad_norm: 1.5538 (1.6068) time: 2.9432 data: 0.0003 max mem: 29202 +[2024-12-11 17:27:21 root] (utils.py 283): INFO Epoch: [14] [1520/2502] eta: 0:48:06 lr: 0.000002 loss_cls: 2.7386 (2.6699) grad_norm: 1.5438 (1.6067) time: 2.9394 data: 0.0003 max mem: 29202 +[2024-12-11 17:27:50 root] (utils.py 283): INFO Epoch: [14] [1530/2502] eta: 0:47:37 lr: 0.000002 loss_cls: 2.7944 (2.6703) grad_norm: 1.6057 (1.6072) time: 2.9387 data: 0.0003 max mem: 29202 +[2024-12-11 17:28:20 root] (utils.py 283): INFO Epoch: [14] [1540/2502] eta: 0:47:08 lr: 0.000002 loss_cls: 2.7222 (2.6691) grad_norm: 1.6112 (1.6074) time: 2.9414 data: 0.0003 max mem: 29202 +[2024-12-11 17:28:49 root] (utils.py 283): INFO Epoch: [14] [1550/2502] eta: 0:46:38 lr: 0.000002 loss_cls: 2.7065 (2.6691) grad_norm: 1.6048 (1.6070) time: 2.9455 data: 0.0003 max mem: 29202 +[2024-12-11 17:29:18 root] (utils.py 283): INFO Epoch: [14] [1560/2502] eta: 0:46:09 lr: 0.000002 loss_cls: 2.9398 (2.6704) grad_norm: 1.5798 (1.6069) time: 2.9462 data: 0.0003 max mem: 29202 +[2024-12-11 17:29:48 root] (utils.py 283): INFO Epoch: [14] [1570/2502] eta: 0:45:40 lr: 0.000002 loss_cls: 2.8496 (2.6707) grad_norm: 1.5709 (1.6063) time: 2.9458 data: 0.0003 max mem: 29202 +[2024-12-11 17:30:17 root] (utils.py 283): INFO Epoch: [14] [1580/2502] eta: 0:45:10 lr: 0.000002 loss_cls: 2.8374 (2.6715) grad_norm: 1.5314 (1.6086) time: 2.9445 data: 0.0003 max mem: 29202 +[2024-12-11 17:30:47 root] (utils.py 283): INFO Epoch: [14] [1590/2502] eta: 0:44:41 lr: 0.000002 loss_cls: 2.8180 (2.6716) grad_norm: 1.5800 (1.6084) time: 2.9392 data: 0.0002 max mem: 29202 +[2024-12-11 17:31:16 root] (utils.py 283): INFO Epoch: [14] [1600/2502] eta: 0:44:11 lr: 0.000002 loss_cls: 2.7177 (2.6704) grad_norm: 1.5458 (1.6082) time: 2.9373 data: 0.0003 max mem: 29202 +[2024-12-11 17:31:46 root] (utils.py 283): INFO Epoch: [14] [1610/2502] eta: 0:43:42 lr: 0.000002 loss_cls: 2.6358 (2.6702) grad_norm: 1.6222 (1.6092) time: 2.9381 data: 0.0003 max mem: 29202 +[2024-12-11 17:32:15 root] (utils.py 283): INFO Epoch: [14] [1620/2502] eta: 0:43:13 lr: 0.000002 loss_cls: 2.7235 (2.6695) grad_norm: 1.6153 (1.6090) time: 2.9387 data: 0.0003 max mem: 29202 +[2024-12-11 17:32:44 root] (utils.py 283): INFO Epoch: [14] [1630/2502] eta: 0:42:43 lr: 0.000002 loss_cls: 2.9424 (2.6714) grad_norm: 1.5327 (1.6095) time: 2.9400 data: 0.0003 max mem: 29202 +[2024-12-11 17:33:14 root] (utils.py 283): INFO Epoch: [14] [1640/2502] eta: 0:42:14 lr: 0.000002 loss_cls: 2.8856 (2.6721) grad_norm: 1.5625 (1.6097) time: 2.9390 data: 0.0003 max mem: 29202 +[2024-12-11 17:33:43 root] (utils.py 283): INFO Epoch: [14] [1650/2502] eta: 0:41:44 lr: 0.000002 loss_cls: 2.7958 (2.6710) grad_norm: 1.4891 (1.6092) time: 2.9376 data: 0.0003 max mem: 29202 +[2024-12-11 17:34:13 root] (utils.py 283): INFO Epoch: [14] [1660/2502] eta: 0:41:15 lr: 0.000002 loss_cls: 2.7083 (2.6707) grad_norm: 1.4829 (1.6098) time: 2.9410 data: 0.0003 max mem: 29202 +[2024-12-11 17:34:42 root] (utils.py 283): INFO Epoch: [14] [1670/2502] eta: 0:40:46 lr: 0.000002 loss_cls: 2.6215 (2.6700) grad_norm: 1.6036 (1.6095) time: 2.9433 data: 0.0003 max mem: 29202 +[2024-12-11 17:35:11 root] (utils.py 283): INFO Epoch: [14] [1680/2502] eta: 0:40:16 lr: 0.000002 loss_cls: 2.6491 (2.6701) grad_norm: 1.5925 (1.6092) time: 2.9425 data: 0.0003 max mem: 29202 +[2024-12-11 17:35:41 root] (utils.py 283): INFO Epoch: [14] [1690/2502] eta: 0:39:47 lr: 0.000002 loss_cls: 2.8338 (2.6699) grad_norm: 1.5699 (1.6094) time: 2.9407 data: 0.0003 max mem: 29202 +[2024-12-11 17:36:10 root] (utils.py 283): INFO Epoch: [14] [1700/2502] eta: 0:39:17 lr: 0.000002 loss_cls: 2.8106 (2.6702) grad_norm: 1.5699 (1.6093) time: 2.9395 data: 0.0003 max mem: 29202 +[2024-12-11 17:36:40 root] (utils.py 283): INFO Epoch: [14] [1710/2502] eta: 0:38:48 lr: 0.000002 loss_cls: 2.8006 (2.6691) grad_norm: 1.5344 (1.6091) time: 2.9377 data: 0.0003 max mem: 29202 +[2024-12-11 17:37:09 root] (utils.py 283): INFO Epoch: [14] [1720/2502] eta: 0:38:19 lr: 0.000002 loss_cls: 2.7503 (2.6693) grad_norm: 1.5344 (1.6087) time: 2.9387 data: 0.0003 max mem: 29202 +[2024-12-11 17:37:38 root] (utils.py 283): INFO Epoch: [14] [1730/2502] eta: 0:37:49 lr: 0.000002 loss_cls: 2.8861 (2.6708) grad_norm: 1.5988 (1.6090) time: 2.9413 data: 0.0002 max mem: 29202 +[2024-12-11 17:38:08 root] (utils.py 283): INFO Epoch: [14] [1740/2502] eta: 0:37:20 lr: 0.000002 loss_cls: 2.8951 (2.6714) grad_norm: 1.5778 (1.6086) time: 2.9422 data: 0.0003 max mem: 29202 +[2024-12-11 17:38:37 root] (utils.py 283): INFO Epoch: [14] [1750/2502] eta: 0:36:50 lr: 0.000002 loss_cls: 2.7964 (2.6712) grad_norm: 1.5068 (1.6090) time: 2.9426 data: 0.0003 max mem: 29202 +[2024-12-11 17:39:07 root] (utils.py 283): INFO Epoch: [14] [1760/2502] eta: 0:36:21 lr: 0.000002 loss_cls: 2.5207 (2.6705) grad_norm: 1.5631 (1.6091) time: 2.9504 data: 0.0003 max mem: 29202 +[2024-12-11 17:39:36 root] (utils.py 283): INFO Epoch: [14] [1770/2502] eta: 0:35:52 lr: 0.000002 loss_cls: 2.5207 (2.6695) grad_norm: 1.6059 (1.6099) time: 2.9487 data: 0.0003 max mem: 29202 +[2024-12-11 17:40:06 root] (utils.py 283): INFO Epoch: [14] [1780/2502] eta: 0:35:22 lr: 0.000002 loss_cls: 2.5630 (2.6692) grad_norm: 1.6059 (1.6098) time: 2.9413 data: 0.0003 max mem: 29202 +[2024-12-11 17:40:35 root] (utils.py 283): INFO Epoch: [14] [1790/2502] eta: 0:34:53 lr: 0.000002 loss_cls: 2.5630 (2.6675) grad_norm: 1.5704 (1.6093) time: 2.9447 data: 0.0003 max mem: 29202 +[2024-12-11 17:41:05 root] (utils.py 283): INFO Epoch: [14] [1800/2502] eta: 0:34:23 lr: 0.000002 loss_cls: 2.7542 (2.6689) grad_norm: 1.5704 (1.6098) time: 2.9444 data: 0.0003 max mem: 29202 +[2024-12-11 17:41:34 root] (utils.py 283): INFO Epoch: [14] [1810/2502] eta: 0:33:54 lr: 0.000002 loss_cls: 2.8219 (2.6676) grad_norm: 1.5174 (1.6094) time: 2.9414 data: 0.0003 max mem: 29202 +[2024-12-11 17:42:03 root] (utils.py 283): INFO Epoch: [14] [1820/2502] eta: 0:33:25 lr: 0.000002 loss_cls: 2.7738 (2.6682) grad_norm: 1.5174 (1.6094) time: 2.9390 data: 0.0002 max mem: 29202 +[2024-12-11 17:42:33 root] (utils.py 283): INFO Epoch: [14] [1830/2502] eta: 0:32:55 lr: 0.000002 loss_cls: 2.8526 (2.6683) grad_norm: 1.5407 (1.6088) time: 2.9372 data: 0.0003 max mem: 29202 +[2024-12-11 17:43:02 root] (utils.py 283): INFO Epoch: [14] [1840/2502] eta: 0:32:26 lr: 0.000002 loss_cls: 2.8474 (2.6688) grad_norm: 1.5839 (1.6089) time: 2.9382 data: 0.0003 max mem: 29202 +[2024-12-11 17:43:31 root] (utils.py 283): INFO Epoch: [14] [1850/2502] eta: 0:31:56 lr: 0.000002 loss_cls: 2.6160 (2.6686) grad_norm: 1.6133 (1.6088) time: 2.9416 data: 0.0003 max mem: 29202 +[2024-12-11 17:44:01 root] (utils.py 283): INFO Epoch: [14] [1860/2502] eta: 0:31:27 lr: 0.000002 loss_cls: 2.8119 (2.6693) grad_norm: 1.6530 (1.6094) time: 2.9453 data: 0.0003 max mem: 29202 +[2024-12-11 17:44:30 root] (utils.py 283): INFO Epoch: [14] [1870/2502] eta: 0:30:58 lr: 0.000002 loss_cls: 2.8265 (2.6692) grad_norm: 1.5994 (1.6091) time: 2.9460 data: 0.0003 max mem: 29202 +[2024-12-11 17:45:00 root] (utils.py 283): INFO Epoch: [14] [1880/2502] eta: 0:30:28 lr: 0.000002 loss_cls: 2.8656 (2.6689) grad_norm: 1.5088 (1.6086) time: 2.9420 data: 0.0003 max mem: 29202 +[2024-12-11 17:45:29 root] (utils.py 283): INFO Epoch: [14] [1890/2502] eta: 0:29:59 lr: 0.000002 loss_cls: 2.9001 (2.6696) grad_norm: 1.4914 (1.6088) time: 2.9416 data: 0.0003 max mem: 29202 +[2024-12-11 17:45:59 root] (utils.py 283): INFO Epoch: [14] [1900/2502] eta: 0:29:30 lr: 0.000002 loss_cls: 2.7515 (2.6687) grad_norm: 1.4283 (1.6080) time: 2.9442 data: 0.0003 max mem: 29202 +[2024-12-11 17:46:28 root] (utils.py 283): INFO Epoch: [14] [1910/2502] eta: 0:29:00 lr: 0.000002 loss_cls: 2.6717 (2.6698) grad_norm: 1.5351 (1.6089) time: 2.9403 data: 0.0002 max mem: 29202 +[2024-12-11 17:46:57 root] (utils.py 283): INFO Epoch: [14] [1920/2502] eta: 0:28:31 lr: 0.000002 loss_cls: 2.7828 (2.6688) grad_norm: 1.5967 (1.6085) time: 2.9389 data: 0.0003 max mem: 29202 +[2024-12-11 17:47:27 root] (utils.py 283): INFO Epoch: [14] [1930/2502] eta: 0:28:01 lr: 0.000002 loss_cls: 2.6965 (2.6695) grad_norm: 1.5326 (1.6087) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-11 17:47:56 root] (utils.py 283): INFO Epoch: [14] [1940/2502] eta: 0:27:32 lr: 0.000002 loss_cls: 2.8482 (2.6706) grad_norm: 1.5706 (1.6089) time: 2.9380 data: 0.0003 max mem: 29202 +[2024-12-11 17:48:26 root] (utils.py 283): INFO Epoch: [14] [1950/2502] eta: 0:27:03 lr: 0.000002 loss_cls: 2.7839 (2.6706) grad_norm: 1.5198 (1.6085) time: 2.9452 data: 0.0003 max mem: 29202 +[2024-12-11 17:48:55 root] (utils.py 283): INFO Epoch: [14] [1960/2502] eta: 0:26:33 lr: 0.000002 loss_cls: 2.7053 (2.6712) grad_norm: 1.5122 (1.6082) time: 2.9473 data: 0.0003 max mem: 29202 +[2024-12-11 17:49:25 root] (utils.py 283): INFO Epoch: [14] [1970/2502] eta: 0:26:04 lr: 0.000002 loss_cls: 2.9130 (2.6721) grad_norm: 1.5802 (1.6093) time: 2.9397 data: 0.0003 max mem: 29202 +[2024-12-11 17:49:54 root] (utils.py 283): INFO Epoch: [14] [1980/2502] eta: 0:25:34 lr: 0.000002 loss_cls: 2.8859 (2.6724) grad_norm: 1.5219 (1.6096) time: 2.9412 data: 0.0003 max mem: 29202 +[2024-12-11 17:50:23 root] (utils.py 283): INFO Epoch: [14] [1990/2502] eta: 0:25:05 lr: 0.000002 loss_cls: 2.6874 (2.6714) grad_norm: 1.4992 (1.6091) time: 2.9426 data: 0.0002 max mem: 29202 +[2024-12-11 17:50:53 root] (utils.py 283): INFO Epoch: [14] [2000/2502] eta: 0:24:36 lr: 0.000002 loss_cls: 2.5339 (2.6706) grad_norm: 1.5001 (1.6089) time: 2.9418 data: 0.0003 max mem: 29202 +[2024-12-11 17:51:22 root] (utils.py 283): INFO Epoch: [14] [2010/2502] eta: 0:24:06 lr: 0.000002 loss_cls: 2.8010 (2.6713) grad_norm: 1.5484 (1.6088) time: 2.9414 data: 0.0003 max mem: 29202 +[2024-12-11 17:51:52 root] (utils.py 283): INFO Epoch: [14] [2020/2502] eta: 0:23:37 lr: 0.000002 loss_cls: 2.5342 (2.6703) grad_norm: 1.5484 (1.6085) time: 2.9404 data: 0.0003 max mem: 29202 +[2024-12-11 17:52:21 root] (utils.py 283): INFO Epoch: [14] [2030/2502] eta: 0:23:07 lr: 0.000002 loss_cls: 2.6269 (2.6707) grad_norm: 1.5661 (1.6085) time: 2.9426 data: 0.0003 max mem: 29202 +[2024-12-11 17:52:51 root] (utils.py 283): INFO Epoch: [14] [2040/2502] eta: 0:22:38 lr: 0.000002 loss_cls: 2.8869 (2.6700) grad_norm: 1.5600 (1.6086) time: 2.9420 data: 0.0003 max mem: 29202 +[2024-12-11 17:53:20 root] (utils.py 283): INFO Epoch: [14] [2050/2502] eta: 0:22:09 lr: 0.000002 loss_cls: 2.7895 (2.6703) grad_norm: 1.5040 (1.6082) time: 2.9392 data: 0.0003 max mem: 29202 +[2024-12-11 17:53:49 root] (utils.py 283): INFO Epoch: [14] [2060/2502] eta: 0:21:39 lr: 0.000002 loss_cls: 2.6453 (2.6694) grad_norm: 1.4789 (1.6073) time: 2.9387 data: 0.0003 max mem: 29202 +[2024-12-11 17:54:19 root] (utils.py 283): INFO Epoch: [14] [2070/2502] eta: 0:21:10 lr: 0.000002 loss_cls: 2.5247 (2.6694) grad_norm: 1.4789 (1.6072) time: 2.9406 data: 0.0003 max mem: 29202 +[2024-12-11 17:54:48 root] (utils.py 283): INFO Epoch: [14] [2080/2502] eta: 0:20:40 lr: 0.000002 loss_cls: 2.6460 (2.6697) grad_norm: 1.6174 (1.6079) time: 2.9433 data: 0.0003 max mem: 29202 +[2024-12-11 17:55:18 root] (utils.py 283): INFO Epoch: [14] [2090/2502] eta: 0:20:11 lr: 0.000002 loss_cls: 2.7437 (2.6697) grad_norm: 1.6361 (1.6078) time: 2.9445 data: 0.0003 max mem: 29202 +[2024-12-11 17:55:47 root] (utils.py 283): INFO Epoch: [14] [2100/2502] eta: 0:19:42 lr: 0.000002 loss_cls: 2.7051 (2.6692) grad_norm: 1.5483 (1.6080) time: 2.9450 data: 0.0003 max mem: 29202 +[2024-12-11 17:56:17 root] (utils.py 283): INFO Epoch: [14] [2110/2502] eta: 0:19:12 lr: 0.000002 loss_cls: 2.7843 (2.6697) grad_norm: 1.6466 (1.6085) time: 2.9471 data: 0.0003 max mem: 29202 +[2024-12-11 17:56:46 root] (utils.py 283): INFO Epoch: [14] [2120/2502] eta: 0:18:43 lr: 0.000002 loss_cls: 2.7660 (2.6694) grad_norm: 1.5597 (1.6080) time: 2.9484 data: 0.0002 max mem: 29202 +[2024-12-11 17:57:15 root] (utils.py 283): INFO Epoch: [14] [2130/2502] eta: 0:18:13 lr: 0.000002 loss_cls: 2.7605 (2.6700) grad_norm: 1.5427 (1.6080) time: 2.9419 data: 0.0002 max mem: 29202 +[2024-12-11 17:57:45 root] (utils.py 283): INFO Epoch: [14] [2140/2502] eta: 0:17:44 lr: 0.000002 loss_cls: 2.7840 (2.6695) grad_norm: 1.6008 (1.6083) time: 2.9430 data: 0.0002 max mem: 29202 +[2024-12-11 17:58:14 root] (utils.py 283): INFO Epoch: [14] [2150/2502] eta: 0:17:15 lr: 0.000002 loss_cls: 2.7840 (2.6708) grad_norm: 1.6240 (1.6084) time: 2.9422 data: 0.0003 max mem: 29202 +[2024-12-11 17:58:44 root] (utils.py 283): INFO Epoch: [14] [2160/2502] eta: 0:16:45 lr: 0.000002 loss_cls: 2.9487 (2.6707) grad_norm: 1.6008 (1.6082) time: 2.9371 data: 0.0003 max mem: 29202 +[2024-12-11 17:59:13 root] (utils.py 283): INFO Epoch: [14] [2170/2502] eta: 0:16:16 lr: 0.000002 loss_cls: 2.5458 (2.6697) grad_norm: 1.4443 (1.6078) time: 2.9389 data: 0.0003 max mem: 29202 +[2024-12-11 17:59:42 root] (utils.py 283): INFO Epoch: [14] [2180/2502] eta: 0:15:46 lr: 0.000002 loss_cls: 2.4613 (2.6688) grad_norm: 1.4425 (1.6074) time: 2.9388 data: 0.0003 max mem: 29202 +[2024-12-11 18:00:12 root] (utils.py 283): INFO Epoch: [14] [2190/2502] eta: 0:15:17 lr: 0.000002 loss_cls: 2.7628 (2.6694) grad_norm: 1.4171 (1.6069) time: 2.9382 data: 0.0003 max mem: 29202 +[2024-12-11 18:00:41 root] (utils.py 283): INFO Epoch: [14] [2200/2502] eta: 0:14:48 lr: 0.000002 loss_cls: 2.8116 (2.6695) grad_norm: 1.4852 (1.6073) time: 2.9378 data: 0.0003 max mem: 29202 +[2024-12-11 18:01:11 root] (utils.py 283): INFO Epoch: [14] [2210/2502] eta: 0:14:18 lr: 0.000002 loss_cls: 2.7723 (2.6694) grad_norm: 1.4793 (1.6068) time: 2.9364 data: 0.0003 max mem: 29202 +[2024-12-11 18:01:40 root] (utils.py 283): INFO Epoch: [14] [2220/2502] eta: 0:13:49 lr: 0.000002 loss_cls: 2.5794 (2.6686) grad_norm: 1.4793 (1.6068) time: 2.9346 data: 0.0003 max mem: 29202 +[2024-12-11 18:02:09 root] (utils.py 283): INFO Epoch: [14] [2230/2502] eta: 0:13:19 lr: 0.000002 loss_cls: 2.4913 (2.6686) grad_norm: 1.5339 (1.6067) time: 2.9360 data: 0.0003 max mem: 29202 +[2024-12-11 18:02:39 root] (utils.py 283): INFO Epoch: [14] [2240/2502] eta: 0:12:50 lr: 0.000002 loss_cls: 2.7263 (2.6688) grad_norm: 1.5159 (1.6066) time: 2.9429 data: 0.0003 max mem: 29202 +[2024-12-11 18:03:08 root] (utils.py 283): INFO Epoch: [14] [2250/2502] eta: 0:12:20 lr: 0.000002 loss_cls: 2.7657 (2.6692) grad_norm: 1.4364 (1.6060) time: 2.9443 data: 0.0003 max mem: 29202 +[2024-12-11 18:03:38 root] (utils.py 283): INFO Epoch: [14] [2260/2502] eta: 0:11:51 lr: 0.000002 loss_cls: 2.9323 (2.6700) grad_norm: 1.4694 (1.6058) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-11 18:04:07 root] (utils.py 283): INFO Epoch: [14] [2270/2502] eta: 0:11:22 lr: 0.000002 loss_cls: 2.8887 (2.6703) grad_norm: 1.5672 (1.6057) time: 2.9420 data: 0.0003 max mem: 29202 +[2024-12-11 18:04:36 root] (utils.py 283): INFO Epoch: [14] [2280/2502] eta: 0:10:52 lr: 0.000002 loss_cls: 2.8003 (2.6706) grad_norm: 1.5985 (1.6063) time: 2.9411 data: 0.0002 max mem: 29202 +[2024-12-11 18:05:06 root] (utils.py 283): INFO Epoch: [14] [2290/2502] eta: 0:10:23 lr: 0.000002 loss_cls: 2.7893 (2.6710) grad_norm: 1.5469 (1.6065) time: 2.9394 data: 0.0003 max mem: 29202 +[2024-12-11 18:05:35 root] (utils.py 283): INFO Epoch: [14] [2300/2502] eta: 0:09:53 lr: 0.000002 loss_cls: 2.7861 (2.6713) grad_norm: 1.5276 (1.6064) time: 2.9398 data: 0.0003 max mem: 29202 +[2024-12-11 18:06:05 root] (utils.py 283): INFO Epoch: [14] [2310/2502] eta: 0:09:24 lr: 0.000002 loss_cls: 2.6590 (2.6707) grad_norm: 1.5528 (1.6065) time: 2.9400 data: 0.0003 max mem: 29202 +[2024-12-11 18:06:34 root] (utils.py 283): INFO Epoch: [14] [2320/2502] eta: 0:08:55 lr: 0.000002 loss_cls: 2.7437 (2.6714) grad_norm: 1.5342 (1.6070) time: 2.9403 data: 0.0003 max mem: 29202 +[2024-12-11 18:07:03 root] (utils.py 283): INFO Epoch: [14] [2330/2502] eta: 0:08:25 lr: 0.000002 loss_cls: 2.8791 (2.6715) grad_norm: 1.5342 (1.6065) time: 2.9395 data: 0.0003 max mem: 29202 +[2024-12-11 18:07:33 root] (utils.py 283): INFO Epoch: [14] [2340/2502] eta: 0:07:56 lr: 0.000002 loss_cls: 2.7153 (2.6713) grad_norm: 1.5021 (1.6063) time: 2.9413 data: 0.0003 max mem: 29202 +[2024-12-11 18:08:02 root] (utils.py 283): INFO Epoch: [14] [2350/2502] eta: 0:07:26 lr: 0.000002 loss_cls: 2.4800 (2.6706) grad_norm: 1.5068 (1.6062) time: 2.9412 data: 0.0003 max mem: 29202 +[2024-12-11 18:08:32 root] (utils.py 283): INFO Epoch: [14] [2360/2502] eta: 0:06:57 lr: 0.000002 loss_cls: 2.7447 (2.6712) grad_norm: 1.6243 (1.6071) time: 2.9400 data: 0.0003 max mem: 29202 +[2024-12-11 18:09:01 root] (utils.py 283): INFO Epoch: [14] [2370/2502] eta: 0:06:28 lr: 0.000002 loss_cls: 2.8399 (2.6719) grad_norm: 1.6348 (1.6073) time: 2.9380 data: 0.0003 max mem: 29202 +[2024-12-11 18:09:30 root] (utils.py 283): INFO Epoch: [14] [2380/2502] eta: 0:05:58 lr: 0.000002 loss_cls: 2.7692 (2.6713) grad_norm: 1.5556 (1.6069) time: 2.9363 data: 0.0003 max mem: 29202 +[2024-12-11 18:10:00 root] (utils.py 283): INFO Epoch: [14] [2390/2502] eta: 0:05:29 lr: 0.000002 loss_cls: 2.5762 (2.6713) grad_norm: 1.4321 (1.6062) time: 2.9378 data: 0.0003 max mem: 29202 +[2024-12-11 18:10:29 root] (utils.py 283): INFO Epoch: [14] [2400/2502] eta: 0:04:59 lr: 0.000002 loss_cls: 2.8019 (2.6716) grad_norm: 1.4421 (1.6063) time: 2.9390 data: 0.0003 max mem: 29202 +[2024-12-11 18:10:58 root] (utils.py 283): INFO Epoch: [14] [2410/2502] eta: 0:04:30 lr: 0.000002 loss_cls: 2.7257 (2.6709) grad_norm: 1.5132 (1.6064) time: 2.9381 data: 0.0003 max mem: 29202 +[2024-12-11 18:11:28 root] (utils.py 283): INFO Epoch: [14] [2420/2502] eta: 0:04:01 lr: 0.000002 loss_cls: 2.5383 (2.6711) grad_norm: 1.4422 (1.6060) time: 2.9359 data: 0.0002 max mem: 29202 +[2024-12-11 18:11:57 root] (utils.py 283): INFO Epoch: [14] [2430/2502] eta: 0:03:31 lr: 0.000002 loss_cls: 2.8718 (2.6710) grad_norm: 1.5092 (1.6062) time: 2.9346 data: 0.0003 max mem: 29202 +[2024-12-11 18:12:27 root] (utils.py 283): INFO Epoch: [14] [2440/2502] eta: 0:03:02 lr: 0.000002 loss_cls: 2.7922 (2.6712) grad_norm: 1.5518 (1.6061) time: 2.9452 data: 0.0003 max mem: 29202 +[2024-12-11 18:12:56 root] (utils.py 283): INFO Epoch: [14] [2450/2502] eta: 0:02:32 lr: 0.000002 loss_cls: 2.7252 (2.6705) grad_norm: 1.5370 (1.6060) time: 2.9537 data: 0.0003 max mem: 29202 +[2024-12-11 18:13:26 root] (utils.py 283): INFO Epoch: [14] [2460/2502] eta: 0:02:03 lr: 0.000002 loss_cls: 2.6830 (2.6712) grad_norm: 1.5580 (1.6062) time: 2.9437 data: 0.0002 max mem: 29202 +[2024-12-11 18:13:55 root] (utils.py 283): INFO Epoch: [14] [2470/2502] eta: 0:01:34 lr: 0.000002 loss_cls: 2.6427 (2.6702) grad_norm: 1.5219 (1.6063) time: 2.9458 data: 0.0002 max mem: 29202 +[2024-12-11 18:14:25 root] (utils.py 283): INFO Epoch: [14] [2480/2502] eta: 0:01:04 lr: 0.000002 loss_cls: 2.6063 (2.6701) grad_norm: 1.5473 (1.6064) time: 2.9445 data: 0.0002 max mem: 29202 +[2024-12-11 18:14:54 root] (utils.py 283): INFO Epoch: [14] [2490/2502] eta: 0:00:35 lr: 0.000002 loss_cls: 2.6352 (2.6698) grad_norm: 1.6685 (1.6069) time: 2.9544 data: 0.0210 max mem: 29202 +[2024-12-11 18:15:24 root] (utils.py 283): INFO Epoch: [14] [2500/2502] eta: 0:00:05 lr: 0.000002 loss_cls: 2.7069 (2.6699) grad_norm: 1.5979 (1.6067) time: 2.9601 data: 0.0210 max mem: 29202 +[2024-12-11 18:15:27 root] (utils.py 283): INFO Epoch: [14] [2501/2502] eta: 0:00:02 lr: 0.000002 loss_cls: 2.7069 (2.6701) grad_norm: 1.5626 (1.6067) time: 2.9612 data: 0.0210 max mem: 29202 +[2024-12-11 18:15:27 root] (utils.py 297): INFO Epoch: [14] Total time: 2:02:37 (2.9407 s / it) +[2024-12-11 18:15:27 root] (engine.py 179): INFO Averaged stats:lr: 0.000002 loss_cls: 2.7069 (2.6735) grad_norm: 1.5626 (1.6067) +[2024-12-11 18:15:30 root] (utils.py 283): INFO Test: [ 0/98] eta: 0:00:55 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.2761 (0.2761) acc1: 92.1875 (92.1875) acc3: 98.4375 (98.4375) acc5: 99.2188 (99.2188) time: 0.5681 data: 0.0004 max mem: 29202 +[2024-12-11 18:15:36 root] (utils.py 283): INFO Test: [10/98] eta: 0:00:48 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.5587 (0.5380) acc1: 86.7188 (87.0028) acc3: 96.8750 (96.9460) acc5: 99.2188 (98.3665) time: 0.5529 data: 0.0006 max mem: 29202 +[2024-12-11 18:15:41 root] (utils.py 283): INFO Test: [20/98] eta: 0:00:43 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.5587 (0.5871) acc1: 85.9375 (86.0119) acc3: 96.8750 (96.2054) acc5: 98.4375 (97.7679) time: 0.5520 data: 0.0005 max mem: 29202 +[2024-12-11 18:15:47 root] (utils.py 283): INFO Test: [30/98] eta: 0:00:37 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6121 (0.6166) acc1: 85.1562 (85.3831) acc3: 95.3125 (95.8165) acc5: 97.6562 (97.5554) time: 0.5523 data: 0.0005 max mem: 29202 +[2024-12-11 18:15:52 root] (utils.py 283): INFO Test: [40/98] eta: 0:00:32 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.6105 (0.6195) acc1: 85.1562 (85.3087) acc3: 96.0938 (95.8460) acc5: 97.6562 (97.5610) time: 0.5523 data: 0.0005 max mem: 29202 +[2024-12-11 18:15:58 root] (utils.py 283): INFO Test: [50/98] eta: 0:00:26 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.7645 (0.6973) acc1: 78.9062 (83.5631) acc3: 92.9688 (94.7304) acc5: 96.0938 (96.6759) time: 0.5526 data: 0.0005 max mem: 29202 +[2024-12-11 18:16:03 root] (utils.py 283): INFO Test: [60/98] eta: 0:00:21 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.8830 (0.7276) acc1: 77.3438 (83.0174) acc3: 89.8438 (94.0574) acc5: 93.7500 (96.2602) time: 0.5534 data: 0.0005 max mem: 29202 +[2024-12-11 18:16:09 root] (utils.py 283): INFO Test: [70/98] eta: 0:00:15 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9273 (0.7591) acc1: 79.6875 (82.2513) acc3: 90.6250 (93.8050) acc5: 94.5312 (96.0497) time: 0.5537 data: 0.0004 max mem: 29202 +[2024-12-11 18:16:15 root] (utils.py 283): INFO Test: [80/98] eta: 0:00:09 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9052 (0.7830) acc1: 78.1250 (81.8673) acc3: 92.1875 (93.3931) acc5: 93.7500 (95.6694) time: 0.5535 data: 0.0007 max mem: 29202 +[2024-12-11 18:16:20 root] (utils.py 283): INFO Test: [90/98] eta: 0:00:04 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.9406 (0.8112) acc1: 77.3438 (81.1470) acc3: 90.6250 (93.0632) acc5: 92.9688 (95.4069) time: 0.5540 data: 0.0007 max mem: 29202 +[2024-12-11 18:16:24 root] (utils.py 283): INFO Test: [97/98] eta: 0:00:00 flops: 13.2070 (13.2070) layer_flops: 13.1094 (13.1094) loss: 0.8766 (0.8094) acc1: 76.5625 (81.0960) acc3: 90.6250 (93.1040) acc5: 93.7500 (95.4960) time: 0.5445 data: 0.0006 max mem: 29202 +[2024-12-11 18:16:24 root] (utils.py 297): INFO Test: Total time: 0:00:54 (0.5514 s / it) +[2024-12-11 18:16:24 root] (engine.py 264): INFO * Acc@1 81.264 Acc@3 93.088 Acc@5 95.422 loss 0.809 flops 13.207 layer_flops 13.109 +[2024-12-11 18:16:24 root] (main.py 572): INFO Accuracy of the network on the 50000 test images: 81.3% +[2024-12-11 18:16:24 root] (main.py 576): INFO Max accuracy: 81.29% +[2024-12-11 18:16:24 root] (main.py 589): INFO Finetune time 1 day, 6:55:08